LatinIME/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
Keisuke Kuroyanagi c9865785f4 Support ngram entry migration.
Bug: 14425059
Change-Id: I98cb9fa303af2d93a0a3512e8732231c564e3c5d
2014-10-22 11:31:16 +09:00

782 lines
36 KiB
C++

/*
* Copyright (C) 2009 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#define LOG_TAG "LatinIME: jni: BinaryDictionary"
#include "com_android_inputmethod_latin_BinaryDictionary.h"
#include <cstring> // for memset()
#include <vector>
#include "defines.h"
#include "jni.h"
#include "jni_common.h"
#include "suggest/core/dictionary/dictionary.h"
#include "suggest/core/dictionary/property/unigram_property.h"
#include "suggest/core/dictionary/property/word_property.h"
#include "suggest/core/result/suggestion_results.h"
#include "suggest/core/session/ngram_context.h"
#include "suggest/core/suggest_options.h"
#include "suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.h"
#include "utils/char_utils.h"
#include "utils/int_array_view.h"
#include "utils/jni_data_utils.h"
#include "utils/log_utils.h"
#include "utils/time_keeper.h"
namespace latinime {
class ProximityInfo;
static jlong latinime_BinaryDictionary_open(JNIEnv *env, jclass clazz, jstring sourceDir,
jlong dictOffset, jlong dictSize, jboolean isUpdatable) {
PROF_OPEN;
PROF_START(66);
const jsize sourceDirUtf8Length = env->GetStringUTFLength(sourceDir);
if (sourceDirUtf8Length <= 0) {
AKLOGE("DICT: Can't get sourceDir string");
return 0;
}
char sourceDirChars[sourceDirUtf8Length + 1];
env->GetStringUTFRegion(sourceDir, 0, env->GetStringLength(sourceDir), sourceDirChars);
sourceDirChars[sourceDirUtf8Length] = '\0';
DictionaryStructureWithBufferPolicy::StructurePolicyPtr dictionaryStructureWithBufferPolicy(
DictionaryStructureWithBufferPolicyFactory::newPolicyForExistingDictFile(
sourceDirChars, static_cast<int>(dictOffset), static_cast<int>(dictSize),
isUpdatable == JNI_TRUE));
if (!dictionaryStructureWithBufferPolicy) {
return 0;
}
Dictionary *const dictionary =
new Dictionary(env, std::move(dictionaryStructureWithBufferPolicy));
PROF_END(66);
PROF_CLOSE;
return reinterpret_cast<jlong>(dictionary);
}
static jlong latinime_BinaryDictionary_createOnMemory(JNIEnv *env, jclass clazz,
jlong formatVersion, jstring locale, jobjectArray attributeKeyStringArray,
jobjectArray attributeValueStringArray) {
const jsize localeUtf8Length = env->GetStringUTFLength(locale);
char localeChars[localeUtf8Length + 1];
env->GetStringUTFRegion(locale, 0, env->GetStringLength(locale), localeChars);
localeChars[localeUtf8Length] = '\0';
std::vector<int> localeCodePoints;
HeaderReadWriteUtils::insertCharactersIntoVector(localeChars, &localeCodePoints);
const int keyCount = env->GetArrayLength(attributeKeyStringArray);
const int valueCount = env->GetArrayLength(attributeValueStringArray);
if (keyCount != valueCount) {
return false;
}
DictionaryHeaderStructurePolicy::AttributeMap attributeMap =
JniDataUtils::constructAttributeMap(env, attributeKeyStringArray,
attributeValueStringArray);
DictionaryStructureWithBufferPolicy::StructurePolicyPtr dictionaryStructureWithBufferPolicy =
DictionaryStructureWithBufferPolicyFactory::newPolicyForOnMemoryDict(
formatVersion, localeCodePoints, &attributeMap);
if (!dictionaryStructureWithBufferPolicy) {
return 0;
}
Dictionary *const dictionary =
new Dictionary(env, std::move(dictionaryStructureWithBufferPolicy));
return reinterpret_cast<jlong>(dictionary);
}
static bool latinime_BinaryDictionary_flush(JNIEnv *env, jclass clazz, jlong dict,
jstring filePath) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) return false;
const jsize filePathUtf8Length = env->GetStringUTFLength(filePath);
char filePathChars[filePathUtf8Length + 1];
env->GetStringUTFRegion(filePath, 0, env->GetStringLength(filePath), filePathChars);
filePathChars[filePathUtf8Length] = '\0';
return dictionary->flush(filePathChars);
}
static bool latinime_BinaryDictionary_needsToRunGC(JNIEnv *env, jclass clazz,
jlong dict, jboolean mindsBlockByGC) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) return false;
return dictionary->needsToRunGC(mindsBlockByGC == JNI_TRUE);
}
static bool latinime_BinaryDictionary_flushWithGC(JNIEnv *env, jclass clazz, jlong dict,
jstring filePath) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) return false;
const jsize filePathUtf8Length = env->GetStringUTFLength(filePath);
char filePathChars[filePathUtf8Length + 1];
env->GetStringUTFRegion(filePath, 0, env->GetStringLength(filePath), filePathChars);
filePathChars[filePathUtf8Length] = '\0';
return dictionary->flushWithGC(filePathChars);
}
static void latinime_BinaryDictionary_close(JNIEnv *env, jclass clazz, jlong dict) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) return;
delete dictionary;
}
static void latinime_BinaryDictionary_getHeaderInfo(JNIEnv *env, jclass clazz, jlong dict,
jintArray outHeaderSize, jintArray outFormatVersion, jobject outAttributeKeys,
jobject outAttributeValues) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) return;
const DictionaryHeaderStructurePolicy *const headerPolicy =
dictionary->getDictionaryStructurePolicy()->getHeaderStructurePolicy();
JniDataUtils::putIntToArray(env, outHeaderSize, 0 /* index */, headerPolicy->getSize());
JniDataUtils::putIntToArray(env, outFormatVersion, 0 /* index */,
headerPolicy->getFormatVersionNumber());
// Output attribute map
jclass arrayListClass = env->FindClass("java/util/ArrayList");
jmethodID addMethodId = env->GetMethodID(arrayListClass, "add", "(Ljava/lang/Object;)Z");
const DictionaryHeaderStructurePolicy::AttributeMap *const attributeMap =
headerPolicy->getAttributeMap();
for (DictionaryHeaderStructurePolicy::AttributeMap::const_iterator it = attributeMap->begin();
it != attributeMap->end(); ++it) {
// Output key
jintArray keyCodePointArray = env->NewIntArray(it->first.size());
JniDataUtils::outputCodePoints(env, keyCodePointArray, 0 /* start */,
it->first.size(), it->first.data(), it->first.size(),
false /* needsNullTermination */);
env->CallBooleanMethod(outAttributeKeys, addMethodId, keyCodePointArray);
env->DeleteLocalRef(keyCodePointArray);
// Output value
jintArray valueCodePointArray = env->NewIntArray(it->second.size());
JniDataUtils::outputCodePoints(env, valueCodePointArray, 0 /* start */,
it->second.size(), it->second.data(), it->second.size(),
false /* needsNullTermination */);
env->CallBooleanMethod(outAttributeValues, addMethodId, valueCodePointArray);
env->DeleteLocalRef(valueCodePointArray);
}
env->DeleteLocalRef(arrayListClass);
return;
}
static int latinime_BinaryDictionary_getFormatVersion(JNIEnv *env, jclass clazz, jlong dict) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) return 0;
const DictionaryHeaderStructurePolicy *const headerPolicy =
dictionary->getDictionaryStructurePolicy()->getHeaderStructurePolicy();
return headerPolicy->getFormatVersionNumber();
}
static void latinime_BinaryDictionary_getSuggestions(JNIEnv *env, jclass clazz, jlong dict,
jlong proximityInfo, jlong dicTraverseSession, jintArray xCoordinatesArray,
jintArray yCoordinatesArray, jintArray timesArray, jintArray pointerIdsArray,
jintArray inputCodePointsArray, jint inputSize, jintArray suggestOptions,
jobjectArray prevWordCodePointArrays, jbooleanArray isBeginningOfSentenceArray,
jint prevWordCount, jintArray outSuggestionCount, jintArray outCodePointsArray,
jintArray outScoresArray, jintArray outSpaceIndicesArray, jintArray outTypesArray,
jintArray outAutoCommitFirstWordConfidenceArray,
jfloatArray inOutWeightOfLangModelVsSpatialModel) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
// Assign 0 to outSuggestionCount here in case of returning earlier in this method.
JniDataUtils::putIntToArray(env, outSuggestionCount, 0 /* index */, 0);
if (!dictionary) {
return;
}
ProximityInfo *pInfo = reinterpret_cast<ProximityInfo *>(proximityInfo);
DicTraverseSession *traverseSession =
reinterpret_cast<DicTraverseSession *>(dicTraverseSession);
if (!traverseSession) {
return;
}
// Input values
int xCoordinates[inputSize];
int yCoordinates[inputSize];
int times[inputSize];
int pointerIds[inputSize];
const jsize inputCodePointsLength = env->GetArrayLength(inputCodePointsArray);
int inputCodePoints[inputCodePointsLength];
env->GetIntArrayRegion(xCoordinatesArray, 0, inputSize, xCoordinates);
env->GetIntArrayRegion(yCoordinatesArray, 0, inputSize, yCoordinates);
env->GetIntArrayRegion(timesArray, 0, inputSize, times);
env->GetIntArrayRegion(pointerIdsArray, 0, inputSize, pointerIds);
env->GetIntArrayRegion(inputCodePointsArray, 0, inputCodePointsLength, inputCodePoints);
const jsize numberOfOptions = env->GetArrayLength(suggestOptions);
int options[numberOfOptions];
env->GetIntArrayRegion(suggestOptions, 0, numberOfOptions, options);
SuggestOptions givenSuggestOptions(options, numberOfOptions);
// Output values
/* By the way, let's check the output array length here to make sure */
const jsize outputCodePointsLength = env->GetArrayLength(outCodePointsArray);
if (outputCodePointsLength != (MAX_WORD_LENGTH * MAX_RESULTS)) {
AKLOGE("Invalid outputCodePointsLength: %d", outputCodePointsLength);
ASSERT(false);
return;
}
const jsize scoresLength = env->GetArrayLength(outScoresArray);
if (scoresLength != MAX_RESULTS) {
AKLOGE("Invalid scoresLength: %d", scoresLength);
ASSERT(false);
return;
}
const jsize outputAutoCommitFirstWordConfidenceLength =
env->GetArrayLength(outAutoCommitFirstWordConfidenceArray);
ASSERT(outputAutoCommitFirstWordConfidenceLength == 1);
if (outputAutoCommitFirstWordConfidenceLength != 1) {
// We only use the first result, as obviously we will only ever autocommit the first one
AKLOGE("Invalid outputAutoCommitFirstWordConfidenceLength: %d",
outputAutoCommitFirstWordConfidenceLength);
ASSERT(false);
return;
}
float weightOfLangModelVsSpatialModel;
env->GetFloatArrayRegion(inOutWeightOfLangModelVsSpatialModel, 0, 1 /* len */,
&weightOfLangModelVsSpatialModel);
SuggestionResults suggestionResults(MAX_RESULTS);
const NgramContext ngramContext = JniDataUtils::constructNgramContext(env,
prevWordCodePointArrays, isBeginningOfSentenceArray, prevWordCount);
if (givenSuggestOptions.isGesture() || inputSize > 0) {
// TODO: Use SuggestionResults to return suggestions.
dictionary->getSuggestions(pInfo, traverseSession, xCoordinates, yCoordinates,
times, pointerIds, inputCodePoints, inputSize, &ngramContext,
&givenSuggestOptions, weightOfLangModelVsSpatialModel, &suggestionResults);
} else {
dictionary->getPredictions(&ngramContext, &suggestionResults);
}
if (DEBUG_DICT) {
suggestionResults.dumpSuggestions();
}
suggestionResults.outputSuggestions(env, outSuggestionCount, outCodePointsArray,
outScoresArray, outSpaceIndicesArray, outTypesArray,
outAutoCommitFirstWordConfidenceArray, inOutWeightOfLangModelVsSpatialModel);
}
static jint latinime_BinaryDictionary_getProbability(JNIEnv *env, jclass clazz, jlong dict,
jintArray word) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) return NOT_A_PROBABILITY;
const jsize codePointCount = env->GetArrayLength(word);
int codePoints[codePointCount];
env->GetIntArrayRegion(word, 0, codePointCount, codePoints);
return dictionary->getProbability(CodePointArrayView(codePoints, codePointCount));
}
static jint latinime_BinaryDictionary_getMaxProbabilityOfExactMatches(
JNIEnv *env, jclass clazz, jlong dict, jintArray word) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) return NOT_A_PROBABILITY;
const jsize codePointCount = env->GetArrayLength(word);
int codePoints[codePointCount];
env->GetIntArrayRegion(word, 0, codePointCount, codePoints);
return dictionary->getMaxProbabilityOfExactMatches(
CodePointArrayView(codePoints, codePointCount));
}
static jint latinime_BinaryDictionary_getNgramProbability(JNIEnv *env, jclass clazz,
jlong dict, jobjectArray prevWordCodePointArrays, jbooleanArray isBeginningOfSentenceArray,
jintArray word) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) return JNI_FALSE;
const jsize wordLength = env->GetArrayLength(word);
int wordCodePoints[wordLength];
env->GetIntArrayRegion(word, 0, wordLength, wordCodePoints);
const NgramContext ngramContext = JniDataUtils::constructNgramContext(env,
prevWordCodePointArrays, isBeginningOfSentenceArray,
env->GetArrayLength(prevWordCodePointArrays));
return dictionary->getNgramProbability(&ngramContext,
CodePointArrayView(wordCodePoints, wordLength));
}
// Method to iterate all words in the dictionary for makedict.
// If token is 0, this method newly starts iterating the dictionary. This method returns 0 when
// the dictionary does not have a next word.
static jint latinime_BinaryDictionary_getNextWord(JNIEnv *env, jclass clazz,
jlong dict, jint token, jintArray outCodePoints, jbooleanArray outIsBeginningOfSentence) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) return 0;
const jsize codePointBufSize = env->GetArrayLength(outCodePoints);
if (codePointBufSize != MAX_WORD_LENGTH) {
AKLOGE("Invalid outCodePointsLength: %d", codePointBufSize);
ASSERT(false);
return 0;
}
int wordCodePoints[codePointBufSize];
int wordCodePointCount = 0;
const int nextToken = dictionary->getNextWordAndNextToken(token, wordCodePoints,
&wordCodePointCount);
JniDataUtils::outputCodePoints(env, outCodePoints, 0 /* start */,
MAX_WORD_LENGTH /* maxLength */, wordCodePoints, wordCodePointCount,
false /* needsNullTermination */);
bool isBeginningOfSentence = false;
if (wordCodePointCount > 0 && wordCodePoints[0] == CODE_POINT_BEGINNING_OF_SENTENCE) {
isBeginningOfSentence = true;
}
JniDataUtils::putBooleanToArray(env, outIsBeginningOfSentence, 0 /* index */,
isBeginningOfSentence);
return nextToken;
}
static void latinime_BinaryDictionary_getWordProperty(JNIEnv *env, jclass clazz,
jlong dict, jintArray word, jboolean isBeginningOfSentence, jintArray outCodePoints,
jbooleanArray outFlags, jintArray outProbabilityInfo, jobject /* outNgramPrevWordsArray */,
jobject /* outNgramPrevWordIsBeginningOfSentenceArray */, jobject outNgramTargets,
jobject outNgramProbabilityInfo, jobject outShortcutTargets,
jobject outShortcutProbabilities) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) return;
const jsize wordLength = env->GetArrayLength(word);
if (wordLength > MAX_WORD_LENGTH) {
AKLOGE("Invalid wordLength: %d", wordLength);
return;
}
int wordCodePoints[MAX_WORD_LENGTH];
env->GetIntArrayRegion(word, 0, wordLength, wordCodePoints);
int codePointCount = wordLength;
if (isBeginningOfSentence) {
codePointCount = CharUtils::attachBeginningOfSentenceMarker(
wordCodePoints, wordLength, MAX_WORD_LENGTH);
if (codePointCount < 0) {
AKLOGE("Cannot attach Beginning-of-Sentence marker.");
return;
}
}
const WordProperty wordProperty = dictionary->getWordProperty(
CodePointArrayView(wordCodePoints, codePointCount));
wordProperty.outputProperties(env, outCodePoints, outFlags, outProbabilityInfo,
outNgramTargets, outNgramProbabilityInfo, outShortcutTargets,
outShortcutProbabilities);
}
static bool latinime_BinaryDictionary_addUnigramEntry(JNIEnv *env, jclass clazz, jlong dict,
jintArray word, jint probability, jintArray shortcutTarget, jint shortcutProbability,
jboolean isBeginningOfSentence, jboolean isNotAWord, jboolean isPossiblyOffensive,
jint timestamp) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) {
return false;
}
jsize codePointCount = env->GetArrayLength(word);
int codePoints[codePointCount];
env->GetIntArrayRegion(word, 0, codePointCount, codePoints);
std::vector<UnigramProperty::ShortcutProperty> shortcuts;
{
std::vector<int> shortcutTargetCodePoints;
JniDataUtils::jintarrayToVector(env, shortcutTarget, &shortcutTargetCodePoints);
if (!shortcutTargetCodePoints.empty()) {
shortcuts.emplace_back(std::move(shortcutTargetCodePoints), shortcutProbability);
}
}
// Use 1 for count to indicate the word has inputted.
const UnigramProperty unigramProperty(isBeginningOfSentence, isNotAWord,
isPossiblyOffensive, probability, HistoricalInfo(timestamp, 0 /* level */,
1 /* count */), std::move(shortcuts));
return dictionary->addUnigramEntry(CodePointArrayView(codePoints, codePointCount),
&unigramProperty);
}
static bool latinime_BinaryDictionary_removeUnigramEntry(JNIEnv *env, jclass clazz, jlong dict,
jintArray word) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) {
return false;
}
jsize codePointCount = env->GetArrayLength(word);
int codePoints[codePointCount];
env->GetIntArrayRegion(word, 0, codePointCount, codePoints);
return dictionary->removeUnigramEntry(CodePointArrayView(codePoints, codePointCount));
}
static bool latinime_BinaryDictionary_addNgramEntry(JNIEnv *env, jclass clazz, jlong dict,
jobjectArray prevWordCodePointArrays, jbooleanArray isBeginningOfSentenceArray,
jintArray word, jint probability, jint timestamp) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) {
return false;
}
const NgramContext ngramContext = JniDataUtils::constructNgramContext(env,
prevWordCodePointArrays, isBeginningOfSentenceArray,
env->GetArrayLength(prevWordCodePointArrays));
jsize wordLength = env->GetArrayLength(word);
int wordCodePoints[wordLength];
env->GetIntArrayRegion(word, 0, wordLength, wordCodePoints);
// Use 1 for count to indicate the ngram has inputted.
const NgramProperty ngramProperty(ngramContext,
CodePointArrayView(wordCodePoints, wordLength).toVector(),
probability, HistoricalInfo(timestamp, 0 /* level */, 1 /* count */));
return dictionary->addNgramEntry(&ngramProperty);
}
static bool latinime_BinaryDictionary_removeNgramEntry(JNIEnv *env, jclass clazz, jlong dict,
jobjectArray prevWordCodePointArrays, jbooleanArray isBeginningOfSentenceArray,
jintArray word) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) {
return false;
}
const NgramContext ngramContext = JniDataUtils::constructNgramContext(env,
prevWordCodePointArrays, isBeginningOfSentenceArray,
env->GetArrayLength(prevWordCodePointArrays));
jsize codePointCount = env->GetArrayLength(word);
int wordCodePoints[codePointCount];
env->GetIntArrayRegion(word, 0, codePointCount, wordCodePoints);
return dictionary->removeNgramEntry(&ngramContext,
CodePointArrayView(wordCodePoints, codePointCount));
}
static bool latinime_BinaryDictionary_updateEntriesForWordWithNgramContext(JNIEnv *env,
jclass clazz, jlong dict, jobjectArray prevWordCodePointArrays,
jbooleanArray isBeginningOfSentenceArray, jintArray word, jboolean isValidWord, jint count,
jint timestamp) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) {
return false;
}
const NgramContext ngramContext = JniDataUtils::constructNgramContext(env,
prevWordCodePointArrays, isBeginningOfSentenceArray,
env->GetArrayLength(prevWordCodePointArrays));
jsize codePointCount = env->GetArrayLength(word);
int wordCodePoints[codePointCount];
env->GetIntArrayRegion(word, 0, codePointCount, wordCodePoints);
const HistoricalInfo historicalInfo(timestamp, 0 /* level */, count);
return dictionary->updateEntriesForWordWithNgramContext(&ngramContext,
CodePointArrayView(wordCodePoints, codePointCount), isValidWord == JNI_TRUE,
historicalInfo);
}
// Returns how many language model params are processed.
static int latinime_BinaryDictionary_addMultipleDictionaryEntries(JNIEnv *env, jclass clazz,
jlong dict, jobjectArray languageModelParams, jint startIndex) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) {
return 0;
}
jsize languageModelParamCount = env->GetArrayLength(languageModelParams);
if (languageModelParamCount == 0 || startIndex >= languageModelParamCount) {
return 0;
}
jobject languageModelParam = env->GetObjectArrayElement(languageModelParams, 0);
jclass languageModelParamClass = env->GetObjectClass(languageModelParam);
env->DeleteLocalRef(languageModelParam);
jfieldID word0FieldId = env->GetFieldID(languageModelParamClass, "mWord0", "[I");
jfieldID word1FieldId = env->GetFieldID(languageModelParamClass, "mWord1", "[I");
jfieldID unigramProbabilityFieldId =
env->GetFieldID(languageModelParamClass, "mUnigramProbability", "I");
jfieldID bigramProbabilityFieldId =
env->GetFieldID(languageModelParamClass, "mBigramProbability", "I");
jfieldID timestampFieldId =
env->GetFieldID(languageModelParamClass, "mTimestamp", "I");
jfieldID shortcutTargetFieldId =
env->GetFieldID(languageModelParamClass, "mShortcutTarget", "[I");
jfieldID shortcutProbabilityFieldId =
env->GetFieldID(languageModelParamClass, "mShortcutProbability", "I");
jfieldID isNotAWordFieldId =
env->GetFieldID(languageModelParamClass, "mIsNotAWord", "Z");
jfieldID isPossiblyOffensiveFieldId =
env->GetFieldID(languageModelParamClass, "mIsPossiblyOffensive", "Z");
env->DeleteLocalRef(languageModelParamClass);
for (int i = startIndex; i < languageModelParamCount; ++i) {
jobject languageModelParam = env->GetObjectArrayElement(languageModelParams, i);
// languageModelParam is a set of params for word1; thus, word1 cannot be null. On the
// other hand, word0 can be null and then it means the set of params doesn't contain bigram
// information.
jintArray word0 = static_cast<jintArray>(
env->GetObjectField(languageModelParam, word0FieldId));
jsize word0Length = word0 ? env->GetArrayLength(word0) : 0;
int word0CodePoints[word0Length];
if (word0) {
env->GetIntArrayRegion(word0, 0, word0Length, word0CodePoints);
}
jintArray word1 = static_cast<jintArray>(
env->GetObjectField(languageModelParam, word1FieldId));
jsize word1Length = env->GetArrayLength(word1);
int word1CodePoints[word1Length];
env->GetIntArrayRegion(word1, 0, word1Length, word1CodePoints);
jint unigramProbability = env->GetIntField(languageModelParam, unigramProbabilityFieldId);
jint timestamp = env->GetIntField(languageModelParam, timestampFieldId);
jboolean isNotAWord = env->GetBooleanField(languageModelParam, isNotAWordFieldId);
jboolean isPossiblyOffensive = env->GetBooleanField(languageModelParam,
isPossiblyOffensiveFieldId);
jintArray shortcutTarget = static_cast<jintArray>(
env->GetObjectField(languageModelParam, shortcutTargetFieldId));
std::vector<UnigramProperty::ShortcutProperty> shortcuts;
{
std::vector<int> shortcutTargetCodePoints;
JniDataUtils::jintarrayToVector(env, shortcutTarget, &shortcutTargetCodePoints);
if (!shortcutTargetCodePoints.empty()) {
jint shortcutProbability =
env->GetIntField(languageModelParam, shortcutProbabilityFieldId);
shortcuts.emplace_back(std::move(shortcutTargetCodePoints), shortcutProbability);
}
}
// Use 1 for count to indicate the word has inputted.
const UnigramProperty unigramProperty(false /* isBeginningOfSentence */, isNotAWord,
isPossiblyOffensive, unigramProbability,
HistoricalInfo(timestamp, 0 /* level */, 1 /* count */), std::move(shortcuts));
dictionary->addUnigramEntry(CodePointArrayView(word1CodePoints, word1Length),
&unigramProperty);
if (word0) {
jint bigramProbability = env->GetIntField(languageModelParam, bigramProbabilityFieldId);
// Use 1 for count to indicate the bigram has inputted.
const NgramContext ngramContext(word0CodePoints, word0Length,
false /* isBeginningOfSentence */);
const NgramProperty ngramProperty(ngramContext,
CodePointArrayView(word1CodePoints, word1Length).toVector(),
bigramProbability, HistoricalInfo(timestamp, 0 /* level */, 1 /* count */));
dictionary->addNgramEntry(&ngramProperty);
}
if (dictionary->needsToRunGC(true /* mindsBlockByGC */)) {
return i + 1;
}
env->DeleteLocalRef(word0);
env->DeleteLocalRef(word1);
env->DeleteLocalRef(shortcutTarget);
env->DeleteLocalRef(languageModelParam);
}
return languageModelParamCount;
}
static jstring latinime_BinaryDictionary_getProperty(JNIEnv *env, jclass clazz, jlong dict,
jstring query) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) {
return env->NewStringUTF("");
}
const jsize queryUtf8Length = env->GetStringUTFLength(query);
char queryChars[queryUtf8Length + 1];
env->GetStringUTFRegion(query, 0, env->GetStringLength(query), queryChars);
queryChars[queryUtf8Length] = '\0';
static const int GET_PROPERTY_RESULT_LENGTH = 100;
char resultChars[GET_PROPERTY_RESULT_LENGTH];
resultChars[0] = '\0';
dictionary->getProperty(queryChars, queryUtf8Length, resultChars, GET_PROPERTY_RESULT_LENGTH);
return env->NewStringUTF(resultChars);
}
static bool latinime_BinaryDictionary_isCorruptedNative(JNIEnv *env, jclass clazz, jlong dict) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) {
return false;
}
return dictionary->getDictionaryStructurePolicy()->isCorrupted();
}
static DictionaryStructureWithBufferPolicy::StructurePolicyPtr runGCAndGetNewStructurePolicy(
DictionaryStructureWithBufferPolicy::StructurePolicyPtr structurePolicy,
const char *const dictFilePath) {
structurePolicy->flushWithGC(dictFilePath);
structurePolicy.release();
return DictionaryStructureWithBufferPolicyFactory::newPolicyForExistingDictFile(
dictFilePath, 0 /* offset */, 0 /* size */, true /* isUpdatable */);
}
static bool latinime_BinaryDictionary_migrateNative(JNIEnv *env, jclass clazz, jlong dict,
jstring dictFilePath, jlong newFormatVersion) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) {
return false;
}
const jsize filePathUtf8Length = env->GetStringUTFLength(dictFilePath);
char dictFilePathChars[filePathUtf8Length + 1];
env->GetStringUTFRegion(dictFilePath, 0, env->GetStringLength(dictFilePath), dictFilePathChars);
dictFilePathChars[filePathUtf8Length] = '\0';
const DictionaryHeaderStructurePolicy *const headerPolicy =
dictionary->getDictionaryStructurePolicy()->getHeaderStructurePolicy();
DictionaryStructureWithBufferPolicy::StructurePolicyPtr dictionaryStructureWithBufferPolicy =
DictionaryStructureWithBufferPolicyFactory::newPolicyForOnMemoryDict(
newFormatVersion, *headerPolicy->getLocale(), headerPolicy->getAttributeMap());
if (!dictionaryStructureWithBufferPolicy) {
LogUtils::logToJava(env, "Cannot migrate header.");
return false;
}
int wordCodePoints[MAX_WORD_LENGTH];
int wordCodePointCount = 0;
int token = 0;
// Add unigrams.
do {
token = dictionary->getNextWordAndNextToken(token, wordCodePoints, &wordCodePointCount);
const WordProperty wordProperty = dictionary->getWordProperty(
CodePointArrayView(wordCodePoints, wordCodePointCount));
if (wordCodePoints[0] == CODE_POINT_BEGINNING_OF_SENTENCE) {
// Skip beginning-of-sentence unigram.
continue;
}
if (dictionaryStructureWithBufferPolicy->needsToRunGC(true /* mindsBlockByGC */)) {
dictionaryStructureWithBufferPolicy = runGCAndGetNewStructurePolicy(
std::move(dictionaryStructureWithBufferPolicy), dictFilePathChars);
if (!dictionaryStructureWithBufferPolicy) {
LogUtils::logToJava(env, "Cannot open dict after GC.");
return false;
}
}
if (!dictionaryStructureWithBufferPolicy->addUnigramEntry(
CodePointArrayView(wordCodePoints, wordCodePointCount),
wordProperty.getUnigramProperty())) {
LogUtils::logToJava(env, "Cannot add unigram to the new dict.");
return false;
}
} while (token != 0);
// Add ngrams.
do {
token = dictionary->getNextWordAndNextToken(token, wordCodePoints, &wordCodePointCount);
const WordProperty wordProperty = dictionary->getWordProperty(
CodePointArrayView(wordCodePoints, wordCodePointCount));
if (dictionaryStructureWithBufferPolicy->needsToRunGC(true /* mindsBlockByGC */)) {
dictionaryStructureWithBufferPolicy = runGCAndGetNewStructurePolicy(
std::move(dictionaryStructureWithBufferPolicy), dictFilePathChars);
if (!dictionaryStructureWithBufferPolicy) {
LogUtils::logToJava(env, "Cannot open dict after GC.");
return false;
}
}
for (const NgramProperty &ngramProperty : *wordProperty.getNgramProperties()) {
if (!dictionaryStructureWithBufferPolicy->addNgramEntry(&ngramProperty)) {
LogUtils::logToJava(env, "Cannot add ngram to the new dict.");
return false;
}
}
} while (token != 0);
// Save to File.
dictionaryStructureWithBufferPolicy->flushWithGC(dictFilePathChars);
return true;
}
static const JNINativeMethod sMethods[] = {
{
const_cast<char *>("openNative"),
const_cast<char *>("(Ljava/lang/String;JJZ)J"),
reinterpret_cast<void *>(latinime_BinaryDictionary_open)
},
{
const_cast<char *>("createOnMemoryNative"),
const_cast<char *>("(JLjava/lang/String;[Ljava/lang/String;[Ljava/lang/String;)J"),
reinterpret_cast<void *>(latinime_BinaryDictionary_createOnMemory)
},
{
const_cast<char *>("closeNative"),
const_cast<char *>("(J)V"),
reinterpret_cast<void *>(latinime_BinaryDictionary_close)
},
{
const_cast<char *>("getFormatVersionNative"),
const_cast<char *>("(J)I"),
reinterpret_cast<void *>(latinime_BinaryDictionary_getFormatVersion)
},
{
const_cast<char *>("getHeaderInfoNative"),
const_cast<char *>("(J[I[ILjava/util/ArrayList;Ljava/util/ArrayList;)V"),
reinterpret_cast<void *>(latinime_BinaryDictionary_getHeaderInfo)
},
{
const_cast<char *>("flushNative"),
const_cast<char *>("(JLjava/lang/String;)Z"),
reinterpret_cast<void *>(latinime_BinaryDictionary_flush)
},
{
const_cast<char *>("needsToRunGCNative"),
const_cast<char *>("(JZ)Z"),
reinterpret_cast<void *>(latinime_BinaryDictionary_needsToRunGC)
},
{
const_cast<char *>("flushWithGCNative"),
const_cast<char *>("(JLjava/lang/String;)Z"),
reinterpret_cast<void *>(latinime_BinaryDictionary_flushWithGC)
},
{
const_cast<char *>("getSuggestionsNative"),
const_cast<char *>("(JJJ[I[I[I[I[II[I[[I[ZI[I[I[I[I[I[I[F)V"),
reinterpret_cast<void *>(latinime_BinaryDictionary_getSuggestions)
},
{
const_cast<char *>("getProbabilityNative"),
const_cast<char *>("(J[I)I"),
reinterpret_cast<void *>(latinime_BinaryDictionary_getProbability)
},
{
const_cast<char *>("getMaxProbabilityOfExactMatchesNative"),
const_cast<char *>("(J[I)I"),
reinterpret_cast<void *>(latinime_BinaryDictionary_getMaxProbabilityOfExactMatches)
},
{
const_cast<char *>("getNgramProbabilityNative"),
const_cast<char *>("(J[[I[Z[I)I"),
reinterpret_cast<void *>(latinime_BinaryDictionary_getNgramProbability)
},
{
const_cast<char *>("getWordPropertyNative"),
const_cast<char *>("(J[IZ[I[Z[ILjava/util/ArrayList;Ljava/util/ArrayList;"
"Ljava/util/ArrayList;Ljava/util/ArrayList;Ljava/util/ArrayList;"
"Ljava/util/ArrayList;)V"),
reinterpret_cast<void *>(latinime_BinaryDictionary_getWordProperty)
},
{
const_cast<char *>("getNextWordNative"),
const_cast<char *>("(JI[I[Z)I"),
reinterpret_cast<void *>(latinime_BinaryDictionary_getNextWord)
},
{
const_cast<char *>("addUnigramEntryNative"),
const_cast<char *>("(J[II[IIZZZI)Z"),
reinterpret_cast<void *>(latinime_BinaryDictionary_addUnigramEntry)
},
{
const_cast<char *>("removeUnigramEntryNative"),
const_cast<char *>("(J[I)Z"),
reinterpret_cast<void *>(latinime_BinaryDictionary_removeUnigramEntry)
},
{
const_cast<char *>("addNgramEntryNative"),
const_cast<char *>("(J[[I[Z[III)Z"),
reinterpret_cast<void *>(latinime_BinaryDictionary_addNgramEntry)
},
{
const_cast<char *>("removeNgramEntryNative"),
const_cast<char *>("(J[[I[Z[I)Z"),
reinterpret_cast<void *>(latinime_BinaryDictionary_removeNgramEntry)
},
{
const_cast<char *>("updateEntriesForWordWithNgramContextNative"),
const_cast<char *>("(J[[I[Z[IZII)Z"),
reinterpret_cast<void *>(latinime_BinaryDictionary_updateEntriesForWordWithNgramContext)
},
{
const_cast<char *>("addMultipleDictionaryEntriesNative"),
const_cast<char *>(
"(J[Lcom/android/inputmethod/latin/utils/LanguageModelParam;I)I"),
reinterpret_cast<void *>(latinime_BinaryDictionary_addMultipleDictionaryEntries)
},
{
const_cast<char *>("getPropertyNative"),
const_cast<char *>("(JLjava/lang/String;)Ljava/lang/String;"),
reinterpret_cast<void *>(latinime_BinaryDictionary_getProperty)
},
{
const_cast<char *>("isCorruptedNative"),
const_cast<char *>("(J)Z"),
reinterpret_cast<void *>(latinime_BinaryDictionary_isCorruptedNative)
},
{
const_cast<char *>("migrateNative"),
const_cast<char *>("(JLjava/lang/String;J)Z"),
reinterpret_cast<void *>(latinime_BinaryDictionary_migrateNative)
}
};
int register_BinaryDictionary(JNIEnv *env) {
const char *const kClassPathName = "com/android/inputmethod/latin/BinaryDictionary";
return registerNativeMethods(env, kClassPathName, sMethods, NELEMS(sMethods));
}
} // namespace latinime