LatinIME/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
Keisuke Kuroyanagi d9b8602f48 Implement getMaxProbabilityOfExactMatches().
Bug: 13142176
Bug: 15428247
Change-Id: I5be6d683be95505412615ca7c88260de1ea05f54
2014-06-06 17:37:46 +09:00

709 lines
33 KiB
C++

/*
* Copyright (C) 2009 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#define LOG_TAG "LatinIME: jni: BinaryDictionary"
#include "com_android_inputmethod_latin_BinaryDictionary.h"
#include <cstring> // for memset()
#include <vector>
#include "defines.h"
#include "jni.h"
#include "jni_common.h"
#include "suggest/core/dictionary/dictionary.h"
#include "suggest/core/dictionary/property/unigram_property.h"
#include "suggest/core/dictionary/property/word_property.h"
#include "suggest/core/result/suggestion_results.h"
#include "suggest/core/session/prev_words_info.h"
#include "suggest/core/suggest_options.h"
#include "suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.h"
#include "utils/char_utils.h"
#include "utils/jni_data_utils.h"
#include "utils/log_utils.h"
#include "utils/time_keeper.h"
namespace latinime {
class ProximityInfo;
static jlong latinime_BinaryDictionary_open(JNIEnv *env, jclass clazz, jstring sourceDir,
jlong dictOffset, jlong dictSize, jboolean isUpdatable) {
PROF_OPEN;
PROF_START(66);
const jsize sourceDirUtf8Length = env->GetStringUTFLength(sourceDir);
if (sourceDirUtf8Length <= 0) {
AKLOGE("DICT: Can't get sourceDir string");
return 0;
}
char sourceDirChars[sourceDirUtf8Length + 1];
env->GetStringUTFRegion(sourceDir, 0, env->GetStringLength(sourceDir), sourceDirChars);
sourceDirChars[sourceDirUtf8Length] = '\0';
DictionaryStructureWithBufferPolicy::StructurePolicyPtr dictionaryStructureWithBufferPolicy(
DictionaryStructureWithBufferPolicyFactory::newPolicyForExistingDictFile(
sourceDirChars, static_cast<int>(dictOffset), static_cast<int>(dictSize),
isUpdatable == JNI_TRUE));
if (!dictionaryStructureWithBufferPolicy) {
return 0;
}
Dictionary *const dictionary =
new Dictionary(env, std::move(dictionaryStructureWithBufferPolicy));
PROF_END(66);
PROF_CLOSE;
return reinterpret_cast<jlong>(dictionary);
}
static jlong latinime_BinaryDictionary_createOnMemory(JNIEnv *env, jclass clazz,
jlong formatVersion, jstring locale, jobjectArray attributeKeyStringArray,
jobjectArray attributeValueStringArray) {
const jsize localeUtf8Length = env->GetStringUTFLength(locale);
char localeChars[localeUtf8Length + 1];
env->GetStringUTFRegion(locale, 0, env->GetStringLength(locale), localeChars);
localeChars[localeUtf8Length] = '\0';
std::vector<int> localeCodePoints;
HeaderReadWriteUtils::insertCharactersIntoVector(localeChars, &localeCodePoints);
const int keyCount = env->GetArrayLength(attributeKeyStringArray);
const int valueCount = env->GetArrayLength(attributeValueStringArray);
if (keyCount != valueCount) {
return false;
}
DictionaryHeaderStructurePolicy::AttributeMap attributeMap =
JniDataUtils::constructAttributeMap(env, attributeKeyStringArray,
attributeValueStringArray);
DictionaryStructureWithBufferPolicy::StructurePolicyPtr dictionaryStructureWithBufferPolicy =
DictionaryStructureWithBufferPolicyFactory::newPolicyForOnMemoryDict(
formatVersion, localeCodePoints, &attributeMap);
if (!dictionaryStructureWithBufferPolicy) {
return 0;
}
Dictionary *const dictionary =
new Dictionary(env, std::move(dictionaryStructureWithBufferPolicy));
return reinterpret_cast<jlong>(dictionary);
}
static bool latinime_BinaryDictionary_flush(JNIEnv *env, jclass clazz, jlong dict,
jstring filePath) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) return false;
const jsize filePathUtf8Length = env->GetStringUTFLength(filePath);
char filePathChars[filePathUtf8Length + 1];
env->GetStringUTFRegion(filePath, 0, env->GetStringLength(filePath), filePathChars);
filePathChars[filePathUtf8Length] = '\0';
return dictionary->flush(filePathChars);
}
static bool latinime_BinaryDictionary_needsToRunGC(JNIEnv *env, jclass clazz,
jlong dict, jboolean mindsBlockByGC) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) return false;
return dictionary->needsToRunGC(mindsBlockByGC == JNI_TRUE);
}
static bool latinime_BinaryDictionary_flushWithGC(JNIEnv *env, jclass clazz, jlong dict,
jstring filePath) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) return false;
const jsize filePathUtf8Length = env->GetStringUTFLength(filePath);
char filePathChars[filePathUtf8Length + 1];
env->GetStringUTFRegion(filePath, 0, env->GetStringLength(filePath), filePathChars);
filePathChars[filePathUtf8Length] = '\0';
return dictionary->flushWithGC(filePathChars);
}
static void latinime_BinaryDictionary_close(JNIEnv *env, jclass clazz, jlong dict) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) return;
delete dictionary;
}
static void latinime_BinaryDictionary_getHeaderInfo(JNIEnv *env, jclass clazz, jlong dict,
jintArray outHeaderSize, jintArray outFormatVersion, jobject outAttributeKeys,
jobject outAttributeValues) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) return;
const DictionaryHeaderStructurePolicy *const headerPolicy =
dictionary->getDictionaryStructurePolicy()->getHeaderStructurePolicy();
JniDataUtils::putIntToArray(env, outHeaderSize, 0 /* index */, headerPolicy->getSize());
JniDataUtils::putIntToArray(env, outFormatVersion, 0 /* index */,
headerPolicy->getFormatVersionNumber());
// Output attribute map
jclass arrayListClass = env->FindClass("java/util/ArrayList");
jmethodID addMethodId = env->GetMethodID(arrayListClass, "add", "(Ljava/lang/Object;)Z");
const DictionaryHeaderStructurePolicy::AttributeMap *const attributeMap =
headerPolicy->getAttributeMap();
for (DictionaryHeaderStructurePolicy::AttributeMap::const_iterator it = attributeMap->begin();
it != attributeMap->end(); ++it) {
// Output key
jintArray keyCodePointArray = env->NewIntArray(it->first.size());
JniDataUtils::outputCodePoints(env, keyCodePointArray, 0 /* start */,
it->first.size(), it->first.data(), it->first.size(),
false /* needsNullTermination */);
env->CallBooleanMethod(outAttributeKeys, addMethodId, keyCodePointArray);
env->DeleteLocalRef(keyCodePointArray);
// Output value
jintArray valueCodePointArray = env->NewIntArray(it->second.size());
JniDataUtils::outputCodePoints(env, valueCodePointArray, 0 /* start */,
it->second.size(), it->second.data(), it->second.size(),
false /* needsNullTermination */);
env->CallBooleanMethod(outAttributeValues, addMethodId, valueCodePointArray);
env->DeleteLocalRef(valueCodePointArray);
}
env->DeleteLocalRef(arrayListClass);
return;
}
static int latinime_BinaryDictionary_getFormatVersion(JNIEnv *env, jclass clazz, jlong dict) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) return 0;
const DictionaryHeaderStructurePolicy *const headerPolicy =
dictionary->getDictionaryStructurePolicy()->getHeaderStructurePolicy();
return headerPolicy->getFormatVersionNumber();
}
static void latinime_BinaryDictionary_getSuggestions(JNIEnv *env, jclass clazz, jlong dict,
jlong proximityInfo, jlong dicTraverseSession, jintArray xCoordinatesArray,
jintArray yCoordinatesArray, jintArray timesArray, jintArray pointerIdsArray,
jintArray inputCodePointsArray, jint inputSize, jintArray suggestOptions,
jintArray prevWordCodePointsForBigrams, jboolean isBeginningOfSentence,
jintArray outSuggestionCount, jintArray outCodePointsArray, jintArray outScoresArray,
jintArray outSpaceIndicesArray, jintArray outTypesArray,
jintArray outAutoCommitFirstWordConfidenceArray, jfloatArray inOutLanguageWeight) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
// Assign 0 to outSuggestionCount here in case of returning earlier in this method.
JniDataUtils::putIntToArray(env, outSuggestionCount, 0 /* index */, 0);
if (!dictionary) {
return;
}
ProximityInfo *pInfo = reinterpret_cast<ProximityInfo *>(proximityInfo);
DicTraverseSession *traverseSession =
reinterpret_cast<DicTraverseSession *>(dicTraverseSession);
if (!traverseSession) {
return;
}
// Input values
int xCoordinates[inputSize];
int yCoordinates[inputSize];
int times[inputSize];
int pointerIds[inputSize];
const jsize inputCodePointsLength = env->GetArrayLength(inputCodePointsArray);
int inputCodePoints[inputCodePointsLength];
const jsize prevWordCodePointsLength =
prevWordCodePointsForBigrams ? env->GetArrayLength(prevWordCodePointsForBigrams) : 0;
int prevWordCodePointsInternal[prevWordCodePointsLength];
int *prevWordCodePoints = nullptr;
env->GetIntArrayRegion(xCoordinatesArray, 0, inputSize, xCoordinates);
env->GetIntArrayRegion(yCoordinatesArray, 0, inputSize, yCoordinates);
env->GetIntArrayRegion(timesArray, 0, inputSize, times);
env->GetIntArrayRegion(pointerIdsArray, 0, inputSize, pointerIds);
env->GetIntArrayRegion(inputCodePointsArray, 0, inputCodePointsLength, inputCodePoints);
if (prevWordCodePointsForBigrams) {
env->GetIntArrayRegion(prevWordCodePointsForBigrams, 0, prevWordCodePointsLength,
prevWordCodePointsInternal);
prevWordCodePoints = prevWordCodePointsInternal;
}
const jsize numberOfOptions = env->GetArrayLength(suggestOptions);
int options[numberOfOptions];
env->GetIntArrayRegion(suggestOptions, 0, numberOfOptions, options);
SuggestOptions givenSuggestOptions(options, numberOfOptions);
// Output values
/* By the way, let's check the output array length here to make sure */
const jsize outputCodePointsLength = env->GetArrayLength(outCodePointsArray);
if (outputCodePointsLength != (MAX_WORD_LENGTH * MAX_RESULTS)) {
AKLOGE("Invalid outputCodePointsLength: %d", outputCodePointsLength);
ASSERT(false);
return;
}
const jsize scoresLength = env->GetArrayLength(outScoresArray);
if (scoresLength != MAX_RESULTS) {
AKLOGE("Invalid scoresLength: %d", scoresLength);
ASSERT(false);
return;
}
const jsize outputAutoCommitFirstWordConfidenceLength =
env->GetArrayLength(outAutoCommitFirstWordConfidenceArray);
ASSERT(outputAutoCommitFirstWordConfidenceLength == 1);
if (outputAutoCommitFirstWordConfidenceLength != 1) {
// We only use the first result, as obviously we will only ever autocommit the first one
AKLOGE("Invalid outputAutoCommitFirstWordConfidenceLength: %d",
outputAutoCommitFirstWordConfidenceLength);
ASSERT(false);
return;
}
float languageWeight;
env->GetFloatArrayRegion(inOutLanguageWeight, 0, 1 /* len */, &languageWeight);
SuggestionResults suggestionResults(MAX_RESULTS);
const PrevWordsInfo prevWordsInfo(prevWordCodePoints, prevWordCodePointsLength,
false /* isStartOfSentence */);
if (givenSuggestOptions.isGesture() || inputSize > 0) {
// TODO: Use SuggestionResults to return suggestions.
dictionary->getSuggestions(pInfo, traverseSession, xCoordinates, yCoordinates,
times, pointerIds, inputCodePoints, inputSize, &prevWordsInfo,
&givenSuggestOptions, languageWeight, &suggestionResults);
} else {
dictionary->getPredictions(&prevWordsInfo, &suggestionResults);
}
suggestionResults.outputSuggestions(env, outSuggestionCount, outCodePointsArray,
outScoresArray, outSpaceIndicesArray, outTypesArray,
outAutoCommitFirstWordConfidenceArray, inOutLanguageWeight);
}
static jint latinime_BinaryDictionary_getProbability(JNIEnv *env, jclass clazz, jlong dict,
jintArray word) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) return NOT_A_PROBABILITY;
const jsize wordLength = env->GetArrayLength(word);
int codePoints[wordLength];
env->GetIntArrayRegion(word, 0, wordLength, codePoints);
return dictionary->getProbability(codePoints, wordLength);
}
static jint latinime_BinaryDictionary_getMaxProbabilityOfExactMatches(
JNIEnv *env, jclass clazz, jlong dict, jintArray word) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) return NOT_A_PROBABILITY;
const jsize wordLength = env->GetArrayLength(word);
int codePoints[wordLength];
env->GetIntArrayRegion(word, 0, wordLength, codePoints);
return dictionary->getMaxProbabilityOfExactMatches(codePoints, wordLength);
}
static jint latinime_BinaryDictionary_getBigramProbability(JNIEnv *env, jclass clazz,
jlong dict, jintArray word0, jboolean isBeginningOfSentence, jintArray word1) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) return JNI_FALSE;
const jsize word0Length = env->GetArrayLength(word0);
const jsize word1Length = env->GetArrayLength(word1);
int word0CodePoints[word0Length];
int word1CodePoints[word1Length];
env->GetIntArrayRegion(word0, 0, word0Length, word0CodePoints);
env->GetIntArrayRegion(word1, 0, word1Length, word1CodePoints);
const PrevWordsInfo prevWordsInfo(word0CodePoints, word0Length, isBeginningOfSentence);
return dictionary->getBigramProbability(&prevWordsInfo, word1CodePoints, word1Length);
}
// Method to iterate all words in the dictionary for makedict.
// If token is 0, this method newly starts iterating the dictionary. This method returns 0 when
// the dictionary does not have a next word.
static jint latinime_BinaryDictionary_getNextWord(JNIEnv *env, jclass clazz,
jlong dict, jint token, jintArray outCodePoints) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) return 0;
const jsize outCodePointsLength = env->GetArrayLength(outCodePoints);
if (outCodePointsLength != MAX_WORD_LENGTH) {
AKLOGE("Invalid outCodePointsLength: %d", outCodePointsLength);
ASSERT(false);
return 0;
}
int wordCodePoints[outCodePointsLength];
memset(wordCodePoints, 0, sizeof(wordCodePoints));
const int nextToken = dictionary->getNextWordAndNextToken(token, wordCodePoints);
JniDataUtils::outputCodePoints(env, outCodePoints, 0 /* start */,
MAX_WORD_LENGTH /* maxLength */, wordCodePoints, outCodePointsLength,
false /* needsNullTermination */);
return nextToken;
}
static void latinime_BinaryDictionary_getWordProperty(JNIEnv *env, jclass clazz,
jlong dict, jintArray word, jintArray outCodePoints, jbooleanArray outFlags,
jintArray outProbabilityInfo, jobject outBigramTargets, jobject outBigramProbabilityInfo,
jobject outShortcutTargets, jobject outShortcutProbabilities) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) return;
const jsize wordLength = env->GetArrayLength(word);
int wordCodePoints[wordLength];
env->GetIntArrayRegion(word, 0, wordLength, wordCodePoints);
const WordProperty wordProperty = dictionary->getWordProperty(wordCodePoints, wordLength);
wordProperty.outputProperties(env, outCodePoints, outFlags, outProbabilityInfo,
outBigramTargets, outBigramProbabilityInfo, outShortcutTargets,
outShortcutProbabilities);
}
static bool latinime_BinaryDictionary_addUnigramWord(JNIEnv *env, jclass clazz, jlong dict,
jintArray word, jint probability, jintArray shortcutTarget, jint shortcutProbability,
jboolean isBeginningOfSentence, jboolean isNotAWord, jboolean isBlacklisted,
jint timestamp) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) {
return false;
}
jsize codePointCount = env->GetArrayLength(word);
int codePoints[codePointCount];
env->GetIntArrayRegion(word, 0, codePointCount, codePoints);
std::vector<UnigramProperty::ShortcutProperty> shortcuts;
std::vector<int> shortcutTargetCodePoints;
JniDataUtils::jintarrayToVector(env, shortcutTarget, &shortcutTargetCodePoints);
if (!shortcutTargetCodePoints.empty()) {
shortcuts.emplace_back(&shortcutTargetCodePoints, shortcutProbability);
}
// Use 1 for count to indicate the word has inputted.
const UnigramProperty unigramProperty(isBeginningOfSentence, isNotAWord,
isBlacklisted, probability, timestamp, 0 /* level */, 1 /* count */, &shortcuts);
return dictionary->addUnigramEntry(codePoints, codePointCount, &unigramProperty);
}
static bool latinime_BinaryDictionary_addBigramWords(JNIEnv *env, jclass clazz, jlong dict,
jintArray word0, jboolean isBeginningOfSentence, jintArray word1, jint probability,
jint timestamp) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) {
return false;
}
jsize word0Length = env->GetArrayLength(word0);
int word0CodePoints[word0Length];
env->GetIntArrayRegion(word0, 0, word0Length, word0CodePoints);
jsize word1Length = env->GetArrayLength(word1);
int word1CodePoints[word1Length];
env->GetIntArrayRegion(word1, 0, word1Length, word1CodePoints);
const std::vector<int> bigramTargetCodePoints(
word1CodePoints, word1CodePoints + word1Length);
// Use 1 for count to indicate the bigram has inputted.
const BigramProperty bigramProperty(&bigramTargetCodePoints, probability,
timestamp, 0 /* level */, 1 /* count */);
const PrevWordsInfo prevWordsInfo(word0CodePoints, word0Length, isBeginningOfSentence);
return dictionary->addNgramEntry(&prevWordsInfo, &bigramProperty);
}
static bool latinime_BinaryDictionary_removeBigramWords(JNIEnv *env, jclass clazz, jlong dict,
jintArray word0, jboolean isBeginningOfSentence, jintArray word1) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) {
return false;
}
jsize word0Length = env->GetArrayLength(word0);
int word0CodePoints[word0Length];
env->GetIntArrayRegion(word0, 0, word0Length, word0CodePoints);
jsize word1Length = env->GetArrayLength(word1);
int word1CodePoints[word1Length];
env->GetIntArrayRegion(word1, 0, word1Length, word1CodePoints);
const PrevWordsInfo prevWordsInfo(word0CodePoints, word0Length, isBeginningOfSentence);
return dictionary->removeNgramEntry(&prevWordsInfo, word1CodePoints, word1Length);
}
// Returns how many language model params are processed.
static int latinime_BinaryDictionary_addMultipleDictionaryEntries(JNIEnv *env, jclass clazz,
jlong dict, jobjectArray languageModelParams, jint startIndex) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) {
return 0;
}
jsize languageModelParamCount = env->GetArrayLength(languageModelParams);
if (languageModelParamCount == 0 || startIndex >= languageModelParamCount) {
return 0;
}
jobject languageModelParam = env->GetObjectArrayElement(languageModelParams, 0);
jclass languageModelParamClass = env->GetObjectClass(languageModelParam);
env->DeleteLocalRef(languageModelParam);
jfieldID word0FieldId = env->GetFieldID(languageModelParamClass, "mWord0", "[I");
jfieldID word1FieldId = env->GetFieldID(languageModelParamClass, "mWord1", "[I");
jfieldID unigramProbabilityFieldId =
env->GetFieldID(languageModelParamClass, "mUnigramProbability", "I");
jfieldID bigramProbabilityFieldId =
env->GetFieldID(languageModelParamClass, "mBigramProbability", "I");
jfieldID timestampFieldId =
env->GetFieldID(languageModelParamClass, "mTimestamp", "I");
jfieldID shortcutTargetFieldId =
env->GetFieldID(languageModelParamClass, "mShortcutTarget", "[I");
jfieldID shortcutProbabilityFieldId =
env->GetFieldID(languageModelParamClass, "mShortcutProbability", "I");
jfieldID isNotAWordFieldId =
env->GetFieldID(languageModelParamClass, "mIsNotAWord", "Z");
jfieldID isBlacklistedFieldId =
env->GetFieldID(languageModelParamClass, "mIsBlacklisted", "Z");
env->DeleteLocalRef(languageModelParamClass);
for (int i = startIndex; i < languageModelParamCount; ++i) {
jobject languageModelParam = env->GetObjectArrayElement(languageModelParams, i);
// languageModelParam is a set of params for word1; thus, word1 cannot be null. On the
// other hand, word0 can be null and then it means the set of params doesn't contain bigram
// information.
jintArray word0 = static_cast<jintArray>(
env->GetObjectField(languageModelParam, word0FieldId));
jsize word0Length = word0 ? env->GetArrayLength(word0) : 0;
int word0CodePoints[word0Length];
if (word0) {
env->GetIntArrayRegion(word0, 0, word0Length, word0CodePoints);
}
jintArray word1 = static_cast<jintArray>(
env->GetObjectField(languageModelParam, word1FieldId));
jsize word1Length = env->GetArrayLength(word1);
int word1CodePoints[word1Length];
env->GetIntArrayRegion(word1, 0, word1Length, word1CodePoints);
jint unigramProbability = env->GetIntField(languageModelParam, unigramProbabilityFieldId);
jint timestamp = env->GetIntField(languageModelParam, timestampFieldId);
jboolean isNotAWord = env->GetBooleanField(languageModelParam, isNotAWordFieldId);
jboolean isBlacklisted = env->GetBooleanField(languageModelParam, isBlacklistedFieldId);
jintArray shortcutTarget = static_cast<jintArray>(
env->GetObjectField(languageModelParam, shortcutTargetFieldId));
std::vector<UnigramProperty::ShortcutProperty> shortcuts;
std::vector<int> shortcutTargetCodePoints;
JniDataUtils::jintarrayToVector(env, shortcutTarget, &shortcutTargetCodePoints);
if (!shortcutTargetCodePoints.empty()) {
jint shortcutProbability =
env->GetIntField(languageModelParam, shortcutProbabilityFieldId);
shortcuts.emplace_back(&shortcutTargetCodePoints, shortcutProbability);
}
// Use 1 for count to indicate the word has inputted.
const UnigramProperty unigramProperty(false /* isBeginningOfSentence */, isNotAWord,
isBlacklisted, unigramProbability, timestamp, 0 /* level */, 1 /* count */,
&shortcuts);
dictionary->addUnigramEntry(word1CodePoints, word1Length, &unigramProperty);
if (word0) {
jint bigramProbability = env->GetIntField(languageModelParam, bigramProbabilityFieldId);
const std::vector<int> bigramTargetCodePoints(
word1CodePoints, word1CodePoints + word1Length);
// Use 1 for count to indicate the bigram has inputted.
const BigramProperty bigramProperty(&bigramTargetCodePoints, bigramProbability,
timestamp, 0 /* level */, 1 /* count */);
const PrevWordsInfo prevWordsInfo(word0CodePoints, word0Length,
false /* isBeginningOfSentence */);
dictionary->addNgramEntry(&prevWordsInfo, &bigramProperty);
}
if (dictionary->needsToRunGC(true /* mindsBlockByGC */)) {
return i + 1;
}
env->DeleteLocalRef(word0);
env->DeleteLocalRef(word1);
env->DeleteLocalRef(shortcutTarget);
env->DeleteLocalRef(languageModelParam);
}
return languageModelParamCount;
}
static jstring latinime_BinaryDictionary_getProperty(JNIEnv *env, jclass clazz, jlong dict,
jstring query) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) {
return env->NewStringUTF("");
}
const jsize queryUtf8Length = env->GetStringUTFLength(query);
char queryChars[queryUtf8Length + 1];
env->GetStringUTFRegion(query, 0, env->GetStringLength(query), queryChars);
queryChars[queryUtf8Length] = '\0';
static const int GET_PROPERTY_RESULT_LENGTH = 100;
char resultChars[GET_PROPERTY_RESULT_LENGTH];
resultChars[0] = '\0';
dictionary->getProperty(queryChars, queryUtf8Length, resultChars, GET_PROPERTY_RESULT_LENGTH);
return env->NewStringUTF(resultChars);
}
static bool latinime_BinaryDictionary_isCorruptedNative(JNIEnv *env, jclass clazz, jlong dict) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) {
return false;
}
return dictionary->getDictionaryStructurePolicy()->isCorrupted();
}
static DictionaryStructureWithBufferPolicy::StructurePolicyPtr runGCAndGetNewStructurePolicy(
DictionaryStructureWithBufferPolicy::StructurePolicyPtr structurePolicy,
const char *const dictFilePath) {
structurePolicy->flushWithGC(dictFilePath);
structurePolicy.release();
return DictionaryStructureWithBufferPolicyFactory::newPolicyForExistingDictFile(
dictFilePath, 0 /* offset */, 0 /* size */, true /* isUpdatable */);
}
static bool latinime_BinaryDictionary_migrateNative(JNIEnv *env, jclass clazz, jlong dict,
jstring dictFilePath, jlong newFormatVersion) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) {
return false;
}
const jsize filePathUtf8Length = env->GetStringUTFLength(dictFilePath);
char dictFilePathChars[filePathUtf8Length + 1];
env->GetStringUTFRegion(dictFilePath, 0, env->GetStringLength(dictFilePath), dictFilePathChars);
dictFilePathChars[filePathUtf8Length] = '\0';
const DictionaryHeaderStructurePolicy *const headerPolicy =
dictionary->getDictionaryStructurePolicy()->getHeaderStructurePolicy();
DictionaryStructureWithBufferPolicy::StructurePolicyPtr dictionaryStructureWithBufferPolicy =
DictionaryStructureWithBufferPolicyFactory::newPolicyForOnMemoryDict(
newFormatVersion, *headerPolicy->getLocale(), headerPolicy->getAttributeMap());
if (!dictionaryStructureWithBufferPolicy) {
LogUtils::logToJava(env, "Cannot migrate header.");
return false;
}
// TODO: Migrate historical information.
int wordCodePoints[MAX_WORD_LENGTH];
int token = 0;
// Add unigrams.
do {
token = dictionary->getNextWordAndNextToken(token, wordCodePoints);
const int wordLength = CharUtils::getCodePointCount(MAX_WORD_LENGTH, wordCodePoints);
const WordProperty wordProperty = dictionary->getWordProperty(wordCodePoints, wordLength);
if (dictionaryStructureWithBufferPolicy->needsToRunGC(true /* mindsBlockByGC */)) {
dictionaryStructureWithBufferPolicy = runGCAndGetNewStructurePolicy(
std::move(dictionaryStructureWithBufferPolicy), dictFilePathChars);
if (!dictionaryStructureWithBufferPolicy) {
LogUtils::logToJava(env, "Cannot open dict after GC.");
return false;
}
}
if (!dictionaryStructureWithBufferPolicy->addUnigramEntry(wordCodePoints, wordLength,
wordProperty.getUnigramProperty())) {
LogUtils::logToJava(env, "Cannot add unigram to the new dict.");
return false;
}
} while (token != 0);
// Add bigrams.
do {
token = dictionary->getNextWordAndNextToken(token, wordCodePoints);
const int wordLength = CharUtils::getCodePointCount(MAX_WORD_LENGTH, wordCodePoints);
const WordProperty wordProperty = dictionary->getWordProperty(wordCodePoints, wordLength);
if (dictionaryStructureWithBufferPolicy->needsToRunGC(true /* mindsBlockByGC */)) {
dictionaryStructureWithBufferPolicy = runGCAndGetNewStructurePolicy(
std::move(dictionaryStructureWithBufferPolicy), dictFilePathChars);
if (!dictionaryStructureWithBufferPolicy) {
LogUtils::logToJava(env, "Cannot open dict after GC.");
return false;
}
}
const PrevWordsInfo prevWordsInfo(wordCodePoints, wordLength,
false /* isStartOfSentence */);
for (const BigramProperty &bigramProperty : *wordProperty.getBigramProperties()) {
if (!dictionaryStructureWithBufferPolicy->addNgramEntry(&prevWordsInfo,
&bigramProperty)) {
LogUtils::logToJava(env, "Cannot add bigram to the new dict.");
return false;
}
}
} while (token != 0);
// Save to File.
dictionaryStructureWithBufferPolicy->flushWithGC(dictFilePathChars);
return true;
}
static const JNINativeMethod sMethods[] = {
{
const_cast<char *>("openNative"),
const_cast<char *>("(Ljava/lang/String;JJZ)J"),
reinterpret_cast<void *>(latinime_BinaryDictionary_open)
},
{
const_cast<char *>("createOnMemoryNative"),
const_cast<char *>("(JLjava/lang/String;[Ljava/lang/String;[Ljava/lang/String;)J"),
reinterpret_cast<void *>(latinime_BinaryDictionary_createOnMemory)
},
{
const_cast<char *>("closeNative"),
const_cast<char *>("(J)V"),
reinterpret_cast<void *>(latinime_BinaryDictionary_close)
},
{
const_cast<char *>("getFormatVersionNative"),
const_cast<char *>("(J)I"),
reinterpret_cast<void *>(latinime_BinaryDictionary_getFormatVersion)
},
{
const_cast<char *>("getHeaderInfoNative"),
const_cast<char *>("(J[I[ILjava/util/ArrayList;Ljava/util/ArrayList;)V"),
reinterpret_cast<void *>(latinime_BinaryDictionary_getHeaderInfo)
},
{
const_cast<char *>("flushNative"),
const_cast<char *>("(JLjava/lang/String;)Z"),
reinterpret_cast<void *>(latinime_BinaryDictionary_flush)
},
{
const_cast<char *>("needsToRunGCNative"),
const_cast<char *>("(JZ)Z"),
reinterpret_cast<void *>(latinime_BinaryDictionary_needsToRunGC)
},
{
const_cast<char *>("flushWithGCNative"),
const_cast<char *>("(JLjava/lang/String;)Z"),
reinterpret_cast<void *>(latinime_BinaryDictionary_flushWithGC)
},
{
const_cast<char *>("getSuggestionsNative"),
const_cast<char *>("(JJJ[I[I[I[I[II[I[IZ[I[I[I[I[I[I[F)V"),
reinterpret_cast<void *>(latinime_BinaryDictionary_getSuggestions)
},
{
const_cast<char *>("getProbabilityNative"),
const_cast<char *>("(J[I)I"),
reinterpret_cast<void *>(latinime_BinaryDictionary_getProbability)
},
{
const_cast<char *>("getMaxProbabilityOfExactMatchesNative"),
const_cast<char *>("(J[I)I"),
reinterpret_cast<void *>(latinime_BinaryDictionary_getMaxProbabilityOfExactMatches)
},
{
const_cast<char *>("getBigramProbabilityNative"),
const_cast<char *>("(J[IZ[I)I"),
reinterpret_cast<void *>(latinime_BinaryDictionary_getBigramProbability)
},
{
const_cast<char *>("getWordPropertyNative"),
const_cast<char *>("(J[I[I[Z[ILjava/util/ArrayList;Ljava/util/ArrayList;"
"Ljava/util/ArrayList;Ljava/util/ArrayList;)V"),
reinterpret_cast<void *>(latinime_BinaryDictionary_getWordProperty)
},
{
const_cast<char *>("getNextWordNative"),
const_cast<char *>("(JI[I)I"),
reinterpret_cast<void *>(latinime_BinaryDictionary_getNextWord)
},
{
const_cast<char *>("addUnigramWordNative"),
const_cast<char *>("(J[II[IIZZZI)Z"),
reinterpret_cast<void *>(latinime_BinaryDictionary_addUnigramWord)
},
{
const_cast<char *>("addBigramWordsNative"),
const_cast<char *>("(J[IZ[III)Z"),
reinterpret_cast<void *>(latinime_BinaryDictionary_addBigramWords)
},
{
const_cast<char *>("removeBigramWordsNative"),
const_cast<char *>("(J[IZ[I)Z"),
reinterpret_cast<void *>(latinime_BinaryDictionary_removeBigramWords)
},
{
const_cast<char *>("addMultipleDictionaryEntriesNative"),
const_cast<char *>(
"(J[Lcom/android/inputmethod/latin/utils/LanguageModelParam;I)I"),
reinterpret_cast<void *>(latinime_BinaryDictionary_addMultipleDictionaryEntries)
},
{
const_cast<char *>("getPropertyNative"),
const_cast<char *>("(JLjava/lang/String;)Ljava/lang/String;"),
reinterpret_cast<void *>(latinime_BinaryDictionary_getProperty)
},
{
const_cast<char *>("isCorruptedNative"),
const_cast<char *>("(J)Z"),
reinterpret_cast<void *>(latinime_BinaryDictionary_isCorruptedNative)
},
{
const_cast<char *>("migrateNative"),
const_cast<char *>("(JLjava/lang/String;J)Z"),
reinterpret_cast<void *>(latinime_BinaryDictionary_migrateNative)
}
};
int register_BinaryDictionary(JNIEnv *env) {
const char *const kClassPathName = "com/android/inputmethod/latin/BinaryDictionary";
return registerNativeMethods(env, kClassPathName, sMethods, NELEMS(sMethods));
}
} // namespace latinime