diff --git a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp index 6e2219d87..c2cd2addd 100644 --- a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp +++ b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp @@ -304,17 +304,18 @@ static jint latinime_BinaryDictionary_getNextWord(JNIEnv *env, jclass clazz, jlong dict, jint token, jintArray outCodePoints) { Dictionary *dictionary = reinterpret_cast(dict); if (!dictionary) return 0; - const jsize outCodePointsLength = env->GetArrayLength(outCodePoints); - if (outCodePointsLength != MAX_WORD_LENGTH) { - AKLOGE("Invalid outCodePointsLength: %d", outCodePointsLength); + const jsize codePointBufSize = env->GetArrayLength(outCodePoints); + if (codePointBufSize != MAX_WORD_LENGTH) { + AKLOGE("Invalid outCodePointsLength: %d", codePointBufSize); ASSERT(false); return 0; } - int wordCodePoints[outCodePointsLength]; - memset(wordCodePoints, 0, sizeof(wordCodePoints)); - const int nextToken = dictionary->getNextWordAndNextToken(token, wordCodePoints); + int wordCodePoints[codePointBufSize]; + int wordCodePointCount = 0; + const int nextToken = dictionary->getNextWordAndNextToken(token, wordCodePoints, + &wordCodePointCount); JniDataUtils::outputCodePoints(env, outCodePoints, 0 /* start */, - MAX_WORD_LENGTH /* maxLength */, wordCodePoints, outCodePointsLength, + MAX_WORD_LENGTH /* maxLength */, wordCodePoints, wordCodePointCount, false /* needsNullTermination */); return nextToken; } @@ -555,12 +556,13 @@ static bool latinime_BinaryDictionary_migrateNative(JNIEnv *env, jclass clazz, j // TODO: Migrate historical information. int wordCodePoints[MAX_WORD_LENGTH]; + int wordCodePointCount = 0; int token = 0; // Add unigrams. do { - token = dictionary->getNextWordAndNextToken(token, wordCodePoints); - const int wordLength = CharUtils::getCodePointCount(MAX_WORD_LENGTH, wordCodePoints); - const WordProperty wordProperty = dictionary->getWordProperty(wordCodePoints, wordLength); + token = dictionary->getNextWordAndNextToken(token, wordCodePoints, &wordCodePointCount); + const WordProperty wordProperty = dictionary->getWordProperty(wordCodePoints, + wordCodePointCount); if (dictionaryStructureWithBufferPolicy->needsToRunGC(true /* mindsBlockByGC */)) { dictionaryStructureWithBufferPolicy = runGCAndGetNewStructurePolicy( std::move(dictionaryStructureWithBufferPolicy), dictFilePathChars); @@ -569,8 +571,8 @@ static bool latinime_BinaryDictionary_migrateNative(JNIEnv *env, jclass clazz, j return false; } } - if (!dictionaryStructureWithBufferPolicy->addUnigramEntry(wordCodePoints, wordLength, - wordProperty.getUnigramProperty())) { + if (!dictionaryStructureWithBufferPolicy->addUnigramEntry(wordCodePoints, + wordCodePointCount, wordProperty.getUnigramProperty())) { LogUtils::logToJava(env, "Cannot add unigram to the new dict."); return false; } @@ -578,9 +580,9 @@ static bool latinime_BinaryDictionary_migrateNative(JNIEnv *env, jclass clazz, j // Add bigrams. do { - token = dictionary->getNextWordAndNextToken(token, wordCodePoints); - const int wordLength = CharUtils::getCodePointCount(MAX_WORD_LENGTH, wordCodePoints); - const WordProperty wordProperty = dictionary->getWordProperty(wordCodePoints, wordLength); + token = dictionary->getNextWordAndNextToken(token, wordCodePoints, &wordCodePointCount); + const WordProperty wordProperty = dictionary->getWordProperty(wordCodePoints, + wordCodePointCount); if (dictionaryStructureWithBufferPolicy->needsToRunGC(true /* mindsBlockByGC */)) { dictionaryStructureWithBufferPolicy = runGCAndGetNewStructurePolicy( std::move(dictionaryStructureWithBufferPolicy), dictFilePathChars); @@ -589,8 +591,8 @@ static bool latinime_BinaryDictionary_migrateNative(JNIEnv *env, jclass clazz, j return false; } } - const PrevWordsInfo prevWordsInfo(wordCodePoints, wordLength, - false /* isStartOfSentence */); + const PrevWordsInfo prevWordsInfo(wordCodePoints, wordCodePointCount, + false /* isBeginningOfSentence */); for (const BigramProperty &bigramProperty : *wordProperty.getBigramProperties()) { if (!dictionaryStructureWithBufferPolicy->addNgramEntry(&prevWordsInfo, &bigramProperty)) { diff --git a/native/jni/src/suggest/core/dictionary/dictionary.cpp b/native/jni/src/suggest/core/dictionary/dictionary.cpp index 0bcde2294..228260216 100644 --- a/native/jni/src/suggest/core/dictionary/dictionary.cpp +++ b/native/jni/src/suggest/core/dictionary/dictionary.cpp @@ -145,10 +145,11 @@ const WordProperty Dictionary::getWordProperty(const int *const codePoints, codePoints, codePointCount); } -int Dictionary::getNextWordAndNextToken(const int token, int *const outCodePoints) { +int Dictionary::getNextWordAndNextToken(const int token, int *const outCodePoints, + int *const outCodePointCount) { TimeKeeper::setCurrentTime(); return mDictionaryStructureWithBufferPolicy->getNextWordAndNextToken( - token, outCodePoints); + token, outCodePoints, outCodePointCount); } void Dictionary::logDictionaryInfo(JNIEnv *const env) const { diff --git a/native/jni/src/suggest/core/dictionary/dictionary.h b/native/jni/src/suggest/core/dictionary/dictionary.h index 542ba7291..247ee2421 100644 --- a/native/jni/src/suggest/core/dictionary/dictionary.h +++ b/native/jni/src/suggest/core/dictionary/dictionary.h @@ -103,7 +103,8 @@ class Dictionary { // Method to iterate all words in the dictionary. // The returned token has to be used to get the next word. If token is 0, this method newly // starts iterating the dictionary. - int getNextWordAndNextToken(const int token, int *const outCodePoints); + int getNextWordAndNextToken(const int token, int *const outCodePoints, + int *const outCodePointCount); const DictionaryStructureWithBufferPolicy *getDictionaryStructurePolicy() const { return mDictionaryStructureWithBufferPolicy.get(); diff --git a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h index e2771f97c..b72601109 100644 --- a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h +++ b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h @@ -104,7 +104,8 @@ class DictionaryStructureWithBufferPolicy { // Method to iterate all words in the dictionary. // The returned token has to be used to get the next word. If token is 0, this method newly // starts iterating the dictionary. - virtual int getNextWordAndNextToken(const int token, int *const outCodePoints) = 0; + virtual int getNextWordAndNextToken(const int token, int *const outCodePoints, + int *const outCodePointCount) = 0; virtual bool isCorrupted() const = 0; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp index 4ac0f406e..9780ae048 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp @@ -478,10 +478,9 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty(const int *const code return WordProperty(&codePointVector, &unigramProperty, &bigrams); } -int Ver4PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const outCodePoints) { - // TODO: Return code point count like other methods. - // Null termination. - outCodePoints[0] = 0; +int Ver4PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const outCodePoints, + int *const outCodePointCount) { + *outCodePointCount = 0; if (token == 0) { mTerminalPtNodePositionsForIteratingWords.clear(); DynamicPtReadingHelper::TraversePolicyToGetAllTerminalPtNodePositions traversePolicy( @@ -498,13 +497,8 @@ int Ver4PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const } const int terminalPtNodePos = mTerminalPtNodePositionsForIteratingWords[token]; int unigramProbability = NOT_A_PROBABILITY; - const int codePointCount = getCodePointsAndProbabilityAndReturnCodePointCount( + *outCodePointCount = getCodePointsAndProbabilityAndReturnCodePointCount( terminalPtNodePos, MAX_WORD_LENGTH, outCodePoints, &unigramProbability); - if (codePointCount < MAX_WORD_LENGTH) { - // Null termination. outCodePoints have to be null terminated or contain MAX_WORD_LENGTH - // code points. - outCodePoints[codePointCount] = 0; - } const int nextToken = token + 1; if (nextToken >= terminalPtNodePositionsVectorSize) { // All words have been iterated. diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h index 2e948ac4a..16b1bd2c1 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h @@ -134,7 +134,8 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { const WordProperty getWordProperty(const int *const codePoints, const int codePointCount) const; - int getNextWordAndNextToken(const int token, int *const outCodePoints); + int getNextWordAndNextToken(const int token, int *const outCodePoints, + int *const outCodePointCount); bool isCorrupted() const { return mIsCorrupted; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp index 7e1f3b233..5c62b9caf 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp @@ -391,7 +391,9 @@ const WordProperty PatriciaTriePolicy::getWordProperty(const int *const codePoin return WordProperty(&codePointVector, &unigramProperty, &bigrams); } -int PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const outCodePoints) { +int PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const outCodePoints, + int *const outCodePointCount) { + *outCodePointCount = 0; if (token == 0) { // Start iterating the dictionary. mTerminalPtNodePositionsForIteratingWords.clear(); @@ -409,8 +411,8 @@ int PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const outC } const int terminalPtNodePos = mTerminalPtNodePositionsForIteratingWords[token]; int unigramProbability = NOT_A_PROBABILITY; - getCodePointsAndProbabilityAndReturnCodePointCount(terminalPtNodePos, MAX_WORD_LENGTH, - outCodePoints, &unigramProbability); + *outCodePointCount = getCodePointsAndProbabilityAndReturnCodePointCount(terminalPtNodePos, + MAX_WORD_LENGTH, outCodePoints, &unigramProbability); const int nextToken = token + 1; if (nextToken >= terminalPtNodePositionsVectorSize) { // All words have been iterated. diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h index dce94363a..ec8407408 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h @@ -137,7 +137,8 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { const WordProperty getWordProperty(const int *const codePoints, const int codePointCount) const; - int getNextWordAndNextToken(const int token, int *const outCodePoints); + int getNextWordAndNextToken(const int token, int *const outCodePoints, + int *const outCodePointCount); bool isCorrupted() const { return mIsCorrupted; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp index f7f2a32b4..46107d92a 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp @@ -489,10 +489,9 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty(const int *const code return WordProperty(&codePointVector, &unigramProperty, &bigrams); } -int Ver4PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const outCodePoints) { - // TODO: Return code point count like other methods. - // Null termination. - outCodePoints[0] = 0; +int Ver4PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const outCodePoints, + int *const outCodePointCount) { + *outCodePointCount = 0; if (token == 0) { mTerminalPtNodePositionsForIteratingWords.clear(); DynamicPtReadingHelper::TraversePolicyToGetAllTerminalPtNodePositions traversePolicy( @@ -509,13 +508,8 @@ int Ver4PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const } const int terminalPtNodePos = mTerminalPtNodePositionsForIteratingWords[token]; int unigramProbability = NOT_A_PROBABILITY; - const int codePointCount = getCodePointsAndProbabilityAndReturnCodePointCount( + *outCodePointCount = getCodePointsAndProbabilityAndReturnCodePointCount( terminalPtNodePos, MAX_WORD_LENGTH, outCodePoints, &unigramProbability); - if (codePointCount < MAX_WORD_LENGTH) { - // Null termination. outCodePoints have to be null terminated or contain MAX_WORD_LENGTH - // code points. - outCodePoints[codePointCount] = 0; - } const int nextToken = token + 1; if (nextToken >= terminalPtNodePositionsVectorSize) { // All words have been iterated. diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h index 0a20965f3..5d66a2cce 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h @@ -113,7 +113,8 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { const WordProperty getWordProperty(const int *const codePoints, const int codePointCount) const; - int getNextWordAndNextToken(const int token, int *const outCodePoints); + int getNextWordAndNextToken(const int token, int *const outCodePoints, + int *const outCodePointCount); bool isCorrupted() const { return mIsCorrupted;