From 9f8c9a0161924f515c5ff9617db2317cdc1d01e2 Mon Sep 17 00:00:00 2001 From: Keisuke Kuroyanagi Date: Wed, 21 May 2014 18:30:34 +0900 Subject: [PATCH] Use PrevWordsInfo to add/remove n(bi)-gram in native code. Bug: 14119293 Bug: 14425059 Change-Id: I4b9a46bfd670b35195418eaee51456d44fb91b6d --- ...oid_inputmethod_latin_BinaryDictionary.cpp | 23 ++++--- .../jni/src/suggest/core/dicnode/dic_node.h | 4 +- .../suggest/core/dictionary/dictionary.cpp | 20 +++--- .../src/suggest/core/dictionary/dictionary.h | 10 +-- .../dictionary/property/bigram_property.h | 1 + .../dictionary_structure_with_buffer_policy.h | 9 +-- .../core/session/dic_traverse_session.cpp | 2 +- .../suggest/core/session/prev_words_info.h | 36 +++++++++-- .../v401/ver4_patricia_trie_policy.cpp | 15 +++-- .../backward/v401/ver4_patricia_trie_policy.h | 8 +-- ...y_structure_with_buffer_policy_factory.cpp | 5 +- .../structure/v2/patricia_trie_policy.h | 14 ++--- .../v4/ver4_patricia_trie_policy.cpp | 63 +++++++++++-------- .../structure/v4/ver4_patricia_trie_policy.h | 6 +- 14 files changed, 135 insertions(+), 81 deletions(-) diff --git a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp index 28aaf2d1a..6223f86f4 100644 --- a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp +++ b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp @@ -343,7 +343,7 @@ static void latinime_BinaryDictionary_addUnigramWord(JNIEnv *env, jclass clazz, // Use 1 for count to indicate the word has inputted. const UnigramProperty unigramProperty(isNotAWord, isBlacklisted, probability, timestamp, 0 /* level */, 1 /* count */, &shortcuts); - dictionary->addUnigramWord(codePoints, codePointCount, &unigramProperty); + dictionary->addUnigramEntry(codePoints, codePointCount, &unigramProperty); } static void latinime_BinaryDictionary_addBigramWords(JNIEnv *env, jclass clazz, jlong dict, @@ -363,7 +363,9 @@ static void latinime_BinaryDictionary_addBigramWords(JNIEnv *env, jclass clazz, // Use 1 for count to indicate the bigram has inputted. const BigramProperty bigramProperty(&bigramTargetCodePoints, probability, timestamp, 0 /* level */, 1 /* count */); - dictionary->addBigramWords(word0CodePoints, word0Length, &bigramProperty); + const PrevWordsInfo prevWordsInfo(word0CodePoints, word0Length, + false /* isBeginningOfSentence */); + dictionary->addNgramEntry(&prevWordsInfo, &bigramProperty); } static void latinime_BinaryDictionary_removeBigramWords(JNIEnv *env, jclass clazz, jlong dict, @@ -378,8 +380,9 @@ static void latinime_BinaryDictionary_removeBigramWords(JNIEnv *env, jclass claz jsize word1Length = env->GetArrayLength(word1); int word1CodePoints[word1Length]; env->GetIntArrayRegion(word1, 0, word1Length, word1CodePoints); - dictionary->removeBigramWords(word0CodePoints, word0Length, word1CodePoints, - word1Length); + const PrevWordsInfo prevWordsInfo(word0CodePoints, word0Length, + false /* isBeginningOfSentence */); + dictionary->removeNgramEntry(&prevWordsInfo, word1CodePoints, word1Length); } // Returns how many language model params are processed. @@ -449,7 +452,7 @@ static int latinime_BinaryDictionary_addMultipleDictionaryEntries(JNIEnv *env, j // Use 1 for count to indicate the word has inputted. const UnigramProperty unigramProperty(isNotAWord, isBlacklisted, unigramProbability, timestamp, 0 /* level */, 1 /* count */, &shortcuts); - dictionary->addUnigramWord(word1CodePoints, word1Length, &unigramProperty); + dictionary->addUnigramEntry(word1CodePoints, word1Length, &unigramProperty); if (word0) { jint bigramProbability = env->GetIntField(languageModelParam, bigramProbabilityFieldId); const std::vector bigramTargetCodePoints( @@ -457,7 +460,9 @@ static int latinime_BinaryDictionary_addMultipleDictionaryEntries(JNIEnv *env, j // Use 1 for count to indicate the bigram has inputted. const BigramProperty bigramProperty(&bigramTargetCodePoints, bigramProbability, timestamp, 0 /* level */, 1 /* count */); - dictionary->addBigramWords(word0CodePoints, word0Length, &bigramProperty); + const PrevWordsInfo prevWordsInfo(word0CodePoints, word0Length, + false /* isBeginningOfSentence */); + dictionary->addNgramEntry(&prevWordsInfo, &bigramProperty); } if (dictionary->needsToRunGC(true /* mindsBlockByGC */)) { return i + 1; @@ -541,7 +546,7 @@ static bool latinime_BinaryDictionary_migrateNative(JNIEnv *env, jclass clazz, j return false; } } - if (!dictionaryStructureWithBufferPolicy->addUnigramWord(wordCodePoints, wordLength, + if (!dictionaryStructureWithBufferPolicy->addUnigramEntry(wordCodePoints, wordLength, wordProperty.getUnigramProperty())) { LogUtils::logToJava(env, "Cannot add unigram to the new dict."); return false; @@ -561,8 +566,10 @@ static bool latinime_BinaryDictionary_migrateNative(JNIEnv *env, jclass clazz, j return false; } } + const PrevWordsInfo prevWordsInfo(wordCodePoints, wordLength, + false /* isStartOfSentence */); for (const BigramProperty &bigramProperty : *wordProperty.getBigramProperties()) { - if (!dictionaryStructureWithBufferPolicy->addBigramWords(wordCodePoints, wordLength, + if (!dictionaryStructureWithBufferPolicy->addNgramEntry(&prevWordsInfo, &bigramProperty)) { LogUtils::logToJava(env, "Cannot add bigram to the new dict."); return false; diff --git a/native/jni/src/suggest/core/dicnode/dic_node.h b/native/jni/src/suggest/core/dicnode/dic_node.h index e69d2c46b..ef03d2b6d 100644 --- a/native/jni/src/suggest/core/dicnode/dic_node.h +++ b/native/jni/src/suggest/core/dicnode/dic_node.h @@ -203,12 +203,12 @@ class DicNode { return mDicNodeState.mDicNodeStateInput.getInputIndex(0) < inputSize - 1; } - // Used to get n-gram probability in DicNodeUtils + // Used to get n-gram probability in DicNodeUtils. int getPtNodePos() const { return mDicNodeProperties.getPtNodePos(); } - // Used to get n-gram probability in DicNodeUtils + // Used to get n-gram probability in DicNodeUtils. n is 1-indexed. int getNthPrevWordTerminalPtNodePos(const int n) const { if (n <= 0 || n > MAX_PREV_WORD_COUNT_FOR_N_GRAM) { return NOT_A_DICT_POS; diff --git a/native/jni/src/suggest/core/dictionary/dictionary.cpp b/native/jni/src/suggest/core/dictionary/dictionary.cpp index c860d82af..fe3167a61 100644 --- a/native/jni/src/suggest/core/dictionary/dictionary.cpp +++ b/native/jni/src/suggest/core/dictionary/dictionary.cpp @@ -74,28 +74,28 @@ int Dictionary::getProbability(const int *word, int length) const { return getDictionaryStructurePolicy()->getUnigramProbabilityOfPtNode(pos); } -int Dictionary::getBigramProbability(const PrevWordsInfo *const prevWordsInfo, const int *word1, - int length1) const { +int Dictionary::getBigramProbability(const PrevWordsInfo *const prevWordsInfo, const int *word, + int length) const { TimeKeeper::setCurrentTime(); - return mBigramDictionary.getBigramProbability(prevWordsInfo, word1, length1); + return mBigramDictionary.getBigramProbability(prevWordsInfo, word, length); } -void Dictionary::addUnigramWord(const int *const word, const int length, +void Dictionary::addUnigramEntry(const int *const word, const int length, const UnigramProperty *const unigramProperty) { TimeKeeper::setCurrentTime(); - mDictionaryStructureWithBufferPolicy->addUnigramWord(word, length, unigramProperty); + mDictionaryStructureWithBufferPolicy->addUnigramEntry(word, length, unigramProperty); } -void Dictionary::addBigramWords(const int *const word0, const int length0, +void Dictionary::addNgramEntry(const PrevWordsInfo *const prevWordsInfo, const BigramProperty *const bigramProperty) { TimeKeeper::setCurrentTime(); - mDictionaryStructureWithBufferPolicy->addBigramWords(word0, length0, bigramProperty); + mDictionaryStructureWithBufferPolicy->addNgramEntry(prevWordsInfo, bigramProperty); } -void Dictionary::removeBigramWords(const int *const word0, const int length0, - const int *const word1, const int length1) { +void Dictionary::removeNgramEntry(const PrevWordsInfo *const prevWordsInfo, + const int *const word, const int length) { TimeKeeper::setCurrentTime(); - mDictionaryStructureWithBufferPolicy->removeBigramWords(word0, length0, word1, length1); + mDictionaryStructureWithBufferPolicy->removeNgramEntry(prevWordsInfo, word, length); } void Dictionary::flush(const char *const filePath) { diff --git a/native/jni/src/suggest/core/dictionary/dictionary.h b/native/jni/src/suggest/core/dictionary/dictionary.h index b63c61fbb..817d9f7fc 100644 --- a/native/jni/src/suggest/core/dictionary/dictionary.h +++ b/native/jni/src/suggest/core/dictionary/dictionary.h @@ -73,16 +73,16 @@ class Dictionary { int getProbability(const int *word, int length) const; int getBigramProbability(const PrevWordsInfo *const prevWordsInfo, - const int *word1, int length1) const; + const int *word, int length) const; - void addUnigramWord(const int *const codePoints, const int codePointCount, + void addUnigramEntry(const int *const codePoints, const int codePointCount, const UnigramProperty *const unigramProperty); - void addBigramWords(const int *const word0, const int length0, + void addNgramEntry(const PrevWordsInfo *const prevWordsInfo, const BigramProperty *const bigramProperty); - void removeBigramWords(const int *const word0, const int length0, const int *const word1, - const int length1); + void removeNgramEntry(const PrevWordsInfo *const prevWordsInfo, const int *const word, + const int length); void flush(const char *const filePath); diff --git a/native/jni/src/suggest/core/dictionary/property/bigram_property.h b/native/jni/src/suggest/core/dictionary/property/bigram_property.h index 8d3429b5b..343af143c 100644 --- a/native/jni/src/suggest/core/dictionary/property/bigram_property.h +++ b/native/jni/src/suggest/core/dictionary/property/bigram_property.h @@ -23,6 +23,7 @@ namespace latinime { +// TODO: Change to NgramProperty. class BigramProperty { public: BigramProperty(const std::vector *const targetCodePoints, diff --git a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h index ce5a49f83..3fd815f98 100644 --- a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h +++ b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h @@ -29,6 +29,7 @@ class DicNodeVector; class DictionaryBigramsStructurePolicy; class DictionaryHeaderStructurePolicy; class DictionaryShortcutsStructurePolicy; +class PrevWordsInfo; class UnigramProperty; /* @@ -69,16 +70,16 @@ class DictionaryStructureWithBufferPolicy { virtual const DictionaryShortcutsStructurePolicy *getShortcutsStructurePolicy() const = 0; // Returns whether the update was success or not. - virtual bool addUnigramWord(const int *const word, const int length, + virtual bool addUnigramEntry(const int *const word, const int length, const UnigramProperty *const unigramProperty) = 0; // Returns whether the update was success or not. - virtual bool addBigramWords(const int *const word0, const int length0, + virtual bool addNgramEntry(const PrevWordsInfo *const prevWordsInfo, const BigramProperty *const bigramProperty) = 0; // Returns whether the update was success or not. - virtual bool removeBigramWords(const int *const word0, const int length0, - const int *const word1, const int length1) = 0; + virtual bool removeNgramEntry(const PrevWordsInfo *const prevWordsInfo, + const int *const word, const int length) = 0; virtual void flush(const char *const filePath) = 0; diff --git a/native/jni/src/suggest/core/session/dic_traverse_session.cpp b/native/jni/src/suggest/core/session/dic_traverse_session.cpp index dc2b66a2c..f1e411f38 100644 --- a/native/jni/src/suggest/core/session/dic_traverse_session.cpp +++ b/native/jni/src/suggest/core/session/dic_traverse_session.cpp @@ -36,7 +36,7 @@ void DicTraverseSession::init(const Dictionary *const dictionary, ->getMultiWordCostMultiplier(); mSuggestOptions = suggestOptions; prevWordsInfo->getPrevWordsTerminalPtNodePos( - getDictionaryStructurePolicy(), mPrevWordsPtNodePos); + getDictionaryStructurePolicy(), mPrevWordsPtNodePos, true /* tryLowerCaseSearch */); } void DicTraverseSession::setupForGetSuggestions(const ProximityInfo *pInfo, diff --git a/native/jni/src/suggest/core/session/prev_words_info.h b/native/jni/src/suggest/core/session/prev_words_info.h index 70a99ef38..e4de1f4cc 100644 --- a/native/jni/src/suggest/core/session/prev_words_info.h +++ b/native/jni/src/suggest/core/session/prev_words_info.h @@ -41,13 +41,23 @@ class PrevWordsInfo { mIsBeginningOfSentence[0] = isBeginningOfSentence; } + bool isValid() const { + for (size_t i = 0; i < NELEMS(mPrevWordCodePoints); ++i) { + if (mPrevWordCodePointCount[i] > MAX_WORD_LENGTH) { + return false; + } + } + return true; + } + void getPrevWordsTerminalPtNodePos( const DictionaryStructureWithBufferPolicy *const dictStructurePolicy, - int *const outPrevWordsTerminalPtNodePos) const { + int *const outPrevWordsTerminalPtNodePos, + const bool tryLowerCaseSearch) const { for (size_t i = 0; i < NELEMS(mPrevWordCodePoints); ++i) { outPrevWordsTerminalPtNodePos[i] = getTerminalPtNodePosOfWord(dictStructurePolicy, mPrevWordCodePoints[i], mPrevWordCodePointCount[i], - mIsBeginningOfSentence[i]); + mIsBeginningOfSentence[i], tryLowerCaseSearch); } } @@ -66,19 +76,37 @@ class PrevWordsInfo { dictStructurePolicy->getBigramsStructurePolicy(), pos); } + // n is 1-indexed. + const int *getNthPrevWordCodePoints(const int n) const { + if (n <= 0 || n > MAX_PREV_WORD_COUNT_FOR_N_GRAM) { + return nullptr; + } + return mPrevWordCodePoints[n - 1]; + } + + // n is 1-indexed. + int getNthPrevWordCodePointCount(const int n) const { + if (n <= 0 || n > MAX_PREV_WORD_COUNT_FOR_N_GRAM) { + return 0; + } + return mPrevWordCodePointCount[n - 1]; + } + private: DISALLOW_COPY_AND_ASSIGN(PrevWordsInfo); static int getTerminalPtNodePosOfWord( const DictionaryStructureWithBufferPolicy *const dictStructurePolicy, const int *const wordCodePoints, const int wordCodePointCount, - const bool isBeginningOfSentence) { + const bool isBeginningOfSentence, const bool tryLowerCaseSearch) { if (!dictStructurePolicy || !wordCodePoints) { return NOT_A_DICT_POS; } const int wordPtNodePos = dictStructurePolicy->getTerminalPtNodePositionOfWord( wordCodePoints, wordCodePointCount, false /* forceLowerCaseSearch */); - if (wordPtNodePos != NOT_A_DICT_POS) { + if (wordPtNodePos != NOT_A_DICT_POS || !tryLowerCaseSearch) { + // Return the position when when the word was found or doesn't try lower case + // search. return wordPtNodePos; } // Check bigrams for lower-cased previous word if original was not found. Useful for diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_policy.cpp index dde1af299..97e1120a3 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_policy.cpp @@ -31,6 +31,7 @@ #include "suggest/core/dictionary/property/bigram_property.h" #include "suggest/core/dictionary/property/unigram_property.h" #include "suggest/core/dictionary/property/word_property.h" +#include "suggest/core/session/prev_words_info.h" #include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h" #include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_node_reader.h" #include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h" @@ -163,10 +164,10 @@ int Ver4PatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) cons ptNodeParams.getTerminalId()); } -bool Ver4PatriciaTriePolicy::addUnigramWord(const int *const word, const int length, +bool Ver4PatriciaTriePolicy::addUnigramEntry(const int *const word, const int length, const UnigramProperty *const unigramProperty) { if (!mBuffers->isUpdatable()) { - AKLOGI("Warning: addUnigramWord() is called for non-updatable dictionary."); + AKLOGI("Warning: addUnigramEntry() is called for non-updatable dictionary."); return false; } if (mDictBuffer->getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) { @@ -218,10 +219,12 @@ bool Ver4PatriciaTriePolicy::addUnigramWord(const int *const word, const int len } } -bool Ver4PatriciaTriePolicy::addBigramWords(const int *const word0, const int length0, +bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsInfo, const BigramProperty *const bigramProperty) { + const int length0 = prevWordsInfo->getNthPrevWordCodePointCount(1); + const int *word0 = prevWordsInfo->getNthPrevWordCodePoints(1); if (!mBuffers->isUpdatable()) { - AKLOGI("Warning: addBigramWords() is called for non-updatable dictionary."); + AKLOGI("Warning: addNgramEntry() is called for non-updatable dictionary."); return false; } if (mDictBuffer->getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) { @@ -257,8 +260,10 @@ bool Ver4PatriciaTriePolicy::addBigramWords(const int *const word0, const int le } } -bool Ver4PatriciaTriePolicy::removeBigramWords(const int *const word0, const int length0, +bool Ver4PatriciaTriePolicy::removeNgramEntry(const PrevWordsInfo *const prevWordsInfo, const int *const word1, const int length1) { + const int length0 = prevWordsInfo->getNthPrevWordCodePointCount(1); + const int *word0 = prevWordsInfo->getNthPrevWordCodePoints(1); if (!mBuffers->isUpdatable()) { AKLOGI("Warning: addBigramWords() is called for non-updatable dictionary."); return false; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_policy.h index 2f8ad539c..95813881d 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_policy.h @@ -108,14 +108,14 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { return &mShortcutPolicy; } - bool addUnigramWord(const int *const word, const int length, + bool addUnigramEntry(const int *const word, const int length, const UnigramProperty *const unigramProperty); - bool addBigramWords(const int *const word0, const int length0, + bool addNgramEntry(const PrevWordsInfo *const prevWordsInfo, const BigramProperty *const bigramProperty); - bool removeBigramWords(const int *const word0, const int length0, const int *const word1, - const int length1); + bool removeNgramEntry(const PrevWordsInfo *const prevWordsInfo, const int *const word, + const int length); void flush(const char *const filePath); diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp index 59f1f29e9..f93d2894c 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp @@ -145,7 +145,8 @@ templateisValid()) { AKLOGE("DICT: The dictionary doesn't satisfy ver4 format requirements. path: %s", - path); + dictPath); ASSERT(false); return nullptr; } diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h index 54d1e0f6d..6240d46aa 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h @@ -81,24 +81,24 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { return &mShortcutListPolicy; } - bool addUnigramWord(const int *const word, const int length, + bool addUnigramEntry(const int *const word, const int length, const UnigramProperty *const unigramProperty) { // This method should not be called for non-updatable dictionary. - AKLOGI("Warning: addUnigramWord() is called for non-updatable dictionary."); + AKLOGI("Warning: addUnigramEntry() is called for non-updatable dictionary."); return false; } - bool addBigramWords(const int *const word0, const int length0, + bool addNgramEntry(const PrevWordsInfo *const prevWordsInfo, const BigramProperty *const bigramProperty) { // This method should not be called for non-updatable dictionary. - AKLOGI("Warning: addBigramWords() is called for non-updatable dictionary."); + AKLOGI("Warning: addNgramEntry() is called for non-updatable dictionary."); return false; } - bool removeBigramWords(const int *const word0, const int length0, const int *const word1, - const int length1) { + bool removeNgramEntry(const PrevWordsInfo *const prevWordsInfo, const int *const word, + const int length) { // This method should not be called for non-updatable dictionary. - AKLOGI("Warning: removeBigramWords() is called for non-updatable dictionary."); + AKLOGI("Warning: removeNgramEntry() is called for non-updatable dictionary."); return false; } diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp index 7da9e3072..439e90e44 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp @@ -23,6 +23,7 @@ #include "suggest/core/dictionary/property/bigram_property.h" #include "suggest/core/dictionary/property/unigram_property.h" #include "suggest/core/dictionary/property/word_property.h" +#include "suggest/core/session/prev_words_info.h" #include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h" #include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h" #include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h" @@ -155,10 +156,10 @@ int Ver4PatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) cons ptNodeParams.getTerminalId()); } -bool Ver4PatriciaTriePolicy::addUnigramWord(const int *const word, const int length, +bool Ver4PatriciaTriePolicy::addUnigramEntry(const int *const word, const int length, const UnigramProperty *const unigramProperty) { if (!mBuffers->isUpdatable()) { - AKLOGI("Warning: addUnigramWord() is called for non-updatable dictionary."); + AKLOGI("Warning: addUnigramEntry() is called for non-updatable dictionary."); return false; } if (mDictBuffer->getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) { @@ -210,10 +211,10 @@ bool Ver4PatriciaTriePolicy::addUnigramWord(const int *const word, const int len } } -bool Ver4PatriciaTriePolicy::addBigramWords(const int *const word0, const int length0, +bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsInfo, const BigramProperty *const bigramProperty) { if (!mBuffers->isUpdatable()) { - AKLOGI("Warning: addBigramWords() is called for non-updatable dictionary."); + AKLOGI("Warning: addNgramEntry() is called for non-updatable dictionary."); return false; } if (mDictBuffer->getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) { @@ -221,15 +222,20 @@ bool Ver4PatriciaTriePolicy::addBigramWords(const int *const word0, const int le mDictBuffer->getTailPosition()); return false; } - if (length0 > MAX_WORD_LENGTH - || bigramProperty->getTargetCodePoints()->size() > MAX_WORD_LENGTH) { - AKLOGE("Either src word or target word is too long to insert the bigram to the dictionary. " - "length0: %d, length1: %d", length0, bigramProperty->getTargetCodePoints()->size()); + if (!prevWordsInfo->isValid()) { + AKLOGE("prev words info is not valid for adding n-gram entry to the dictionary."); return false; } - const int word0Pos = getTerminalPtNodePositionOfWord(word0, length0, - false /* forceLowerCaseSearch */); - if (word0Pos == NOT_A_DICT_POS) { + if (bigramProperty->getTargetCodePoints()->size() > MAX_WORD_LENGTH) { + AKLOGE("The word is too long to insert the ngram to the dictionary. " + "length: %d", bigramProperty->getTargetCodePoints()->size()); + return false; + } + int prevWordsPtNodePos[MAX_PREV_WORD_COUNT_FOR_N_GRAM]; + prevWordsInfo->getPrevWordsTerminalPtNodePos(this, prevWordsPtNodePos, + false /* tryLowerCaseSearch */); + // TODO: Support N-gram. + if (prevWordsPtNodePos[0] == NOT_A_DICT_POS) { return false; } const int word1Pos = getTerminalPtNodePositionOfWord( @@ -239,7 +245,8 @@ bool Ver4PatriciaTriePolicy::addBigramWords(const int *const word0, const int le return false; } bool addedNewBigram = false; - if (mUpdatingHelper.addBigramWords(word0Pos, word1Pos, bigramProperty, &addedNewBigram)) { + if (mUpdatingHelper.addBigramWords(prevWordsPtNodePos[0], word1Pos, bigramProperty, + &addedNewBigram)) { if (addedNewBigram) { mBigramCount++; } @@ -249,10 +256,10 @@ bool Ver4PatriciaTriePolicy::addBigramWords(const int *const word0, const int le } } -bool Ver4PatriciaTriePolicy::removeBigramWords(const int *const word0, const int length0, - const int *const word1, const int length1) { +bool Ver4PatriciaTriePolicy::removeNgramEntry(const PrevWordsInfo *const prevWordsInfo, + const int *const word, const int length) { if (!mBuffers->isUpdatable()) { - AKLOGI("Warning: addBigramWords() is called for non-updatable dictionary."); + AKLOGI("Warning: removeNgramEntry() is called for non-updatable dictionary."); return false; } if (mDictBuffer->getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) { @@ -260,22 +267,26 @@ bool Ver4PatriciaTriePolicy::removeBigramWords(const int *const word0, const int mDictBuffer->getTailPosition()); return false; } - if (length0 > MAX_WORD_LENGTH || length1 > MAX_WORD_LENGTH) { - AKLOGE("Either src word or target word is too long to remove the bigram to from the " - "dictionary. length0: %d, length1: %d", length0, length1); + if (!prevWordsInfo->isValid()) { + AKLOGE("prev words info is not valid for removing n-gram entry form the dictionary."); return false; } - const int word0Pos = getTerminalPtNodePositionOfWord(word0, length0, + if (length > MAX_WORD_LENGTH) { + AKLOGE("word is too long to remove n-gram entry form the dictionary. length: %d", length); + } + int prevWordsPtNodePos[MAX_PREV_WORD_COUNT_FOR_N_GRAM]; + prevWordsInfo->getPrevWordsTerminalPtNodePos(this, prevWordsPtNodePos, + false /* tryLowerCaseSerch */); + // TODO: Support N-gram. + if (prevWordsPtNodePos[0] == NOT_A_DICT_POS) { + return false; + } + const int wordPos = getTerminalPtNodePositionOfWord(word, length, false /* forceLowerCaseSearch */); - if (word0Pos == NOT_A_DICT_POS) { + if (wordPos == NOT_A_DICT_POS) { return false; } - const int word1Pos = getTerminalPtNodePositionOfWord(word1, length1, - false /* forceLowerCaseSearch */); - if (word1Pos == NOT_A_DICT_POS) { - return false; - } - if (mUpdatingHelper.removeBigramWords(word0Pos, word1Pos)) { + if (mUpdatingHelper.removeBigramWords(prevWordsPtNodePos[0], wordPos)) { mBigramCount--; return true; } else { diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h index b78576484..008f2e423 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h @@ -90,13 +90,13 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { return &mShortcutPolicy; } - bool addUnigramWord(const int *const word, const int length, + bool addUnigramEntry(const int *const word, const int length, const UnigramProperty *const unigramProperty); - bool addBigramWords(const int *const word0, const int length0, + bool addNgramEntry(const PrevWordsInfo *const prevWordsInfo, const BigramProperty *const bigramProperty); - bool removeBigramWords(const int *const word0, const int length0, const int *const word1, + bool removeNgramEntry(const PrevWordsInfo *const prevWordsInfo, const int *const word1, const int length1); void flush(const char *const filePath);