diff --git a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp index 1e6baa5ec..365217a60 100644 --- a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp +++ b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp @@ -374,7 +374,7 @@ static bool latinime_BinaryDictionary_addUnigramEntry(JNIEnv *env, jclass clazz, // Use 1 for count to indicate the word has inputted. const UnigramProperty unigramProperty(isBeginningOfSentence, isNotAWord, isBlacklisted, probability, HistoricalInfo(timestamp, 0 /* level */, 1 /* count */), - &shortcuts); + std::move(shortcuts)); return dictionary->addUnigramEntry(CodePointArrayView(codePoints, codePointCount), &unigramProperty); } @@ -434,10 +434,16 @@ static bool latinime_BinaryDictionary_updateCounter(JNIEnv *env, jclass clazz, j if (!dictionary) { return false; } - jsize wordLength = env->GetArrayLength(word); - int wordCodePoints[wordLength]; - env->GetIntArrayRegion(word, 0, wordLength, wordCodePoints); - return false; + const PrevWordsInfo prevWordsInfo = JniDataUtils::constructPrevWordsInfo(env, + prevWordCodePointArrays, isBeginningOfSentenceArray, + env->GetArrayLength(prevWordCodePointArrays)); + jsize codePointCount = env->GetArrayLength(word); + int wordCodePoints[codePointCount]; + env->GetIntArrayRegion(word, 0, codePointCount, wordCodePoints); + const HistoricalInfo historicalInfo(timestamp, 0 /* level */, count); + return dictionary->updateCounter(&prevWordsInfo, + CodePointArrayView(wordCodePoints, codePointCount), isValidWord == JNI_TRUE, + historicalInfo); } // Returns how many language model params are processed. @@ -509,7 +515,7 @@ static int latinime_BinaryDictionary_addMultipleDictionaryEntries(JNIEnv *env, j // Use 1 for count to indicate the word has inputted. const UnigramProperty unigramProperty(false /* isBeginningOfSentence */, isNotAWord, isBlacklisted, unigramProbability, - HistoricalInfo(timestamp, 0 /* level */, 1 /* count */), &shortcuts); + HistoricalInfo(timestamp, 0 /* level */, 1 /* count */), std::move(shortcuts)); dictionary->addUnigramEntry(CodePointArrayView(word1CodePoints, word1Length), &unigramProperty); if (word0) { diff --git a/native/jni/src/suggest/core/dictionary/dictionary.cpp b/native/jni/src/suggest/core/dictionary/dictionary.cpp index c3f422916..8d3f8a9f8 100644 --- a/native/jni/src/suggest/core/dictionary/dictionary.cpp +++ b/native/jni/src/suggest/core/dictionary/dictionary.cpp @@ -155,6 +155,14 @@ bool Dictionary::removeNgramEntry(const PrevWordsInfo *const prevWordsInfo, return mDictionaryStructureWithBufferPolicy->removeNgramEntry(prevWordsInfo, codePoints); } +bool Dictionary::updateCounter(const PrevWordsInfo *const prevWordsInfo, + const CodePointArrayView codePoints, const bool isValidWord, + const HistoricalInfo historicalInfo) { + TimeKeeper::setCurrentTime(); + return mDictionaryStructureWithBufferPolicy->updateCounter(prevWordsInfo, codePoints, + isValidWord, historicalInfo); +} + bool Dictionary::flush(const char *const filePath) { TimeKeeper::setCurrentTime(); return mDictionaryStructureWithBufferPolicy->flush(filePath); diff --git a/native/jni/src/suggest/core/dictionary/dictionary.h b/native/jni/src/suggest/core/dictionary/dictionary.h index 09f8eaceb..a58dbfbd7 100644 --- a/native/jni/src/suggest/core/dictionary/dictionary.h +++ b/native/jni/src/suggest/core/dictionary/dictionary.h @@ -22,6 +22,7 @@ #include "defines.h" #include "jni.h" #include "suggest/core/dictionary/ngram_listener.h" +#include "suggest/core/dictionary/property/historical_info.h" #include "suggest/core/dictionary/property/word_property.h" #include "suggest/core/policy/dictionary_header_structure_policy.h" #include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" @@ -90,6 +91,10 @@ class Dictionary { bool removeNgramEntry(const PrevWordsInfo *const prevWordsInfo, const CodePointArrayView codePoints); + bool updateCounter(const PrevWordsInfo *const prevWordsInfo, + const CodePointArrayView codePoints, const bool isValidWord, + const HistoricalInfo historicalInfo); + bool flush(const char *const filePath); bool flushWithGC(const char *const filePath); diff --git a/native/jni/src/suggest/core/dictionary/property/ngram_property.h b/native/jni/src/suggest/core/dictionary/property/ngram_property.h index 49f683bdc..dce460099 100644 --- a/native/jni/src/suggest/core/dictionary/property/ngram_property.h +++ b/native/jni/src/suggest/core/dictionary/property/ngram_property.h @@ -27,7 +27,7 @@ namespace latinime { class NgramProperty { public: NgramProperty(const std::vector &&targetCodePoints, const int probability, - const HistoricalInfo &historicalInfo) + const HistoricalInfo historicalInfo) : mTargetCodePoints(std::move(targetCodePoints)), mProbability(probability), mHistoricalInfo(historicalInfo) {} diff --git a/native/jni/src/suggest/core/dictionary/property/unigram_property.h b/native/jni/src/suggest/core/dictionary/property/unigram_property.h index 4c61f96e6..d1f0ab4ca 100644 --- a/native/jni/src/suggest/core/dictionary/property/unigram_property.h +++ b/native/jni/src/suggest/core/dictionary/property/unigram_property.h @@ -54,11 +54,18 @@ class UnigramProperty { mProbability(NOT_A_PROBABILITY), mHistoricalInfo(), mShortcuts() {} UnigramProperty(const bool representsBeginningOfSentence, const bool isNotAWord, - const bool isBlacklisted, const int probability, const HistoricalInfo &historicalInfo, - const std::vector *const shortcuts) + const bool isBlacklisted, const int probability, const HistoricalInfo historicalInfo, + const std::vector &&shortcuts) : mRepresentsBeginningOfSentence(representsBeginningOfSentence), mIsNotAWord(isNotAWord), mIsBlacklisted(isBlacklisted), mProbability(probability), - mHistoricalInfo(historicalInfo), mShortcuts(*shortcuts) {} + mHistoricalInfo(historicalInfo), mShortcuts(std::move(shortcuts)) {} + + // Without shortcuts. + UnigramProperty(const bool representsBeginningOfSentence, const bool isNotAWord, + const bool isBlacklisted, const int probability, const HistoricalInfo historicalInfo) + : mRepresentsBeginningOfSentence(representsBeginningOfSentence), + mIsNotAWord(isNotAWord), mIsBlacklisted(isBlacklisted), mProbability(probability), + mHistoricalInfo(historicalInfo), mShortcuts() {} bool representsBeginningOfSentence() const { return mRepresentsBeginningOfSentence; diff --git a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h index f4b97989f..6624b7921 100644 --- a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h +++ b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h @@ -21,6 +21,7 @@ #include "defines.h" #include "suggest/core/dictionary/binary_dictionary_shortcut_iterator.h" +#include "suggest/core/dictionary/property/historical_info.h" #include "suggest/core/dictionary/property/word_property.h" #include "suggest/core/dictionary/word_attributes.h" #include "utils/int_array_view.h" @@ -87,6 +88,11 @@ class DictionaryStructureWithBufferPolicy { virtual bool removeNgramEntry(const PrevWordsInfo *const prevWordsInfo, const CodePointArrayView wordCodePoints) = 0; + // Returns whether the update was success or not. + virtual bool updateCounter(const PrevWordsInfo *const prevWordsInfo, + const CodePointArrayView wordCodePoints, const bool isValidWord, + const HistoricalInfo historicalInfo) = 0; + // Returns whether the flush was success or not. virtual bool flush(const char *const filePath) = 0; diff --git a/native/jni/src/suggest/core/session/prev_words_info.h b/native/jni/src/suggest/core/session/prev_words_info.h index 02e82a8e0..553d5ad07 100644 --- a/native/jni/src/suggest/core/session/prev_words_info.h +++ b/native/jni/src/suggest/core/session/prev_words_info.h @@ -33,7 +33,7 @@ class PrevWordsInfo { clear(); } - PrevWordsInfo(PrevWordsInfo &&prevWordsInfo) + PrevWordsInfo(const PrevWordsInfo &prevWordsInfo) : mPrevWordCount(prevWordsInfo.mPrevWordCount) { for (size_t i = 0; i < mPrevWordCount; ++i) { mPrevWordCodePointCount[i] = prevWordsInfo.mPrevWordCodePointCount[i]; @@ -73,6 +73,16 @@ class PrevWordsInfo { mIsBeginningOfSentence[0] = isBeginningOfSentence; } + size_t getPrevWordCount() const { + return mPrevWordCount; + } + + // TODO: Remove. + const PrevWordsInfo getTrimmedPrevWordsInfo(const size_t maxPrevWordCount) const { + return PrevWordsInfo(mPrevWordCodePoints, mPrevWordCodePointCount, mIsBeginningOfSentence, + std::min(mPrevWordCount, maxPrevWordCount)); + } + bool isValid() const { if (mPrevWordCodePointCount[0] > 0) { return true; @@ -112,7 +122,7 @@ class PrevWordsInfo { } private: - DISALLOW_COPY_AND_ASSIGN(PrevWordsInfo); + DISALLOW_ASSIGNMENT_OPERATOR(PrevWordsInfo); static int getWordId(const DictionaryStructureWithBufferPolicy *const dictStructurePolicy, const int *const wordCodePoints, const int wordCodePointCount, diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp index 40e393c6c..11f7b305f 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp @@ -52,6 +52,7 @@ const char *const Ver4PatriciaTriePolicy::MAX_BIGRAM_COUNT_QUERY = "MAX_BIGRAM_C const int Ver4PatriciaTriePolicy::MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS = 1024; const int Ver4PatriciaTriePolicy::MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS = Ver4DictConstants::MAX_DICTIONARY_SIZE - MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS; +const int Ver4PatriciaTriePolicy::DUMMY_PROBABILITY_FOR_VALID_WORDS = 1; void Ver4PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const dicNode, DicNodeVector *const childDicNodes) const { @@ -339,11 +340,9 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsI } if (prevWordIds[0] == NOT_A_WORD_ID) { if (prevWordsInfo->isNthPrevWordBeginningOfSentence(1 /* n */)) { - const std::vector shortcuts; const UnigramProperty beginningOfSentenceUnigramProperty( true /* representsBeginningOfSentence */, true /* isNotAWord */, - false /* isBlacklisted */, MAX_PROBABILITY /* probability */, - HistoricalInfo(), &shortcuts); + false /* isBlacklisted */, MAX_PROBABILITY /* probability */, HistoricalInfo()); if (!addUnigramEntry(prevWordsInfo->getNthPrevWordCodePoints(1 /* n */), &beginningOfSentenceUnigramProperty)) { AKLOGE("Cannot add unigram entry for the beginning-of-sentence."); @@ -414,6 +413,29 @@ bool Ver4PatriciaTriePolicy::removeNgramEntry(const PrevWordsInfo *const prevWor } } + +bool Ver4PatriciaTriePolicy::updateCounter(const PrevWordsInfo *const prevWordsInfo, + const CodePointArrayView wordCodePoints, const bool isValidWord, + const HistoricalInfo historicalInfo) { + if (!mBuffers->isUpdatable()) { + AKLOGI("Warning: updateCounter() is called for non-updatable dictionary."); + return false; + } + const int probability = isValidWord ? DUMMY_PROBABILITY_FOR_VALID_WORDS : NOT_A_PROBABILITY; + const UnigramProperty unigramProperty(false /* representsBeginningOfSentence */, + false /* isNotAWord */, false /*isBlacklisted*/, probability, historicalInfo); + if (!addUnigramEntry(wordCodePoints, &unigramProperty)) { + AKLOGE("Cannot update unigarm entry in updateCounter()."); + return false; + } + const NgramProperty ngramProperty(wordCodePoints.toVector(), probability, historicalInfo); + if (!addNgramEntry(prevWordsInfo, &ngramProperty)) { + AKLOGE("Cannot update unigarm entry in updateCounter()."); + return false; + } + return true; +} + bool Ver4PatriciaTriePolicy::flush(const char *const filePath) { if (!mBuffers->isUpdatable()) { AKLOGI("Warning: flush() is called for non-updatable dictionary. filePath: %s", filePath); @@ -551,7 +573,7 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty( } const UnigramProperty unigramProperty(ptNodeParams.representsBeginningOfSentence(), ptNodeParams.isNotAWord(), ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(), - *historicalInfo, &shortcuts); + *historicalInfo, std::move(shortcuts)); return WordProperty(wordCodePoints.toVector(), &unigramProperty, &ngrams); } diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h index 324a53e62..995d7764f 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h @@ -118,6 +118,10 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { bool removeNgramEntry(const PrevWordsInfo *const prevWordsInfo, const CodePointArrayView wordCodePoints); + bool updateCounter(const PrevWordsInfo *const prevWordsInfo, + const CodePointArrayView wordCodePoints, const bool isValidWord, + const HistoricalInfo historicalInfo); + bool flush(const char *const filePath); bool flushWithGC(const char *const filePath); @@ -147,6 +151,7 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { // prevent the dictionary from overflowing. static const int MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS; static const int MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS; + static const int DUMMY_PROBABILITY_FOR_VALID_WORDS; const Ver4DictBuffers::Ver4DictBuffersPtr mBuffers; const HeaderPolicy *const mHeaderPolicy; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp index 7800758c9..d3d684bfa 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp @@ -477,7 +477,7 @@ const WordProperty PatriciaTriePolicy::getWordProperty( } const UnigramProperty unigramProperty(ptNodeParams.representsBeginningOfSentence(), ptNodeParams.isNotAWord(), ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(), - HistoricalInfo(), &shortcuts); + HistoricalInfo(), std::move(shortcuts)); return WordProperty(wordCodePoints.toVector(), &unigramProperty, &ngrams); } diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h index a912d03be..32a95bb6c 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h @@ -107,6 +107,14 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { return false; } + bool updateCounter(const PrevWordsInfo *const prevWordsInfo, + const CodePointArrayView wordCodePoints, const bool isValidWord, + const HistoricalInfo historicalInfo) { + // This method should not be called for non-updatable dictionary. + AKLOGI("Warning: updateCounter() is called for non-updatable dictionary."); + return false; + } + bool flush(const char *const filePath) { // This method should not be called for non-updatable dictionary. AKLOGI("Warning: flush() is called for non-updatable dictionary."); diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp index 0badabf03..41b109f95 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp @@ -43,6 +43,7 @@ const char *const Ver4PatriciaTriePolicy::MAX_BIGRAM_COUNT_QUERY = "MAX_BIGRAM_C const int Ver4PatriciaTriePolicy::MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS = 1024; const int Ver4PatriciaTriePolicy::MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS = Ver4DictConstants::MAX_DICTIONARY_SIZE - MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS; +const int Ver4PatriciaTriePolicy::DUMMY_PROBABILITY_FOR_VALID_WORDS = 1; void Ver4PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const dicNode, DicNodeVector *const childDicNodes) const { @@ -298,11 +299,9 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsI if (!prevWordsInfo->isNthPrevWordBeginningOfSentence(i + 1 /* n */)) { return false; } - const std::vector shortcuts; const UnigramProperty beginningOfSentenceUnigramProperty( true /* representsBeginningOfSentence */, true /* isNotAWord */, - false /* isBlacklisted */, MAX_PROBABILITY /* probability */, - HistoricalInfo(), &shortcuts); + false /* isBlacklisted */, MAX_PROBABILITY /* probability */, HistoricalInfo()); if (!addUnigramEntry(prevWordsInfo->getNthPrevWordCodePoints(1 /* n */), &beginningOfSentenceUnigramProperty)) { AKLOGE("Cannot add unigram entry for the beginning-of-sentence."); @@ -364,6 +363,32 @@ bool Ver4PatriciaTriePolicy::removeNgramEntry(const PrevWordsInfo *const prevWor } } +bool Ver4PatriciaTriePolicy::updateCounter(const PrevWordsInfo *const prevWordsInfo, + const CodePointArrayView wordCodePoints, const bool isValidWord, + const HistoricalInfo historicalInfo) { + if (!mBuffers->isUpdatable()) { + AKLOGI("Warning: updateCounter() is called for non-updatable dictionary."); + return false; + } + // TODO: Have count up method in language model dict content. + const int probability = isValidWord ? DUMMY_PROBABILITY_FOR_VALID_WORDS : NOT_A_PROBABILITY; + const UnigramProperty unigramProperty(false /* representsBeginningOfSentence */, + false /* isNotAWord */, false /*isBlacklisted*/, probability, historicalInfo); + if (!addUnigramEntry(wordCodePoints, &unigramProperty)) { + AKLOGE("Cannot update unigarm entry in updateCounter()."); + return false; + } + const NgramProperty ngramProperty(wordCodePoints.toVector(), probability, historicalInfo); + for (size_t i = 1; i <= prevWordsInfo->getPrevWordCount(); ++i) { + const PrevWordsInfo trimmedPrevWordsInfo(prevWordsInfo->getTrimmedPrevWordsInfo(i)); + if (!addNgramEntry(&trimmedPrevWordsInfo, &ngramProperty)) { + AKLOGE("Cannot update ngram entry in updateCounter()."); + return false; + } + } + return true; +} + bool Ver4PatriciaTriePolicy::flush(const char *const filePath) { if (!mBuffers->isUpdatable()) { AKLOGI("Warning: flush() is called for non-updatable dictionary. filePath: %s", filePath); @@ -486,7 +511,7 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty( } const UnigramProperty unigramProperty(probabilityEntry.representsBeginningOfSentence(), probabilityEntry.isNotAWord(), probabilityEntry.isBlacklisted(), - probabilityEntry.getProbability(), *historicalInfo, &shortcuts); + probabilityEntry.getProbability(), *historicalInfo, std::move(shortcuts)); return WordProperty(wordCodePoints.toVector(), &unigramProperty, &ngrams); } diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h index 598122bf2..662bb8d4b 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h @@ -98,6 +98,10 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { bool removeNgramEntry(const PrevWordsInfo *const prevWordsInfo, const CodePointArrayView wordCodePoints); + bool updateCounter(const PrevWordsInfo *const prevWordsInfo, + const CodePointArrayView wordCodePoints, const bool isValidWord, + const HistoricalInfo historicalInfo); + bool flush(const char *const filePath); bool flushWithGC(const char *const filePath); @@ -127,6 +131,8 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { // prevent the dictionary from overflowing. static const int MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS; static const int MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS; + // TODO: Remove + static const int DUMMY_PROBABILITY_FOR_VALID_WORDS; const Ver4DictBuffers::Ver4DictBuffersPtr mBuffers; const HeaderPolicy *const mHeaderPolicy;