Merge "Implement updateCounter() by using existing entry adding methods."

2014-10-02 02:09:20 +00:00 · 2014-10-02 02:09:20 +00:00 · 0afad267c5
commit 0afad267c5
parent 5a4e1c5a67 29777e3a8a
13 changed files with 129 additions and 21 deletions
--- a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
+++ b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
@ -374,7 +374,7 @@ static bool latinime_BinaryDictionary_addUnigramEntry(JNIEnv *env, jclass clazz,
    // Use 1 for count to indicate the word has inputted.
    const UnigramProperty unigramProperty(isBeginningOfSentence, isNotAWord,
            isBlacklisted, probability, HistoricalInfo(timestamp, 0 /* level */, 1 /* count */),
-            &shortcuts);
+            std::move(shortcuts));
    return dictionary->addUnigramEntry(CodePointArrayView(codePoints, codePointCount),
            &unigramProperty);
 }
@ -434,10 +434,16 @@ static bool latinime_BinaryDictionary_updateCounter(JNIEnv *env, jclass clazz, j
    if (!dictionary) {
        return false;
    }
-    jsize wordLength = env->GetArrayLength(word);
-    int wordCodePoints[wordLength];
-    env->GetIntArrayRegion(word, 0, wordLength, wordCodePoints);
-    return false;
+    const PrevWordsInfo prevWordsInfo = JniDataUtils::constructPrevWordsInfo(env,
+            prevWordCodePointArrays, isBeginningOfSentenceArray,
+            env->GetArrayLength(prevWordCodePointArrays));
+    jsize codePointCount = env->GetArrayLength(word);
+    int wordCodePoints[codePointCount];
+    env->GetIntArrayRegion(word, 0, codePointCount, wordCodePoints);
+    const HistoricalInfo historicalInfo(timestamp, 0 /* level */, count);
+    return dictionary->updateCounter(&prevWordsInfo,
+            CodePointArrayView(wordCodePoints, codePointCount), isValidWord == JNI_TRUE,
+            historicalInfo);
 }

 // Returns how many language model params are processed.
@ -509,7 +515,7 @@ static int latinime_BinaryDictionary_addMultipleDictionaryEntries(JNIEnv *env, j
        // Use 1 for count to indicate the word has inputted.
        const UnigramProperty unigramProperty(false /* isBeginningOfSentence */, isNotAWord,
                isBlacklisted, unigramProbability,
-                HistoricalInfo(timestamp, 0 /* level */, 1 /* count */), &shortcuts);
+                HistoricalInfo(timestamp, 0 /* level */, 1 /* count */), std::move(shortcuts));
        dictionary->addUnigramEntry(CodePointArrayView(word1CodePoints, word1Length),
                &unigramProperty);
        if (word0) {
--- a/native/jni/src/suggest/core/dictionary/dictionary.cpp
+++ b/native/jni/src/suggest/core/dictionary/dictionary.cpp
@ -155,6 +155,14 @@ bool Dictionary::removeNgramEntry(const PrevWordsInfo *const prevWordsInfo,
    return mDictionaryStructureWithBufferPolicy->removeNgramEntry(prevWordsInfo, codePoints);
 }

+bool Dictionary::updateCounter(const PrevWordsInfo *const prevWordsInfo,
+        const CodePointArrayView codePoints, const bool isValidWord,
+        const HistoricalInfo historicalInfo) {
+    TimeKeeper::setCurrentTime();
+    return mDictionaryStructureWithBufferPolicy->updateCounter(prevWordsInfo, codePoints,
+            isValidWord, historicalInfo);
+}
+
 bool Dictionary::flush(const char *const filePath) {
    TimeKeeper::setCurrentTime();
    return mDictionaryStructureWithBufferPolicy->flush(filePath);
--- a/native/jni/src/suggest/core/dictionary/dictionary.h
+++ b/native/jni/src/suggest/core/dictionary/dictionary.h
@ -22,6 +22,7 @@
 #include "defines.h"
 #include "jni.h"
 #include "suggest/core/dictionary/ngram_listener.h"
+#include "suggest/core/dictionary/property/historical_info.h"
 #include "suggest/core/dictionary/property/word_property.h"
 #include "suggest/core/policy/dictionary_header_structure_policy.h"
 #include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
@ -90,6 +91,10 @@ class Dictionary {
    bool removeNgramEntry(const PrevWordsInfo *const prevWordsInfo,
            const CodePointArrayView codePoints);

+    bool updateCounter(const PrevWordsInfo *const prevWordsInfo,
+            const CodePointArrayView codePoints, const bool isValidWord,
+            const HistoricalInfo historicalInfo);
+
    bool flush(const char *const filePath);

    bool flushWithGC(const char *const filePath);
--- a/native/jni/src/suggest/core/dictionary/property/ngram_property.h
+++ b/native/jni/src/suggest/core/dictionary/property/ngram_property.h
@ -27,7 +27,7 @@ namespace latinime {
 class NgramProperty {
 public:
    NgramProperty(const std::vector<int> &&targetCodePoints, const int probability,
-            const HistoricalInfo &historicalInfo)
+            const HistoricalInfo historicalInfo)
            : mTargetCodePoints(std::move(targetCodePoints)), mProbability(probability),
              mHistoricalInfo(historicalInfo) {}

--- a/native/jni/src/suggest/core/dictionary/property/unigram_property.h
+++ b/native/jni/src/suggest/core/dictionary/property/unigram_property.h
@ -54,11 +54,18 @@ class UnigramProperty {
              mProbability(NOT_A_PROBABILITY), mHistoricalInfo(), mShortcuts() {}

    UnigramProperty(const bool representsBeginningOfSentence, const bool isNotAWord,
-            const bool isBlacklisted, const int probability, const HistoricalInfo &historicalInfo,
-            const std::vector<ShortcutProperty> *const shortcuts)
+            const bool isBlacklisted, const int probability, const HistoricalInfo historicalInfo,
+            const std::vector<ShortcutProperty> &&shortcuts)
            : mRepresentsBeginningOfSentence(representsBeginningOfSentence),
              mIsNotAWord(isNotAWord), mIsBlacklisted(isBlacklisted), mProbability(probability),
-              mHistoricalInfo(historicalInfo), mShortcuts(*shortcuts) {}
+              mHistoricalInfo(historicalInfo), mShortcuts(std::move(shortcuts)) {}
+
+    // Without shortcuts.
+    UnigramProperty(const bool representsBeginningOfSentence, const bool isNotAWord,
+            const bool isBlacklisted, const int probability, const HistoricalInfo historicalInfo)
+            : mRepresentsBeginningOfSentence(representsBeginningOfSentence),
+              mIsNotAWord(isNotAWord), mIsBlacklisted(isBlacklisted), mProbability(probability),
+              mHistoricalInfo(historicalInfo), mShortcuts() {}

    bool representsBeginningOfSentence() const {
        return mRepresentsBeginningOfSentence;
--- a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h
+++ b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h
@ -21,6 +21,7 @@

 #include "defines.h"
 #include "suggest/core/dictionary/binary_dictionary_shortcut_iterator.h"
+#include "suggest/core/dictionary/property/historical_info.h"
 #include "suggest/core/dictionary/property/word_property.h"
 #include "suggest/core/dictionary/word_attributes.h"
 #include "utils/int_array_view.h"
@ -87,6 +88,11 @@ class DictionaryStructureWithBufferPolicy {
    virtual bool removeNgramEntry(const PrevWordsInfo *const prevWordsInfo,
            const CodePointArrayView wordCodePoints) = 0;

+    // Returns whether the update was success or not.
+    virtual bool updateCounter(const PrevWordsInfo *const prevWordsInfo,
+            const CodePointArrayView wordCodePoints, const bool isValidWord,
+            const HistoricalInfo historicalInfo) = 0;
+
    // Returns whether the flush was success or not.
    virtual bool flush(const char *const filePath) = 0;

--- a/native/jni/src/suggest/core/session/prev_words_info.h
+++ b/native/jni/src/suggest/core/session/prev_words_info.h
@ -33,7 +33,7 @@ class PrevWordsInfo {
        clear();
    }

-    PrevWordsInfo(PrevWordsInfo &&prevWordsInfo)
+    PrevWordsInfo(const PrevWordsInfo &prevWordsInfo)
            : mPrevWordCount(prevWordsInfo.mPrevWordCount) {
        for (size_t i = 0; i < mPrevWordCount; ++i) {
            mPrevWordCodePointCount[i] = prevWordsInfo.mPrevWordCodePointCount[i];
@ -73,6 +73,16 @@ class PrevWordsInfo {
        mIsBeginningOfSentence[0] = isBeginningOfSentence;
    }

+    size_t getPrevWordCount() const {
+        return mPrevWordCount;
+    }
+
+    // TODO: Remove.
+    const PrevWordsInfo getTrimmedPrevWordsInfo(const size_t maxPrevWordCount) const {
+        return PrevWordsInfo(mPrevWordCodePoints, mPrevWordCodePointCount, mIsBeginningOfSentence,
+                std::min(mPrevWordCount, maxPrevWordCount));
+    }
+
    bool isValid() const {
        if (mPrevWordCodePointCount[0] > 0) {
            return true;
@ -112,7 +122,7 @@ class PrevWordsInfo {
    }

 private:
-    DISALLOW_COPY_AND_ASSIGN(PrevWordsInfo);
+    DISALLOW_ASSIGNMENT_OPERATOR(PrevWordsInfo);

    static int getWordId(const DictionaryStructureWithBufferPolicy *const dictStructurePolicy,
            const int *const wordCodePoints, const int wordCodePointCount,
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp
@ -52,6 +52,7 @@ const char *const Ver4PatriciaTriePolicy::MAX_BIGRAM_COUNT_QUERY = "MAX_BIGRAM_C
 const int Ver4PatriciaTriePolicy::MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS = 1024;
 const int Ver4PatriciaTriePolicy::MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS =
        Ver4DictConstants::MAX_DICTIONARY_SIZE - MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS;
+const int Ver4PatriciaTriePolicy::DUMMY_PROBABILITY_FOR_VALID_WORDS = 1;

 void Ver4PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const dicNode,
        DicNodeVector *const childDicNodes) const {
@ -339,11 +340,9 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsI
    }
    if (prevWordIds[0] == NOT_A_WORD_ID) {
        if (prevWordsInfo->isNthPrevWordBeginningOfSentence(1 /* n */)) {
-            const std::vector<UnigramProperty::ShortcutProperty> shortcuts;
            const UnigramProperty beginningOfSentenceUnigramProperty(
                    true /* representsBeginningOfSentence */, true /* isNotAWord */,
-                    false /* isBlacklisted */, MAX_PROBABILITY /* probability */,
-                    HistoricalInfo(), &shortcuts);
+                    false /* isBlacklisted */, MAX_PROBABILITY /* probability */, HistoricalInfo());
            if (!addUnigramEntry(prevWordsInfo->getNthPrevWordCodePoints(1 /* n */),
                    &beginningOfSentenceUnigramProperty)) {
                AKLOGE("Cannot add unigram entry for the beginning-of-sentence.");
@ -414,6 +413,29 @@ bool Ver4PatriciaTriePolicy::removeNgramEntry(const PrevWordsInfo *const prevWor
    }
 }

+
+bool Ver4PatriciaTriePolicy::updateCounter(const PrevWordsInfo *const prevWordsInfo,
+        const CodePointArrayView wordCodePoints, const bool isValidWord,
+        const HistoricalInfo historicalInfo) {
+    if (!mBuffers->isUpdatable()) {
+        AKLOGI("Warning: updateCounter() is called for non-updatable dictionary.");
+        return false;
+    }
+    const int probability = isValidWord ? DUMMY_PROBABILITY_FOR_VALID_WORDS : NOT_A_PROBABILITY;
+    const UnigramProperty unigramProperty(false /* representsBeginningOfSentence */,
+            false /* isNotAWord */, false /*isBlacklisted*/, probability, historicalInfo);
+    if (!addUnigramEntry(wordCodePoints, &unigramProperty)) {
+        AKLOGE("Cannot update unigarm entry in updateCounter().");
+        return false;
+    }
+    const NgramProperty ngramProperty(wordCodePoints.toVector(), probability, historicalInfo);
+    if (!addNgramEntry(prevWordsInfo, &ngramProperty)) {
+        AKLOGE("Cannot update unigarm entry in updateCounter().");
+        return false;
+    }
+    return true;
+}
+
 bool Ver4PatriciaTriePolicy::flush(const char *const filePath) {
    if (!mBuffers->isUpdatable()) {
        AKLOGI("Warning: flush() is called for non-updatable dictionary. filePath: %s", filePath);
@ -551,7 +573,7 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty(
    }
    const UnigramProperty unigramProperty(ptNodeParams.representsBeginningOfSentence(),
            ptNodeParams.isNotAWord(), ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(),
-            *historicalInfo, &shortcuts);
+            *historicalInfo, std::move(shortcuts));
    return WordProperty(wordCodePoints.toVector(), &unigramProperty, &ngrams);
 }

--- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h
@ -118,6 +118,10 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
    bool removeNgramEntry(const PrevWordsInfo *const prevWordsInfo,
            const CodePointArrayView wordCodePoints);

+    bool updateCounter(const PrevWordsInfo *const prevWordsInfo,
+            const CodePointArrayView wordCodePoints, const bool isValidWord,
+            const HistoricalInfo historicalInfo);
+
    bool flush(const char *const filePath);

    bool flushWithGC(const char *const filePath);
@ -147,6 +151,7 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
    // prevent the dictionary from overflowing.
    static const int MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS;
    static const int MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS;
+    static const int DUMMY_PROBABILITY_FOR_VALID_WORDS;

    const Ver4DictBuffers::Ver4DictBuffersPtr mBuffers;
    const HeaderPolicy *const mHeaderPolicy;
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp
@ -477,7 +477,7 @@ const WordProperty PatriciaTriePolicy::getWordProperty(
    }
    const UnigramProperty unigramProperty(ptNodeParams.representsBeginningOfSentence(),
            ptNodeParams.isNotAWord(), ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(),
-            HistoricalInfo(), &shortcuts);
+            HistoricalInfo(), std::move(shortcuts));
    return WordProperty(wordCodePoints.toVector(), &unigramProperty, &ngrams);
 }

--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h
@ -107,6 +107,14 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
        return false;
    }

+    bool updateCounter(const PrevWordsInfo *const prevWordsInfo,
+            const CodePointArrayView wordCodePoints, const bool isValidWord,
+            const HistoricalInfo historicalInfo) {
+        // This method should not be called for non-updatable dictionary.
+        AKLOGI("Warning: updateCounter() is called for non-updatable dictionary.");
+        return false;
+    }
+
    bool flush(const char *const filePath) {
        // This method should not be called for non-updatable dictionary.
        AKLOGI("Warning: flush() is called for non-updatable dictionary.");
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
@ -43,6 +43,7 @@ const char *const Ver4PatriciaTriePolicy::MAX_BIGRAM_COUNT_QUERY = "MAX_BIGRAM_C
 const int Ver4PatriciaTriePolicy::MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS = 1024;
 const int Ver4PatriciaTriePolicy::MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS =
        Ver4DictConstants::MAX_DICTIONARY_SIZE - MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS;
+const int Ver4PatriciaTriePolicy::DUMMY_PROBABILITY_FOR_VALID_WORDS = 1;

 void Ver4PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const dicNode,
        DicNodeVector *const childDicNodes) const {
@ -298,11 +299,9 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsI
        if (!prevWordsInfo->isNthPrevWordBeginningOfSentence(i + 1 /* n */)) {
            return false;
        }
-        const std::vector<UnigramProperty::ShortcutProperty> shortcuts;
        const UnigramProperty beginningOfSentenceUnigramProperty(
                true /* representsBeginningOfSentence */, true /* isNotAWord */,
-                false /* isBlacklisted */, MAX_PROBABILITY /* probability */,
-                HistoricalInfo(), &shortcuts);
+                false /* isBlacklisted */, MAX_PROBABILITY /* probability */, HistoricalInfo());
        if (!addUnigramEntry(prevWordsInfo->getNthPrevWordCodePoints(1 /* n */),
                &beginningOfSentenceUnigramProperty)) {
            AKLOGE("Cannot add unigram entry for the beginning-of-sentence.");
@ -364,6 +363,32 @@ bool Ver4PatriciaTriePolicy::removeNgramEntry(const PrevWordsInfo *const prevWor
    }
 }

+bool Ver4PatriciaTriePolicy::updateCounter(const PrevWordsInfo *const prevWordsInfo,
+        const CodePointArrayView wordCodePoints, const bool isValidWord,
+        const HistoricalInfo historicalInfo) {
+    if (!mBuffers->isUpdatable()) {
+        AKLOGI("Warning: updateCounter() is called for non-updatable dictionary.");
+        return false;
+    }
+    // TODO: Have count up method in language model dict content.
+    const int probability = isValidWord ? DUMMY_PROBABILITY_FOR_VALID_WORDS : NOT_A_PROBABILITY;
+    const UnigramProperty unigramProperty(false /* representsBeginningOfSentence */,
+            false /* isNotAWord */, false /*isBlacklisted*/, probability, historicalInfo);
+    if (!addUnigramEntry(wordCodePoints, &unigramProperty)) {
+        AKLOGE("Cannot update unigarm entry in updateCounter().");
+        return false;
+    }
+    const NgramProperty ngramProperty(wordCodePoints.toVector(), probability, historicalInfo);
+    for (size_t i = 1; i <= prevWordsInfo->getPrevWordCount(); ++i) {
+        const PrevWordsInfo trimmedPrevWordsInfo(prevWordsInfo->getTrimmedPrevWordsInfo(i));
+        if (!addNgramEntry(&trimmedPrevWordsInfo, &ngramProperty)) {
+            AKLOGE("Cannot update ngram entry in updateCounter().");
+            return false;
+        }
+    }
+    return true;
+}
+
 bool Ver4PatriciaTriePolicy::flush(const char *const filePath) {
    if (!mBuffers->isUpdatable()) {
        AKLOGI("Warning: flush() is called for non-updatable dictionary. filePath: %s", filePath);
@ -486,7 +511,7 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty(
    }
    const UnigramProperty unigramProperty(probabilityEntry.representsBeginningOfSentence(),
            probabilityEntry.isNotAWord(), probabilityEntry.isBlacklisted(),
-            probabilityEntry.getProbability(), *historicalInfo, &shortcuts);
+            probabilityEntry.getProbability(), *historicalInfo, std::move(shortcuts));
    return WordProperty(wordCodePoints.toVector(), &unigramProperty, &ngrams);
 }

--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h
@ -98,6 +98,10 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
    bool removeNgramEntry(const PrevWordsInfo *const prevWordsInfo,
            const CodePointArrayView wordCodePoints);

+    bool updateCounter(const PrevWordsInfo *const prevWordsInfo,
+            const CodePointArrayView wordCodePoints, const bool isValidWord,
+            const HistoricalInfo historicalInfo);
+
    bool flush(const char *const filePath);

    bool flushWithGC(const char *const filePath);
@ -127,6 +131,8 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
    // prevent the dictionary from overflowing.
    static const int MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS;
    static const int MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS;
+    // TODO: Remove
+    static const int DUMMY_PROBABILITY_FOR_VALID_WORDS;

    const Ver4DictBuffers::Ver4DictBuffersPtr mBuffers;
    const HeaderPolicy *const mHeaderPolicy;