From 67c855ea6f882190d73df9d3fae0b56929fd6888 Mon Sep 17 00:00:00 2001 From: Keisuke Kuroyanagi Date: Fri, 4 Oct 2013 16:11:11 +0900 Subject: [PATCH] Check the elapsed time for decaying. Bug: 6669677 Change-Id: I0401a4c0908c702ce65abfa5e017dd16cb1296f9 --- .../dictionary_header_structure_policy.h | 2 + .../bigram/dynamic_bigram_list_policy.cpp | 4 +- .../bigram/dynamic_bigram_list_policy.h | 10 +++- ...namic_patricia_trie_gc_event_listeners.cpp | 4 +- ...dynamic_patricia_trie_gc_event_listeners.h | 14 +++-- .../dynamic_patricia_trie_policy.cpp | 32 +++-------- .../dictionary/dynamic_patricia_trie_policy.h | 5 +- .../dynamic_patricia_trie_writing_helper.cpp | 14 +++-- .../dynamic_patricia_trie_writing_helper.h | 5 +- .../utils/forgetting_curve_utils.cpp | 55 +++++++++++++++---- .../dictionary/utils/forgetting_curve_utils.h | 23 +++++++- .../latin/BinaryDictionaryDecayingTests.java | 4 +- 12 files changed, 113 insertions(+), 59 deletions(-) diff --git a/native/jni/src/suggest/core/policy/dictionary_header_structure_policy.h b/native/jni/src/suggest/core/policy/dictionary_header_structure_policy.h index a6829b476..5492c6070 100644 --- a/native/jni/src/suggest/core/policy/dictionary_header_structure_policy.h +++ b/native/jni/src/suggest/core/policy/dictionary_header_structure_policy.h @@ -37,6 +37,8 @@ class DictionaryHeaderStructurePolicy { virtual float getMultiWordCostMultiplier() const = 0; + virtual int getLastDecayedTime() const = 0; + virtual void readHeaderValueOrQuestionMark(const char *const key, int *outValue, int outValueSize) const = 0; diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp index 8753c6eb0..b1170e251 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp @@ -360,13 +360,13 @@ int DynamicBigramListPolicy::followBigramLinkAndGetCurrentBigramPtNodePos( } bool DynamicBigramListPolicy::updateProbabilityForDecay( - BigramListReadWriteUtils::BigramFlags bigramFlags, const int targetPtNodePos, + const BigramListReadWriteUtils::BigramFlags bigramFlags, const int targetPtNodePos, int *const bigramEntryPos, bool *const outRemoved) const { *outRemoved = false; if (mIsDecayingDict) { // Update bigram probability for decaying. const int newProbability = ForgettingCurveUtils::getEncodedProbabilityToSave( - BigramListReadWriteUtils::getProbabilityFromFlags(bigramFlags)); + BigramListReadWriteUtils::getProbabilityFromFlags(bigramFlags), mHeaderPolicy); if (ForgettingCurveUtils::isValidEncodedProbability(newProbability)) { // Write new probability. const BigramListReadWriteUtils::BigramFlags updatedBigramFlags = diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h index b358b4ed5..0504b59d5 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h @@ -27,6 +27,7 @@ namespace latinime { class BufferWithExtendableBuffer; +class DictionaryHeaderStructurePolicy; class DictionaryShortcutsStructurePolicy; /* @@ -34,10 +35,12 @@ class DictionaryShortcutsStructurePolicy; */ class DynamicBigramListPolicy : public DictionaryBigramsStructurePolicy { public: - DynamicBigramListPolicy(BufferWithExtendableBuffer *const buffer, + DynamicBigramListPolicy(const DictionaryHeaderStructurePolicy *const headerPolicy, + BufferWithExtendableBuffer *const buffer, const DictionaryShortcutsStructurePolicy *const shortcutPolicy, const bool isDecayingDict) - : mBuffer(buffer), mShortcutPolicy(shortcutPolicy), mIsDecayingDict(isDecayingDict) {} + : mHeaderPolicy(headerPolicy), mBuffer(buffer), mShortcutPolicy(shortcutPolicy), + mIsDecayingDict(isDecayingDict) {} ~DynamicBigramListPolicy() {} @@ -74,6 +77,7 @@ class DynamicBigramListPolicy : public DictionaryBigramsStructurePolicy { static const int CONTINUING_BIGRAM_LINK_COUNT_LIMIT; static const int BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT; + const DictionaryHeaderStructurePolicy *const mHeaderPolicy; BufferWithExtendableBuffer *const mBuffer; const DictionaryShortcutsStructurePolicy *const mShortcutPolicy; const bool mIsDecayingDict; @@ -81,7 +85,7 @@ class DynamicBigramListPolicy : public DictionaryBigramsStructurePolicy { // Follow bigram link and return the position of bigram target PtNode that is currently valid. int followBigramLinkAndGetCurrentBigramPtNodePos(const int originalBigramPos) const; - bool updateProbabilityForDecay(BigramListReadWriteUtils::BigramFlags bigramFlags, + bool updateProbabilityForDecay(const BigramListReadWriteUtils::BigramFlags bigramFlags, const int targetPtNodePos, int *const bigramEntryPos, bool *const outRemoved) const; }; } // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp index 324b53062..a17a0acf6 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp @@ -16,6 +16,7 @@ #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h" +#include "suggest/core/policy/dictionary_header_structure_policy.h" #include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h" namespace latinime { @@ -29,7 +30,8 @@ bool DynamicPatriciaTrieGcEventListeners bool isUselessPtNode = !node->isTerminal(); if (node->isTerminal() && mIsDecayingDict) { const int newProbability = - ForgettingCurveUtils::getEncodedProbabilityToSave(node->getProbability()); + ForgettingCurveUtils::getEncodedProbabilityToSave(node->getProbability(), + mHeaderPolicy); int writingPos = node->getProbabilityFieldPos(); // Update probability. if (!DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition( diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h index 463715af5..3ca2f2a01 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h @@ -29,6 +29,8 @@ namespace latinime { +class DictionaryHeaderStructurePolicy; + class DynamicPatriciaTrieGcEventListeners { public: // Updates all PtNodes that can be reached from the root. Checks if each PtNode is useless or @@ -38,10 +40,12 @@ class DynamicPatriciaTrieGcEventListeners { : public DynamicPatriciaTrieReadingHelper::TraversingEventListener { public: TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted( + const DictionaryHeaderStructurePolicy *const headerPolicy, DynamicPatriciaTrieWritingHelper *const writingHelper, BufferWithExtendableBuffer *const buffer, const bool isDecayingDict) - : mWritingHelper(writingHelper), mBuffer(buffer), mIsDecayingDict(isDecayingDict), - mValueStack(), mChildrenValue(0), mValidUnigramCount(0) {} + : mHeaderPolicy(headerPolicy), mWritingHelper(writingHelper), mBuffer(buffer), + mIsDecayingDict(isDecayingDict), mValueStack(), mChildrenValue(0), + mValidUnigramCount(0) {} ~TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted() {}; @@ -72,9 +76,10 @@ class DynamicPatriciaTrieGcEventListeners { DISALLOW_IMPLICIT_CONSTRUCTORS( TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted); + const DictionaryHeaderStructurePolicy *const mHeaderPolicy; DynamicPatriciaTrieWritingHelper *const mWritingHelper; BufferWithExtendableBuffer *const mBuffer; - const int mIsDecayingDict; + const bool mIsDecayingDict; std::vector mValueStack; int mChildrenValue; int mValidUnigramCount; @@ -85,7 +90,8 @@ class DynamicPatriciaTrieGcEventListeners { class TraversePolicyToUpdateBigramProbability : public DynamicPatriciaTrieReadingHelper::TraversingEventListener { public: - TraversePolicyToUpdateBigramProbability(DynamicBigramListPolicy *const bigramPolicy) + TraversePolicyToUpdateBigramProbability( + DynamicBigramListPolicy *const bigramPolicy) : mBigramPolicy(bigramPolicy), mValidBigramEntryCount(0) {} bool onAscend() { return true; } diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp index 60d0db0c0..31e3fb42f 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp @@ -42,7 +42,6 @@ const char *const DynamicPatriciaTriePolicy::SET_NEEDS_TO_DECAY_FOR_TESTING_QUER const int DynamicPatriciaTriePolicy::MAX_DICT_EXTENDED_REGION_SIZE = 1024 * 1024; const int DynamicPatriciaTriePolicy::MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS = DynamicPatriciaTrieWritingHelper::MAX_DICTIONARY_SIZE - 1024; -const int DynamicPatriciaTriePolicy::DECAY_INTERVAL_FOR_DECAYING_DICTS = 2 * 60 * 60; void DynamicPatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode, DicNodeVector *const childDicNodes) const { @@ -314,15 +313,15 @@ void DynamicPatriciaTriePolicy::flushWithGC(const char *const filePath) { AKLOGI("Warning: flushWithGC() is called for non-updatable dictionary."); return; } - const bool runGCwithDecay = needsToDecay(); - DynamicBigramListPolicy bigramListPolicyForGC(&mBufferWithExtendableBuffer, - &mShortcutListPolicy, runGCwithDecay); + const bool needsToDecay = mHeaderPolicy.isDecayingDict() + && (mNeedsToDecayForTesting || ForgettingCurveUtils::needsToDecay( + false /* mindsBlockByDecay */, mUnigramCount, mBigramCount, &mHeaderPolicy)); + DynamicBigramListPolicy bigramListPolicyForGC(&mHeaderPolicy, &mBufferWithExtendableBuffer, + &mShortcutListPolicy, needsToDecay); DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer, - &bigramListPolicyForGC, &mShortcutListPolicy, runGCwithDecay); + &bigramListPolicyForGC, &mShortcutListPolicy, needsToDecay); writingHelper.writeToDictFileWithGC(getRootPosition(), filePath, &mHeaderPolicy); - if (runGCwithDecay) { - mNeedsToDecayForTesting = false; - } + mNeedsToDecayForTesting = false; } bool DynamicPatriciaTriePolicy::needsToRunGC(const bool mindsBlockByGC) const { @@ -344,16 +343,8 @@ bool DynamicPatriciaTriePolicy::needsToRunGC(const bool mindsBlockByGC) const { // Needs to reduce dictionary size. return true; } else if (mHeaderPolicy.isDecayingDict()) { - if (mUnigramCount >= ForgettingCurveUtils::MAX_UNIGRAM_COUNT) { - // Unigram count exceeds the limit. - return true; - } else if (mBigramCount >= ForgettingCurveUtils::MAX_BIGRAM_COUNT) { - // Bigram count exceeds the limit. - return true; - } else if (mindsBlockByGC && needsToDecay()) { - // Time to update probabilities for decaying. - return true; - } + return mNeedsToDecayForTesting || ForgettingCurveUtils::needsToDecay( + mindsBlockByGC, mUnigramCount, mBigramCount, &mHeaderPolicy); } return false; } @@ -369,9 +360,4 @@ void DynamicPatriciaTriePolicy::getProperty(const char *const query, char *const } } -bool DynamicPatriciaTriePolicy::needsToDecay() const { - return mHeaderPolicy.isDecayingDict() && (mNeedsToDecayForTesting - || mHeaderPolicy.getLastDecayedTime() + DECAY_INTERVAL_FOR_DECAYING_DICTS < time(0)); -} - } // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h index c3bbe9977..903f65e8e 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h @@ -37,7 +37,7 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { mBufferWithExtendableBuffer(mBuffer->getBuffer() + mHeaderPolicy.getSize(), mBuffer->getBufferSize() - mHeaderPolicy.getSize()), mShortcutListPolicy(&mBufferWithExtendableBuffer), - mBigramListPolicy(&mBufferWithExtendableBuffer, &mShortcutListPolicy, + mBigramListPolicy(&mHeaderPolicy, &mBufferWithExtendableBuffer, &mShortcutListPolicy, mHeaderPolicy.isDecayingDict()), mUnigramCount(mHeaderPolicy.getUnigramCount()), mBigramCount(mHeaderPolicy.getBigramCount()), mNeedsToDecayForTesting(false) {} @@ -105,7 +105,6 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { static const char *const SET_NEEDS_TO_DECAY_FOR_TESTING_QUERY; static const int MAX_DICT_EXTENDED_REGION_SIZE; static const int MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS; - static const int DECAY_INTERVAL_FOR_DECAYING_DICTS; const MmappedBuffer *const mBuffer; const HeaderPolicy mHeaderPolicy; @@ -115,8 +114,6 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { int mUnigramCount; int mBigramCount; int mNeedsToDecayForTesting; - - bool needsToDecay() const; }; } // namespace latinime #endif // LATINIME_DYNAMIC_PATRICIA_TRIE_POLICY_H diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp index 70a9ee564..067c8ec98 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp @@ -165,7 +165,10 @@ void DynamicPatriciaTrieWritingHelper::writeToDictFileWithGC(const int rootPtNod MAX_DICTIONARY_SIZE); int unigramCount = 0; int bigramCount = 0; - if (!runGC(rootPtNodeArrayPos, &newDictBuffer, &unigramCount, &bigramCount)) { + if (mNeedsToDecay) { + ForgettingCurveUtils::sTimeKeeper.setCurrentTime(); + } + if (!runGC(rootPtNodeArrayPos, headerPolicy, &newDictBuffer, &unigramCount, &bigramCount)) { return; } BufferWithExtendableBuffer headerBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */); @@ -481,14 +484,14 @@ bool DynamicPatriciaTrieWritingHelper::reallocatePtNodeAndAddNewPtNodes( } bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos, - BufferWithExtendableBuffer *const bufferToWrite, int *const outUnigramCount, - int *const outBigramCount) { + const HeaderPolicy *const headerPolicy, BufferWithExtendableBuffer *const bufferToWrite, + int *const outUnigramCount, int *const outBigramCount) { DynamicPatriciaTrieReadingHelper readingHelper(mBuffer, mBigramPolicy, mShortcutPolicy); readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos); DynamicPatriciaTrieGcEventListeners ::TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted( - this, mBuffer, mNeedsToDecay); + headerPolicy, this, mBuffer, mNeedsToDecay); if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner( &traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted)) { return false; @@ -505,7 +508,6 @@ bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos, &traversePolicyToUpdateBigramProbability)) { return false; } - if (mNeedsToDecay && traversePolicyToUpdateBigramProbability.getValidBigramEntryCount() > ForgettingCurveUtils::MAX_BIGRAM_COUNT_AFTER_GC) { // TODO: Remove more bigrams. @@ -524,7 +526,7 @@ bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos, // Create policy instance for the GCed dictionary. DynamicShortcutListPolicy newDictShortcutPolicy(bufferToWrite); - DynamicBigramListPolicy newDictBigramPolicy(bufferToWrite, &newDictShortcutPolicy, + DynamicBigramListPolicy newDictBigramPolicy(headerPolicy, bufferToWrite, &newDictShortcutPolicy, mNeedsToDecay); // Create reading helper for the GCed dictionary. DynamicPatriciaTrieReadingHelper newDictReadingHelper(bufferToWrite, &newDictBigramPolicy, diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h index 0caf29120..ca8664729 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h @@ -128,8 +128,9 @@ class DynamicPatriciaTrieWritingHelper { const int probabilityOfNewPtNode, const int *const newNodeCodePoints, const int newNodeCodePointCount); - bool runGC(const int rootPtNodeArrayPos, BufferWithExtendableBuffer *const bufferToWrite, - int *const outUnigramCount, int *const outBigramCount); + bool runGC(const int rootPtNodeArrayPos, const HeaderPolicy *const headerPolicy, + BufferWithExtendableBuffer *const bufferToWrite, int *const outUnigramCount, + int *const outBigramCount); int getUpdatedProbability(const int originalProbability, const int newProbability); }; diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp index b502fe25d..19ca35481 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp @@ -15,10 +15,12 @@ */ #include +#include #include #include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h" +#include "suggest/core/policy/dictionary_header_structure_policy.h" #include "suggest/policyimpl/dictionary/utils/probability_utils.h" namespace latinime { @@ -35,8 +37,14 @@ const int ForgettingCurveUtils::ENCODED_PROBABILITY_STEP = 1; // Currently, we try to decay each uni/bigram once every 2 hours. Accordingly, the expected // duration of the decay is approximately 66hours. const float ForgettingCurveUtils::MIN_PROBABILITY_TO_DECAY = 0.03f; +const int ForgettingCurveUtils::DECAY_INTERVAL_SECONDS = 2 * 60 * 60; const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityTable; +ForgettingCurveUtils::TimeKeeper ForgettingCurveUtils::sTimeKeeper; + +void ForgettingCurveUtils::TimeKeeper::setCurrentTime() { + mCurrentTime = time(0); +} /* static */ int ForgettingCurveUtils::getProbability(const int encodedUnigramProbability, const int encodedBigramProbability) { @@ -76,19 +84,44 @@ const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityT return encodedProbability >= MIN_VALID_ENCODED_PROBABILITY; } -/* static */ int ForgettingCurveUtils::getEncodedProbabilityToSave(const int encodedProbability) { - const int currentEncodedProbability = max(min(encodedProbability, MAX_ENCODED_PROBABILITY), 0); +/* static */ int ForgettingCurveUtils::getEncodedProbabilityToSave(const int encodedProbability, + const DictionaryHeaderStructurePolicy *const headerPolicy) { + const int elapsedTime = sTimeKeeper.peekCurrentTime() - headerPolicy->getLastDecayedTime(); + const int decayIterationCount = max(elapsedTime / DECAY_INTERVAL_SECONDS, 1); + int currentEncodedProbability = max(min(encodedProbability, MAX_ENCODED_PROBABILITY), 0); // TODO: Implement the decay in more proper way. - const float currentRate = static_cast(currentEncodedProbability) - / static_cast(MAX_ENCODED_PROBABILITY); - const float thresholdToDecay = MIN_PROBABILITY_TO_DECAY - + (1.0f - MIN_PROBABILITY_TO_DECAY) * (1.0f - currentRate); - const float randValue = static_cast(rand()) / static_cast(RAND_MAX); - if (thresholdToDecay < randValue) { - return max(currentEncodedProbability - ENCODED_PROBABILITY_STEP, 0); - } else { - return currentEncodedProbability; + for (int i = 0; i < decayIterationCount; ++i) { + const float currentRate = static_cast(currentEncodedProbability) + / static_cast(MAX_ENCODED_PROBABILITY); + const float thresholdToDecay = MIN_PROBABILITY_TO_DECAY + + (1.0f - MIN_PROBABILITY_TO_DECAY) * currentRate; + const float randValue = static_cast(rand()) / static_cast(RAND_MAX); + if (thresholdToDecay < randValue) { + currentEncodedProbability = max(currentEncodedProbability - ENCODED_PROBABILITY_STEP, + 0); + } } + return currentEncodedProbability; +} + +/* static */ bool ForgettingCurveUtils::needsToDecay(const bool mindsBlockByDecay, + const int unigramCount, const int bigramCount, + const DictionaryHeaderStructurePolicy *const headerPolicy) { + if (unigramCount >= ForgettingCurveUtils::MAX_UNIGRAM_COUNT) { + // Unigram count exceeds the limit. + return true; + } else if (bigramCount >= ForgettingCurveUtils::MAX_BIGRAM_COUNT) { + // Bigram count exceeds the limit. + return true; + } + if (mindsBlockByDecay) { + return false; + } + if (headerPolicy->getLastDecayedTime() + DECAY_INTERVAL_SECONDS < time(0)) { + // Time to decay. + return true; + } + return false; } /* static */ int ForgettingCurveUtils::decodeProbability(const int encodedProbability) { diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h index d666f22aa..2ad423874 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h @@ -23,16 +23,32 @@ namespace latinime { +class DictionaryHeaderStructurePolicy; + // TODO: Check the elapsed time and decrease the probability depending on the time. Time field is // required to introduced to each terminal PtNode and bigram entry. // TODO: Quit using bigram probability to indicate the delta. class ForgettingCurveUtils { public: + class TimeKeeper { + public: + TimeKeeper() : mCurrentTime(0) {} + void setCurrentTime(); + int peekCurrentTime() const { return mCurrentTime; }; + + private: + DISALLOW_COPY_AND_ASSIGN(TimeKeeper); + + int mCurrentTime; + }; + static const int MAX_UNIGRAM_COUNT; static const int MAX_UNIGRAM_COUNT_AFTER_GC; static const int MAX_BIGRAM_COUNT; static const int MAX_BIGRAM_COUNT_AFTER_GC; + static TimeKeeper sTimeKeeper; + static int getProbability(const int encodedUnigramProbability, const int encodedBigramProbability); @@ -41,7 +57,11 @@ class ForgettingCurveUtils { static int isValidEncodedProbability(const int encodedProbability); - static int getEncodedProbabilityToSave(const int encodedProbability); + static int getEncodedProbabilityToSave(const int encodedProbability, + const DictionaryHeaderStructurePolicy *const headerPolicy); + + static bool needsToDecay(const bool mindsBlockByDecay, const int unigramCount, + const int bigramCount, const DictionaryHeaderStructurePolicy *const headerPolicy); private: DISALLOW_IMPLICIT_CONSTRUCTORS(ForgettingCurveUtils); @@ -68,6 +88,7 @@ class ForgettingCurveUtils { static const int MIN_VALID_ENCODED_PROBABILITY; static const int ENCODED_PROBABILITY_STEP; static const float MIN_PROBABILITY_TO_DECAY; + static const int DECAY_INTERVAL_SECONDS; static const ProbabilityTable sProbabilityTable; diff --git a/tests/src/com/android/inputmethod/latin/BinaryDictionaryDecayingTests.java b/tests/src/com/android/inputmethod/latin/BinaryDictionaryDecayingTests.java index b2d31c21f..ded8eaa97 100644 --- a/tests/src/com/android/inputmethod/latin/BinaryDictionaryDecayingTests.java +++ b/tests/src/com/android/inputmethod/latin/BinaryDictionaryDecayingTests.java @@ -50,8 +50,8 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase { } private void forcePassingShortTime(final BinaryDictionary binaryDictionary) { - // Entries having low probability would be suppressed once in 2 GCs. - final int count = 2; + // Entries having low probability would be suppressed once in 3 GCs. + final int count = 3; for (int i = 0; i < count; i++) { binaryDictionary.getPropertyForTests(SET_NEEDS_TO_DECAY_FOR_TESTING_KEY); binaryDictionary.flushWithGC();