From 5128935ac4d7961e3c863270b828e47a79b97235 Mon Sep 17 00:00:00 2001 From: Keisuke Kuroyanagi Date: Fri, 28 Feb 2014 21:06:03 +0900 Subject: [PATCH] Add header attributes for evaluation. Bug: 13197276 Change-Id: Ib5247da691ff24a73e13485288237ccc51bb54f0 --- .../latin/ExpandableBinaryDictionary.java | 7 +++ .../latin/makedict/DictionaryHeader.java | 4 ++ .../bigram/ver4_bigram_list_policy.cpp | 2 +- .../dictionary/header/header_policy.cpp | 11 +++++ .../dictionary/header/header_policy.h | 42 ++++++++++++++-- .../v4/ver4_patricia_trie_node_writer.cpp | 2 +- .../v4/ver4_patricia_trie_policy.cpp | 8 +++- .../v4/ver4_patricia_trie_writing_helper.cpp | 18 ++++--- .../utils/forgetting_curve_utils.cpp | 48 ++++++++++--------- .../dictionary/utils/forgetting_curve_utils.h | 25 ++++++---- 10 files changed, 119 insertions(+), 48 deletions(-) diff --git a/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java b/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java index b18951500..26545acbd 100644 --- a/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java +++ b/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java @@ -64,6 +64,9 @@ abstract public class ExpandableBinaryDictionary extends Dictionary { private static final int TIMEOUT_FOR_READ_OPS_IN_MILLISECONDS = 100; private static final int TIMEOUT_FOR_READ_OPS_FOR_TESTS_IN_MILLISECONDS = 10000; + private static final int DEFAULT_MAX_UNIGRAM_COUNT = 10000; + private static final int DEFAULT_MAX_BIGRAM_COUNT = 10000; + /** * The maximum length of a word in this dictionary. */ @@ -207,6 +210,10 @@ abstract public class ExpandableBinaryDictionary extends Dictionary { attributeMap.put(DictionaryHeader.DICTIONARY_LOCALE_KEY, mLocale.toString()); attributeMap.put(DictionaryHeader.DICTIONARY_VERSION_KEY, String.valueOf(TimeUnit.MILLISECONDS.toSeconds(System.currentTimeMillis()))); + attributeMap.put(DictionaryHeader.MAX_UNIGRAM_COUNT_KEY, + String.valueOf(DEFAULT_MAX_UNIGRAM_COUNT)); + attributeMap.put(DictionaryHeader.MAX_BIGRAM_COUNT_KEY, + String.valueOf(DEFAULT_MAX_BIGRAM_COUNT)); return attributeMap; } diff --git a/java/src/com/android/inputmethod/latin/makedict/DictionaryHeader.java b/java/src/com/android/inputmethod/latin/makedict/DictionaryHeader.java index ed9c39602..b32eb9195 100644 --- a/java/src/com/android/inputmethod/latin/makedict/DictionaryHeader.java +++ b/java/src/com/android/inputmethod/latin/makedict/DictionaryHeader.java @@ -42,6 +42,10 @@ public final class DictionaryHeader { "FORGETTING_CURVE_OCCURRENCES_TO_LEVEL_UP"; public static final String FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID_KEY = "FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID"; + public static final String FORGETTING_CURVE_DURATION_TO_LEVEL_DOWN_IN_SECONDS_KEY = + "FORGETTING_CURVE_DURATION_TO_LEVEL_DOWN_IN_SECONDS"; + public static final String MAX_UNIGRAM_COUNT_KEY = "MAX_UNIGRAM_COUNT"; + public static final String MAX_BIGRAM_COUNT_KEY = "MAX_BIGRAM_COUNT"; public static final String ATTRIBUTE_VALUE_TRUE = "1"; public DictionaryHeader(final int headerSize, final DictionaryOptions dictionaryOptions, diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp index 7f916677a..5df2096a4 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp @@ -162,7 +162,7 @@ bool Ver4BigramListPolicy::updateAllBigramEntriesAndDeleteUselessEntries(const i } else if (bigramEntry.hasHistoricalInfo()) { const HistoricalInfo historicalInfo = ForgettingCurveUtils::createHistoricalInfoToSave( bigramEntry.getHistoricalInfo(), mHeaderPolicy); - if (ForgettingCurveUtils::needsToKeep(&historicalInfo)) { + if (ForgettingCurveUtils::needsToKeep(&historicalInfo, mHeaderPolicy)) { const BigramEntry updatedBigramEntry = bigramEntry.updateHistoricalInfoAndGetEntry(&historicalInfo); if (!mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPos)) { diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp index 2ac417b33..7c7b05ca8 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp @@ -37,11 +37,22 @@ const char *const HeaderPolicy::FORGETTING_CURVE_OCCURRENCES_TO_LEVEL_UP_KEY = "FORGETTING_CURVE_OCCURRENCES_TO_LEVEL_UP"; const char *const HeaderPolicy::FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID_KEY = "FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID"; +const char *const HeaderPolicy::FORGETTING_CURVE_DURATION_TO_LEVEL_DOWN_IN_SECONDS_KEY = + "FORGETTING_CURVE_DURATION_TO_LEVEL_DOWN_IN_SECONDS"; + +const char *const HeaderPolicy::MAX_UNIGRAM_COUNT_KEY = "MAX_UNIGRAM_COUNT"; +const char *const HeaderPolicy::MAX_BIGRAM_COUNT_KEY = "MAX_BIGRAM_COUNT"; const int HeaderPolicy::DEFAULT_MULTIPLE_WORDS_DEMOTION_RATE = 100; const float HeaderPolicy::MULTIPLE_WORD_COST_MULTIPLIER_SCALE = 100.0f; const int HeaderPolicy::DEFAULT_FORGETTING_CURVE_OCCURRENCES_TO_LEVEL_UP = 4; const int HeaderPolicy::DEFAULT_FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID = 0; +// 4 days +const int HeaderPolicy::DEFAULT_FORGETTING_CURVE_DURATION_TO_LEVEL_DOWN_IN_SECONDS = + 4 * 24 * 60 * 60; + +const int HeaderPolicy::DEFAULT_MAX_UNIGRAM_COUNT = 10000; +const int HeaderPolicy::DEFAULT_MAX_BIGRAM_COUNT = 10000; // Used for logging. Question mark is used to indicate that the key is not found. void HeaderPolicy::readHeaderValueOrQuestionMark(const char *const key, int *outValue, diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h index 8fa7e168c..66824245e 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h @@ -58,7 +58,14 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { DEFAULT_FORGETTING_CURVE_OCCURRENCES_TO_LEVEL_UP)), mForgettingCurveProbabilityValuesTableId(HeaderReadWriteUtils::readIntAttributeValue( &mAttributeMap, FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID_KEY, - DEFAULT_FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID)) {} + DEFAULT_FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID)), + mForgettingCurveDurationToLevelDown(HeaderReadWriteUtils::readIntAttributeValue( + &mAttributeMap, FORGETTING_CURVE_DURATION_TO_LEVEL_DOWN_IN_SECONDS_KEY, + DEFAULT_FORGETTING_CURVE_DURATION_TO_LEVEL_DOWN_IN_SECONDS)), + mMaxUnigramCount(HeaderReadWriteUtils::readIntAttributeValue( + &mAttributeMap, MAX_UNIGRAM_COUNT_KEY, DEFAULT_MAX_UNIGRAM_COUNT)), + mMaxBigramCount(HeaderReadWriteUtils::readIntAttributeValue( + &mAttributeMap, MAX_BIGRAM_COUNT_KEY, DEFAULT_MAX_BIGRAM_COUNT)) {} // Constructs header information using an attribute map. HeaderPolicy(const FormatUtils::FORMAT_VERSION dictFormatVersion, @@ -83,7 +90,14 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { DEFAULT_FORGETTING_CURVE_OCCURRENCES_TO_LEVEL_UP)), mForgettingCurveProbabilityValuesTableId(HeaderReadWriteUtils::readIntAttributeValue( &mAttributeMap, FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID_KEY, - DEFAULT_FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID)) {} + DEFAULT_FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID)), + mForgettingCurveDurationToLevelDown(HeaderReadWriteUtils::readIntAttributeValue( + &mAttributeMap, FORGETTING_CURVE_DURATION_TO_LEVEL_DOWN_IN_SECONDS_KEY, + DEFAULT_FORGETTING_CURVE_DURATION_TO_LEVEL_DOWN_IN_SECONDS)), + mMaxUnigramCount(HeaderReadWriteUtils::readIntAttributeValue( + &mAttributeMap, MAX_UNIGRAM_COUNT_KEY, DEFAULT_MAX_UNIGRAM_COUNT)), + mMaxBigramCount(HeaderReadWriteUtils::readIntAttributeValue( + &mAttributeMap, MAX_BIGRAM_COUNT_KEY, DEFAULT_MAX_BIGRAM_COUNT)) {} // Temporary dummy header. HeaderPolicy() @@ -92,7 +106,8 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { mRequiresGermanUmlautProcessing(false), mIsDecayingDict(false), mDate(0), mLastDecayedTime(0), mUnigramCount(0), mBigramCount(0), mExtendedRegionSize(0), mHasHistoricalInfoOfWords(false), - mForgettingCurveOccurrencesToLevelUp(0), mForgettingCurveProbabilityValuesTableId(0) {} + mForgettingCurveOccurrencesToLevelUp(0), mForgettingCurveProbabilityValuesTableId(0), + mForgettingCurveDurationToLevelDown(0), mMaxUnigramCount(0), mMaxBigramCount(0) {} ~HeaderPolicy() {} @@ -179,6 +194,18 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { return mForgettingCurveProbabilityValuesTableId; } + AK_FORCE_INLINE int getForgettingCurveDurationToLevelDown() const { + return mForgettingCurveDurationToLevelDown; + } + + AK_FORCE_INLINE int getMaxUnigramCount() const { + return mMaxUnigramCount; + } + + AK_FORCE_INLINE int getMaxBigramCount() const { + return mMaxBigramCount; + } + void readHeaderValueOrQuestionMark(const char *const key, int *outValue, int outValueSize) const; @@ -205,10 +232,16 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { static const char *const LOCALE_KEY; static const char *const FORGETTING_CURVE_OCCURRENCES_TO_LEVEL_UP_KEY; static const char *const FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID_KEY; + static const char *const FORGETTING_CURVE_DURATION_TO_LEVEL_DOWN_IN_SECONDS_KEY; + static const char *const MAX_UNIGRAM_COUNT_KEY; + static const char *const MAX_BIGRAM_COUNT_KEY; static const int DEFAULT_MULTIPLE_WORDS_DEMOTION_RATE; static const float MULTIPLE_WORD_COST_MULTIPLIER_SCALE; static const int DEFAULT_FORGETTING_CURVE_OCCURRENCES_TO_LEVEL_UP; static const int DEFAULT_FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID; + static const int DEFAULT_FORGETTING_CURVE_DURATION_TO_LEVEL_DOWN_IN_SECONDS; + static const int DEFAULT_MAX_UNIGRAM_COUNT; + static const int DEFAULT_MAX_BIGRAM_COUNT; const FormatUtils::FORMAT_VERSION mDictFormatVersion; const HeaderReadWriteUtils::DictionaryFlags mDictionaryFlags; @@ -226,6 +259,9 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { const bool mHasHistoricalInfoOfWords; const int mForgettingCurveOccurrencesToLevelUp; const int mForgettingCurveProbabilityValuesTableId; + const int mForgettingCurveDurationToLevelDown; + const int mMaxUnigramCount; + const int mMaxBigramCount; const std::vector readLocale() const; float readMultipleWordCostMultiplier() const; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp index 13ae9d923..f24c2e1af 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp @@ -168,7 +168,7 @@ bool Ver4PatriciaTrieNodeWriter::updatePtNodeProbabilityAndGetNeedsToKeepPtNodeA toBeUpdatedPtNodeParams->getTerminalId()); return false; } - const bool isValid = ForgettingCurveUtils::needsToKeep(&historicalInfo); + const bool isValid = ForgettingCurveUtils::needsToKeep(&historicalInfo, mHeaderPolicy); if (!isValid) { if (!markPtNodeAsWillBecomeNonTerminal(toBeUpdatedPtNodeParams)) { AKLOGE("Cannot mark PtNode as willBecomeNonTerminal."); diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp index 197250ff3..4d1b0dadb 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp @@ -329,11 +329,15 @@ void Ver4PatriciaTriePolicy::getProperty(const char *const query, const int quer snprintf(outResult, maxResultLength, "%d", mBigramCount); } else if (strncmp(query, MAX_UNIGRAM_COUNT_QUERY, compareLength) == 0) { snprintf(outResult, maxResultLength, "%d", - mHeaderPolicy->isDecayingDict() ? ForgettingCurveUtils::MAX_UNIGRAM_COUNT : + mHeaderPolicy->isDecayingDict() ? + ForgettingCurveUtils::getUnigramCountHardLimit( + mHeaderPolicy->getMaxUnigramCount()) : static_cast(Ver4DictConstants::MAX_DICTIONARY_SIZE)); } else if (strncmp(query, MAX_BIGRAM_COUNT_QUERY, compareLength) == 0) { snprintf(outResult, maxResultLength, "%d", - mHeaderPolicy->isDecayingDict() ? ForgettingCurveUtils::MAX_BIGRAM_COUNT : + mHeaderPolicy->isDecayingDict() ? + ForgettingCurveUtils::getBigramCountHardLimit( + mHeaderPolicy->getMaxBigramCount()) : static_cast(Ver4DictConstants::MAX_DICTIONARY_SIZE)); } } diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp index 2da295054..3907c84a0 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp @@ -96,12 +96,11 @@ bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos, } const int unigramCount = traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted .getValidUnigramCount(); - if (headerPolicy->isDecayingDict() - && unigramCount > ForgettingCurveUtils::MAX_UNIGRAM_COUNT_AFTER_GC) { - if (!truncateUnigrams(&ptNodeReader, &ptNodeWriter, - ForgettingCurveUtils::MAX_UNIGRAM_COUNT_AFTER_GC)) { + const int maxUnigramCount = headerPolicy->getMaxUnigramCount(); + if (headerPolicy->isDecayingDict() && unigramCount > maxUnigramCount) { + if (!truncateUnigrams(&ptNodeReader, &ptNodeWriter, maxUnigramCount)) { AKLOGE("Cannot remove unigrams. current: %d, max: %d", unigramCount, - ForgettingCurveUtils::MAX_UNIGRAM_COUNT_AFTER_GC); + maxUnigramCount); return false; } } @@ -114,11 +113,10 @@ bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos, return false; } const int bigramCount = traversePolicyToUpdateBigramProbability.getValidBigramEntryCount(); - if (headerPolicy->isDecayingDict() - && bigramCount > ForgettingCurveUtils::MAX_BIGRAM_COUNT_AFTER_GC) { - if (!truncateBigrams(ForgettingCurveUtils::MAX_BIGRAM_COUNT_AFTER_GC)) { - AKLOGE("Cannot remove bigrams. current: %d, max: %d", bigramCount, - ForgettingCurveUtils::MAX_BIGRAM_COUNT_AFTER_GC); + const int maxBigramCount = headerPolicy->getMaxBigramCount(); + if (headerPolicy->isDecayingDict() && bigramCount > maxBigramCount) { + if (!truncateBigrams(maxBigramCount)) { + AKLOGE("Cannot remove bigrams. current: %d, max: %d", bigramCount, maxBigramCount); return false; } } diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp index 51b0eb23f..35e05d77a 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp @@ -25,20 +25,17 @@ namespace latinime { -const int ForgettingCurveUtils::MAX_UNIGRAM_COUNT = 12000; -const int ForgettingCurveUtils::MAX_UNIGRAM_COUNT_AFTER_GC = 10000; -const int ForgettingCurveUtils::MAX_BIGRAM_COUNT = 12000; -const int ForgettingCurveUtils::MAX_BIGRAM_COUNT_AFTER_GC = 10000; - const int ForgettingCurveUtils::MULTIPLIER_TWO_IN_PROBABILITY_SCALE = 8; const int ForgettingCurveUtils::DECAY_INTERVAL_SECONDS = 2 * 60 * 60; const int ForgettingCurveUtils::MAX_LEVEL = 3; const int ForgettingCurveUtils::MIN_VALID_LEVEL = 1; -const int ForgettingCurveUtils::TIME_STEP_DURATION_IN_SECONDS = 6 * 60 * 60; const int ForgettingCurveUtils::MAX_ELAPSED_TIME_STEP_COUNT = 15; const int ForgettingCurveUtils::DISCARD_LEVEL_ZERO_ENTRY_TIME_STEP_COUNT_THRESHOLD = 14; +const float ForgettingCurveUtils::UNIGRAM_COUNT_HARD_LIMIT_WEIGHT = 1.2; +const float ForgettingCurveUtils::BIGRAM_COUNT_HARD_LIMIT_WEIGHT = 1.2; + const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityTable; // TODO: Revise the logic to decide the initial probability depending on the given probability. @@ -71,7 +68,8 @@ const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityT /* static */ int ForgettingCurveUtils::decodeProbability( const HistoricalInfo *const historicalInfo, const HeaderPolicy *const headerPolicy) { - const int elapsedTimeStepCount = getElapsedTimeStepCount(historicalInfo->getTimeStamp()); + const int elapsedTimeStepCount = getElapsedTimeStepCount(historicalInfo->getTimeStamp(), + headerPolicy->getForgettingCurveDurationToLevelDown()); return sProbabilityTable.getProbability( headerPolicy->getForgettingCurveProbabilityValuesTableId(), historicalInfo->getLevel(), min(max(elapsedTimeStepCount, 0), MAX_ELAPSED_TIME_STEP_COUNT)); @@ -90,10 +88,12 @@ const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityT } } -/* static */ bool ForgettingCurveUtils::needsToKeep(const HistoricalInfo *const historicalInfo) { +/* static */ bool ForgettingCurveUtils::needsToKeep(const HistoricalInfo *const historicalInfo, + const HeaderPolicy *const headerPolicy) { return historicalInfo->getLevel() > 0 - || getElapsedTimeStepCount(historicalInfo->getTimeStamp()) - < DISCARD_LEVEL_ZERO_ENTRY_TIME_STEP_COUNT_THRESHOLD; + || getElapsedTimeStepCount(historicalInfo->getTimeStamp(), + headerPolicy->getForgettingCurveDurationToLevelDown()) + < DISCARD_LEVEL_ZERO_ENTRY_TIME_STEP_COUNT_THRESHOLD; } /* static */ const HistoricalInfo ForgettingCurveUtils::createHistoricalInfoToSave( @@ -102,7 +102,9 @@ const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityT if (originalHistoricalInfo->getTimeStamp() == NOT_A_TIMESTAMP) { return HistoricalInfo(); } - const int elapsedTimeStep = getElapsedTimeStepCount(originalHistoricalInfo->getTimeStamp()); + const int durationToLevelDownInSeconds = headerPolicy->getForgettingCurveDurationToLevelDown(); + const int elapsedTimeStep = getElapsedTimeStepCount( + originalHistoricalInfo->getTimeStamp(), durationToLevelDownInSeconds); if (elapsedTimeStep <= MAX_ELAPSED_TIME_STEP_COUNT) { // No need to update historical info. return *originalHistoricalInfo; @@ -111,18 +113,18 @@ const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityT const int maxLevelDownAmonut = elapsedTimeStep / (MAX_ELAPSED_TIME_STEP_COUNT + 1); const int levelDownAmount = (maxLevelDownAmonut >= originalHistoricalInfo->getLevel()) ? originalHistoricalInfo->getLevel() : maxLevelDownAmonut; - const int adjustedTimestamp = originalHistoricalInfo->getTimeStamp() + - levelDownAmount * (MAX_ELAPSED_TIME_STEP_COUNT + 1) * TIME_STEP_DURATION_IN_SECONDS; - return HistoricalInfo(adjustedTimestamp, + const int adjustedTimestampInSeconds = originalHistoricalInfo->getTimeStamp() + + levelDownAmount * durationToLevelDownInSeconds; + return HistoricalInfo(adjustedTimestampInSeconds, originalHistoricalInfo->getLevel() - levelDownAmount, 0 /* count */); } /* static */ bool ForgettingCurveUtils::needsToDecay(const bool mindsBlockByDecay, const int unigramCount, const int bigramCount, const HeaderPolicy *const headerPolicy) { - if (unigramCount >= ForgettingCurveUtils::MAX_UNIGRAM_COUNT) { + if (unigramCount >= getUnigramCountHardLimit(headerPolicy->getMaxUnigramCount())) { // Unigram count exceeds the limit. return true; - } else if (bigramCount >= ForgettingCurveUtils::MAX_BIGRAM_COUNT) { + } else if (bigramCount >= getBigramCountHardLimit(headerPolicy->getMaxBigramCount())) { // Bigram count exceeds the limit. return true; } @@ -143,8 +145,12 @@ const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityT return unigramProbability; } -/* static */ int ForgettingCurveUtils::getElapsedTimeStepCount(const int timestamp) { - return (TimeKeeper::peekCurrentTime() - timestamp) / TIME_STEP_DURATION_IN_SECONDS; +/* static */ int ForgettingCurveUtils::getElapsedTimeStepCount(const int timestamp, + const int durationToLevelDownInSeconds) { + const int elapsedTimeInSeconds = TimeKeeper::peekCurrentTime() - timestamp; + const int timeStepDurationInSeconds = + durationToLevelDownInSeconds / (MAX_ELAPSED_TIME_STEP_COUNT + 1); + return elapsedTimeInSeconds / timeStepDurationInSeconds; } const int ForgettingCurveUtils::ProbabilityTable::PROBABILITY_TABLE_COUNT = 4; @@ -172,12 +178,10 @@ ForgettingCurveUtils::ProbabilityTable::ProbabilityTable() : mTables() { mTables[tableId][level][timeStepCount] = NOT_A_PROBABILITY; continue; } - const int elapsedTime = timeStepCount * TIME_STEP_DURATION_IN_SECONDS; const float probability = initialProbability * powf(initialProbability / endProbability, - -1.0f * static_cast(elapsedTime) - / static_cast(TIME_STEP_DURATION_IN_SECONDS - * (MAX_ELAPSED_TIME_STEP_COUNT + 1))); + -1.0f * static_cast(timeStepCount) + / static_cast(MAX_ELAPSED_TIME_STEP_COUNT + 1)); mTables[tableId][level][timeStepCount] = min(max(static_cast(probability), 1), MAX_PROBABILITY); } diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h index 1a285e573..bb8690939 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h @@ -28,11 +28,6 @@ class HeaderPolicy; class ForgettingCurveUtils { public: - static const int MAX_UNIGRAM_COUNT; - static const int MAX_UNIGRAM_COUNT_AFTER_GC; - static const int MAX_BIGRAM_COUNT; - static const int MAX_BIGRAM_COUNT_AFTER_GC; - static const HistoricalInfo createUpdatedHistoricalInfo( const HistoricalInfo *const originalHistoricalInfo, const int newProbability, const int timestamp, const HeaderPolicy *const headerPolicy); @@ -47,11 +42,22 @@ class ForgettingCurveUtils { static int getProbability(const int encodedUnigramProbability, const int encodedBigramProbability); - static bool needsToKeep(const HistoricalInfo *const historicalInfo); + static bool needsToKeep(const HistoricalInfo *const historicalInfo, + const HeaderPolicy *const headerPolicy); static bool needsToDecay(const bool mindsBlockByDecay, const int unigramCount, const int bigramCount, const HeaderPolicy *const headerPolicy); + AK_FORCE_INLINE static int getUnigramCountHardLimit(const int maxUnigramCount) { + return static_cast(static_cast(maxUnigramCount) + * UNIGRAM_COUNT_HARD_LIMIT_WEIGHT); + } + + AK_FORCE_INLINE static int getBigramCountHardLimit(const int maxBigramCount) { + return static_cast(static_cast(maxBigramCount) + * BIGRAM_COUNT_HARD_LIMIT_WEIGHT); + } + private: DISALLOW_IMPLICIT_CONSTRUCTORS(ForgettingCurveUtils); @@ -88,16 +94,17 @@ class ForgettingCurveUtils { static const int MAX_LEVEL; static const int MIN_VALID_LEVEL; - static const int TIME_STEP_DURATION_IN_SECONDS; static const int MAX_ELAPSED_TIME_STEP_COUNT; static const int DISCARD_LEVEL_ZERO_ENTRY_TIME_STEP_COUNT_THRESHOLD; - static const int HALF_LIFE_TIME_IN_SECONDS; + + static const float UNIGRAM_COUNT_HARD_LIMIT_WEIGHT; + static const float BIGRAM_COUNT_HARD_LIMIT_WEIGHT; static const ProbabilityTable sProbabilityTable; static int backoff(const int unigramProbability); - static int getElapsedTimeStepCount(const int timestamp); + static int getElapsedTimeStepCount(const int timestamp, const int durationToLevelDown); }; } // namespace latinime #endif /* LATINIME_FORGETTING_CURVE_UTILS_H */