From 26266bd53b06facd4ef7da3de6e0a52ea82b4482 Mon Sep 17 00:00:00 2001 From: Keisuke Kuroyanagi Date: Mon, 9 Dec 2013 21:02:41 +0900 Subject: [PATCH] Update historical info for GC. Bug: 11073222 Change-Id: I08a61c02f9f5d527897095eee2de395f86050e2d --- .../bigram/ver4_bigram_list_policy.cpp | 10 ++++-- .../v4/ver4_patricia_trie_node_writer.cpp | 11 ++++-- .../utils/forgetting_curve_utils.cpp | 35 ++++++++++++++++++- .../dictionary/utils/forgetting_curve_utils.h | 15 +++++++- 4 files changed, 63 insertions(+), 8 deletions(-) diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp index ad437b1a5..968bacee6 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp @@ -153,12 +153,16 @@ bool Ver4BigramListPolicy::updateAllBigramEntriesAndDeleteUselessEntries(const i return false; } } else if (mNeedsToDecayWhenUpdating) { - // TODO: Quit decaying probability during GC. const int probability = ForgettingCurveUtils::getEncodedProbabilityToSave( bigramEntry.getProbability(), mHeaderPolicy); + const HistoricalInfo historicalInfo = + ForgettingCurveUtils::createHistoricalInfoToSave( + bigramEntry.getHistoricalInfo()); + // TODO: Use ForgettingCurveUtils::needsToKeep(&historicalInfo). if (ForgettingCurveUtils::isValidEncodedProbability(probability)) { const BigramEntry updatedBigramEntry = - bigramEntry.updateProbabilityAndGetEntry(probability); + bigramEntry.updateProbabilityAndGetEntry(probability) + .updateHistoricalInfoAndGetEntry(&historicalInfo); if (!mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPos)) { return false; } @@ -225,7 +229,7 @@ const BigramEntry Ver4BigramListPolicy::createUpdatedBigramEntryFrom( const int probability = ForgettingCurveUtils::getUpdatedEncodedProbability( originalBigramEntry->getProbability(), newProbability); const HistoricalInfo updatedHistoricalInfo = - ForgettingCurveUtils::createUpdatedHistoricalInfoFrom( + ForgettingCurveUtils::createUpdatedHistoricalInfo( originalBigramEntry->getHistoricalInfo(), newProbability, timestamp); return originalBigramEntry->updateProbabilityAndGetEntry(probability) .updateHistoricalInfoAndGetEntry(&updatedHistoricalInfo); diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp index 867cd7673..07554342f 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp @@ -151,17 +151,22 @@ bool Ver4PatriciaTrieNodeWriter::updatePtNodeProbabilityAndGetNeedsToKeepPtNodeA const ProbabilityEntry originalProbabilityEntry = mBuffers->getProbabilityDictContent()->getProbabilityEntry( toBeUpdatedPtNodeParams->getTerminalId()); - // TODO: Use historical info. + // TODO: Remove. const int newProbability = ForgettingCurveUtils::getEncodedProbabilityToSave( originalProbabilityEntry.getProbability(), mBuffers->getHeaderPolicy()); + const HistoricalInfo historicalInfo = + ForgettingCurveUtils::createHistoricalInfoToSave( + originalProbabilityEntry.getHistoricalInfo()); const ProbabilityEntry probabilityEntry = - originalProbabilityEntry.createEntryWithUpdatedProbability(newProbability); + originalProbabilityEntry.createEntryWithUpdatedProbability(newProbability) + .createEntryWithUpdatedHistoricalInfo(&historicalInfo); if (!mBuffers->getMutableProbabilityDictContent()->setProbabilityEntry( toBeUpdatedPtNodeParams->getTerminalId(), &probabilityEntry)) { AKLOGE("Cannot write updated probability entry. terminalId: %d", toBeUpdatedPtNodeParams->getTerminalId()); return false; } + // TODO: Use ForgettingCurveUtils::needsToKeep(&historicalInfo). const bool isValid = ForgettingCurveUtils::isValidEncodedProbability(newProbability); if (!isValid) { if (!markPtNodeAsWillBecomeNonTerminal(toBeUpdatedPtNodeParams)) { @@ -379,7 +384,7 @@ const ProbabilityEntry Ver4PatriciaTrieNodeWriter::createUpdatedEntryFrom( const int updatedProbability = ForgettingCurveUtils::getUpdatedEncodedProbability( originalProbabilityEntry->getProbability(), newProbability); const HistoricalInfo updatedHistoricalInfo = - ForgettingCurveUtils::createUpdatedHistoricalInfoFrom( + ForgettingCurveUtils::createUpdatedHistoricalInfo( originalProbabilityEntry->getHistoricalInfo(), newProbability, timestamp); return originalProbabilityEntry->createEntryWithUpdatedProbability(updatedProbability) .createEntryWithUpdatedHistoricalInfo(&updatedHistoricalInfo); diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp index c7fb47ef6..466af7256 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp @@ -42,10 +42,13 @@ const int ForgettingCurveUtils::DECAY_INTERVAL_SECONDS = 2 * 60 * 60; const int ForgettingCurveUtils::MAX_LEVEL = 3; const int ForgettingCurveUtils::MAX_COUNT = 3; const int ForgettingCurveUtils::MIN_VALID_LEVEL = 1; +const int ForgettingCurveUtils::TIME_STEP_DURATION_IN_SECONDS = 6 * 60 * 60; +const int ForgettingCurveUtils::MAX_ELAPSED_TIME_STEP_COUNT = 15; +const int ForgettingCurveUtils::DISCARD_LEVEL_ZERO_ENTRY_TIME_STEP_COUNT_THRESHOLD = 14; const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityTable; -/* static */ const HistoricalInfo ForgettingCurveUtils::createUpdatedHistoricalInfoFrom( +/* static */ const HistoricalInfo ForgettingCurveUtils::createUpdatedHistoricalInfo( const HistoricalInfo *const originalHistoricalInfo, const int newProbability, const int timestamp) { if (newProbability != NOT_A_PROBABILITY && originalHistoricalInfo->getLevel() == 0) { @@ -110,6 +113,12 @@ const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityT return encodedProbability >= MIN_VALID_ENCODED_PROBABILITY; } +/* static */ bool ForgettingCurveUtils::needsToKeep(const HistoricalInfo *const historicalInfo) { + return historicalInfo->getLevel() > 0 + || getElapsedTimeStepCount(historicalInfo->getTimeStamp()) + < DISCARD_LEVEL_ZERO_ENTRY_TIME_STEP_COUNT_THRESHOLD; +} + /* static */ int ForgettingCurveUtils::getEncodedProbabilityToSave(const int encodedProbability, const DictionaryHeaderStructurePolicy *const headerPolicy) { const int elapsedTime = TimeKeeper::peekCurrentTime() - headerPolicy->getLastDecayedTime(); @@ -129,6 +138,26 @@ const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityT return currentEncodedProbability; } +/* static */ const HistoricalInfo ForgettingCurveUtils::createHistoricalInfoToSave( + const HistoricalInfo *const originalHistoricalInfo) { + if (originalHistoricalInfo->getTimeStamp() == NOT_A_TIMESTAMP) { + return HistoricalInfo(); + } + const int elapsedTimeStep = getElapsedTimeStepCount(originalHistoricalInfo->getTimeStamp()); + if (elapsedTimeStep < MAX_ELAPSED_TIME_STEP_COUNT) { + // No need to update historical info. + return *originalHistoricalInfo; + } + // Level down. + const int maxLevelDownAmonut = elapsedTimeStep / MAX_ELAPSED_TIME_STEP_COUNT; + const int levelDownAmount = (maxLevelDownAmonut >= originalHistoricalInfo->getLevel()) ? + originalHistoricalInfo->getLevel() : maxLevelDownAmonut; + const int adjustedTimestamp = originalHistoricalInfo->getTimeStamp() + + levelDownAmount * MAX_ELAPSED_TIME_STEP_COUNT * TIME_STEP_DURATION_IN_SECONDS; + return HistoricalInfo(adjustedTimestamp, + originalHistoricalInfo->getLevel() - levelDownAmount, 0 /* count */); +} + /* static */ bool ForgettingCurveUtils::needsToDecay(const bool mindsBlockByDecay, const int unigramCount, const int bigramCount, const DictionaryHeaderStructurePolicy *const headerPolicy) { @@ -167,6 +196,10 @@ const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityT } } +/* static */ int ForgettingCurveUtils::getElapsedTimeStepCount(const int timestamp) { + return (TimeKeeper::peekCurrentTime() - timestamp) / TIME_STEP_DURATION_IN_SECONDS; +} + ForgettingCurveUtils::ProbabilityTable::ProbabilityTable() : mTable() { // Table entry is as follows: // 1, 1, 1, 2, 3, 5, 6, 9, 13, 18, 25, 34, 48, 66, 91, 127. diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h index 3e5bdf6b2..76d172e0b 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h @@ -36,18 +36,26 @@ class ForgettingCurveUtils { static const int MAX_BIGRAM_COUNT; static const int MAX_BIGRAM_COUNT_AFTER_GC; - static const HistoricalInfo createUpdatedHistoricalInfoFrom( + static const HistoricalInfo createUpdatedHistoricalInfo( const HistoricalInfo *const originalHistoricalInfo, const int newProbability, const int timestamp); + static const HistoricalInfo createHistoricalInfoToSave( + const HistoricalInfo *const originalHistoricalInfo); + static int getProbability(const int encodedUnigramProbability, const int encodedBigramProbability); + // TODO: Remove. static int getUpdatedEncodedProbability(const int originalEncodedProbability, const int newProbability); + // TODO: Remove. static int isValidEncodedProbability(const int encodedProbability); + static bool needsToKeep(const HistoricalInfo *const historicalInfo); + + // TODO: Remove. static int getEncodedProbabilityToSave(const int encodedProbability, const DictionaryHeaderStructurePolicy *const headerPolicy); @@ -84,12 +92,17 @@ class ForgettingCurveUtils { static const int MAX_LEVEL; static const int MAX_COUNT; static const int MIN_VALID_LEVEL; + static const int TIME_STEP_DURATION_IN_SECONDS; + static const int MAX_ELAPSED_TIME_STEP_COUNT; + static const int DISCARD_LEVEL_ZERO_ENTRY_TIME_STEP_COUNT_THRESHOLD; static const ProbabilityTable sProbabilityTable; static int decodeProbability(const int encodedProbability); static int backoff(const int unigramProbability); + + static int getElapsedTimeStepCount(const int timestamp); }; } // namespace latinime #endif /* LATINIME_FORGETTING_CURVE_UTILS_H */