Update historical info for GC.

Bug: 11073222

Change-Id: I08a61c02f9f5d527897095eee2de395f86050e2d
main
Keisuke Kuroyanagi 2013-12-09 21:02:41 +09:00
parent ebb57c02c2
commit 26266bd53b
4 changed files with 63 additions and 8 deletions

View File

@ -153,12 +153,16 @@ bool Ver4BigramListPolicy::updateAllBigramEntriesAndDeleteUselessEntries(const i
return false; return false;
} }
} else if (mNeedsToDecayWhenUpdating) { } else if (mNeedsToDecayWhenUpdating) {
// TODO: Quit decaying probability during GC.
const int probability = ForgettingCurveUtils::getEncodedProbabilityToSave( const int probability = ForgettingCurveUtils::getEncodedProbabilityToSave(
bigramEntry.getProbability(), mHeaderPolicy); bigramEntry.getProbability(), mHeaderPolicy);
const HistoricalInfo historicalInfo =
ForgettingCurveUtils::createHistoricalInfoToSave(
bigramEntry.getHistoricalInfo());
// TODO: Use ForgettingCurveUtils::needsToKeep(&historicalInfo).
if (ForgettingCurveUtils::isValidEncodedProbability(probability)) { if (ForgettingCurveUtils::isValidEncodedProbability(probability)) {
const BigramEntry updatedBigramEntry = const BigramEntry updatedBigramEntry =
bigramEntry.updateProbabilityAndGetEntry(probability); bigramEntry.updateProbabilityAndGetEntry(probability)
.updateHistoricalInfoAndGetEntry(&historicalInfo);
if (!mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPos)) { if (!mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPos)) {
return false; return false;
} }
@ -225,7 +229,7 @@ const BigramEntry Ver4BigramListPolicy::createUpdatedBigramEntryFrom(
const int probability = ForgettingCurveUtils::getUpdatedEncodedProbability( const int probability = ForgettingCurveUtils::getUpdatedEncodedProbability(
originalBigramEntry->getProbability(), newProbability); originalBigramEntry->getProbability(), newProbability);
const HistoricalInfo updatedHistoricalInfo = const HistoricalInfo updatedHistoricalInfo =
ForgettingCurveUtils::createUpdatedHistoricalInfoFrom( ForgettingCurveUtils::createUpdatedHistoricalInfo(
originalBigramEntry->getHistoricalInfo(), newProbability, timestamp); originalBigramEntry->getHistoricalInfo(), newProbability, timestamp);
return originalBigramEntry->updateProbabilityAndGetEntry(probability) return originalBigramEntry->updateProbabilityAndGetEntry(probability)
.updateHistoricalInfoAndGetEntry(&updatedHistoricalInfo); .updateHistoricalInfoAndGetEntry(&updatedHistoricalInfo);

View File

@ -151,17 +151,22 @@ bool Ver4PatriciaTrieNodeWriter::updatePtNodeProbabilityAndGetNeedsToKeepPtNodeA
const ProbabilityEntry originalProbabilityEntry = const ProbabilityEntry originalProbabilityEntry =
mBuffers->getProbabilityDictContent()->getProbabilityEntry( mBuffers->getProbabilityDictContent()->getProbabilityEntry(
toBeUpdatedPtNodeParams->getTerminalId()); toBeUpdatedPtNodeParams->getTerminalId());
// TODO: Use historical info. // TODO: Remove.
const int newProbability = ForgettingCurveUtils::getEncodedProbabilityToSave( const int newProbability = ForgettingCurveUtils::getEncodedProbabilityToSave(
originalProbabilityEntry.getProbability(), mBuffers->getHeaderPolicy()); originalProbabilityEntry.getProbability(), mBuffers->getHeaderPolicy());
const HistoricalInfo historicalInfo =
ForgettingCurveUtils::createHistoricalInfoToSave(
originalProbabilityEntry.getHistoricalInfo());
const ProbabilityEntry probabilityEntry = const ProbabilityEntry probabilityEntry =
originalProbabilityEntry.createEntryWithUpdatedProbability(newProbability); originalProbabilityEntry.createEntryWithUpdatedProbability(newProbability)
.createEntryWithUpdatedHistoricalInfo(&historicalInfo);
if (!mBuffers->getMutableProbabilityDictContent()->setProbabilityEntry( if (!mBuffers->getMutableProbabilityDictContent()->setProbabilityEntry(
toBeUpdatedPtNodeParams->getTerminalId(), &probabilityEntry)) { toBeUpdatedPtNodeParams->getTerminalId(), &probabilityEntry)) {
AKLOGE("Cannot write updated probability entry. terminalId: %d", AKLOGE("Cannot write updated probability entry. terminalId: %d",
toBeUpdatedPtNodeParams->getTerminalId()); toBeUpdatedPtNodeParams->getTerminalId());
return false; return false;
} }
// TODO: Use ForgettingCurveUtils::needsToKeep(&historicalInfo).
const bool isValid = ForgettingCurveUtils::isValidEncodedProbability(newProbability); const bool isValid = ForgettingCurveUtils::isValidEncodedProbability(newProbability);
if (!isValid) { if (!isValid) {
if (!markPtNodeAsWillBecomeNonTerminal(toBeUpdatedPtNodeParams)) { if (!markPtNodeAsWillBecomeNonTerminal(toBeUpdatedPtNodeParams)) {
@ -379,7 +384,7 @@ const ProbabilityEntry Ver4PatriciaTrieNodeWriter::createUpdatedEntryFrom(
const int updatedProbability = ForgettingCurveUtils::getUpdatedEncodedProbability( const int updatedProbability = ForgettingCurveUtils::getUpdatedEncodedProbability(
originalProbabilityEntry->getProbability(), newProbability); originalProbabilityEntry->getProbability(), newProbability);
const HistoricalInfo updatedHistoricalInfo = const HistoricalInfo updatedHistoricalInfo =
ForgettingCurveUtils::createUpdatedHistoricalInfoFrom( ForgettingCurveUtils::createUpdatedHistoricalInfo(
originalProbabilityEntry->getHistoricalInfo(), newProbability, timestamp); originalProbabilityEntry->getHistoricalInfo(), newProbability, timestamp);
return originalProbabilityEntry->createEntryWithUpdatedProbability(updatedProbability) return originalProbabilityEntry->createEntryWithUpdatedProbability(updatedProbability)
.createEntryWithUpdatedHistoricalInfo(&updatedHistoricalInfo); .createEntryWithUpdatedHistoricalInfo(&updatedHistoricalInfo);

View File

@ -42,10 +42,13 @@ const int ForgettingCurveUtils::DECAY_INTERVAL_SECONDS = 2 * 60 * 60;
const int ForgettingCurveUtils::MAX_LEVEL = 3; const int ForgettingCurveUtils::MAX_LEVEL = 3;
const int ForgettingCurveUtils::MAX_COUNT = 3; const int ForgettingCurveUtils::MAX_COUNT = 3;
const int ForgettingCurveUtils::MIN_VALID_LEVEL = 1; const int ForgettingCurveUtils::MIN_VALID_LEVEL = 1;
const int ForgettingCurveUtils::TIME_STEP_DURATION_IN_SECONDS = 6 * 60 * 60;
const int ForgettingCurveUtils::MAX_ELAPSED_TIME_STEP_COUNT = 15;
const int ForgettingCurveUtils::DISCARD_LEVEL_ZERO_ENTRY_TIME_STEP_COUNT_THRESHOLD = 14;
const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityTable; const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityTable;
/* static */ const HistoricalInfo ForgettingCurveUtils::createUpdatedHistoricalInfoFrom( /* static */ const HistoricalInfo ForgettingCurveUtils::createUpdatedHistoricalInfo(
const HistoricalInfo *const originalHistoricalInfo, const HistoricalInfo *const originalHistoricalInfo,
const int newProbability, const int timestamp) { const int newProbability, const int timestamp) {
if (newProbability != NOT_A_PROBABILITY && originalHistoricalInfo->getLevel() == 0) { if (newProbability != NOT_A_PROBABILITY && originalHistoricalInfo->getLevel() == 0) {
@ -110,6 +113,12 @@ const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityT
return encodedProbability >= MIN_VALID_ENCODED_PROBABILITY; return encodedProbability >= MIN_VALID_ENCODED_PROBABILITY;
} }
/* static */ bool ForgettingCurveUtils::needsToKeep(const HistoricalInfo *const historicalInfo) {
return historicalInfo->getLevel() > 0
|| getElapsedTimeStepCount(historicalInfo->getTimeStamp())
< DISCARD_LEVEL_ZERO_ENTRY_TIME_STEP_COUNT_THRESHOLD;
}
/* static */ int ForgettingCurveUtils::getEncodedProbabilityToSave(const int encodedProbability, /* static */ int ForgettingCurveUtils::getEncodedProbabilityToSave(const int encodedProbability,
const DictionaryHeaderStructurePolicy *const headerPolicy) { const DictionaryHeaderStructurePolicy *const headerPolicy) {
const int elapsedTime = TimeKeeper::peekCurrentTime() - headerPolicy->getLastDecayedTime(); const int elapsedTime = TimeKeeper::peekCurrentTime() - headerPolicy->getLastDecayedTime();
@ -129,6 +138,26 @@ const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityT
return currentEncodedProbability; return currentEncodedProbability;
} }
/* static */ const HistoricalInfo ForgettingCurveUtils::createHistoricalInfoToSave(
const HistoricalInfo *const originalHistoricalInfo) {
if (originalHistoricalInfo->getTimeStamp() == NOT_A_TIMESTAMP) {
return HistoricalInfo();
}
const int elapsedTimeStep = getElapsedTimeStepCount(originalHistoricalInfo->getTimeStamp());
if (elapsedTimeStep < MAX_ELAPSED_TIME_STEP_COUNT) {
// No need to update historical info.
return *originalHistoricalInfo;
}
// Level down.
const int maxLevelDownAmonut = elapsedTimeStep / MAX_ELAPSED_TIME_STEP_COUNT;
const int levelDownAmount = (maxLevelDownAmonut >= originalHistoricalInfo->getLevel()) ?
originalHistoricalInfo->getLevel() : maxLevelDownAmonut;
const int adjustedTimestamp = originalHistoricalInfo->getTimeStamp() +
levelDownAmount * MAX_ELAPSED_TIME_STEP_COUNT * TIME_STEP_DURATION_IN_SECONDS;
return HistoricalInfo(adjustedTimestamp,
originalHistoricalInfo->getLevel() - levelDownAmount, 0 /* count */);
}
/* static */ bool ForgettingCurveUtils::needsToDecay(const bool mindsBlockByDecay, /* static */ bool ForgettingCurveUtils::needsToDecay(const bool mindsBlockByDecay,
const int unigramCount, const int bigramCount, const int unigramCount, const int bigramCount,
const DictionaryHeaderStructurePolicy *const headerPolicy) { const DictionaryHeaderStructurePolicy *const headerPolicy) {
@ -167,6 +196,10 @@ const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityT
} }
} }
/* static */ int ForgettingCurveUtils::getElapsedTimeStepCount(const int timestamp) {
return (TimeKeeper::peekCurrentTime() - timestamp) / TIME_STEP_DURATION_IN_SECONDS;
}
ForgettingCurveUtils::ProbabilityTable::ProbabilityTable() : mTable() { ForgettingCurveUtils::ProbabilityTable::ProbabilityTable() : mTable() {
// Table entry is as follows: // Table entry is as follows:
// 1, 1, 1, 2, 3, 5, 6, 9, 13, 18, 25, 34, 48, 66, 91, 127. // 1, 1, 1, 2, 3, 5, 6, 9, 13, 18, 25, 34, 48, 66, 91, 127.

View File

@ -36,18 +36,26 @@ class ForgettingCurveUtils {
static const int MAX_BIGRAM_COUNT; static const int MAX_BIGRAM_COUNT;
static const int MAX_BIGRAM_COUNT_AFTER_GC; static const int MAX_BIGRAM_COUNT_AFTER_GC;
static const HistoricalInfo createUpdatedHistoricalInfoFrom( static const HistoricalInfo createUpdatedHistoricalInfo(
const HistoricalInfo *const originalHistoricalInfo, const int newProbability, const HistoricalInfo *const originalHistoricalInfo, const int newProbability,
const int timestamp); const int timestamp);
static const HistoricalInfo createHistoricalInfoToSave(
const HistoricalInfo *const originalHistoricalInfo);
static int getProbability(const int encodedUnigramProbability, static int getProbability(const int encodedUnigramProbability,
const int encodedBigramProbability); const int encodedBigramProbability);
// TODO: Remove.
static int getUpdatedEncodedProbability(const int originalEncodedProbability, static int getUpdatedEncodedProbability(const int originalEncodedProbability,
const int newProbability); const int newProbability);
// TODO: Remove.
static int isValidEncodedProbability(const int encodedProbability); static int isValidEncodedProbability(const int encodedProbability);
static bool needsToKeep(const HistoricalInfo *const historicalInfo);
// TODO: Remove.
static int getEncodedProbabilityToSave(const int encodedProbability, static int getEncodedProbabilityToSave(const int encodedProbability,
const DictionaryHeaderStructurePolicy *const headerPolicy); const DictionaryHeaderStructurePolicy *const headerPolicy);
@ -84,12 +92,17 @@ class ForgettingCurveUtils {
static const int MAX_LEVEL; static const int MAX_LEVEL;
static const int MAX_COUNT; static const int MAX_COUNT;
static const int MIN_VALID_LEVEL; static const int MIN_VALID_LEVEL;
static const int TIME_STEP_DURATION_IN_SECONDS;
static const int MAX_ELAPSED_TIME_STEP_COUNT;
static const int DISCARD_LEVEL_ZERO_ENTRY_TIME_STEP_COUNT_THRESHOLD;
static const ProbabilityTable sProbabilityTable; static const ProbabilityTable sProbabilityTable;
static int decodeProbability(const int encodedProbability); static int decodeProbability(const int encodedProbability);
static int backoff(const int unigramProbability); static int backoff(const int unigramProbability);
static int getElapsedTimeStepCount(const int timestamp);
}; };
} // namespace latinime } // namespace latinime
#endif /* LATINIME_FORGETTING_CURVE_UTILS_H */ #endif /* LATINIME_FORGETTING_CURVE_UTILS_H */