am 26266bd5
: Update historical info for GC.
* commit '26266bd53b06facd4ef7da3de6e0a52ea82b4482': Update historical info for GC.
This commit is contained in:
commit
725cb18957
4 changed files with 63 additions and 8 deletions
|
@ -153,12 +153,16 @@ bool Ver4BigramListPolicy::updateAllBigramEntriesAndDeleteUselessEntries(const i
|
|||
return false;
|
||||
}
|
||||
} else if (mNeedsToDecayWhenUpdating) {
|
||||
// TODO: Quit decaying probability during GC.
|
||||
const int probability = ForgettingCurveUtils::getEncodedProbabilityToSave(
|
||||
bigramEntry.getProbability(), mHeaderPolicy);
|
||||
const HistoricalInfo historicalInfo =
|
||||
ForgettingCurveUtils::createHistoricalInfoToSave(
|
||||
bigramEntry.getHistoricalInfo());
|
||||
// TODO: Use ForgettingCurveUtils::needsToKeep(&historicalInfo).
|
||||
if (ForgettingCurveUtils::isValidEncodedProbability(probability)) {
|
||||
const BigramEntry updatedBigramEntry =
|
||||
bigramEntry.updateProbabilityAndGetEntry(probability);
|
||||
bigramEntry.updateProbabilityAndGetEntry(probability)
|
||||
.updateHistoricalInfoAndGetEntry(&historicalInfo);
|
||||
if (!mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPos)) {
|
||||
return false;
|
||||
}
|
||||
|
@ -225,7 +229,7 @@ const BigramEntry Ver4BigramListPolicy::createUpdatedBigramEntryFrom(
|
|||
const int probability = ForgettingCurveUtils::getUpdatedEncodedProbability(
|
||||
originalBigramEntry->getProbability(), newProbability);
|
||||
const HistoricalInfo updatedHistoricalInfo =
|
||||
ForgettingCurveUtils::createUpdatedHistoricalInfoFrom(
|
||||
ForgettingCurveUtils::createUpdatedHistoricalInfo(
|
||||
originalBigramEntry->getHistoricalInfo(), newProbability, timestamp);
|
||||
return originalBigramEntry->updateProbabilityAndGetEntry(probability)
|
||||
.updateHistoricalInfoAndGetEntry(&updatedHistoricalInfo);
|
||||
|
|
|
@ -151,17 +151,22 @@ bool Ver4PatriciaTrieNodeWriter::updatePtNodeProbabilityAndGetNeedsToKeepPtNodeA
|
|||
const ProbabilityEntry originalProbabilityEntry =
|
||||
mBuffers->getProbabilityDictContent()->getProbabilityEntry(
|
||||
toBeUpdatedPtNodeParams->getTerminalId());
|
||||
// TODO: Use historical info.
|
||||
// TODO: Remove.
|
||||
const int newProbability = ForgettingCurveUtils::getEncodedProbabilityToSave(
|
||||
originalProbabilityEntry.getProbability(), mBuffers->getHeaderPolicy());
|
||||
const HistoricalInfo historicalInfo =
|
||||
ForgettingCurveUtils::createHistoricalInfoToSave(
|
||||
originalProbabilityEntry.getHistoricalInfo());
|
||||
const ProbabilityEntry probabilityEntry =
|
||||
originalProbabilityEntry.createEntryWithUpdatedProbability(newProbability);
|
||||
originalProbabilityEntry.createEntryWithUpdatedProbability(newProbability)
|
||||
.createEntryWithUpdatedHistoricalInfo(&historicalInfo);
|
||||
if (!mBuffers->getMutableProbabilityDictContent()->setProbabilityEntry(
|
||||
toBeUpdatedPtNodeParams->getTerminalId(), &probabilityEntry)) {
|
||||
AKLOGE("Cannot write updated probability entry. terminalId: %d",
|
||||
toBeUpdatedPtNodeParams->getTerminalId());
|
||||
return false;
|
||||
}
|
||||
// TODO: Use ForgettingCurveUtils::needsToKeep(&historicalInfo).
|
||||
const bool isValid = ForgettingCurveUtils::isValidEncodedProbability(newProbability);
|
||||
if (!isValid) {
|
||||
if (!markPtNodeAsWillBecomeNonTerminal(toBeUpdatedPtNodeParams)) {
|
||||
|
@ -379,7 +384,7 @@ const ProbabilityEntry Ver4PatriciaTrieNodeWriter::createUpdatedEntryFrom(
|
|||
const int updatedProbability = ForgettingCurveUtils::getUpdatedEncodedProbability(
|
||||
originalProbabilityEntry->getProbability(), newProbability);
|
||||
const HistoricalInfo updatedHistoricalInfo =
|
||||
ForgettingCurveUtils::createUpdatedHistoricalInfoFrom(
|
||||
ForgettingCurveUtils::createUpdatedHistoricalInfo(
|
||||
originalProbabilityEntry->getHistoricalInfo(), newProbability, timestamp);
|
||||
return originalProbabilityEntry->createEntryWithUpdatedProbability(updatedProbability)
|
||||
.createEntryWithUpdatedHistoricalInfo(&updatedHistoricalInfo);
|
||||
|
|
|
@ -42,10 +42,13 @@ const int ForgettingCurveUtils::DECAY_INTERVAL_SECONDS = 2 * 60 * 60;
|
|||
const int ForgettingCurveUtils::MAX_LEVEL = 3;
|
||||
const int ForgettingCurveUtils::MAX_COUNT = 3;
|
||||
const int ForgettingCurveUtils::MIN_VALID_LEVEL = 1;
|
||||
const int ForgettingCurveUtils::TIME_STEP_DURATION_IN_SECONDS = 6 * 60 * 60;
|
||||
const int ForgettingCurveUtils::MAX_ELAPSED_TIME_STEP_COUNT = 15;
|
||||
const int ForgettingCurveUtils::DISCARD_LEVEL_ZERO_ENTRY_TIME_STEP_COUNT_THRESHOLD = 14;
|
||||
|
||||
const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityTable;
|
||||
|
||||
/* static */ const HistoricalInfo ForgettingCurveUtils::createUpdatedHistoricalInfoFrom(
|
||||
/* static */ const HistoricalInfo ForgettingCurveUtils::createUpdatedHistoricalInfo(
|
||||
const HistoricalInfo *const originalHistoricalInfo,
|
||||
const int newProbability, const int timestamp) {
|
||||
if (newProbability != NOT_A_PROBABILITY && originalHistoricalInfo->getLevel() == 0) {
|
||||
|
@ -110,6 +113,12 @@ const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityT
|
|||
return encodedProbability >= MIN_VALID_ENCODED_PROBABILITY;
|
||||
}
|
||||
|
||||
/* static */ bool ForgettingCurveUtils::needsToKeep(const HistoricalInfo *const historicalInfo) {
|
||||
return historicalInfo->getLevel() > 0
|
||||
|| getElapsedTimeStepCount(historicalInfo->getTimeStamp())
|
||||
< DISCARD_LEVEL_ZERO_ENTRY_TIME_STEP_COUNT_THRESHOLD;
|
||||
}
|
||||
|
||||
/* static */ int ForgettingCurveUtils::getEncodedProbabilityToSave(const int encodedProbability,
|
||||
const DictionaryHeaderStructurePolicy *const headerPolicy) {
|
||||
const int elapsedTime = TimeKeeper::peekCurrentTime() - headerPolicy->getLastDecayedTime();
|
||||
|
@ -129,6 +138,26 @@ const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityT
|
|||
return currentEncodedProbability;
|
||||
}
|
||||
|
||||
/* static */ const HistoricalInfo ForgettingCurveUtils::createHistoricalInfoToSave(
|
||||
const HistoricalInfo *const originalHistoricalInfo) {
|
||||
if (originalHistoricalInfo->getTimeStamp() == NOT_A_TIMESTAMP) {
|
||||
return HistoricalInfo();
|
||||
}
|
||||
const int elapsedTimeStep = getElapsedTimeStepCount(originalHistoricalInfo->getTimeStamp());
|
||||
if (elapsedTimeStep < MAX_ELAPSED_TIME_STEP_COUNT) {
|
||||
// No need to update historical info.
|
||||
return *originalHistoricalInfo;
|
||||
}
|
||||
// Level down.
|
||||
const int maxLevelDownAmonut = elapsedTimeStep / MAX_ELAPSED_TIME_STEP_COUNT;
|
||||
const int levelDownAmount = (maxLevelDownAmonut >= originalHistoricalInfo->getLevel()) ?
|
||||
originalHistoricalInfo->getLevel() : maxLevelDownAmonut;
|
||||
const int adjustedTimestamp = originalHistoricalInfo->getTimeStamp() +
|
||||
levelDownAmount * MAX_ELAPSED_TIME_STEP_COUNT * TIME_STEP_DURATION_IN_SECONDS;
|
||||
return HistoricalInfo(adjustedTimestamp,
|
||||
originalHistoricalInfo->getLevel() - levelDownAmount, 0 /* count */);
|
||||
}
|
||||
|
||||
/* static */ bool ForgettingCurveUtils::needsToDecay(const bool mindsBlockByDecay,
|
||||
const int unigramCount, const int bigramCount,
|
||||
const DictionaryHeaderStructurePolicy *const headerPolicy) {
|
||||
|
@ -167,6 +196,10 @@ const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityT
|
|||
}
|
||||
}
|
||||
|
||||
/* static */ int ForgettingCurveUtils::getElapsedTimeStepCount(const int timestamp) {
|
||||
return (TimeKeeper::peekCurrentTime() - timestamp) / TIME_STEP_DURATION_IN_SECONDS;
|
||||
}
|
||||
|
||||
ForgettingCurveUtils::ProbabilityTable::ProbabilityTable() : mTable() {
|
||||
// Table entry is as follows:
|
||||
// 1, 1, 1, 2, 3, 5, 6, 9, 13, 18, 25, 34, 48, 66, 91, 127.
|
||||
|
|
|
@ -36,18 +36,26 @@ class ForgettingCurveUtils {
|
|||
static const int MAX_BIGRAM_COUNT;
|
||||
static const int MAX_BIGRAM_COUNT_AFTER_GC;
|
||||
|
||||
static const HistoricalInfo createUpdatedHistoricalInfoFrom(
|
||||
static const HistoricalInfo createUpdatedHistoricalInfo(
|
||||
const HistoricalInfo *const originalHistoricalInfo, const int newProbability,
|
||||
const int timestamp);
|
||||
|
||||
static const HistoricalInfo createHistoricalInfoToSave(
|
||||
const HistoricalInfo *const originalHistoricalInfo);
|
||||
|
||||
static int getProbability(const int encodedUnigramProbability,
|
||||
const int encodedBigramProbability);
|
||||
|
||||
// TODO: Remove.
|
||||
static int getUpdatedEncodedProbability(const int originalEncodedProbability,
|
||||
const int newProbability);
|
||||
|
||||
// TODO: Remove.
|
||||
static int isValidEncodedProbability(const int encodedProbability);
|
||||
|
||||
static bool needsToKeep(const HistoricalInfo *const historicalInfo);
|
||||
|
||||
// TODO: Remove.
|
||||
static int getEncodedProbabilityToSave(const int encodedProbability,
|
||||
const DictionaryHeaderStructurePolicy *const headerPolicy);
|
||||
|
||||
|
@ -84,12 +92,17 @@ class ForgettingCurveUtils {
|
|||
static const int MAX_LEVEL;
|
||||
static const int MAX_COUNT;
|
||||
static const int MIN_VALID_LEVEL;
|
||||
static const int TIME_STEP_DURATION_IN_SECONDS;
|
||||
static const int MAX_ELAPSED_TIME_STEP_COUNT;
|
||||
static const int DISCARD_LEVEL_ZERO_ENTRY_TIME_STEP_COUNT_THRESHOLD;
|
||||
|
||||
static const ProbabilityTable sProbabilityTable;
|
||||
|
||||
static int decodeProbability(const int encodedProbability);
|
||||
|
||||
static int backoff(const int unigramProbability);
|
||||
|
||||
static int getElapsedTimeStepCount(const int timestamp);
|
||||
};
|
||||
} // namespace latinime
|
||||
#endif /* LATINIME_FORGETTING_CURVE_UTILS_H */
|
||||
|
|
Loading…
Reference in a new issue