am 26266bd5: Update historical info for GC.
* commit '26266bd53b06facd4ef7da3de6e0a52ea82b4482': Update historical info for GC.main
commit
725cb18957
|
@ -153,12 +153,16 @@ bool Ver4BigramListPolicy::updateAllBigramEntriesAndDeleteUselessEntries(const i
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
} else if (mNeedsToDecayWhenUpdating) {
|
} else if (mNeedsToDecayWhenUpdating) {
|
||||||
// TODO: Quit decaying probability during GC.
|
|
||||||
const int probability = ForgettingCurveUtils::getEncodedProbabilityToSave(
|
const int probability = ForgettingCurveUtils::getEncodedProbabilityToSave(
|
||||||
bigramEntry.getProbability(), mHeaderPolicy);
|
bigramEntry.getProbability(), mHeaderPolicy);
|
||||||
|
const HistoricalInfo historicalInfo =
|
||||||
|
ForgettingCurveUtils::createHistoricalInfoToSave(
|
||||||
|
bigramEntry.getHistoricalInfo());
|
||||||
|
// TODO: Use ForgettingCurveUtils::needsToKeep(&historicalInfo).
|
||||||
if (ForgettingCurveUtils::isValidEncodedProbability(probability)) {
|
if (ForgettingCurveUtils::isValidEncodedProbability(probability)) {
|
||||||
const BigramEntry updatedBigramEntry =
|
const BigramEntry updatedBigramEntry =
|
||||||
bigramEntry.updateProbabilityAndGetEntry(probability);
|
bigramEntry.updateProbabilityAndGetEntry(probability)
|
||||||
|
.updateHistoricalInfoAndGetEntry(&historicalInfo);
|
||||||
if (!mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPos)) {
|
if (!mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPos)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -225,7 +229,7 @@ const BigramEntry Ver4BigramListPolicy::createUpdatedBigramEntryFrom(
|
||||||
const int probability = ForgettingCurveUtils::getUpdatedEncodedProbability(
|
const int probability = ForgettingCurveUtils::getUpdatedEncodedProbability(
|
||||||
originalBigramEntry->getProbability(), newProbability);
|
originalBigramEntry->getProbability(), newProbability);
|
||||||
const HistoricalInfo updatedHistoricalInfo =
|
const HistoricalInfo updatedHistoricalInfo =
|
||||||
ForgettingCurveUtils::createUpdatedHistoricalInfoFrom(
|
ForgettingCurveUtils::createUpdatedHistoricalInfo(
|
||||||
originalBigramEntry->getHistoricalInfo(), newProbability, timestamp);
|
originalBigramEntry->getHistoricalInfo(), newProbability, timestamp);
|
||||||
return originalBigramEntry->updateProbabilityAndGetEntry(probability)
|
return originalBigramEntry->updateProbabilityAndGetEntry(probability)
|
||||||
.updateHistoricalInfoAndGetEntry(&updatedHistoricalInfo);
|
.updateHistoricalInfoAndGetEntry(&updatedHistoricalInfo);
|
||||||
|
|
|
@ -151,17 +151,22 @@ bool Ver4PatriciaTrieNodeWriter::updatePtNodeProbabilityAndGetNeedsToKeepPtNodeA
|
||||||
const ProbabilityEntry originalProbabilityEntry =
|
const ProbabilityEntry originalProbabilityEntry =
|
||||||
mBuffers->getProbabilityDictContent()->getProbabilityEntry(
|
mBuffers->getProbabilityDictContent()->getProbabilityEntry(
|
||||||
toBeUpdatedPtNodeParams->getTerminalId());
|
toBeUpdatedPtNodeParams->getTerminalId());
|
||||||
// TODO: Use historical info.
|
// TODO: Remove.
|
||||||
const int newProbability = ForgettingCurveUtils::getEncodedProbabilityToSave(
|
const int newProbability = ForgettingCurveUtils::getEncodedProbabilityToSave(
|
||||||
originalProbabilityEntry.getProbability(), mBuffers->getHeaderPolicy());
|
originalProbabilityEntry.getProbability(), mBuffers->getHeaderPolicy());
|
||||||
|
const HistoricalInfo historicalInfo =
|
||||||
|
ForgettingCurveUtils::createHistoricalInfoToSave(
|
||||||
|
originalProbabilityEntry.getHistoricalInfo());
|
||||||
const ProbabilityEntry probabilityEntry =
|
const ProbabilityEntry probabilityEntry =
|
||||||
originalProbabilityEntry.createEntryWithUpdatedProbability(newProbability);
|
originalProbabilityEntry.createEntryWithUpdatedProbability(newProbability)
|
||||||
|
.createEntryWithUpdatedHistoricalInfo(&historicalInfo);
|
||||||
if (!mBuffers->getMutableProbabilityDictContent()->setProbabilityEntry(
|
if (!mBuffers->getMutableProbabilityDictContent()->setProbabilityEntry(
|
||||||
toBeUpdatedPtNodeParams->getTerminalId(), &probabilityEntry)) {
|
toBeUpdatedPtNodeParams->getTerminalId(), &probabilityEntry)) {
|
||||||
AKLOGE("Cannot write updated probability entry. terminalId: %d",
|
AKLOGE("Cannot write updated probability entry. terminalId: %d",
|
||||||
toBeUpdatedPtNodeParams->getTerminalId());
|
toBeUpdatedPtNodeParams->getTerminalId());
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
// TODO: Use ForgettingCurveUtils::needsToKeep(&historicalInfo).
|
||||||
const bool isValid = ForgettingCurveUtils::isValidEncodedProbability(newProbability);
|
const bool isValid = ForgettingCurveUtils::isValidEncodedProbability(newProbability);
|
||||||
if (!isValid) {
|
if (!isValid) {
|
||||||
if (!markPtNodeAsWillBecomeNonTerminal(toBeUpdatedPtNodeParams)) {
|
if (!markPtNodeAsWillBecomeNonTerminal(toBeUpdatedPtNodeParams)) {
|
||||||
|
@ -379,7 +384,7 @@ const ProbabilityEntry Ver4PatriciaTrieNodeWriter::createUpdatedEntryFrom(
|
||||||
const int updatedProbability = ForgettingCurveUtils::getUpdatedEncodedProbability(
|
const int updatedProbability = ForgettingCurveUtils::getUpdatedEncodedProbability(
|
||||||
originalProbabilityEntry->getProbability(), newProbability);
|
originalProbabilityEntry->getProbability(), newProbability);
|
||||||
const HistoricalInfo updatedHistoricalInfo =
|
const HistoricalInfo updatedHistoricalInfo =
|
||||||
ForgettingCurveUtils::createUpdatedHistoricalInfoFrom(
|
ForgettingCurveUtils::createUpdatedHistoricalInfo(
|
||||||
originalProbabilityEntry->getHistoricalInfo(), newProbability, timestamp);
|
originalProbabilityEntry->getHistoricalInfo(), newProbability, timestamp);
|
||||||
return originalProbabilityEntry->createEntryWithUpdatedProbability(updatedProbability)
|
return originalProbabilityEntry->createEntryWithUpdatedProbability(updatedProbability)
|
||||||
.createEntryWithUpdatedHistoricalInfo(&updatedHistoricalInfo);
|
.createEntryWithUpdatedHistoricalInfo(&updatedHistoricalInfo);
|
||||||
|
|
|
@ -42,10 +42,13 @@ const int ForgettingCurveUtils::DECAY_INTERVAL_SECONDS = 2 * 60 * 60;
|
||||||
const int ForgettingCurveUtils::MAX_LEVEL = 3;
|
const int ForgettingCurveUtils::MAX_LEVEL = 3;
|
||||||
const int ForgettingCurveUtils::MAX_COUNT = 3;
|
const int ForgettingCurveUtils::MAX_COUNT = 3;
|
||||||
const int ForgettingCurveUtils::MIN_VALID_LEVEL = 1;
|
const int ForgettingCurveUtils::MIN_VALID_LEVEL = 1;
|
||||||
|
const int ForgettingCurveUtils::TIME_STEP_DURATION_IN_SECONDS = 6 * 60 * 60;
|
||||||
|
const int ForgettingCurveUtils::MAX_ELAPSED_TIME_STEP_COUNT = 15;
|
||||||
|
const int ForgettingCurveUtils::DISCARD_LEVEL_ZERO_ENTRY_TIME_STEP_COUNT_THRESHOLD = 14;
|
||||||
|
|
||||||
const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityTable;
|
const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityTable;
|
||||||
|
|
||||||
/* static */ const HistoricalInfo ForgettingCurveUtils::createUpdatedHistoricalInfoFrom(
|
/* static */ const HistoricalInfo ForgettingCurveUtils::createUpdatedHistoricalInfo(
|
||||||
const HistoricalInfo *const originalHistoricalInfo,
|
const HistoricalInfo *const originalHistoricalInfo,
|
||||||
const int newProbability, const int timestamp) {
|
const int newProbability, const int timestamp) {
|
||||||
if (newProbability != NOT_A_PROBABILITY && originalHistoricalInfo->getLevel() == 0) {
|
if (newProbability != NOT_A_PROBABILITY && originalHistoricalInfo->getLevel() == 0) {
|
||||||
|
@ -110,6 +113,12 @@ const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityT
|
||||||
return encodedProbability >= MIN_VALID_ENCODED_PROBABILITY;
|
return encodedProbability >= MIN_VALID_ENCODED_PROBABILITY;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* static */ bool ForgettingCurveUtils::needsToKeep(const HistoricalInfo *const historicalInfo) {
|
||||||
|
return historicalInfo->getLevel() > 0
|
||||||
|
|| getElapsedTimeStepCount(historicalInfo->getTimeStamp())
|
||||||
|
< DISCARD_LEVEL_ZERO_ENTRY_TIME_STEP_COUNT_THRESHOLD;
|
||||||
|
}
|
||||||
|
|
||||||
/* static */ int ForgettingCurveUtils::getEncodedProbabilityToSave(const int encodedProbability,
|
/* static */ int ForgettingCurveUtils::getEncodedProbabilityToSave(const int encodedProbability,
|
||||||
const DictionaryHeaderStructurePolicy *const headerPolicy) {
|
const DictionaryHeaderStructurePolicy *const headerPolicy) {
|
||||||
const int elapsedTime = TimeKeeper::peekCurrentTime() - headerPolicy->getLastDecayedTime();
|
const int elapsedTime = TimeKeeper::peekCurrentTime() - headerPolicy->getLastDecayedTime();
|
||||||
|
@ -129,6 +138,26 @@ const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityT
|
||||||
return currentEncodedProbability;
|
return currentEncodedProbability;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* static */ const HistoricalInfo ForgettingCurveUtils::createHistoricalInfoToSave(
|
||||||
|
const HistoricalInfo *const originalHistoricalInfo) {
|
||||||
|
if (originalHistoricalInfo->getTimeStamp() == NOT_A_TIMESTAMP) {
|
||||||
|
return HistoricalInfo();
|
||||||
|
}
|
||||||
|
const int elapsedTimeStep = getElapsedTimeStepCount(originalHistoricalInfo->getTimeStamp());
|
||||||
|
if (elapsedTimeStep < MAX_ELAPSED_TIME_STEP_COUNT) {
|
||||||
|
// No need to update historical info.
|
||||||
|
return *originalHistoricalInfo;
|
||||||
|
}
|
||||||
|
// Level down.
|
||||||
|
const int maxLevelDownAmonut = elapsedTimeStep / MAX_ELAPSED_TIME_STEP_COUNT;
|
||||||
|
const int levelDownAmount = (maxLevelDownAmonut >= originalHistoricalInfo->getLevel()) ?
|
||||||
|
originalHistoricalInfo->getLevel() : maxLevelDownAmonut;
|
||||||
|
const int adjustedTimestamp = originalHistoricalInfo->getTimeStamp() +
|
||||||
|
levelDownAmount * MAX_ELAPSED_TIME_STEP_COUNT * TIME_STEP_DURATION_IN_SECONDS;
|
||||||
|
return HistoricalInfo(adjustedTimestamp,
|
||||||
|
originalHistoricalInfo->getLevel() - levelDownAmount, 0 /* count */);
|
||||||
|
}
|
||||||
|
|
||||||
/* static */ bool ForgettingCurveUtils::needsToDecay(const bool mindsBlockByDecay,
|
/* static */ bool ForgettingCurveUtils::needsToDecay(const bool mindsBlockByDecay,
|
||||||
const int unigramCount, const int bigramCount,
|
const int unigramCount, const int bigramCount,
|
||||||
const DictionaryHeaderStructurePolicy *const headerPolicy) {
|
const DictionaryHeaderStructurePolicy *const headerPolicy) {
|
||||||
|
@ -167,6 +196,10 @@ const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityT
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* static */ int ForgettingCurveUtils::getElapsedTimeStepCount(const int timestamp) {
|
||||||
|
return (TimeKeeper::peekCurrentTime() - timestamp) / TIME_STEP_DURATION_IN_SECONDS;
|
||||||
|
}
|
||||||
|
|
||||||
ForgettingCurveUtils::ProbabilityTable::ProbabilityTable() : mTable() {
|
ForgettingCurveUtils::ProbabilityTable::ProbabilityTable() : mTable() {
|
||||||
// Table entry is as follows:
|
// Table entry is as follows:
|
||||||
// 1, 1, 1, 2, 3, 5, 6, 9, 13, 18, 25, 34, 48, 66, 91, 127.
|
// 1, 1, 1, 2, 3, 5, 6, 9, 13, 18, 25, 34, 48, 66, 91, 127.
|
||||||
|
|
|
@ -36,18 +36,26 @@ class ForgettingCurveUtils {
|
||||||
static const int MAX_BIGRAM_COUNT;
|
static const int MAX_BIGRAM_COUNT;
|
||||||
static const int MAX_BIGRAM_COUNT_AFTER_GC;
|
static const int MAX_BIGRAM_COUNT_AFTER_GC;
|
||||||
|
|
||||||
static const HistoricalInfo createUpdatedHistoricalInfoFrom(
|
static const HistoricalInfo createUpdatedHistoricalInfo(
|
||||||
const HistoricalInfo *const originalHistoricalInfo, const int newProbability,
|
const HistoricalInfo *const originalHistoricalInfo, const int newProbability,
|
||||||
const int timestamp);
|
const int timestamp);
|
||||||
|
|
||||||
|
static const HistoricalInfo createHistoricalInfoToSave(
|
||||||
|
const HistoricalInfo *const originalHistoricalInfo);
|
||||||
|
|
||||||
static int getProbability(const int encodedUnigramProbability,
|
static int getProbability(const int encodedUnigramProbability,
|
||||||
const int encodedBigramProbability);
|
const int encodedBigramProbability);
|
||||||
|
|
||||||
|
// TODO: Remove.
|
||||||
static int getUpdatedEncodedProbability(const int originalEncodedProbability,
|
static int getUpdatedEncodedProbability(const int originalEncodedProbability,
|
||||||
const int newProbability);
|
const int newProbability);
|
||||||
|
|
||||||
|
// TODO: Remove.
|
||||||
static int isValidEncodedProbability(const int encodedProbability);
|
static int isValidEncodedProbability(const int encodedProbability);
|
||||||
|
|
||||||
|
static bool needsToKeep(const HistoricalInfo *const historicalInfo);
|
||||||
|
|
||||||
|
// TODO: Remove.
|
||||||
static int getEncodedProbabilityToSave(const int encodedProbability,
|
static int getEncodedProbabilityToSave(const int encodedProbability,
|
||||||
const DictionaryHeaderStructurePolicy *const headerPolicy);
|
const DictionaryHeaderStructurePolicy *const headerPolicy);
|
||||||
|
|
||||||
|
@ -84,12 +92,17 @@ class ForgettingCurveUtils {
|
||||||
static const int MAX_LEVEL;
|
static const int MAX_LEVEL;
|
||||||
static const int MAX_COUNT;
|
static const int MAX_COUNT;
|
||||||
static const int MIN_VALID_LEVEL;
|
static const int MIN_VALID_LEVEL;
|
||||||
|
static const int TIME_STEP_DURATION_IN_SECONDS;
|
||||||
|
static const int MAX_ELAPSED_TIME_STEP_COUNT;
|
||||||
|
static const int DISCARD_LEVEL_ZERO_ENTRY_TIME_STEP_COUNT_THRESHOLD;
|
||||||
|
|
||||||
static const ProbabilityTable sProbabilityTable;
|
static const ProbabilityTable sProbabilityTable;
|
||||||
|
|
||||||
static int decodeProbability(const int encodedProbability);
|
static int decodeProbability(const int encodedProbability);
|
||||||
|
|
||||||
static int backoff(const int unigramProbability);
|
static int backoff(const int unigramProbability);
|
||||||
|
|
||||||
|
static int getElapsedTimeStepCount(const int timestamp);
|
||||||
};
|
};
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
#endif /* LATINIME_FORGETTING_CURVE_UTILS_H */
|
#endif /* LATINIME_FORGETTING_CURVE_UTILS_H */
|
||||||
|
|
Loading…
Reference in New Issue