Merge "Check header attributes for forgetting curve."
This commit is contained in:
commit
94080a37e8
13 changed files with 96 additions and 61 deletions
|
@ -44,8 +44,6 @@ class DictionaryHeaderStructurePolicy {
|
|||
|
||||
virtual float getMultiWordCostMultiplier() const = 0;
|
||||
|
||||
virtual int getLastDecayedTime() const = 0;
|
||||
|
||||
virtual void readHeaderValueOrQuestionMark(const char *const key, int *outValue,
|
||||
int outValueSize) const = 0;
|
||||
|
||||
|
|
|
@ -37,7 +37,8 @@ void Ver4BigramListPolicy::getNextBigram(int *const outBigramPos, int *const out
|
|||
if (outProbability) {
|
||||
if (bigramEntry.hasHistoricalInfo()) {
|
||||
*outProbability =
|
||||
ForgettingCurveUtils::decodeProbability(bigramEntry.getHistoricalInfo());
|
||||
ForgettingCurveUtils::decodeProbability(bigramEntry.getHistoricalInfo(),
|
||||
mHeaderPolicy);
|
||||
} else {
|
||||
*outProbability = bigramEntry.getProbability();
|
||||
}
|
||||
|
@ -160,7 +161,7 @@ bool Ver4BigramListPolicy::updateAllBigramEntriesAndDeleteUselessEntries(const i
|
|||
}
|
||||
} else if (bigramEntry.hasHistoricalInfo()) {
|
||||
const HistoricalInfo historicalInfo = ForgettingCurveUtils::createHistoricalInfoToSave(
|
||||
bigramEntry.getHistoricalInfo());
|
||||
bigramEntry.getHistoricalInfo(), mHeaderPolicy);
|
||||
if (ForgettingCurveUtils::needsToKeep(&historicalInfo)) {
|
||||
const BigramEntry updatedBigramEntry =
|
||||
bigramEntry.updateHistoricalInfoAndGetEntry(&historicalInfo);
|
||||
|
@ -230,7 +231,8 @@ const BigramEntry Ver4BigramListPolicy::createUpdatedBigramEntryFrom(
|
|||
if (mHeaderPolicy->hasHistoricalInfoOfWords()) {
|
||||
const HistoricalInfo updatedHistoricalInfo =
|
||||
ForgettingCurveUtils::createUpdatedHistoricalInfo(
|
||||
originalBigramEntry->getHistoricalInfo(), newProbability, timestamp);
|
||||
originalBigramEntry->getHistoricalInfo(), newProbability, timestamp,
|
||||
mHeaderPolicy);
|
||||
return originalBigramEntry->updateHistoricalInfoAndGetEntry(&updatedHistoricalInfo);
|
||||
} else {
|
||||
return originalBigramEntry->updateProbabilityAndGetEntry(newProbability);
|
||||
|
|
|
@ -35,6 +35,8 @@ const char *const HeaderPolicy::HAS_HISTORICAL_INFO_KEY = "HAS_HISTORICAL_INFO";
|
|||
const char *const HeaderPolicy::LOCALE_KEY = "locale"; // match Java declaration
|
||||
const int HeaderPolicy::DEFAULT_MULTIPLE_WORDS_DEMOTION_RATE = 100;
|
||||
const float HeaderPolicy::MULTIPLE_WORD_COST_MULTIPLIER_SCALE = 100.0f;
|
||||
const int HeaderPolicy::DEFAULT_FORGETTING_CURVE_OCCURRENCES_TO_LEVEL_UP = 4;
|
||||
const int HeaderPolicy::DEFAULT_FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID = 0;
|
||||
|
||||
// Used for logging. Question mark is used to indicate that the key is not found.
|
||||
void HeaderPolicy::readHeaderValueOrQuestionMark(const char *const key, int *outValue,
|
||||
|
|
|
@ -159,6 +159,14 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
|
|||
return &mAttributeMap;
|
||||
}
|
||||
|
||||
AK_FORCE_INLINE int getForgettingCurveOccurrencesToLevelUp() const {
|
||||
return DEFAULT_FORGETTING_CURVE_OCCURRENCES_TO_LEVEL_UP;
|
||||
}
|
||||
|
||||
AK_FORCE_INLINE int getForgettingCurveProbabilityValuesTableId() const {
|
||||
return DEFAULT_FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID;
|
||||
}
|
||||
|
||||
void readHeaderValueOrQuestionMark(const char *const key,
|
||||
int *outValue, int outValueSize) const;
|
||||
|
||||
|
@ -185,6 +193,8 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
|
|||
static const char *const LOCALE_KEY;
|
||||
static const int DEFAULT_MULTIPLE_WORDS_DEMOTION_RATE;
|
||||
static const float MULTIPLE_WORD_COST_MULTIPLIER_SCALE;
|
||||
static const int DEFAULT_FORGETTING_CURVE_OCCURRENCES_TO_LEVEL_UP;
|
||||
static const int DEFAULT_FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID;
|
||||
|
||||
const FormatUtils::FORMAT_VERSION mDictFormatVersion;
|
||||
const HeaderReadWriteUtils::DictionaryFlags mDictionaryFlags;
|
||||
|
|
|
@ -65,7 +65,7 @@ const PtNodeParams Ver4PatriciaTrieNodeReader::fetchPtNodeInfoFromBufferAndProce
|
|||
mProbabilityDictContent->getProbabilityEntry(terminalId);
|
||||
if (probabilityEntry.hasHistoricalInfo()) {
|
||||
probability = ForgettingCurveUtils::decodeProbability(
|
||||
probabilityEntry.getHistoricalInfo());
|
||||
probabilityEntry.getHistoricalInfo(), mHeaderPolicy);
|
||||
} else {
|
||||
probability = probabilityEntry.getProbability();
|
||||
}
|
||||
|
|
|
@ -26,6 +26,7 @@
|
|||
namespace latinime {
|
||||
|
||||
class BufferWithExtendableBuffer;
|
||||
class HeaderPolicy;
|
||||
class ProbabilityDictContent;
|
||||
|
||||
/*
|
||||
|
@ -35,8 +36,10 @@ class ProbabilityDictContent;
|
|||
class Ver4PatriciaTrieNodeReader : public PtNodeReader {
|
||||
public:
|
||||
Ver4PatriciaTrieNodeReader(const BufferWithExtendableBuffer *const buffer,
|
||||
const ProbabilityDictContent *const probabilityDictContent)
|
||||
: mBuffer(buffer), mProbabilityDictContent(probabilityDictContent) {}
|
||||
const ProbabilityDictContent *const probabilityDictContent,
|
||||
const HeaderPolicy *const headerPolicy)
|
||||
: mBuffer(buffer), mProbabilityDictContent(probabilityDictContent),
|
||||
mHeaderPolicy(headerPolicy) {}
|
||||
|
||||
~Ver4PatriciaTrieNodeReader() {}
|
||||
|
||||
|
@ -50,6 +53,7 @@ class Ver4PatriciaTrieNodeReader : public PtNodeReader {
|
|||
|
||||
const BufferWithExtendableBuffer *const mBuffer;
|
||||
const ProbabilityDictContent *const mProbabilityDictContent;
|
||||
const HeaderPolicy *const mHeaderPolicy;
|
||||
|
||||
const PtNodeParams fetchPtNodeInfoFromBufferAndProcessMovedPtNode(const int ptNodePos,
|
||||
const int siblingNodePos) const;
|
||||
|
|
|
@ -159,7 +159,7 @@ bool Ver4PatriciaTrieNodeWriter::updatePtNodeProbabilityAndGetNeedsToKeepPtNodeA
|
|||
toBeUpdatedPtNodeParams->getTerminalId());
|
||||
if (originalProbabilityEntry.hasHistoricalInfo()) {
|
||||
const HistoricalInfo historicalInfo = ForgettingCurveUtils::createHistoricalInfoToSave(
|
||||
originalProbabilityEntry.getHistoricalInfo());
|
||||
originalProbabilityEntry.getHistoricalInfo(), mHeaderPolicy);
|
||||
const ProbabilityEntry probabilityEntry =
|
||||
originalProbabilityEntry.createEntryWithUpdatedHistoricalInfo(&historicalInfo);
|
||||
if (!mBuffers->getMutableProbabilityDictContent()->setProbabilityEntry(
|
||||
|
@ -382,10 +382,11 @@ const ProbabilityEntry Ver4PatriciaTrieNodeWriter::createUpdatedEntryFrom(
|
|||
const ProbabilityEntry *const originalProbabilityEntry, const int newProbability,
|
||||
const int timestamp) const {
|
||||
// TODO: Consolidate historical info and probability.
|
||||
if (mBuffers->getHeaderPolicy()->hasHistoricalInfoOfWords()) {
|
||||
if (mHeaderPolicy->hasHistoricalInfoOfWords()) {
|
||||
const HistoricalInfo updatedHistoricalInfo =
|
||||
ForgettingCurveUtils::createUpdatedHistoricalInfo(
|
||||
originalProbabilityEntry->getHistoricalInfo(), newProbability, timestamp);
|
||||
originalProbabilityEntry->getHistoricalInfo(), newProbability, timestamp,
|
||||
mHeaderPolicy);
|
||||
return originalProbabilityEntry->createEntryWithUpdatedHistoricalInfo(
|
||||
&updatedHistoricalInfo);
|
||||
} else {
|
||||
|
|
|
@ -28,6 +28,7 @@
|
|||
namespace latinime {
|
||||
|
||||
class BufferWithExtendableBuffer;
|
||||
class HeaderPolicy;
|
||||
class Ver4BigramListPolicy;
|
||||
class Ver4DictBuffers;
|
||||
class Ver4PatriciaTrieNodeReader;
|
||||
|
@ -40,10 +41,11 @@ class Ver4ShortcutListPolicy;
|
|||
class Ver4PatriciaTrieNodeWriter : public PtNodeWriter {
|
||||
public:
|
||||
Ver4PatriciaTrieNodeWriter(BufferWithExtendableBuffer *const trieBuffer,
|
||||
Ver4DictBuffers *const buffers, const PtNodeReader *const ptNodeReader,
|
||||
Ver4DictBuffers *const buffers, const HeaderPolicy *const headerPolicy,
|
||||
const PtNodeReader *const ptNodeReader,
|
||||
const PtNodeArrayReader *const ptNodeArrayReader,
|
||||
Ver4BigramListPolicy *const bigramPolicy, Ver4ShortcutListPolicy *const shortcutPolicy)
|
||||
: mTrieBuffer(trieBuffer), mBuffers(buffers),
|
||||
: mTrieBuffer(trieBuffer), mBuffers(buffers), mHeaderPolicy(headerPolicy),
|
||||
mReadingHelper(ptNodeReader, ptNodeArrayReader), mBigramPolicy(bigramPolicy),
|
||||
mShortcutPolicy(shortcutPolicy) {}
|
||||
|
||||
|
@ -116,6 +118,7 @@ class Ver4PatriciaTrieNodeWriter : public PtNodeWriter {
|
|||
|
||||
BufferWithExtendableBuffer *const mTrieBuffer;
|
||||
Ver4DictBuffers *const mBuffers;
|
||||
const HeaderPolicy *const mHeaderPolicy;
|
||||
DynamicPtReadingHelper mReadingHelper;
|
||||
Ver4BigramListPolicy *const mBigramPolicy;
|
||||
Ver4ShortcutListPolicy *const mShortcutPolicy;
|
||||
|
|
|
@ -382,7 +382,8 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty(const int *const code
|
|||
bigramWord1CodePoints + codePointCount);
|
||||
const HistoricalInfo *const historicalInfo = bigramEntry.getHistoricalInfo();
|
||||
const int probability = bigramEntry.hasHistoricalInfo() ?
|
||||
ForgettingCurveUtils::decodeProbability(bigramEntry.getHistoricalInfo()) :
|
||||
ForgettingCurveUtils::decodeProbability(
|
||||
bigramEntry.getHistoricalInfo(), mHeaderPolicy) :
|
||||
bigramEntry.getProbability();
|
||||
bigrams.push_back(WordProperty::BigramProperty(&word1, probability,
|
||||
historicalInfo->getTimeStamp(), historicalInfo->getLevel(),
|
||||
|
|
|
@ -47,10 +47,10 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
|||
mBuffers.get()->getTerminalPositionLookupTable(), mHeaderPolicy),
|
||||
mShortcutPolicy(mBuffers.get()->getMutableShortcutDictContent(),
|
||||
mBuffers.get()->getTerminalPositionLookupTable()),
|
||||
mNodeReader(mDictBuffer, mBuffers.get()->getProbabilityDictContent()),
|
||||
mNodeReader(mDictBuffer, mBuffers.get()->getProbabilityDictContent(), mHeaderPolicy),
|
||||
mPtNodeArrayReader(mDictBuffer),
|
||||
mNodeWriter(mDictBuffer, mBuffers.get(), &mNodeReader, &mPtNodeArrayReader,
|
||||
&mBigramPolicy, &mShortcutPolicy),
|
||||
mNodeWriter(mDictBuffer, mBuffers.get(), mHeaderPolicy, &mNodeReader,
|
||||
&mPtNodeArrayReader, &mBigramPolicy, &mShortcutPolicy),
|
||||
mUpdatingHelper(mDictBuffer, &mNodeReader, &mNodeWriter),
|
||||
mWritingHelper(mBuffers.get()),
|
||||
mUnigramCount(mHeaderPolicy->getUnigramCount()),
|
||||
|
|
|
@ -74,14 +74,15 @@ bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
|
|||
const HeaderPolicy *const headerPolicy, Ver4DictBuffers *const buffersToWrite,
|
||||
int *const outUnigramCount, int *const outBigramCount) {
|
||||
Ver4PatriciaTrieNodeReader ptNodeReader(mBuffers->getTrieBuffer(),
|
||||
mBuffers->getProbabilityDictContent());
|
||||
mBuffers->getProbabilityDictContent(), headerPolicy);
|
||||
Ver4PtNodeArrayReader ptNodeArrayReader(mBuffers->getTrieBuffer());
|
||||
Ver4BigramListPolicy bigramPolicy(mBuffers->getMutableBigramDictContent(),
|
||||
mBuffers->getTerminalPositionLookupTable(), headerPolicy);
|
||||
Ver4ShortcutListPolicy shortcutPolicy(mBuffers->getMutableShortcutDictContent(),
|
||||
mBuffers->getTerminalPositionLookupTable());
|
||||
Ver4PatriciaTrieNodeWriter ptNodeWriter(mBuffers->getWritableTrieBuffer(),
|
||||
mBuffers, &ptNodeReader, &ptNodeArrayReader, &bigramPolicy, &shortcutPolicy);
|
||||
mBuffers, headerPolicy, &ptNodeReader, &ptNodeArrayReader, &bigramPolicy,
|
||||
&shortcutPolicy);
|
||||
|
||||
DynamicPtReadingHelper readingHelper(&ptNodeReader, &ptNodeArrayReader);
|
||||
readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
|
||||
|
@ -126,7 +127,8 @@ bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
|
|||
PtNodeWriter::DictPositionRelocationMap dictPositionRelocationMap;
|
||||
readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
|
||||
Ver4PatriciaTrieNodeWriter ptNodeWriterForNewBuffers(buffersToWrite->getWritableTrieBuffer(),
|
||||
buffersToWrite, &ptNodeReader, &ptNodeArrayReader, &bigramPolicy, &shortcutPolicy);
|
||||
buffersToWrite, headerPolicy, &ptNodeReader, &ptNodeArrayReader, &bigramPolicy,
|
||||
&shortcutPolicy);
|
||||
DynamicPtGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer
|
||||
traversePolicyToPlaceAndWriteValidPtNodesToBuffer(&ptNodeWriterForNewBuffers,
|
||||
buffersToWrite->getWritableTrieBuffer(), &dictPositionRelocationMap);
|
||||
|
@ -137,14 +139,14 @@ bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
|
|||
|
||||
// Create policy instances for the GCed dictionary.
|
||||
Ver4PatriciaTrieNodeReader newPtNodeReader(buffersToWrite->getTrieBuffer(),
|
||||
buffersToWrite->getProbabilityDictContent());
|
||||
buffersToWrite->getProbabilityDictContent(), headerPolicy);
|
||||
Ver4PtNodeArrayReader newPtNodeArrayreader(buffersToWrite->getTrieBuffer());
|
||||
Ver4BigramListPolicy newBigramPolicy(buffersToWrite->getMutableBigramDictContent(),
|
||||
buffersToWrite->getTerminalPositionLookupTable(), headerPolicy);
|
||||
Ver4ShortcutListPolicy newShortcutPolicy(buffersToWrite->getMutableShortcutDictContent(),
|
||||
buffersToWrite->getTerminalPositionLookupTable());
|
||||
Ver4PatriciaTrieNodeWriter newPtNodeWriter(buffersToWrite->getWritableTrieBuffer(),
|
||||
buffersToWrite, &newPtNodeReader, &newPtNodeArrayreader, &newBigramPolicy,
|
||||
buffersToWrite, headerPolicy, &newPtNodeReader, &newPtNodeArrayreader, &newBigramPolicy,
|
||||
&newShortcutPolicy);
|
||||
// Re-assign terminal IDs for valid terminal PtNodes.
|
||||
TerminalPositionLookupTable::TerminalIdMap terminalIdMap;
|
||||
|
@ -202,8 +204,9 @@ bool Ver4PatriciaTrieWritingHelper::truncateUnigrams(
|
|||
const ProbabilityEntry probabilityEntry =
|
||||
mBuffers->getProbabilityDictContent()->getProbabilityEntry(i);
|
||||
const int probability = probabilityEntry.hasHistoricalInfo() ?
|
||||
ForgettingCurveUtils::decodeProbability(probabilityEntry.getHistoricalInfo()) :
|
||||
probabilityEntry.getProbability();
|
||||
ForgettingCurveUtils::decodeProbability(
|
||||
probabilityEntry.getHistoricalInfo(), mBuffers->getHeaderPolicy()) :
|
||||
probabilityEntry.getProbability();
|
||||
priorityQueue.push(DictProbability(terminalPos, probability,
|
||||
probabilityEntry.getHistoricalInfo()->getTimeStamp()));
|
||||
}
|
||||
|
@ -245,8 +248,9 @@ bool Ver4PatriciaTrieWritingHelper::truncateBigrams(const int maxBigramCount) {
|
|||
continue;
|
||||
}
|
||||
const int probability = bigramEntry.hasHistoricalInfo() ?
|
||||
ForgettingCurveUtils::decodeProbability(bigramEntry.getHistoricalInfo()) :
|
||||
bigramEntry.getProbability();
|
||||
ForgettingCurveUtils::decodeProbability(
|
||||
bigramEntry.getHistoricalInfo(), mBuffers->getHeaderPolicy()) :
|
||||
bigramEntry.getProbability();
|
||||
priorityQueue.push(DictProbability(entryPos, probability,
|
||||
bigramEntry.getHistoricalInfo()->getTimeStamp()));
|
||||
}
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
#include <cmath>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "suggest/core/policy/dictionary_header_structure_policy.h"
|
||||
#include "suggest/policyimpl/dictionary/header/header_policy.h"
|
||||
#include "suggest/policyimpl/dictionary/utils/probability_utils.h"
|
||||
#include "utils/time_keeper.h"
|
||||
|
||||
|
@ -34,7 +34,6 @@ const int ForgettingCurveUtils::MAX_COMPUTED_PROBABILITY = 127;
|
|||
const int ForgettingCurveUtils::DECAY_INTERVAL_SECONDS = 2 * 60 * 60;
|
||||
|
||||
const int ForgettingCurveUtils::MAX_LEVEL = 3;
|
||||
const int ForgettingCurveUtils::MAX_COUNT = 3;
|
||||
const int ForgettingCurveUtils::MIN_VALID_LEVEL = 1;
|
||||
const int ForgettingCurveUtils::TIME_STEP_DURATION_IN_SECONDS = 6 * 60 * 60;
|
||||
const int ForgettingCurveUtils::MAX_ELAPSED_TIME_STEP_COUNT = 15;
|
||||
|
@ -45,7 +44,7 @@ const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityT
|
|||
// TODO: Revise the logic to decide the initial probability depending on the given probability.
|
||||
/* static */ const HistoricalInfo ForgettingCurveUtils::createUpdatedHistoricalInfo(
|
||||
const HistoricalInfo *const originalHistoricalInfo,
|
||||
const int newProbability, const int timestamp) {
|
||||
const int newProbability, const int timestamp, const HeaderPolicy *const headerPolicy) {
|
||||
if (newProbability != NOT_A_PROBABILITY && originalHistoricalInfo->getLevel() == 0) {
|
||||
return HistoricalInfo(timestamp, MIN_VALID_LEVEL /* level */, 0 /* count */);
|
||||
} else if (!originalHistoricalInfo->isValid()) {
|
||||
|
@ -53,7 +52,7 @@ const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityT
|
|||
return HistoricalInfo(timestamp, 0 /* level */, 1 /* count */);
|
||||
} else {
|
||||
const int updatedCount = originalHistoricalInfo->getCount() + 1;
|
||||
if (updatedCount > MAX_COUNT) {
|
||||
if (updatedCount >= headerPolicy->getForgettingCurveOccurrencesToLevelUp()) {
|
||||
// The count exceeds the max value the level can be incremented.
|
||||
if (originalHistoricalInfo->getLevel() >= MAX_LEVEL) {
|
||||
// The level is already max.
|
||||
|
@ -71,9 +70,10 @@ const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityT
|
|||
}
|
||||
|
||||
/* static */ int ForgettingCurveUtils::decodeProbability(
|
||||
const HistoricalInfo *const historicalInfo) {
|
||||
const HistoricalInfo *const historicalInfo, const HeaderPolicy *const headerPolicy) {
|
||||
const int elapsedTimeStepCount = getElapsedTimeStepCount(historicalInfo->getTimeStamp());
|
||||
return sProbabilityTable.getProbability(historicalInfo->getLevel(),
|
||||
return sProbabilityTable.getProbability(
|
||||
headerPolicy->getForgettingCurveProbabilityValuesTableId(), historicalInfo->getLevel(),
|
||||
min(max(elapsedTimeStepCount, 0), MAX_ELAPSED_TIME_STEP_COUNT));
|
||||
}
|
||||
|
||||
|
@ -95,7 +95,8 @@ const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityT
|
|||
}
|
||||
|
||||
/* static */ const HistoricalInfo ForgettingCurveUtils::createHistoricalInfoToSave(
|
||||
const HistoricalInfo *const originalHistoricalInfo) {
|
||||
const HistoricalInfo *const originalHistoricalInfo,
|
||||
const HeaderPolicy *const headerPolicy) {
|
||||
if (originalHistoricalInfo->getTimeStamp() == NOT_A_TIMESTAMP) {
|
||||
return HistoricalInfo();
|
||||
}
|
||||
|
@ -115,8 +116,7 @@ const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityT
|
|||
}
|
||||
|
||||
/* static */ bool ForgettingCurveUtils::needsToDecay(const bool mindsBlockByDecay,
|
||||
const int unigramCount, const int bigramCount,
|
||||
const DictionaryHeaderStructurePolicy *const headerPolicy) {
|
||||
const int unigramCount, const int bigramCount, const HeaderPolicy *const headerPolicy) {
|
||||
if (unigramCount >= ForgettingCurveUtils::MAX_UNIGRAM_COUNT) {
|
||||
// Unigram count exceeds the limit.
|
||||
return true;
|
||||
|
@ -148,24 +148,30 @@ const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityT
|
|||
return (TimeKeeper::peekCurrentTime() - timestamp) / TIME_STEP_DURATION_IN_SECONDS;
|
||||
}
|
||||
|
||||
ForgettingCurveUtils::ProbabilityTable::ProbabilityTable() : mTable() {
|
||||
mTable.resize(MAX_LEVEL + 1);
|
||||
for (int level = 0; level <= MAX_LEVEL; ++level) {
|
||||
mTable[level].resize(MAX_ELAPSED_TIME_STEP_COUNT + 1);
|
||||
const float initialProbability =
|
||||
static_cast<float>(MAX_COMPUTED_PROBABILITY / (1 << (MAX_LEVEL - level)));
|
||||
for (int timeStepCount = 0; timeStepCount <= MAX_ELAPSED_TIME_STEP_COUNT; ++timeStepCount) {
|
||||
if (level == 0) {
|
||||
mTable[level][timeStepCount] = NOT_A_PROBABILITY;
|
||||
continue;
|
||||
const int ForgettingCurveUtils::ProbabilityTable::PROBABILITY_TABLE_COUNT = 1;
|
||||
|
||||
ForgettingCurveUtils::ProbabilityTable::ProbabilityTable() : mTables() {
|
||||
mTables.resize(PROBABILITY_TABLE_COUNT);
|
||||
for (int tableId = 0; tableId < PROBABILITY_TABLE_COUNT; ++tableId) {
|
||||
mTables[tableId].resize(MAX_LEVEL + 1);
|
||||
for (int level = 0; level <= MAX_LEVEL; ++level) {
|
||||
mTables[tableId][level].resize(MAX_ELAPSED_TIME_STEP_COUNT + 1);
|
||||
const float initialProbability =
|
||||
static_cast<float>(MAX_COMPUTED_PROBABILITY / (1 << (MAX_LEVEL - level)));
|
||||
for (int timeStepCount = 0; timeStepCount <= MAX_ELAPSED_TIME_STEP_COUNT;
|
||||
++timeStepCount) {
|
||||
if (level == 0) {
|
||||
mTables[tableId][level][timeStepCount] = NOT_A_PROBABILITY;
|
||||
continue;
|
||||
}
|
||||
const int elapsedTime = timeStepCount * TIME_STEP_DURATION_IN_SECONDS;
|
||||
const float probability = initialProbability
|
||||
* powf(2.0f, -1.0f * static_cast<float>(elapsedTime)
|
||||
/ static_cast<float>(TIME_STEP_DURATION_IN_SECONDS
|
||||
* (MAX_ELAPSED_TIME_STEP_COUNT + 1)));
|
||||
mTables[tableId][level][timeStepCount] =
|
||||
min(max(static_cast<int>(probability), 1), MAX_COMPUTED_PROBABILITY);
|
||||
}
|
||||
const int elapsedTime = timeStepCount * TIME_STEP_DURATION_IN_SECONDS;
|
||||
const float probability = initialProbability
|
||||
* powf(2.0f, -1.0f * static_cast<float>(elapsedTime)
|
||||
/ static_cast<float>(TIME_STEP_DURATION_IN_SECONDS
|
||||
* (MAX_ELAPSED_TIME_STEP_COUNT + 1)));
|
||||
mTable[level][timeStepCount] =
|
||||
min(max(static_cast<int>(probability), 1), MAX_COMPUTED_PROBABILITY);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -24,7 +24,7 @@
|
|||
|
||||
namespace latinime {
|
||||
|
||||
class DictionaryHeaderStructurePolicy;
|
||||
class HeaderPolicy;
|
||||
|
||||
class ForgettingCurveUtils {
|
||||
public:
|
||||
|
@ -35,12 +35,14 @@ class ForgettingCurveUtils {
|
|||
|
||||
static const HistoricalInfo createUpdatedHistoricalInfo(
|
||||
const HistoricalInfo *const originalHistoricalInfo, const int newProbability,
|
||||
const int timestamp);
|
||||
const int timestamp, const HeaderPolicy *const headerPolicy);
|
||||
|
||||
static const HistoricalInfo createHistoricalInfoToSave(
|
||||
const HistoricalInfo *const originalHistoricalInfo);
|
||||
const HistoricalInfo *const originalHistoricalInfo,
|
||||
const HeaderPolicy *const headerPolicy);
|
||||
|
||||
static int decodeProbability(const HistoricalInfo *const historicalInfo);
|
||||
static int decodeProbability(const HistoricalInfo *const historicalInfo,
|
||||
const HeaderPolicy *const headerPolicy);
|
||||
|
||||
static int getProbability(const int encodedUnigramProbability,
|
||||
const int encodedBigramProbability);
|
||||
|
@ -48,7 +50,7 @@ class ForgettingCurveUtils {
|
|||
static bool needsToKeep(const HistoricalInfo *const historicalInfo);
|
||||
|
||||
static bool needsToDecay(const bool mindsBlockByDecay, const int unigramCount,
|
||||
const int bigramCount, const DictionaryHeaderStructurePolicy *const headerPolicy);
|
||||
const int bigramCount, const HeaderPolicy *const headerPolicy);
|
||||
|
||||
private:
|
||||
DISALLOW_IMPLICIT_CONSTRUCTORS(ForgettingCurveUtils);
|
||||
|
@ -57,21 +59,23 @@ class ForgettingCurveUtils {
|
|||
public:
|
||||
ProbabilityTable();
|
||||
|
||||
int getProbability(const int level, const int elapsedTimeStepCount) const {
|
||||
return mTable[level][elapsedTimeStepCount];
|
||||
int getProbability(const int tableId, const int level,
|
||||
const int elapsedTimeStepCount) const {
|
||||
return mTables[tableId][level][elapsedTimeStepCount];
|
||||
}
|
||||
|
||||
private:
|
||||
DISALLOW_COPY_AND_ASSIGN(ProbabilityTable);
|
||||
|
||||
std::vector<std::vector<int> > mTable;
|
||||
static const int PROBABILITY_TABLE_COUNT;
|
||||
|
||||
std::vector<std::vector<std::vector<int> > > mTables;
|
||||
};
|
||||
|
||||
static const int MAX_COMPUTED_PROBABILITY;
|
||||
static const int DECAY_INTERVAL_SECONDS;
|
||||
|
||||
static const int MAX_LEVEL;
|
||||
static const int MAX_COUNT;
|
||||
static const int MIN_VALID_LEVEL;
|
||||
static const int TIME_STEP_DURATION_IN_SECONDS;
|
||||
static const int MAX_ELAPSED_TIME_STEP_COUNT;
|
||||
|
|
Loading…
Reference in a new issue