Merge "Check header attributes for forgetting curve."
This commit is contained in:
commit
94080a37e8
13 changed files with 96 additions and 61 deletions
|
@ -44,8 +44,6 @@ class DictionaryHeaderStructurePolicy {
|
||||||
|
|
||||||
virtual float getMultiWordCostMultiplier() const = 0;
|
virtual float getMultiWordCostMultiplier() const = 0;
|
||||||
|
|
||||||
virtual int getLastDecayedTime() const = 0;
|
|
||||||
|
|
||||||
virtual void readHeaderValueOrQuestionMark(const char *const key, int *outValue,
|
virtual void readHeaderValueOrQuestionMark(const char *const key, int *outValue,
|
||||||
int outValueSize) const = 0;
|
int outValueSize) const = 0;
|
||||||
|
|
||||||
|
|
|
@ -37,7 +37,8 @@ void Ver4BigramListPolicy::getNextBigram(int *const outBigramPos, int *const out
|
||||||
if (outProbability) {
|
if (outProbability) {
|
||||||
if (bigramEntry.hasHistoricalInfo()) {
|
if (bigramEntry.hasHistoricalInfo()) {
|
||||||
*outProbability =
|
*outProbability =
|
||||||
ForgettingCurveUtils::decodeProbability(bigramEntry.getHistoricalInfo());
|
ForgettingCurveUtils::decodeProbability(bigramEntry.getHistoricalInfo(),
|
||||||
|
mHeaderPolicy);
|
||||||
} else {
|
} else {
|
||||||
*outProbability = bigramEntry.getProbability();
|
*outProbability = bigramEntry.getProbability();
|
||||||
}
|
}
|
||||||
|
@ -160,7 +161,7 @@ bool Ver4BigramListPolicy::updateAllBigramEntriesAndDeleteUselessEntries(const i
|
||||||
}
|
}
|
||||||
} else if (bigramEntry.hasHistoricalInfo()) {
|
} else if (bigramEntry.hasHistoricalInfo()) {
|
||||||
const HistoricalInfo historicalInfo = ForgettingCurveUtils::createHistoricalInfoToSave(
|
const HistoricalInfo historicalInfo = ForgettingCurveUtils::createHistoricalInfoToSave(
|
||||||
bigramEntry.getHistoricalInfo());
|
bigramEntry.getHistoricalInfo(), mHeaderPolicy);
|
||||||
if (ForgettingCurveUtils::needsToKeep(&historicalInfo)) {
|
if (ForgettingCurveUtils::needsToKeep(&historicalInfo)) {
|
||||||
const BigramEntry updatedBigramEntry =
|
const BigramEntry updatedBigramEntry =
|
||||||
bigramEntry.updateHistoricalInfoAndGetEntry(&historicalInfo);
|
bigramEntry.updateHistoricalInfoAndGetEntry(&historicalInfo);
|
||||||
|
@ -230,7 +231,8 @@ const BigramEntry Ver4BigramListPolicy::createUpdatedBigramEntryFrom(
|
||||||
if (mHeaderPolicy->hasHistoricalInfoOfWords()) {
|
if (mHeaderPolicy->hasHistoricalInfoOfWords()) {
|
||||||
const HistoricalInfo updatedHistoricalInfo =
|
const HistoricalInfo updatedHistoricalInfo =
|
||||||
ForgettingCurveUtils::createUpdatedHistoricalInfo(
|
ForgettingCurveUtils::createUpdatedHistoricalInfo(
|
||||||
originalBigramEntry->getHistoricalInfo(), newProbability, timestamp);
|
originalBigramEntry->getHistoricalInfo(), newProbability, timestamp,
|
||||||
|
mHeaderPolicy);
|
||||||
return originalBigramEntry->updateHistoricalInfoAndGetEntry(&updatedHistoricalInfo);
|
return originalBigramEntry->updateHistoricalInfoAndGetEntry(&updatedHistoricalInfo);
|
||||||
} else {
|
} else {
|
||||||
return originalBigramEntry->updateProbabilityAndGetEntry(newProbability);
|
return originalBigramEntry->updateProbabilityAndGetEntry(newProbability);
|
||||||
|
|
|
@ -35,6 +35,8 @@ const char *const HeaderPolicy::HAS_HISTORICAL_INFO_KEY = "HAS_HISTORICAL_INFO";
|
||||||
const char *const HeaderPolicy::LOCALE_KEY = "locale"; // match Java declaration
|
const char *const HeaderPolicy::LOCALE_KEY = "locale"; // match Java declaration
|
||||||
const int HeaderPolicy::DEFAULT_MULTIPLE_WORDS_DEMOTION_RATE = 100;
|
const int HeaderPolicy::DEFAULT_MULTIPLE_WORDS_DEMOTION_RATE = 100;
|
||||||
const float HeaderPolicy::MULTIPLE_WORD_COST_MULTIPLIER_SCALE = 100.0f;
|
const float HeaderPolicy::MULTIPLE_WORD_COST_MULTIPLIER_SCALE = 100.0f;
|
||||||
|
const int HeaderPolicy::DEFAULT_FORGETTING_CURVE_OCCURRENCES_TO_LEVEL_UP = 4;
|
||||||
|
const int HeaderPolicy::DEFAULT_FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID = 0;
|
||||||
|
|
||||||
// Used for logging. Question mark is used to indicate that the key is not found.
|
// Used for logging. Question mark is used to indicate that the key is not found.
|
||||||
void HeaderPolicy::readHeaderValueOrQuestionMark(const char *const key, int *outValue,
|
void HeaderPolicy::readHeaderValueOrQuestionMark(const char *const key, int *outValue,
|
||||||
|
|
|
@ -159,6 +159,14 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
|
||||||
return &mAttributeMap;
|
return &mAttributeMap;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
AK_FORCE_INLINE int getForgettingCurveOccurrencesToLevelUp() const {
|
||||||
|
return DEFAULT_FORGETTING_CURVE_OCCURRENCES_TO_LEVEL_UP;
|
||||||
|
}
|
||||||
|
|
||||||
|
AK_FORCE_INLINE int getForgettingCurveProbabilityValuesTableId() const {
|
||||||
|
return DEFAULT_FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID;
|
||||||
|
}
|
||||||
|
|
||||||
void readHeaderValueOrQuestionMark(const char *const key,
|
void readHeaderValueOrQuestionMark(const char *const key,
|
||||||
int *outValue, int outValueSize) const;
|
int *outValue, int outValueSize) const;
|
||||||
|
|
||||||
|
@ -185,6 +193,8 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
|
||||||
static const char *const LOCALE_KEY;
|
static const char *const LOCALE_KEY;
|
||||||
static const int DEFAULT_MULTIPLE_WORDS_DEMOTION_RATE;
|
static const int DEFAULT_MULTIPLE_WORDS_DEMOTION_RATE;
|
||||||
static const float MULTIPLE_WORD_COST_MULTIPLIER_SCALE;
|
static const float MULTIPLE_WORD_COST_MULTIPLIER_SCALE;
|
||||||
|
static const int DEFAULT_FORGETTING_CURVE_OCCURRENCES_TO_LEVEL_UP;
|
||||||
|
static const int DEFAULT_FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID;
|
||||||
|
|
||||||
const FormatUtils::FORMAT_VERSION mDictFormatVersion;
|
const FormatUtils::FORMAT_VERSION mDictFormatVersion;
|
||||||
const HeaderReadWriteUtils::DictionaryFlags mDictionaryFlags;
|
const HeaderReadWriteUtils::DictionaryFlags mDictionaryFlags;
|
||||||
|
|
|
@ -65,7 +65,7 @@ const PtNodeParams Ver4PatriciaTrieNodeReader::fetchPtNodeInfoFromBufferAndProce
|
||||||
mProbabilityDictContent->getProbabilityEntry(terminalId);
|
mProbabilityDictContent->getProbabilityEntry(terminalId);
|
||||||
if (probabilityEntry.hasHistoricalInfo()) {
|
if (probabilityEntry.hasHistoricalInfo()) {
|
||||||
probability = ForgettingCurveUtils::decodeProbability(
|
probability = ForgettingCurveUtils::decodeProbability(
|
||||||
probabilityEntry.getHistoricalInfo());
|
probabilityEntry.getHistoricalInfo(), mHeaderPolicy);
|
||||||
} else {
|
} else {
|
||||||
probability = probabilityEntry.getProbability();
|
probability = probabilityEntry.getProbability();
|
||||||
}
|
}
|
||||||
|
|
|
@ -26,6 +26,7 @@
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
|
||||||
class BufferWithExtendableBuffer;
|
class BufferWithExtendableBuffer;
|
||||||
|
class HeaderPolicy;
|
||||||
class ProbabilityDictContent;
|
class ProbabilityDictContent;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -35,8 +36,10 @@ class ProbabilityDictContent;
|
||||||
class Ver4PatriciaTrieNodeReader : public PtNodeReader {
|
class Ver4PatriciaTrieNodeReader : public PtNodeReader {
|
||||||
public:
|
public:
|
||||||
Ver4PatriciaTrieNodeReader(const BufferWithExtendableBuffer *const buffer,
|
Ver4PatriciaTrieNodeReader(const BufferWithExtendableBuffer *const buffer,
|
||||||
const ProbabilityDictContent *const probabilityDictContent)
|
const ProbabilityDictContent *const probabilityDictContent,
|
||||||
: mBuffer(buffer), mProbabilityDictContent(probabilityDictContent) {}
|
const HeaderPolicy *const headerPolicy)
|
||||||
|
: mBuffer(buffer), mProbabilityDictContent(probabilityDictContent),
|
||||||
|
mHeaderPolicy(headerPolicy) {}
|
||||||
|
|
||||||
~Ver4PatriciaTrieNodeReader() {}
|
~Ver4PatriciaTrieNodeReader() {}
|
||||||
|
|
||||||
|
@ -50,6 +53,7 @@ class Ver4PatriciaTrieNodeReader : public PtNodeReader {
|
||||||
|
|
||||||
const BufferWithExtendableBuffer *const mBuffer;
|
const BufferWithExtendableBuffer *const mBuffer;
|
||||||
const ProbabilityDictContent *const mProbabilityDictContent;
|
const ProbabilityDictContent *const mProbabilityDictContent;
|
||||||
|
const HeaderPolicy *const mHeaderPolicy;
|
||||||
|
|
||||||
const PtNodeParams fetchPtNodeInfoFromBufferAndProcessMovedPtNode(const int ptNodePos,
|
const PtNodeParams fetchPtNodeInfoFromBufferAndProcessMovedPtNode(const int ptNodePos,
|
||||||
const int siblingNodePos) const;
|
const int siblingNodePos) const;
|
||||||
|
|
|
@ -159,7 +159,7 @@ bool Ver4PatriciaTrieNodeWriter::updatePtNodeProbabilityAndGetNeedsToKeepPtNodeA
|
||||||
toBeUpdatedPtNodeParams->getTerminalId());
|
toBeUpdatedPtNodeParams->getTerminalId());
|
||||||
if (originalProbabilityEntry.hasHistoricalInfo()) {
|
if (originalProbabilityEntry.hasHistoricalInfo()) {
|
||||||
const HistoricalInfo historicalInfo = ForgettingCurveUtils::createHistoricalInfoToSave(
|
const HistoricalInfo historicalInfo = ForgettingCurveUtils::createHistoricalInfoToSave(
|
||||||
originalProbabilityEntry.getHistoricalInfo());
|
originalProbabilityEntry.getHistoricalInfo(), mHeaderPolicy);
|
||||||
const ProbabilityEntry probabilityEntry =
|
const ProbabilityEntry probabilityEntry =
|
||||||
originalProbabilityEntry.createEntryWithUpdatedHistoricalInfo(&historicalInfo);
|
originalProbabilityEntry.createEntryWithUpdatedHistoricalInfo(&historicalInfo);
|
||||||
if (!mBuffers->getMutableProbabilityDictContent()->setProbabilityEntry(
|
if (!mBuffers->getMutableProbabilityDictContent()->setProbabilityEntry(
|
||||||
|
@ -382,10 +382,11 @@ const ProbabilityEntry Ver4PatriciaTrieNodeWriter::createUpdatedEntryFrom(
|
||||||
const ProbabilityEntry *const originalProbabilityEntry, const int newProbability,
|
const ProbabilityEntry *const originalProbabilityEntry, const int newProbability,
|
||||||
const int timestamp) const {
|
const int timestamp) const {
|
||||||
// TODO: Consolidate historical info and probability.
|
// TODO: Consolidate historical info and probability.
|
||||||
if (mBuffers->getHeaderPolicy()->hasHistoricalInfoOfWords()) {
|
if (mHeaderPolicy->hasHistoricalInfoOfWords()) {
|
||||||
const HistoricalInfo updatedHistoricalInfo =
|
const HistoricalInfo updatedHistoricalInfo =
|
||||||
ForgettingCurveUtils::createUpdatedHistoricalInfo(
|
ForgettingCurveUtils::createUpdatedHistoricalInfo(
|
||||||
originalProbabilityEntry->getHistoricalInfo(), newProbability, timestamp);
|
originalProbabilityEntry->getHistoricalInfo(), newProbability, timestamp,
|
||||||
|
mHeaderPolicy);
|
||||||
return originalProbabilityEntry->createEntryWithUpdatedHistoricalInfo(
|
return originalProbabilityEntry->createEntryWithUpdatedHistoricalInfo(
|
||||||
&updatedHistoricalInfo);
|
&updatedHistoricalInfo);
|
||||||
} else {
|
} else {
|
||||||
|
|
|
@ -28,6 +28,7 @@
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
|
||||||
class BufferWithExtendableBuffer;
|
class BufferWithExtendableBuffer;
|
||||||
|
class HeaderPolicy;
|
||||||
class Ver4BigramListPolicy;
|
class Ver4BigramListPolicy;
|
||||||
class Ver4DictBuffers;
|
class Ver4DictBuffers;
|
||||||
class Ver4PatriciaTrieNodeReader;
|
class Ver4PatriciaTrieNodeReader;
|
||||||
|
@ -40,10 +41,11 @@ class Ver4ShortcutListPolicy;
|
||||||
class Ver4PatriciaTrieNodeWriter : public PtNodeWriter {
|
class Ver4PatriciaTrieNodeWriter : public PtNodeWriter {
|
||||||
public:
|
public:
|
||||||
Ver4PatriciaTrieNodeWriter(BufferWithExtendableBuffer *const trieBuffer,
|
Ver4PatriciaTrieNodeWriter(BufferWithExtendableBuffer *const trieBuffer,
|
||||||
Ver4DictBuffers *const buffers, const PtNodeReader *const ptNodeReader,
|
Ver4DictBuffers *const buffers, const HeaderPolicy *const headerPolicy,
|
||||||
|
const PtNodeReader *const ptNodeReader,
|
||||||
const PtNodeArrayReader *const ptNodeArrayReader,
|
const PtNodeArrayReader *const ptNodeArrayReader,
|
||||||
Ver4BigramListPolicy *const bigramPolicy, Ver4ShortcutListPolicy *const shortcutPolicy)
|
Ver4BigramListPolicy *const bigramPolicy, Ver4ShortcutListPolicy *const shortcutPolicy)
|
||||||
: mTrieBuffer(trieBuffer), mBuffers(buffers),
|
: mTrieBuffer(trieBuffer), mBuffers(buffers), mHeaderPolicy(headerPolicy),
|
||||||
mReadingHelper(ptNodeReader, ptNodeArrayReader), mBigramPolicy(bigramPolicy),
|
mReadingHelper(ptNodeReader, ptNodeArrayReader), mBigramPolicy(bigramPolicy),
|
||||||
mShortcutPolicy(shortcutPolicy) {}
|
mShortcutPolicy(shortcutPolicy) {}
|
||||||
|
|
||||||
|
@ -116,6 +118,7 @@ class Ver4PatriciaTrieNodeWriter : public PtNodeWriter {
|
||||||
|
|
||||||
BufferWithExtendableBuffer *const mTrieBuffer;
|
BufferWithExtendableBuffer *const mTrieBuffer;
|
||||||
Ver4DictBuffers *const mBuffers;
|
Ver4DictBuffers *const mBuffers;
|
||||||
|
const HeaderPolicy *const mHeaderPolicy;
|
||||||
DynamicPtReadingHelper mReadingHelper;
|
DynamicPtReadingHelper mReadingHelper;
|
||||||
Ver4BigramListPolicy *const mBigramPolicy;
|
Ver4BigramListPolicy *const mBigramPolicy;
|
||||||
Ver4ShortcutListPolicy *const mShortcutPolicy;
|
Ver4ShortcutListPolicy *const mShortcutPolicy;
|
||||||
|
|
|
@ -382,7 +382,8 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty(const int *const code
|
||||||
bigramWord1CodePoints + codePointCount);
|
bigramWord1CodePoints + codePointCount);
|
||||||
const HistoricalInfo *const historicalInfo = bigramEntry.getHistoricalInfo();
|
const HistoricalInfo *const historicalInfo = bigramEntry.getHistoricalInfo();
|
||||||
const int probability = bigramEntry.hasHistoricalInfo() ?
|
const int probability = bigramEntry.hasHistoricalInfo() ?
|
||||||
ForgettingCurveUtils::decodeProbability(bigramEntry.getHistoricalInfo()) :
|
ForgettingCurveUtils::decodeProbability(
|
||||||
|
bigramEntry.getHistoricalInfo(), mHeaderPolicy) :
|
||||||
bigramEntry.getProbability();
|
bigramEntry.getProbability();
|
||||||
bigrams.push_back(WordProperty::BigramProperty(&word1, probability,
|
bigrams.push_back(WordProperty::BigramProperty(&word1, probability,
|
||||||
historicalInfo->getTimeStamp(), historicalInfo->getLevel(),
|
historicalInfo->getTimeStamp(), historicalInfo->getLevel(),
|
||||||
|
|
|
@ -47,10 +47,10 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
||||||
mBuffers.get()->getTerminalPositionLookupTable(), mHeaderPolicy),
|
mBuffers.get()->getTerminalPositionLookupTable(), mHeaderPolicy),
|
||||||
mShortcutPolicy(mBuffers.get()->getMutableShortcutDictContent(),
|
mShortcutPolicy(mBuffers.get()->getMutableShortcutDictContent(),
|
||||||
mBuffers.get()->getTerminalPositionLookupTable()),
|
mBuffers.get()->getTerminalPositionLookupTable()),
|
||||||
mNodeReader(mDictBuffer, mBuffers.get()->getProbabilityDictContent()),
|
mNodeReader(mDictBuffer, mBuffers.get()->getProbabilityDictContent(), mHeaderPolicy),
|
||||||
mPtNodeArrayReader(mDictBuffer),
|
mPtNodeArrayReader(mDictBuffer),
|
||||||
mNodeWriter(mDictBuffer, mBuffers.get(), &mNodeReader, &mPtNodeArrayReader,
|
mNodeWriter(mDictBuffer, mBuffers.get(), mHeaderPolicy, &mNodeReader,
|
||||||
&mBigramPolicy, &mShortcutPolicy),
|
&mPtNodeArrayReader, &mBigramPolicy, &mShortcutPolicy),
|
||||||
mUpdatingHelper(mDictBuffer, &mNodeReader, &mNodeWriter),
|
mUpdatingHelper(mDictBuffer, &mNodeReader, &mNodeWriter),
|
||||||
mWritingHelper(mBuffers.get()),
|
mWritingHelper(mBuffers.get()),
|
||||||
mUnigramCount(mHeaderPolicy->getUnigramCount()),
|
mUnigramCount(mHeaderPolicy->getUnigramCount()),
|
||||||
|
|
|
@ -74,14 +74,15 @@ bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
|
||||||
const HeaderPolicy *const headerPolicy, Ver4DictBuffers *const buffersToWrite,
|
const HeaderPolicy *const headerPolicy, Ver4DictBuffers *const buffersToWrite,
|
||||||
int *const outUnigramCount, int *const outBigramCount) {
|
int *const outUnigramCount, int *const outBigramCount) {
|
||||||
Ver4PatriciaTrieNodeReader ptNodeReader(mBuffers->getTrieBuffer(),
|
Ver4PatriciaTrieNodeReader ptNodeReader(mBuffers->getTrieBuffer(),
|
||||||
mBuffers->getProbabilityDictContent());
|
mBuffers->getProbabilityDictContent(), headerPolicy);
|
||||||
Ver4PtNodeArrayReader ptNodeArrayReader(mBuffers->getTrieBuffer());
|
Ver4PtNodeArrayReader ptNodeArrayReader(mBuffers->getTrieBuffer());
|
||||||
Ver4BigramListPolicy bigramPolicy(mBuffers->getMutableBigramDictContent(),
|
Ver4BigramListPolicy bigramPolicy(mBuffers->getMutableBigramDictContent(),
|
||||||
mBuffers->getTerminalPositionLookupTable(), headerPolicy);
|
mBuffers->getTerminalPositionLookupTable(), headerPolicy);
|
||||||
Ver4ShortcutListPolicy shortcutPolicy(mBuffers->getMutableShortcutDictContent(),
|
Ver4ShortcutListPolicy shortcutPolicy(mBuffers->getMutableShortcutDictContent(),
|
||||||
mBuffers->getTerminalPositionLookupTable());
|
mBuffers->getTerminalPositionLookupTable());
|
||||||
Ver4PatriciaTrieNodeWriter ptNodeWriter(mBuffers->getWritableTrieBuffer(),
|
Ver4PatriciaTrieNodeWriter ptNodeWriter(mBuffers->getWritableTrieBuffer(),
|
||||||
mBuffers, &ptNodeReader, &ptNodeArrayReader, &bigramPolicy, &shortcutPolicy);
|
mBuffers, headerPolicy, &ptNodeReader, &ptNodeArrayReader, &bigramPolicy,
|
||||||
|
&shortcutPolicy);
|
||||||
|
|
||||||
DynamicPtReadingHelper readingHelper(&ptNodeReader, &ptNodeArrayReader);
|
DynamicPtReadingHelper readingHelper(&ptNodeReader, &ptNodeArrayReader);
|
||||||
readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
|
readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
|
||||||
|
@ -126,7 +127,8 @@ bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
|
||||||
PtNodeWriter::DictPositionRelocationMap dictPositionRelocationMap;
|
PtNodeWriter::DictPositionRelocationMap dictPositionRelocationMap;
|
||||||
readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
|
readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
|
||||||
Ver4PatriciaTrieNodeWriter ptNodeWriterForNewBuffers(buffersToWrite->getWritableTrieBuffer(),
|
Ver4PatriciaTrieNodeWriter ptNodeWriterForNewBuffers(buffersToWrite->getWritableTrieBuffer(),
|
||||||
buffersToWrite, &ptNodeReader, &ptNodeArrayReader, &bigramPolicy, &shortcutPolicy);
|
buffersToWrite, headerPolicy, &ptNodeReader, &ptNodeArrayReader, &bigramPolicy,
|
||||||
|
&shortcutPolicy);
|
||||||
DynamicPtGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer
|
DynamicPtGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer
|
||||||
traversePolicyToPlaceAndWriteValidPtNodesToBuffer(&ptNodeWriterForNewBuffers,
|
traversePolicyToPlaceAndWriteValidPtNodesToBuffer(&ptNodeWriterForNewBuffers,
|
||||||
buffersToWrite->getWritableTrieBuffer(), &dictPositionRelocationMap);
|
buffersToWrite->getWritableTrieBuffer(), &dictPositionRelocationMap);
|
||||||
|
@ -137,14 +139,14 @@ bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
|
||||||
|
|
||||||
// Create policy instances for the GCed dictionary.
|
// Create policy instances for the GCed dictionary.
|
||||||
Ver4PatriciaTrieNodeReader newPtNodeReader(buffersToWrite->getTrieBuffer(),
|
Ver4PatriciaTrieNodeReader newPtNodeReader(buffersToWrite->getTrieBuffer(),
|
||||||
buffersToWrite->getProbabilityDictContent());
|
buffersToWrite->getProbabilityDictContent(), headerPolicy);
|
||||||
Ver4PtNodeArrayReader newPtNodeArrayreader(buffersToWrite->getTrieBuffer());
|
Ver4PtNodeArrayReader newPtNodeArrayreader(buffersToWrite->getTrieBuffer());
|
||||||
Ver4BigramListPolicy newBigramPolicy(buffersToWrite->getMutableBigramDictContent(),
|
Ver4BigramListPolicy newBigramPolicy(buffersToWrite->getMutableBigramDictContent(),
|
||||||
buffersToWrite->getTerminalPositionLookupTable(), headerPolicy);
|
buffersToWrite->getTerminalPositionLookupTable(), headerPolicy);
|
||||||
Ver4ShortcutListPolicy newShortcutPolicy(buffersToWrite->getMutableShortcutDictContent(),
|
Ver4ShortcutListPolicy newShortcutPolicy(buffersToWrite->getMutableShortcutDictContent(),
|
||||||
buffersToWrite->getTerminalPositionLookupTable());
|
buffersToWrite->getTerminalPositionLookupTable());
|
||||||
Ver4PatriciaTrieNodeWriter newPtNodeWriter(buffersToWrite->getWritableTrieBuffer(),
|
Ver4PatriciaTrieNodeWriter newPtNodeWriter(buffersToWrite->getWritableTrieBuffer(),
|
||||||
buffersToWrite, &newPtNodeReader, &newPtNodeArrayreader, &newBigramPolicy,
|
buffersToWrite, headerPolicy, &newPtNodeReader, &newPtNodeArrayreader, &newBigramPolicy,
|
||||||
&newShortcutPolicy);
|
&newShortcutPolicy);
|
||||||
// Re-assign terminal IDs for valid terminal PtNodes.
|
// Re-assign terminal IDs for valid terminal PtNodes.
|
||||||
TerminalPositionLookupTable::TerminalIdMap terminalIdMap;
|
TerminalPositionLookupTable::TerminalIdMap terminalIdMap;
|
||||||
|
@ -202,8 +204,9 @@ bool Ver4PatriciaTrieWritingHelper::truncateUnigrams(
|
||||||
const ProbabilityEntry probabilityEntry =
|
const ProbabilityEntry probabilityEntry =
|
||||||
mBuffers->getProbabilityDictContent()->getProbabilityEntry(i);
|
mBuffers->getProbabilityDictContent()->getProbabilityEntry(i);
|
||||||
const int probability = probabilityEntry.hasHistoricalInfo() ?
|
const int probability = probabilityEntry.hasHistoricalInfo() ?
|
||||||
ForgettingCurveUtils::decodeProbability(probabilityEntry.getHistoricalInfo()) :
|
ForgettingCurveUtils::decodeProbability(
|
||||||
probabilityEntry.getProbability();
|
probabilityEntry.getHistoricalInfo(), mBuffers->getHeaderPolicy()) :
|
||||||
|
probabilityEntry.getProbability();
|
||||||
priorityQueue.push(DictProbability(terminalPos, probability,
|
priorityQueue.push(DictProbability(terminalPos, probability,
|
||||||
probabilityEntry.getHistoricalInfo()->getTimeStamp()));
|
probabilityEntry.getHistoricalInfo()->getTimeStamp()));
|
||||||
}
|
}
|
||||||
|
@ -245,8 +248,9 @@ bool Ver4PatriciaTrieWritingHelper::truncateBigrams(const int maxBigramCount) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
const int probability = bigramEntry.hasHistoricalInfo() ?
|
const int probability = bigramEntry.hasHistoricalInfo() ?
|
||||||
ForgettingCurveUtils::decodeProbability(bigramEntry.getHistoricalInfo()) :
|
ForgettingCurveUtils::decodeProbability(
|
||||||
bigramEntry.getProbability();
|
bigramEntry.getHistoricalInfo(), mBuffers->getHeaderPolicy()) :
|
||||||
|
bigramEntry.getProbability();
|
||||||
priorityQueue.push(DictProbability(entryPos, probability,
|
priorityQueue.push(DictProbability(entryPos, probability,
|
||||||
bigramEntry.getHistoricalInfo()->getTimeStamp()));
|
bigramEntry.getHistoricalInfo()->getTimeStamp()));
|
||||||
}
|
}
|
||||||
|
|
|
@ -19,7 +19,7 @@
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
|
|
||||||
#include "suggest/core/policy/dictionary_header_structure_policy.h"
|
#include "suggest/policyimpl/dictionary/header/header_policy.h"
|
||||||
#include "suggest/policyimpl/dictionary/utils/probability_utils.h"
|
#include "suggest/policyimpl/dictionary/utils/probability_utils.h"
|
||||||
#include "utils/time_keeper.h"
|
#include "utils/time_keeper.h"
|
||||||
|
|
||||||
|
@ -34,7 +34,6 @@ const int ForgettingCurveUtils::MAX_COMPUTED_PROBABILITY = 127;
|
||||||
const int ForgettingCurveUtils::DECAY_INTERVAL_SECONDS = 2 * 60 * 60;
|
const int ForgettingCurveUtils::DECAY_INTERVAL_SECONDS = 2 * 60 * 60;
|
||||||
|
|
||||||
const int ForgettingCurveUtils::MAX_LEVEL = 3;
|
const int ForgettingCurveUtils::MAX_LEVEL = 3;
|
||||||
const int ForgettingCurveUtils::MAX_COUNT = 3;
|
|
||||||
const int ForgettingCurveUtils::MIN_VALID_LEVEL = 1;
|
const int ForgettingCurveUtils::MIN_VALID_LEVEL = 1;
|
||||||
const int ForgettingCurveUtils::TIME_STEP_DURATION_IN_SECONDS = 6 * 60 * 60;
|
const int ForgettingCurveUtils::TIME_STEP_DURATION_IN_SECONDS = 6 * 60 * 60;
|
||||||
const int ForgettingCurveUtils::MAX_ELAPSED_TIME_STEP_COUNT = 15;
|
const int ForgettingCurveUtils::MAX_ELAPSED_TIME_STEP_COUNT = 15;
|
||||||
|
@ -45,7 +44,7 @@ const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityT
|
||||||
// TODO: Revise the logic to decide the initial probability depending on the given probability.
|
// TODO: Revise the logic to decide the initial probability depending on the given probability.
|
||||||
/* static */ const HistoricalInfo ForgettingCurveUtils::createUpdatedHistoricalInfo(
|
/* static */ const HistoricalInfo ForgettingCurveUtils::createUpdatedHistoricalInfo(
|
||||||
const HistoricalInfo *const originalHistoricalInfo,
|
const HistoricalInfo *const originalHistoricalInfo,
|
||||||
const int newProbability, const int timestamp) {
|
const int newProbability, const int timestamp, const HeaderPolicy *const headerPolicy) {
|
||||||
if (newProbability != NOT_A_PROBABILITY && originalHistoricalInfo->getLevel() == 0) {
|
if (newProbability != NOT_A_PROBABILITY && originalHistoricalInfo->getLevel() == 0) {
|
||||||
return HistoricalInfo(timestamp, MIN_VALID_LEVEL /* level */, 0 /* count */);
|
return HistoricalInfo(timestamp, MIN_VALID_LEVEL /* level */, 0 /* count */);
|
||||||
} else if (!originalHistoricalInfo->isValid()) {
|
} else if (!originalHistoricalInfo->isValid()) {
|
||||||
|
@ -53,7 +52,7 @@ const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityT
|
||||||
return HistoricalInfo(timestamp, 0 /* level */, 1 /* count */);
|
return HistoricalInfo(timestamp, 0 /* level */, 1 /* count */);
|
||||||
} else {
|
} else {
|
||||||
const int updatedCount = originalHistoricalInfo->getCount() + 1;
|
const int updatedCount = originalHistoricalInfo->getCount() + 1;
|
||||||
if (updatedCount > MAX_COUNT) {
|
if (updatedCount >= headerPolicy->getForgettingCurveOccurrencesToLevelUp()) {
|
||||||
// The count exceeds the max value the level can be incremented.
|
// The count exceeds the max value the level can be incremented.
|
||||||
if (originalHistoricalInfo->getLevel() >= MAX_LEVEL) {
|
if (originalHistoricalInfo->getLevel() >= MAX_LEVEL) {
|
||||||
// The level is already max.
|
// The level is already max.
|
||||||
|
@ -71,9 +70,10 @@ const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityT
|
||||||
}
|
}
|
||||||
|
|
||||||
/* static */ int ForgettingCurveUtils::decodeProbability(
|
/* static */ int ForgettingCurveUtils::decodeProbability(
|
||||||
const HistoricalInfo *const historicalInfo) {
|
const HistoricalInfo *const historicalInfo, const HeaderPolicy *const headerPolicy) {
|
||||||
const int elapsedTimeStepCount = getElapsedTimeStepCount(historicalInfo->getTimeStamp());
|
const int elapsedTimeStepCount = getElapsedTimeStepCount(historicalInfo->getTimeStamp());
|
||||||
return sProbabilityTable.getProbability(historicalInfo->getLevel(),
|
return sProbabilityTable.getProbability(
|
||||||
|
headerPolicy->getForgettingCurveProbabilityValuesTableId(), historicalInfo->getLevel(),
|
||||||
min(max(elapsedTimeStepCount, 0), MAX_ELAPSED_TIME_STEP_COUNT));
|
min(max(elapsedTimeStepCount, 0), MAX_ELAPSED_TIME_STEP_COUNT));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -95,7 +95,8 @@ const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityT
|
||||||
}
|
}
|
||||||
|
|
||||||
/* static */ const HistoricalInfo ForgettingCurveUtils::createHistoricalInfoToSave(
|
/* static */ const HistoricalInfo ForgettingCurveUtils::createHistoricalInfoToSave(
|
||||||
const HistoricalInfo *const originalHistoricalInfo) {
|
const HistoricalInfo *const originalHistoricalInfo,
|
||||||
|
const HeaderPolicy *const headerPolicy) {
|
||||||
if (originalHistoricalInfo->getTimeStamp() == NOT_A_TIMESTAMP) {
|
if (originalHistoricalInfo->getTimeStamp() == NOT_A_TIMESTAMP) {
|
||||||
return HistoricalInfo();
|
return HistoricalInfo();
|
||||||
}
|
}
|
||||||
|
@ -115,8 +116,7 @@ const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityT
|
||||||
}
|
}
|
||||||
|
|
||||||
/* static */ bool ForgettingCurveUtils::needsToDecay(const bool mindsBlockByDecay,
|
/* static */ bool ForgettingCurveUtils::needsToDecay(const bool mindsBlockByDecay,
|
||||||
const int unigramCount, const int bigramCount,
|
const int unigramCount, const int bigramCount, const HeaderPolicy *const headerPolicy) {
|
||||||
const DictionaryHeaderStructurePolicy *const headerPolicy) {
|
|
||||||
if (unigramCount >= ForgettingCurveUtils::MAX_UNIGRAM_COUNT) {
|
if (unigramCount >= ForgettingCurveUtils::MAX_UNIGRAM_COUNT) {
|
||||||
// Unigram count exceeds the limit.
|
// Unigram count exceeds the limit.
|
||||||
return true;
|
return true;
|
||||||
|
@ -148,24 +148,30 @@ const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityT
|
||||||
return (TimeKeeper::peekCurrentTime() - timestamp) / TIME_STEP_DURATION_IN_SECONDS;
|
return (TimeKeeper::peekCurrentTime() - timestamp) / TIME_STEP_DURATION_IN_SECONDS;
|
||||||
}
|
}
|
||||||
|
|
||||||
ForgettingCurveUtils::ProbabilityTable::ProbabilityTable() : mTable() {
|
const int ForgettingCurveUtils::ProbabilityTable::PROBABILITY_TABLE_COUNT = 1;
|
||||||
mTable.resize(MAX_LEVEL + 1);
|
|
||||||
for (int level = 0; level <= MAX_LEVEL; ++level) {
|
ForgettingCurveUtils::ProbabilityTable::ProbabilityTable() : mTables() {
|
||||||
mTable[level].resize(MAX_ELAPSED_TIME_STEP_COUNT + 1);
|
mTables.resize(PROBABILITY_TABLE_COUNT);
|
||||||
const float initialProbability =
|
for (int tableId = 0; tableId < PROBABILITY_TABLE_COUNT; ++tableId) {
|
||||||
static_cast<float>(MAX_COMPUTED_PROBABILITY / (1 << (MAX_LEVEL - level)));
|
mTables[tableId].resize(MAX_LEVEL + 1);
|
||||||
for (int timeStepCount = 0; timeStepCount <= MAX_ELAPSED_TIME_STEP_COUNT; ++timeStepCount) {
|
for (int level = 0; level <= MAX_LEVEL; ++level) {
|
||||||
if (level == 0) {
|
mTables[tableId][level].resize(MAX_ELAPSED_TIME_STEP_COUNT + 1);
|
||||||
mTable[level][timeStepCount] = NOT_A_PROBABILITY;
|
const float initialProbability =
|
||||||
continue;
|
static_cast<float>(MAX_COMPUTED_PROBABILITY / (1 << (MAX_LEVEL - level)));
|
||||||
|
for (int timeStepCount = 0; timeStepCount <= MAX_ELAPSED_TIME_STEP_COUNT;
|
||||||
|
++timeStepCount) {
|
||||||
|
if (level == 0) {
|
||||||
|
mTables[tableId][level][timeStepCount] = NOT_A_PROBABILITY;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
const int elapsedTime = timeStepCount * TIME_STEP_DURATION_IN_SECONDS;
|
||||||
|
const float probability = initialProbability
|
||||||
|
* powf(2.0f, -1.0f * static_cast<float>(elapsedTime)
|
||||||
|
/ static_cast<float>(TIME_STEP_DURATION_IN_SECONDS
|
||||||
|
* (MAX_ELAPSED_TIME_STEP_COUNT + 1)));
|
||||||
|
mTables[tableId][level][timeStepCount] =
|
||||||
|
min(max(static_cast<int>(probability), 1), MAX_COMPUTED_PROBABILITY);
|
||||||
}
|
}
|
||||||
const int elapsedTime = timeStepCount * TIME_STEP_DURATION_IN_SECONDS;
|
|
||||||
const float probability = initialProbability
|
|
||||||
* powf(2.0f, -1.0f * static_cast<float>(elapsedTime)
|
|
||||||
/ static_cast<float>(TIME_STEP_DURATION_IN_SECONDS
|
|
||||||
* (MAX_ELAPSED_TIME_STEP_COUNT + 1)));
|
|
||||||
mTable[level][timeStepCount] =
|
|
||||||
min(max(static_cast<int>(probability), 1), MAX_COMPUTED_PROBABILITY);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -24,7 +24,7 @@
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
|
||||||
class DictionaryHeaderStructurePolicy;
|
class HeaderPolicy;
|
||||||
|
|
||||||
class ForgettingCurveUtils {
|
class ForgettingCurveUtils {
|
||||||
public:
|
public:
|
||||||
|
@ -35,12 +35,14 @@ class ForgettingCurveUtils {
|
||||||
|
|
||||||
static const HistoricalInfo createUpdatedHistoricalInfo(
|
static const HistoricalInfo createUpdatedHistoricalInfo(
|
||||||
const HistoricalInfo *const originalHistoricalInfo, const int newProbability,
|
const HistoricalInfo *const originalHistoricalInfo, const int newProbability,
|
||||||
const int timestamp);
|
const int timestamp, const HeaderPolicy *const headerPolicy);
|
||||||
|
|
||||||
static const HistoricalInfo createHistoricalInfoToSave(
|
static const HistoricalInfo createHistoricalInfoToSave(
|
||||||
const HistoricalInfo *const originalHistoricalInfo);
|
const HistoricalInfo *const originalHistoricalInfo,
|
||||||
|
const HeaderPolicy *const headerPolicy);
|
||||||
|
|
||||||
static int decodeProbability(const HistoricalInfo *const historicalInfo);
|
static int decodeProbability(const HistoricalInfo *const historicalInfo,
|
||||||
|
const HeaderPolicy *const headerPolicy);
|
||||||
|
|
||||||
static int getProbability(const int encodedUnigramProbability,
|
static int getProbability(const int encodedUnigramProbability,
|
||||||
const int encodedBigramProbability);
|
const int encodedBigramProbability);
|
||||||
|
@ -48,7 +50,7 @@ class ForgettingCurveUtils {
|
||||||
static bool needsToKeep(const HistoricalInfo *const historicalInfo);
|
static bool needsToKeep(const HistoricalInfo *const historicalInfo);
|
||||||
|
|
||||||
static bool needsToDecay(const bool mindsBlockByDecay, const int unigramCount,
|
static bool needsToDecay(const bool mindsBlockByDecay, const int unigramCount,
|
||||||
const int bigramCount, const DictionaryHeaderStructurePolicy *const headerPolicy);
|
const int bigramCount, const HeaderPolicy *const headerPolicy);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
DISALLOW_IMPLICIT_CONSTRUCTORS(ForgettingCurveUtils);
|
DISALLOW_IMPLICIT_CONSTRUCTORS(ForgettingCurveUtils);
|
||||||
|
@ -57,21 +59,23 @@ class ForgettingCurveUtils {
|
||||||
public:
|
public:
|
||||||
ProbabilityTable();
|
ProbabilityTable();
|
||||||
|
|
||||||
int getProbability(const int level, const int elapsedTimeStepCount) const {
|
int getProbability(const int tableId, const int level,
|
||||||
return mTable[level][elapsedTimeStepCount];
|
const int elapsedTimeStepCount) const {
|
||||||
|
return mTables[tableId][level][elapsedTimeStepCount];
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
DISALLOW_COPY_AND_ASSIGN(ProbabilityTable);
|
DISALLOW_COPY_AND_ASSIGN(ProbabilityTable);
|
||||||
|
|
||||||
std::vector<std::vector<int> > mTable;
|
static const int PROBABILITY_TABLE_COUNT;
|
||||||
|
|
||||||
|
std::vector<std::vector<std::vector<int> > > mTables;
|
||||||
};
|
};
|
||||||
|
|
||||||
static const int MAX_COMPUTED_PROBABILITY;
|
static const int MAX_COMPUTED_PROBABILITY;
|
||||||
static const int DECAY_INTERVAL_SECONDS;
|
static const int DECAY_INTERVAL_SECONDS;
|
||||||
|
|
||||||
static const int MAX_LEVEL;
|
static const int MAX_LEVEL;
|
||||||
static const int MAX_COUNT;
|
|
||||||
static const int MIN_VALID_LEVEL;
|
static const int MIN_VALID_LEVEL;
|
||||||
static const int TIME_STEP_DURATION_IN_SECONDS;
|
static const int TIME_STEP_DURATION_IN_SECONDS;
|
||||||
static const int MAX_ELAPSED_TIME_STEP_COUNT;
|
static const int MAX_ELAPSED_TIME_STEP_COUNT;
|
||||||
|
|
Loading…
Reference in a new issue