Merge "Check header attributes for forgetting curve."

This commit is contained in:
Keisuke Kuroyanagi 2014-02-26 08:58:48 +00:00 committed by Android (Google) Code Review
commit 94080a37e8
13 changed files with 96 additions and 61 deletions

View file

@ -44,8 +44,6 @@ class DictionaryHeaderStructurePolicy {
virtual float getMultiWordCostMultiplier() const = 0; virtual float getMultiWordCostMultiplier() const = 0;
virtual int getLastDecayedTime() const = 0;
virtual void readHeaderValueOrQuestionMark(const char *const key, int *outValue, virtual void readHeaderValueOrQuestionMark(const char *const key, int *outValue,
int outValueSize) const = 0; int outValueSize) const = 0;

View file

@ -37,7 +37,8 @@ void Ver4BigramListPolicy::getNextBigram(int *const outBigramPos, int *const out
if (outProbability) { if (outProbability) {
if (bigramEntry.hasHistoricalInfo()) { if (bigramEntry.hasHistoricalInfo()) {
*outProbability = *outProbability =
ForgettingCurveUtils::decodeProbability(bigramEntry.getHistoricalInfo()); ForgettingCurveUtils::decodeProbability(bigramEntry.getHistoricalInfo(),
mHeaderPolicy);
} else { } else {
*outProbability = bigramEntry.getProbability(); *outProbability = bigramEntry.getProbability();
} }
@ -160,7 +161,7 @@ bool Ver4BigramListPolicy::updateAllBigramEntriesAndDeleteUselessEntries(const i
} }
} else if (bigramEntry.hasHistoricalInfo()) { } else if (bigramEntry.hasHistoricalInfo()) {
const HistoricalInfo historicalInfo = ForgettingCurveUtils::createHistoricalInfoToSave( const HistoricalInfo historicalInfo = ForgettingCurveUtils::createHistoricalInfoToSave(
bigramEntry.getHistoricalInfo()); bigramEntry.getHistoricalInfo(), mHeaderPolicy);
if (ForgettingCurveUtils::needsToKeep(&historicalInfo)) { if (ForgettingCurveUtils::needsToKeep(&historicalInfo)) {
const BigramEntry updatedBigramEntry = const BigramEntry updatedBigramEntry =
bigramEntry.updateHistoricalInfoAndGetEntry(&historicalInfo); bigramEntry.updateHistoricalInfoAndGetEntry(&historicalInfo);
@ -230,7 +231,8 @@ const BigramEntry Ver4BigramListPolicy::createUpdatedBigramEntryFrom(
if (mHeaderPolicy->hasHistoricalInfoOfWords()) { if (mHeaderPolicy->hasHistoricalInfoOfWords()) {
const HistoricalInfo updatedHistoricalInfo = const HistoricalInfo updatedHistoricalInfo =
ForgettingCurveUtils::createUpdatedHistoricalInfo( ForgettingCurveUtils::createUpdatedHistoricalInfo(
originalBigramEntry->getHistoricalInfo(), newProbability, timestamp); originalBigramEntry->getHistoricalInfo(), newProbability, timestamp,
mHeaderPolicy);
return originalBigramEntry->updateHistoricalInfoAndGetEntry(&updatedHistoricalInfo); return originalBigramEntry->updateHistoricalInfoAndGetEntry(&updatedHistoricalInfo);
} else { } else {
return originalBigramEntry->updateProbabilityAndGetEntry(newProbability); return originalBigramEntry->updateProbabilityAndGetEntry(newProbability);

View file

@ -35,6 +35,8 @@ const char *const HeaderPolicy::HAS_HISTORICAL_INFO_KEY = "HAS_HISTORICAL_INFO";
const char *const HeaderPolicy::LOCALE_KEY = "locale"; // match Java declaration const char *const HeaderPolicy::LOCALE_KEY = "locale"; // match Java declaration
const int HeaderPolicy::DEFAULT_MULTIPLE_WORDS_DEMOTION_RATE = 100; const int HeaderPolicy::DEFAULT_MULTIPLE_WORDS_DEMOTION_RATE = 100;
const float HeaderPolicy::MULTIPLE_WORD_COST_MULTIPLIER_SCALE = 100.0f; const float HeaderPolicy::MULTIPLE_WORD_COST_MULTIPLIER_SCALE = 100.0f;
const int HeaderPolicy::DEFAULT_FORGETTING_CURVE_OCCURRENCES_TO_LEVEL_UP = 4;
const int HeaderPolicy::DEFAULT_FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID = 0;
// Used for logging. Question mark is used to indicate that the key is not found. // Used for logging. Question mark is used to indicate that the key is not found.
void HeaderPolicy::readHeaderValueOrQuestionMark(const char *const key, int *outValue, void HeaderPolicy::readHeaderValueOrQuestionMark(const char *const key, int *outValue,

View file

@ -159,6 +159,14 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
return &mAttributeMap; return &mAttributeMap;
} }
AK_FORCE_INLINE int getForgettingCurveOccurrencesToLevelUp() const {
return DEFAULT_FORGETTING_CURVE_OCCURRENCES_TO_LEVEL_UP;
}
AK_FORCE_INLINE int getForgettingCurveProbabilityValuesTableId() const {
return DEFAULT_FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID;
}
void readHeaderValueOrQuestionMark(const char *const key, void readHeaderValueOrQuestionMark(const char *const key,
int *outValue, int outValueSize) const; int *outValue, int outValueSize) const;
@ -185,6 +193,8 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
static const char *const LOCALE_KEY; static const char *const LOCALE_KEY;
static const int DEFAULT_MULTIPLE_WORDS_DEMOTION_RATE; static const int DEFAULT_MULTIPLE_WORDS_DEMOTION_RATE;
static const float MULTIPLE_WORD_COST_MULTIPLIER_SCALE; static const float MULTIPLE_WORD_COST_MULTIPLIER_SCALE;
static const int DEFAULT_FORGETTING_CURVE_OCCURRENCES_TO_LEVEL_UP;
static const int DEFAULT_FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID;
const FormatUtils::FORMAT_VERSION mDictFormatVersion; const FormatUtils::FORMAT_VERSION mDictFormatVersion;
const HeaderReadWriteUtils::DictionaryFlags mDictionaryFlags; const HeaderReadWriteUtils::DictionaryFlags mDictionaryFlags;

View file

@ -65,7 +65,7 @@ const PtNodeParams Ver4PatriciaTrieNodeReader::fetchPtNodeInfoFromBufferAndProce
mProbabilityDictContent->getProbabilityEntry(terminalId); mProbabilityDictContent->getProbabilityEntry(terminalId);
if (probabilityEntry.hasHistoricalInfo()) { if (probabilityEntry.hasHistoricalInfo()) {
probability = ForgettingCurveUtils::decodeProbability( probability = ForgettingCurveUtils::decodeProbability(
probabilityEntry.getHistoricalInfo()); probabilityEntry.getHistoricalInfo(), mHeaderPolicy);
} else { } else {
probability = probabilityEntry.getProbability(); probability = probabilityEntry.getProbability();
} }

View file

@ -26,6 +26,7 @@
namespace latinime { namespace latinime {
class BufferWithExtendableBuffer; class BufferWithExtendableBuffer;
class HeaderPolicy;
class ProbabilityDictContent; class ProbabilityDictContent;
/* /*
@ -35,8 +36,10 @@ class ProbabilityDictContent;
class Ver4PatriciaTrieNodeReader : public PtNodeReader { class Ver4PatriciaTrieNodeReader : public PtNodeReader {
public: public:
Ver4PatriciaTrieNodeReader(const BufferWithExtendableBuffer *const buffer, Ver4PatriciaTrieNodeReader(const BufferWithExtendableBuffer *const buffer,
const ProbabilityDictContent *const probabilityDictContent) const ProbabilityDictContent *const probabilityDictContent,
: mBuffer(buffer), mProbabilityDictContent(probabilityDictContent) {} const HeaderPolicy *const headerPolicy)
: mBuffer(buffer), mProbabilityDictContent(probabilityDictContent),
mHeaderPolicy(headerPolicy) {}
~Ver4PatriciaTrieNodeReader() {} ~Ver4PatriciaTrieNodeReader() {}
@ -50,6 +53,7 @@ class Ver4PatriciaTrieNodeReader : public PtNodeReader {
const BufferWithExtendableBuffer *const mBuffer; const BufferWithExtendableBuffer *const mBuffer;
const ProbabilityDictContent *const mProbabilityDictContent; const ProbabilityDictContent *const mProbabilityDictContent;
const HeaderPolicy *const mHeaderPolicy;
const PtNodeParams fetchPtNodeInfoFromBufferAndProcessMovedPtNode(const int ptNodePos, const PtNodeParams fetchPtNodeInfoFromBufferAndProcessMovedPtNode(const int ptNodePos,
const int siblingNodePos) const; const int siblingNodePos) const;

View file

@ -159,7 +159,7 @@ bool Ver4PatriciaTrieNodeWriter::updatePtNodeProbabilityAndGetNeedsToKeepPtNodeA
toBeUpdatedPtNodeParams->getTerminalId()); toBeUpdatedPtNodeParams->getTerminalId());
if (originalProbabilityEntry.hasHistoricalInfo()) { if (originalProbabilityEntry.hasHistoricalInfo()) {
const HistoricalInfo historicalInfo = ForgettingCurveUtils::createHistoricalInfoToSave( const HistoricalInfo historicalInfo = ForgettingCurveUtils::createHistoricalInfoToSave(
originalProbabilityEntry.getHistoricalInfo()); originalProbabilityEntry.getHistoricalInfo(), mHeaderPolicy);
const ProbabilityEntry probabilityEntry = const ProbabilityEntry probabilityEntry =
originalProbabilityEntry.createEntryWithUpdatedHistoricalInfo(&historicalInfo); originalProbabilityEntry.createEntryWithUpdatedHistoricalInfo(&historicalInfo);
if (!mBuffers->getMutableProbabilityDictContent()->setProbabilityEntry( if (!mBuffers->getMutableProbabilityDictContent()->setProbabilityEntry(
@ -382,10 +382,11 @@ const ProbabilityEntry Ver4PatriciaTrieNodeWriter::createUpdatedEntryFrom(
const ProbabilityEntry *const originalProbabilityEntry, const int newProbability, const ProbabilityEntry *const originalProbabilityEntry, const int newProbability,
const int timestamp) const { const int timestamp) const {
// TODO: Consolidate historical info and probability. // TODO: Consolidate historical info and probability.
if (mBuffers->getHeaderPolicy()->hasHistoricalInfoOfWords()) { if (mHeaderPolicy->hasHistoricalInfoOfWords()) {
const HistoricalInfo updatedHistoricalInfo = const HistoricalInfo updatedHistoricalInfo =
ForgettingCurveUtils::createUpdatedHistoricalInfo( ForgettingCurveUtils::createUpdatedHistoricalInfo(
originalProbabilityEntry->getHistoricalInfo(), newProbability, timestamp); originalProbabilityEntry->getHistoricalInfo(), newProbability, timestamp,
mHeaderPolicy);
return originalProbabilityEntry->createEntryWithUpdatedHistoricalInfo( return originalProbabilityEntry->createEntryWithUpdatedHistoricalInfo(
&updatedHistoricalInfo); &updatedHistoricalInfo);
} else { } else {

View file

@ -28,6 +28,7 @@
namespace latinime { namespace latinime {
class BufferWithExtendableBuffer; class BufferWithExtendableBuffer;
class HeaderPolicy;
class Ver4BigramListPolicy; class Ver4BigramListPolicy;
class Ver4DictBuffers; class Ver4DictBuffers;
class Ver4PatriciaTrieNodeReader; class Ver4PatriciaTrieNodeReader;
@ -40,10 +41,11 @@ class Ver4ShortcutListPolicy;
class Ver4PatriciaTrieNodeWriter : public PtNodeWriter { class Ver4PatriciaTrieNodeWriter : public PtNodeWriter {
public: public:
Ver4PatriciaTrieNodeWriter(BufferWithExtendableBuffer *const trieBuffer, Ver4PatriciaTrieNodeWriter(BufferWithExtendableBuffer *const trieBuffer,
Ver4DictBuffers *const buffers, const PtNodeReader *const ptNodeReader, Ver4DictBuffers *const buffers, const HeaderPolicy *const headerPolicy,
const PtNodeReader *const ptNodeReader,
const PtNodeArrayReader *const ptNodeArrayReader, const PtNodeArrayReader *const ptNodeArrayReader,
Ver4BigramListPolicy *const bigramPolicy, Ver4ShortcutListPolicy *const shortcutPolicy) Ver4BigramListPolicy *const bigramPolicy, Ver4ShortcutListPolicy *const shortcutPolicy)
: mTrieBuffer(trieBuffer), mBuffers(buffers), : mTrieBuffer(trieBuffer), mBuffers(buffers), mHeaderPolicy(headerPolicy),
mReadingHelper(ptNodeReader, ptNodeArrayReader), mBigramPolicy(bigramPolicy), mReadingHelper(ptNodeReader, ptNodeArrayReader), mBigramPolicy(bigramPolicy),
mShortcutPolicy(shortcutPolicy) {} mShortcutPolicy(shortcutPolicy) {}
@ -116,6 +118,7 @@ class Ver4PatriciaTrieNodeWriter : public PtNodeWriter {
BufferWithExtendableBuffer *const mTrieBuffer; BufferWithExtendableBuffer *const mTrieBuffer;
Ver4DictBuffers *const mBuffers; Ver4DictBuffers *const mBuffers;
const HeaderPolicy *const mHeaderPolicy;
DynamicPtReadingHelper mReadingHelper; DynamicPtReadingHelper mReadingHelper;
Ver4BigramListPolicy *const mBigramPolicy; Ver4BigramListPolicy *const mBigramPolicy;
Ver4ShortcutListPolicy *const mShortcutPolicy; Ver4ShortcutListPolicy *const mShortcutPolicy;

View file

@ -382,7 +382,8 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty(const int *const code
bigramWord1CodePoints + codePointCount); bigramWord1CodePoints + codePointCount);
const HistoricalInfo *const historicalInfo = bigramEntry.getHistoricalInfo(); const HistoricalInfo *const historicalInfo = bigramEntry.getHistoricalInfo();
const int probability = bigramEntry.hasHistoricalInfo() ? const int probability = bigramEntry.hasHistoricalInfo() ?
ForgettingCurveUtils::decodeProbability(bigramEntry.getHistoricalInfo()) : ForgettingCurveUtils::decodeProbability(
bigramEntry.getHistoricalInfo(), mHeaderPolicy) :
bigramEntry.getProbability(); bigramEntry.getProbability();
bigrams.push_back(WordProperty::BigramProperty(&word1, probability, bigrams.push_back(WordProperty::BigramProperty(&word1, probability,
historicalInfo->getTimeStamp(), historicalInfo->getLevel(), historicalInfo->getTimeStamp(), historicalInfo->getLevel(),

View file

@ -47,10 +47,10 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
mBuffers.get()->getTerminalPositionLookupTable(), mHeaderPolicy), mBuffers.get()->getTerminalPositionLookupTable(), mHeaderPolicy),
mShortcutPolicy(mBuffers.get()->getMutableShortcutDictContent(), mShortcutPolicy(mBuffers.get()->getMutableShortcutDictContent(),
mBuffers.get()->getTerminalPositionLookupTable()), mBuffers.get()->getTerminalPositionLookupTable()),
mNodeReader(mDictBuffer, mBuffers.get()->getProbabilityDictContent()), mNodeReader(mDictBuffer, mBuffers.get()->getProbabilityDictContent(), mHeaderPolicy),
mPtNodeArrayReader(mDictBuffer), mPtNodeArrayReader(mDictBuffer),
mNodeWriter(mDictBuffer, mBuffers.get(), &mNodeReader, &mPtNodeArrayReader, mNodeWriter(mDictBuffer, mBuffers.get(), mHeaderPolicy, &mNodeReader,
&mBigramPolicy, &mShortcutPolicy), &mPtNodeArrayReader, &mBigramPolicy, &mShortcutPolicy),
mUpdatingHelper(mDictBuffer, &mNodeReader, &mNodeWriter), mUpdatingHelper(mDictBuffer, &mNodeReader, &mNodeWriter),
mWritingHelper(mBuffers.get()), mWritingHelper(mBuffers.get()),
mUnigramCount(mHeaderPolicy->getUnigramCount()), mUnigramCount(mHeaderPolicy->getUnigramCount()),

View file

@ -74,14 +74,15 @@ bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
const HeaderPolicy *const headerPolicy, Ver4DictBuffers *const buffersToWrite, const HeaderPolicy *const headerPolicy, Ver4DictBuffers *const buffersToWrite,
int *const outUnigramCount, int *const outBigramCount) { int *const outUnigramCount, int *const outBigramCount) {
Ver4PatriciaTrieNodeReader ptNodeReader(mBuffers->getTrieBuffer(), Ver4PatriciaTrieNodeReader ptNodeReader(mBuffers->getTrieBuffer(),
mBuffers->getProbabilityDictContent()); mBuffers->getProbabilityDictContent(), headerPolicy);
Ver4PtNodeArrayReader ptNodeArrayReader(mBuffers->getTrieBuffer()); Ver4PtNodeArrayReader ptNodeArrayReader(mBuffers->getTrieBuffer());
Ver4BigramListPolicy bigramPolicy(mBuffers->getMutableBigramDictContent(), Ver4BigramListPolicy bigramPolicy(mBuffers->getMutableBigramDictContent(),
mBuffers->getTerminalPositionLookupTable(), headerPolicy); mBuffers->getTerminalPositionLookupTable(), headerPolicy);
Ver4ShortcutListPolicy shortcutPolicy(mBuffers->getMutableShortcutDictContent(), Ver4ShortcutListPolicy shortcutPolicy(mBuffers->getMutableShortcutDictContent(),
mBuffers->getTerminalPositionLookupTable()); mBuffers->getTerminalPositionLookupTable());
Ver4PatriciaTrieNodeWriter ptNodeWriter(mBuffers->getWritableTrieBuffer(), Ver4PatriciaTrieNodeWriter ptNodeWriter(mBuffers->getWritableTrieBuffer(),
mBuffers, &ptNodeReader, &ptNodeArrayReader, &bigramPolicy, &shortcutPolicy); mBuffers, headerPolicy, &ptNodeReader, &ptNodeArrayReader, &bigramPolicy,
&shortcutPolicy);
DynamicPtReadingHelper readingHelper(&ptNodeReader, &ptNodeArrayReader); DynamicPtReadingHelper readingHelper(&ptNodeReader, &ptNodeArrayReader);
readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos); readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
@ -126,7 +127,8 @@ bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
PtNodeWriter::DictPositionRelocationMap dictPositionRelocationMap; PtNodeWriter::DictPositionRelocationMap dictPositionRelocationMap;
readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos); readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
Ver4PatriciaTrieNodeWriter ptNodeWriterForNewBuffers(buffersToWrite->getWritableTrieBuffer(), Ver4PatriciaTrieNodeWriter ptNodeWriterForNewBuffers(buffersToWrite->getWritableTrieBuffer(),
buffersToWrite, &ptNodeReader, &ptNodeArrayReader, &bigramPolicy, &shortcutPolicy); buffersToWrite, headerPolicy, &ptNodeReader, &ptNodeArrayReader, &bigramPolicy,
&shortcutPolicy);
DynamicPtGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer DynamicPtGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer
traversePolicyToPlaceAndWriteValidPtNodesToBuffer(&ptNodeWriterForNewBuffers, traversePolicyToPlaceAndWriteValidPtNodesToBuffer(&ptNodeWriterForNewBuffers,
buffersToWrite->getWritableTrieBuffer(), &dictPositionRelocationMap); buffersToWrite->getWritableTrieBuffer(), &dictPositionRelocationMap);
@ -137,14 +139,14 @@ bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
// Create policy instances for the GCed dictionary. // Create policy instances for the GCed dictionary.
Ver4PatriciaTrieNodeReader newPtNodeReader(buffersToWrite->getTrieBuffer(), Ver4PatriciaTrieNodeReader newPtNodeReader(buffersToWrite->getTrieBuffer(),
buffersToWrite->getProbabilityDictContent()); buffersToWrite->getProbabilityDictContent(), headerPolicy);
Ver4PtNodeArrayReader newPtNodeArrayreader(buffersToWrite->getTrieBuffer()); Ver4PtNodeArrayReader newPtNodeArrayreader(buffersToWrite->getTrieBuffer());
Ver4BigramListPolicy newBigramPolicy(buffersToWrite->getMutableBigramDictContent(), Ver4BigramListPolicy newBigramPolicy(buffersToWrite->getMutableBigramDictContent(),
buffersToWrite->getTerminalPositionLookupTable(), headerPolicy); buffersToWrite->getTerminalPositionLookupTable(), headerPolicy);
Ver4ShortcutListPolicy newShortcutPolicy(buffersToWrite->getMutableShortcutDictContent(), Ver4ShortcutListPolicy newShortcutPolicy(buffersToWrite->getMutableShortcutDictContent(),
buffersToWrite->getTerminalPositionLookupTable()); buffersToWrite->getTerminalPositionLookupTable());
Ver4PatriciaTrieNodeWriter newPtNodeWriter(buffersToWrite->getWritableTrieBuffer(), Ver4PatriciaTrieNodeWriter newPtNodeWriter(buffersToWrite->getWritableTrieBuffer(),
buffersToWrite, &newPtNodeReader, &newPtNodeArrayreader, &newBigramPolicy, buffersToWrite, headerPolicy, &newPtNodeReader, &newPtNodeArrayreader, &newBigramPolicy,
&newShortcutPolicy); &newShortcutPolicy);
// Re-assign terminal IDs for valid terminal PtNodes. // Re-assign terminal IDs for valid terminal PtNodes.
TerminalPositionLookupTable::TerminalIdMap terminalIdMap; TerminalPositionLookupTable::TerminalIdMap terminalIdMap;
@ -202,8 +204,9 @@ bool Ver4PatriciaTrieWritingHelper::truncateUnigrams(
const ProbabilityEntry probabilityEntry = const ProbabilityEntry probabilityEntry =
mBuffers->getProbabilityDictContent()->getProbabilityEntry(i); mBuffers->getProbabilityDictContent()->getProbabilityEntry(i);
const int probability = probabilityEntry.hasHistoricalInfo() ? const int probability = probabilityEntry.hasHistoricalInfo() ?
ForgettingCurveUtils::decodeProbability(probabilityEntry.getHistoricalInfo()) : ForgettingCurveUtils::decodeProbability(
probabilityEntry.getProbability(); probabilityEntry.getHistoricalInfo(), mBuffers->getHeaderPolicy()) :
probabilityEntry.getProbability();
priorityQueue.push(DictProbability(terminalPos, probability, priorityQueue.push(DictProbability(terminalPos, probability,
probabilityEntry.getHistoricalInfo()->getTimeStamp())); probabilityEntry.getHistoricalInfo()->getTimeStamp()));
} }
@ -245,8 +248,9 @@ bool Ver4PatriciaTrieWritingHelper::truncateBigrams(const int maxBigramCount) {
continue; continue;
} }
const int probability = bigramEntry.hasHistoricalInfo() ? const int probability = bigramEntry.hasHistoricalInfo() ?
ForgettingCurveUtils::decodeProbability(bigramEntry.getHistoricalInfo()) : ForgettingCurveUtils::decodeProbability(
bigramEntry.getProbability(); bigramEntry.getHistoricalInfo(), mBuffers->getHeaderPolicy()) :
bigramEntry.getProbability();
priorityQueue.push(DictProbability(entryPos, probability, priorityQueue.push(DictProbability(entryPos, probability,
bigramEntry.getHistoricalInfo()->getTimeStamp())); bigramEntry.getHistoricalInfo()->getTimeStamp()));
} }

View file

@ -19,7 +19,7 @@
#include <cmath> #include <cmath>
#include <stdlib.h> #include <stdlib.h>
#include "suggest/core/policy/dictionary_header_structure_policy.h" #include "suggest/policyimpl/dictionary/header/header_policy.h"
#include "suggest/policyimpl/dictionary/utils/probability_utils.h" #include "suggest/policyimpl/dictionary/utils/probability_utils.h"
#include "utils/time_keeper.h" #include "utils/time_keeper.h"
@ -34,7 +34,6 @@ const int ForgettingCurveUtils::MAX_COMPUTED_PROBABILITY = 127;
const int ForgettingCurveUtils::DECAY_INTERVAL_SECONDS = 2 * 60 * 60; const int ForgettingCurveUtils::DECAY_INTERVAL_SECONDS = 2 * 60 * 60;
const int ForgettingCurveUtils::MAX_LEVEL = 3; const int ForgettingCurveUtils::MAX_LEVEL = 3;
const int ForgettingCurveUtils::MAX_COUNT = 3;
const int ForgettingCurveUtils::MIN_VALID_LEVEL = 1; const int ForgettingCurveUtils::MIN_VALID_LEVEL = 1;
const int ForgettingCurveUtils::TIME_STEP_DURATION_IN_SECONDS = 6 * 60 * 60; const int ForgettingCurveUtils::TIME_STEP_DURATION_IN_SECONDS = 6 * 60 * 60;
const int ForgettingCurveUtils::MAX_ELAPSED_TIME_STEP_COUNT = 15; const int ForgettingCurveUtils::MAX_ELAPSED_TIME_STEP_COUNT = 15;
@ -45,7 +44,7 @@ const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityT
// TODO: Revise the logic to decide the initial probability depending on the given probability. // TODO: Revise the logic to decide the initial probability depending on the given probability.
/* static */ const HistoricalInfo ForgettingCurveUtils::createUpdatedHistoricalInfo( /* static */ const HistoricalInfo ForgettingCurveUtils::createUpdatedHistoricalInfo(
const HistoricalInfo *const originalHistoricalInfo, const HistoricalInfo *const originalHistoricalInfo,
const int newProbability, const int timestamp) { const int newProbability, const int timestamp, const HeaderPolicy *const headerPolicy) {
if (newProbability != NOT_A_PROBABILITY && originalHistoricalInfo->getLevel() == 0) { if (newProbability != NOT_A_PROBABILITY && originalHistoricalInfo->getLevel() == 0) {
return HistoricalInfo(timestamp, MIN_VALID_LEVEL /* level */, 0 /* count */); return HistoricalInfo(timestamp, MIN_VALID_LEVEL /* level */, 0 /* count */);
} else if (!originalHistoricalInfo->isValid()) { } else if (!originalHistoricalInfo->isValid()) {
@ -53,7 +52,7 @@ const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityT
return HistoricalInfo(timestamp, 0 /* level */, 1 /* count */); return HistoricalInfo(timestamp, 0 /* level */, 1 /* count */);
} else { } else {
const int updatedCount = originalHistoricalInfo->getCount() + 1; const int updatedCount = originalHistoricalInfo->getCount() + 1;
if (updatedCount > MAX_COUNT) { if (updatedCount >= headerPolicy->getForgettingCurveOccurrencesToLevelUp()) {
// The count exceeds the max value the level can be incremented. // The count exceeds the max value the level can be incremented.
if (originalHistoricalInfo->getLevel() >= MAX_LEVEL) { if (originalHistoricalInfo->getLevel() >= MAX_LEVEL) {
// The level is already max. // The level is already max.
@ -71,9 +70,10 @@ const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityT
} }
/* static */ int ForgettingCurveUtils::decodeProbability( /* static */ int ForgettingCurveUtils::decodeProbability(
const HistoricalInfo *const historicalInfo) { const HistoricalInfo *const historicalInfo, const HeaderPolicy *const headerPolicy) {
const int elapsedTimeStepCount = getElapsedTimeStepCount(historicalInfo->getTimeStamp()); const int elapsedTimeStepCount = getElapsedTimeStepCount(historicalInfo->getTimeStamp());
return sProbabilityTable.getProbability(historicalInfo->getLevel(), return sProbabilityTable.getProbability(
headerPolicy->getForgettingCurveProbabilityValuesTableId(), historicalInfo->getLevel(),
min(max(elapsedTimeStepCount, 0), MAX_ELAPSED_TIME_STEP_COUNT)); min(max(elapsedTimeStepCount, 0), MAX_ELAPSED_TIME_STEP_COUNT));
} }
@ -95,7 +95,8 @@ const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityT
} }
/* static */ const HistoricalInfo ForgettingCurveUtils::createHistoricalInfoToSave( /* static */ const HistoricalInfo ForgettingCurveUtils::createHistoricalInfoToSave(
const HistoricalInfo *const originalHistoricalInfo) { const HistoricalInfo *const originalHistoricalInfo,
const HeaderPolicy *const headerPolicy) {
if (originalHistoricalInfo->getTimeStamp() == NOT_A_TIMESTAMP) { if (originalHistoricalInfo->getTimeStamp() == NOT_A_TIMESTAMP) {
return HistoricalInfo(); return HistoricalInfo();
} }
@ -115,8 +116,7 @@ const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityT
} }
/* static */ bool ForgettingCurveUtils::needsToDecay(const bool mindsBlockByDecay, /* static */ bool ForgettingCurveUtils::needsToDecay(const bool mindsBlockByDecay,
const int unigramCount, const int bigramCount, const int unigramCount, const int bigramCount, const HeaderPolicy *const headerPolicy) {
const DictionaryHeaderStructurePolicy *const headerPolicy) {
if (unigramCount >= ForgettingCurveUtils::MAX_UNIGRAM_COUNT) { if (unigramCount >= ForgettingCurveUtils::MAX_UNIGRAM_COUNT) {
// Unigram count exceeds the limit. // Unigram count exceeds the limit.
return true; return true;
@ -148,24 +148,30 @@ const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityT
return (TimeKeeper::peekCurrentTime() - timestamp) / TIME_STEP_DURATION_IN_SECONDS; return (TimeKeeper::peekCurrentTime() - timestamp) / TIME_STEP_DURATION_IN_SECONDS;
} }
ForgettingCurveUtils::ProbabilityTable::ProbabilityTable() : mTable() { const int ForgettingCurveUtils::ProbabilityTable::PROBABILITY_TABLE_COUNT = 1;
mTable.resize(MAX_LEVEL + 1);
for (int level = 0; level <= MAX_LEVEL; ++level) { ForgettingCurveUtils::ProbabilityTable::ProbabilityTable() : mTables() {
mTable[level].resize(MAX_ELAPSED_TIME_STEP_COUNT + 1); mTables.resize(PROBABILITY_TABLE_COUNT);
const float initialProbability = for (int tableId = 0; tableId < PROBABILITY_TABLE_COUNT; ++tableId) {
static_cast<float>(MAX_COMPUTED_PROBABILITY / (1 << (MAX_LEVEL - level))); mTables[tableId].resize(MAX_LEVEL + 1);
for (int timeStepCount = 0; timeStepCount <= MAX_ELAPSED_TIME_STEP_COUNT; ++timeStepCount) { for (int level = 0; level <= MAX_LEVEL; ++level) {
if (level == 0) { mTables[tableId][level].resize(MAX_ELAPSED_TIME_STEP_COUNT + 1);
mTable[level][timeStepCount] = NOT_A_PROBABILITY; const float initialProbability =
continue; static_cast<float>(MAX_COMPUTED_PROBABILITY / (1 << (MAX_LEVEL - level)));
for (int timeStepCount = 0; timeStepCount <= MAX_ELAPSED_TIME_STEP_COUNT;
++timeStepCount) {
if (level == 0) {
mTables[tableId][level][timeStepCount] = NOT_A_PROBABILITY;
continue;
}
const int elapsedTime = timeStepCount * TIME_STEP_DURATION_IN_SECONDS;
const float probability = initialProbability
* powf(2.0f, -1.0f * static_cast<float>(elapsedTime)
/ static_cast<float>(TIME_STEP_DURATION_IN_SECONDS
* (MAX_ELAPSED_TIME_STEP_COUNT + 1)));
mTables[tableId][level][timeStepCount] =
min(max(static_cast<int>(probability), 1), MAX_COMPUTED_PROBABILITY);
} }
const int elapsedTime = timeStepCount * TIME_STEP_DURATION_IN_SECONDS;
const float probability = initialProbability
* powf(2.0f, -1.0f * static_cast<float>(elapsedTime)
/ static_cast<float>(TIME_STEP_DURATION_IN_SECONDS
* (MAX_ELAPSED_TIME_STEP_COUNT + 1)));
mTable[level][timeStepCount] =
min(max(static_cast<int>(probability), 1), MAX_COMPUTED_PROBABILITY);
} }
} }
} }

View file

@ -24,7 +24,7 @@
namespace latinime { namespace latinime {
class DictionaryHeaderStructurePolicy; class HeaderPolicy;
class ForgettingCurveUtils { class ForgettingCurveUtils {
public: public:
@ -35,12 +35,14 @@ class ForgettingCurveUtils {
static const HistoricalInfo createUpdatedHistoricalInfo( static const HistoricalInfo createUpdatedHistoricalInfo(
const HistoricalInfo *const originalHistoricalInfo, const int newProbability, const HistoricalInfo *const originalHistoricalInfo, const int newProbability,
const int timestamp); const int timestamp, const HeaderPolicy *const headerPolicy);
static const HistoricalInfo createHistoricalInfoToSave( static const HistoricalInfo createHistoricalInfoToSave(
const HistoricalInfo *const originalHistoricalInfo); const HistoricalInfo *const originalHistoricalInfo,
const HeaderPolicy *const headerPolicy);
static int decodeProbability(const HistoricalInfo *const historicalInfo); static int decodeProbability(const HistoricalInfo *const historicalInfo,
const HeaderPolicy *const headerPolicy);
static int getProbability(const int encodedUnigramProbability, static int getProbability(const int encodedUnigramProbability,
const int encodedBigramProbability); const int encodedBigramProbability);
@ -48,7 +50,7 @@ class ForgettingCurveUtils {
static bool needsToKeep(const HistoricalInfo *const historicalInfo); static bool needsToKeep(const HistoricalInfo *const historicalInfo);
static bool needsToDecay(const bool mindsBlockByDecay, const int unigramCount, static bool needsToDecay(const bool mindsBlockByDecay, const int unigramCount,
const int bigramCount, const DictionaryHeaderStructurePolicy *const headerPolicy); const int bigramCount, const HeaderPolicy *const headerPolicy);
private: private:
DISALLOW_IMPLICIT_CONSTRUCTORS(ForgettingCurveUtils); DISALLOW_IMPLICIT_CONSTRUCTORS(ForgettingCurveUtils);
@ -57,21 +59,23 @@ class ForgettingCurveUtils {
public: public:
ProbabilityTable(); ProbabilityTable();
int getProbability(const int level, const int elapsedTimeStepCount) const { int getProbability(const int tableId, const int level,
return mTable[level][elapsedTimeStepCount]; const int elapsedTimeStepCount) const {
return mTables[tableId][level][elapsedTimeStepCount];
} }
private: private:
DISALLOW_COPY_AND_ASSIGN(ProbabilityTable); DISALLOW_COPY_AND_ASSIGN(ProbabilityTable);
std::vector<std::vector<int> > mTable; static const int PROBABILITY_TABLE_COUNT;
std::vector<std::vector<std::vector<int> > > mTables;
}; };
static const int MAX_COMPUTED_PROBABILITY; static const int MAX_COMPUTED_PROBABILITY;
static const int DECAY_INTERVAL_SECONDS; static const int DECAY_INTERVAL_SECONDS;
static const int MAX_LEVEL; static const int MAX_LEVEL;
static const int MAX_COUNT;
static const int MIN_VALID_LEVEL; static const int MIN_VALID_LEVEL;
static const int TIME_STEP_DURATION_IN_SECONDS; static const int TIME_STEP_DURATION_IN_SECONDS;
static const int MAX_ELAPSED_TIME_STEP_COUNT; static const int MAX_ELAPSED_TIME_STEP_COUNT;