am 3e5f4b53: Merge "Check the elapsed time for decaying."

* commit '3e5f4b53b0b51288afdd921052974c6740a5aa6b':
  Check the elapsed time for decaying.
main
Keisuke Kuroyanagi 2013-10-04 00:18:25 -07:00 committed by Android Git Automerger
commit 5544b966e3
12 changed files with 113 additions and 59 deletions

View File

@ -37,6 +37,8 @@ class DictionaryHeaderStructurePolicy {
virtual float getMultiWordCostMultiplier() const = 0; virtual float getMultiWordCostMultiplier() const = 0;
virtual int getLastDecayedTime() const = 0;
virtual void readHeaderValueOrQuestionMark(const char *const key, int *outValue, virtual void readHeaderValueOrQuestionMark(const char *const key, int *outValue,
int outValueSize) const = 0; int outValueSize) const = 0;

View File

@ -360,13 +360,13 @@ int DynamicBigramListPolicy::followBigramLinkAndGetCurrentBigramPtNodePos(
} }
bool DynamicBigramListPolicy::updateProbabilityForDecay( bool DynamicBigramListPolicy::updateProbabilityForDecay(
BigramListReadWriteUtils::BigramFlags bigramFlags, const int targetPtNodePos, const BigramListReadWriteUtils::BigramFlags bigramFlags, const int targetPtNodePos,
int *const bigramEntryPos, bool *const outRemoved) const { int *const bigramEntryPos, bool *const outRemoved) const {
*outRemoved = false; *outRemoved = false;
if (mIsDecayingDict) { if (mIsDecayingDict) {
// Update bigram probability for decaying. // Update bigram probability for decaying.
const int newProbability = ForgettingCurveUtils::getEncodedProbabilityToSave( const int newProbability = ForgettingCurveUtils::getEncodedProbabilityToSave(
BigramListReadWriteUtils::getProbabilityFromFlags(bigramFlags)); BigramListReadWriteUtils::getProbabilityFromFlags(bigramFlags), mHeaderPolicy);
if (ForgettingCurveUtils::isValidEncodedProbability(newProbability)) { if (ForgettingCurveUtils::isValidEncodedProbability(newProbability)) {
// Write new probability. // Write new probability.
const BigramListReadWriteUtils::BigramFlags updatedBigramFlags = const BigramListReadWriteUtils::BigramFlags updatedBigramFlags =

View File

@ -27,6 +27,7 @@
namespace latinime { namespace latinime {
class BufferWithExtendableBuffer; class BufferWithExtendableBuffer;
class DictionaryHeaderStructurePolicy;
class DictionaryShortcutsStructurePolicy; class DictionaryShortcutsStructurePolicy;
/* /*
@ -34,10 +35,12 @@ class DictionaryShortcutsStructurePolicy;
*/ */
class DynamicBigramListPolicy : public DictionaryBigramsStructurePolicy { class DynamicBigramListPolicy : public DictionaryBigramsStructurePolicy {
public: public:
DynamicBigramListPolicy(BufferWithExtendableBuffer *const buffer, DynamicBigramListPolicy(const DictionaryHeaderStructurePolicy *const headerPolicy,
BufferWithExtendableBuffer *const buffer,
const DictionaryShortcutsStructurePolicy *const shortcutPolicy, const DictionaryShortcutsStructurePolicy *const shortcutPolicy,
const bool isDecayingDict) const bool isDecayingDict)
: mBuffer(buffer), mShortcutPolicy(shortcutPolicy), mIsDecayingDict(isDecayingDict) {} : mHeaderPolicy(headerPolicy), mBuffer(buffer), mShortcutPolicy(shortcutPolicy),
mIsDecayingDict(isDecayingDict) {}
~DynamicBigramListPolicy() {} ~DynamicBigramListPolicy() {}
@ -74,6 +77,7 @@ class DynamicBigramListPolicy : public DictionaryBigramsStructurePolicy {
static const int CONTINUING_BIGRAM_LINK_COUNT_LIMIT; static const int CONTINUING_BIGRAM_LINK_COUNT_LIMIT;
static const int BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT; static const int BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT;
const DictionaryHeaderStructurePolicy *const mHeaderPolicy;
BufferWithExtendableBuffer *const mBuffer; BufferWithExtendableBuffer *const mBuffer;
const DictionaryShortcutsStructurePolicy *const mShortcutPolicy; const DictionaryShortcutsStructurePolicy *const mShortcutPolicy;
const bool mIsDecayingDict; const bool mIsDecayingDict;
@ -81,7 +85,7 @@ class DynamicBigramListPolicy : public DictionaryBigramsStructurePolicy {
// Follow bigram link and return the position of bigram target PtNode that is currently valid. // Follow bigram link and return the position of bigram target PtNode that is currently valid.
int followBigramLinkAndGetCurrentBigramPtNodePos(const int originalBigramPos) const; int followBigramLinkAndGetCurrentBigramPtNodePos(const int originalBigramPos) const;
bool updateProbabilityForDecay(BigramListReadWriteUtils::BigramFlags bigramFlags, bool updateProbabilityForDecay(const BigramListReadWriteUtils::BigramFlags bigramFlags,
const int targetPtNodePos, int *const bigramEntryPos, bool *const outRemoved) const; const int targetPtNodePos, int *const bigramEntryPos, bool *const outRemoved) const;
}; };
} // namespace latinime } // namespace latinime

View File

@ -16,6 +16,7 @@
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h" #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h"
#include "suggest/core/policy/dictionary_header_structure_policy.h"
#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h" #include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
namespace latinime { namespace latinime {
@ -29,7 +30,8 @@ bool DynamicPatriciaTrieGcEventListeners
bool isUselessPtNode = !node->isTerminal(); bool isUselessPtNode = !node->isTerminal();
if (node->isTerminal() && mIsDecayingDict) { if (node->isTerminal() && mIsDecayingDict) {
const int newProbability = const int newProbability =
ForgettingCurveUtils::getEncodedProbabilityToSave(node->getProbability()); ForgettingCurveUtils::getEncodedProbabilityToSave(node->getProbability(),
mHeaderPolicy);
int writingPos = node->getProbabilityFieldPos(); int writingPos = node->getProbabilityFieldPos();
// Update probability. // Update probability.
if (!DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition( if (!DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition(

View File

@ -29,6 +29,8 @@
namespace latinime { namespace latinime {
class DictionaryHeaderStructurePolicy;
class DynamicPatriciaTrieGcEventListeners { class DynamicPatriciaTrieGcEventListeners {
public: public:
// Updates all PtNodes that can be reached from the root. Checks if each PtNode is useless or // Updates all PtNodes that can be reached from the root. Checks if each PtNode is useless or
@ -38,10 +40,12 @@ class DynamicPatriciaTrieGcEventListeners {
: public DynamicPatriciaTrieReadingHelper::TraversingEventListener { : public DynamicPatriciaTrieReadingHelper::TraversingEventListener {
public: public:
TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted( TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted(
const DictionaryHeaderStructurePolicy *const headerPolicy,
DynamicPatriciaTrieWritingHelper *const writingHelper, DynamicPatriciaTrieWritingHelper *const writingHelper,
BufferWithExtendableBuffer *const buffer, const bool isDecayingDict) BufferWithExtendableBuffer *const buffer, const bool isDecayingDict)
: mWritingHelper(writingHelper), mBuffer(buffer), mIsDecayingDict(isDecayingDict), : mHeaderPolicy(headerPolicy), mWritingHelper(writingHelper), mBuffer(buffer),
mValueStack(), mChildrenValue(0), mValidUnigramCount(0) {} mIsDecayingDict(isDecayingDict), mValueStack(), mChildrenValue(0),
mValidUnigramCount(0) {}
~TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted() {}; ~TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted() {};
@ -72,9 +76,10 @@ class DynamicPatriciaTrieGcEventListeners {
DISALLOW_IMPLICIT_CONSTRUCTORS( DISALLOW_IMPLICIT_CONSTRUCTORS(
TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted); TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted);
const DictionaryHeaderStructurePolicy *const mHeaderPolicy;
DynamicPatriciaTrieWritingHelper *const mWritingHelper; DynamicPatriciaTrieWritingHelper *const mWritingHelper;
BufferWithExtendableBuffer *const mBuffer; BufferWithExtendableBuffer *const mBuffer;
const int mIsDecayingDict; const bool mIsDecayingDict;
std::vector<int> mValueStack; std::vector<int> mValueStack;
int mChildrenValue; int mChildrenValue;
int mValidUnigramCount; int mValidUnigramCount;
@ -85,7 +90,8 @@ class DynamicPatriciaTrieGcEventListeners {
class TraversePolicyToUpdateBigramProbability class TraversePolicyToUpdateBigramProbability
: public DynamicPatriciaTrieReadingHelper::TraversingEventListener { : public DynamicPatriciaTrieReadingHelper::TraversingEventListener {
public: public:
TraversePolicyToUpdateBigramProbability(DynamicBigramListPolicy *const bigramPolicy) TraversePolicyToUpdateBigramProbability(
DynamicBigramListPolicy *const bigramPolicy)
: mBigramPolicy(bigramPolicy), mValidBigramEntryCount(0) {} : mBigramPolicy(bigramPolicy), mValidBigramEntryCount(0) {}
bool onAscend() { return true; } bool onAscend() { return true; }

View File

@ -42,7 +42,6 @@ const char *const DynamicPatriciaTriePolicy::SET_NEEDS_TO_DECAY_FOR_TESTING_QUER
const int DynamicPatriciaTriePolicy::MAX_DICT_EXTENDED_REGION_SIZE = 1024 * 1024; const int DynamicPatriciaTriePolicy::MAX_DICT_EXTENDED_REGION_SIZE = 1024 * 1024;
const int DynamicPatriciaTriePolicy::MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS = const int DynamicPatriciaTriePolicy::MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS =
DynamicPatriciaTrieWritingHelper::MAX_DICTIONARY_SIZE - 1024; DynamicPatriciaTrieWritingHelper::MAX_DICTIONARY_SIZE - 1024;
const int DynamicPatriciaTriePolicy::DECAY_INTERVAL_FOR_DECAYING_DICTS = 2 * 60 * 60;
void DynamicPatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode, void DynamicPatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode,
DicNodeVector *const childDicNodes) const { DicNodeVector *const childDicNodes) const {
@ -314,16 +313,16 @@ void DynamicPatriciaTriePolicy::flushWithGC(const char *const filePath) {
AKLOGI("Warning: flushWithGC() is called for non-updatable dictionary."); AKLOGI("Warning: flushWithGC() is called for non-updatable dictionary.");
return; return;
} }
const bool runGCwithDecay = needsToDecay(); const bool needsToDecay = mHeaderPolicy.isDecayingDict()
DynamicBigramListPolicy bigramListPolicyForGC(&mBufferWithExtendableBuffer, && (mNeedsToDecayForTesting || ForgettingCurveUtils::needsToDecay(
&mShortcutListPolicy, runGCwithDecay); false /* mindsBlockByDecay */, mUnigramCount, mBigramCount, &mHeaderPolicy));
DynamicBigramListPolicy bigramListPolicyForGC(&mHeaderPolicy, &mBufferWithExtendableBuffer,
&mShortcutListPolicy, needsToDecay);
DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer, DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer,
&bigramListPolicyForGC, &mShortcutListPolicy, runGCwithDecay); &bigramListPolicyForGC, &mShortcutListPolicy, needsToDecay);
writingHelper.writeToDictFileWithGC(getRootPosition(), filePath, &mHeaderPolicy); writingHelper.writeToDictFileWithGC(getRootPosition(), filePath, &mHeaderPolicy);
if (runGCwithDecay) {
mNeedsToDecayForTesting = false; mNeedsToDecayForTesting = false;
} }
}
bool DynamicPatriciaTriePolicy::needsToRunGC(const bool mindsBlockByGC) const { bool DynamicPatriciaTriePolicy::needsToRunGC(const bool mindsBlockByGC) const {
if (!mBuffer->isUpdatable()) { if (!mBuffer->isUpdatable()) {
@ -344,16 +343,8 @@ bool DynamicPatriciaTriePolicy::needsToRunGC(const bool mindsBlockByGC) const {
// Needs to reduce dictionary size. // Needs to reduce dictionary size.
return true; return true;
} else if (mHeaderPolicy.isDecayingDict()) { } else if (mHeaderPolicy.isDecayingDict()) {
if (mUnigramCount >= ForgettingCurveUtils::MAX_UNIGRAM_COUNT) { return mNeedsToDecayForTesting || ForgettingCurveUtils::needsToDecay(
// Unigram count exceeds the limit. mindsBlockByGC, mUnigramCount, mBigramCount, &mHeaderPolicy);
return true;
} else if (mBigramCount >= ForgettingCurveUtils::MAX_BIGRAM_COUNT) {
// Bigram count exceeds the limit.
return true;
} else if (mindsBlockByGC && needsToDecay()) {
// Time to update probabilities for decaying.
return true;
}
} }
return false; return false;
} }
@ -369,9 +360,4 @@ void DynamicPatriciaTriePolicy::getProperty(const char *const query, char *const
} }
} }
bool DynamicPatriciaTriePolicy::needsToDecay() const {
return mHeaderPolicy.isDecayingDict() && (mNeedsToDecayForTesting
|| mHeaderPolicy.getLastDecayedTime() + DECAY_INTERVAL_FOR_DECAYING_DICTS < time(0));
}
} // namespace latinime } // namespace latinime

View File

@ -37,7 +37,7 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
mBufferWithExtendableBuffer(mBuffer->getBuffer() + mHeaderPolicy.getSize(), mBufferWithExtendableBuffer(mBuffer->getBuffer() + mHeaderPolicy.getSize(),
mBuffer->getBufferSize() - mHeaderPolicy.getSize()), mBuffer->getBufferSize() - mHeaderPolicy.getSize()),
mShortcutListPolicy(&mBufferWithExtendableBuffer), mShortcutListPolicy(&mBufferWithExtendableBuffer),
mBigramListPolicy(&mBufferWithExtendableBuffer, &mShortcutListPolicy, mBigramListPolicy(&mHeaderPolicy, &mBufferWithExtendableBuffer, &mShortcutListPolicy,
mHeaderPolicy.isDecayingDict()), mHeaderPolicy.isDecayingDict()),
mUnigramCount(mHeaderPolicy.getUnigramCount()), mUnigramCount(mHeaderPolicy.getUnigramCount()),
mBigramCount(mHeaderPolicy.getBigramCount()), mNeedsToDecayForTesting(false) {} mBigramCount(mHeaderPolicy.getBigramCount()), mNeedsToDecayForTesting(false) {}
@ -105,7 +105,6 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
static const char *const SET_NEEDS_TO_DECAY_FOR_TESTING_QUERY; static const char *const SET_NEEDS_TO_DECAY_FOR_TESTING_QUERY;
static const int MAX_DICT_EXTENDED_REGION_SIZE; static const int MAX_DICT_EXTENDED_REGION_SIZE;
static const int MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS; static const int MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS;
static const int DECAY_INTERVAL_FOR_DECAYING_DICTS;
const MmappedBuffer *const mBuffer; const MmappedBuffer *const mBuffer;
const HeaderPolicy mHeaderPolicy; const HeaderPolicy mHeaderPolicy;
@ -115,8 +114,6 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
int mUnigramCount; int mUnigramCount;
int mBigramCount; int mBigramCount;
int mNeedsToDecayForTesting; int mNeedsToDecayForTesting;
bool needsToDecay() const;
}; };
} // namespace latinime } // namespace latinime
#endif // LATINIME_DYNAMIC_PATRICIA_TRIE_POLICY_H #endif // LATINIME_DYNAMIC_PATRICIA_TRIE_POLICY_H

View File

@ -165,7 +165,10 @@ void DynamicPatriciaTrieWritingHelper::writeToDictFileWithGC(const int rootPtNod
MAX_DICTIONARY_SIZE); MAX_DICTIONARY_SIZE);
int unigramCount = 0; int unigramCount = 0;
int bigramCount = 0; int bigramCount = 0;
if (!runGC(rootPtNodeArrayPos, &newDictBuffer, &unigramCount, &bigramCount)) { if (mNeedsToDecay) {
ForgettingCurveUtils::sTimeKeeper.setCurrentTime();
}
if (!runGC(rootPtNodeArrayPos, headerPolicy, &newDictBuffer, &unigramCount, &bigramCount)) {
return; return;
} }
BufferWithExtendableBuffer headerBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */); BufferWithExtendableBuffer headerBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */);
@ -481,14 +484,14 @@ bool DynamicPatriciaTrieWritingHelper::reallocatePtNodeAndAddNewPtNodes(
} }
bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos, bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
BufferWithExtendableBuffer *const bufferToWrite, int *const outUnigramCount, const HeaderPolicy *const headerPolicy, BufferWithExtendableBuffer *const bufferToWrite,
int *const outBigramCount) { int *const outUnigramCount, int *const outBigramCount) {
DynamicPatriciaTrieReadingHelper readingHelper(mBuffer, mBigramPolicy, mShortcutPolicy); DynamicPatriciaTrieReadingHelper readingHelper(mBuffer, mBigramPolicy, mShortcutPolicy);
readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos); readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
DynamicPatriciaTrieGcEventListeners DynamicPatriciaTrieGcEventListeners
::TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted ::TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted
traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted( traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted(
this, mBuffer, mNeedsToDecay); headerPolicy, this, mBuffer, mNeedsToDecay);
if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner( if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(
&traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted)) { &traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted)) {
return false; return false;
@ -505,7 +508,6 @@ bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
&traversePolicyToUpdateBigramProbability)) { &traversePolicyToUpdateBigramProbability)) {
return false; return false;
} }
if (mNeedsToDecay && traversePolicyToUpdateBigramProbability.getValidBigramEntryCount() if (mNeedsToDecay && traversePolicyToUpdateBigramProbability.getValidBigramEntryCount()
> ForgettingCurveUtils::MAX_BIGRAM_COUNT_AFTER_GC) { > ForgettingCurveUtils::MAX_BIGRAM_COUNT_AFTER_GC) {
// TODO: Remove more bigrams. // TODO: Remove more bigrams.
@ -524,7 +526,7 @@ bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
// Create policy instance for the GCed dictionary. // Create policy instance for the GCed dictionary.
DynamicShortcutListPolicy newDictShortcutPolicy(bufferToWrite); DynamicShortcutListPolicy newDictShortcutPolicy(bufferToWrite);
DynamicBigramListPolicy newDictBigramPolicy(bufferToWrite, &newDictShortcutPolicy, DynamicBigramListPolicy newDictBigramPolicy(headerPolicy, bufferToWrite, &newDictShortcutPolicy,
mNeedsToDecay); mNeedsToDecay);
// Create reading helper for the GCed dictionary. // Create reading helper for the GCed dictionary.
DynamicPatriciaTrieReadingHelper newDictReadingHelper(bufferToWrite, &newDictBigramPolicy, DynamicPatriciaTrieReadingHelper newDictReadingHelper(bufferToWrite, &newDictBigramPolicy,

View File

@ -128,8 +128,9 @@ class DynamicPatriciaTrieWritingHelper {
const int probabilityOfNewPtNode, const int *const newNodeCodePoints, const int probabilityOfNewPtNode, const int *const newNodeCodePoints,
const int newNodeCodePointCount); const int newNodeCodePointCount);
bool runGC(const int rootPtNodeArrayPos, BufferWithExtendableBuffer *const bufferToWrite, bool runGC(const int rootPtNodeArrayPos, const HeaderPolicy *const headerPolicy,
int *const outUnigramCount, int *const outBigramCount); BufferWithExtendableBuffer *const bufferToWrite, int *const outUnigramCount,
int *const outBigramCount);
int getUpdatedProbability(const int originalProbability, const int newProbability); int getUpdatedProbability(const int originalProbability, const int newProbability);
}; };

View File

@ -15,10 +15,12 @@
*/ */
#include <cmath> #include <cmath>
#include <ctime>
#include <stdlib.h> #include <stdlib.h>
#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h" #include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
#include "suggest/core/policy/dictionary_header_structure_policy.h"
#include "suggest/policyimpl/dictionary/utils/probability_utils.h" #include "suggest/policyimpl/dictionary/utils/probability_utils.h"
namespace latinime { namespace latinime {
@ -35,8 +37,14 @@ const int ForgettingCurveUtils::ENCODED_PROBABILITY_STEP = 1;
// Currently, we try to decay each uni/bigram once every 2 hours. Accordingly, the expected // Currently, we try to decay each uni/bigram once every 2 hours. Accordingly, the expected
// duration of the decay is approximately 66hours. // duration of the decay is approximately 66hours.
const float ForgettingCurveUtils::MIN_PROBABILITY_TO_DECAY = 0.03f; const float ForgettingCurveUtils::MIN_PROBABILITY_TO_DECAY = 0.03f;
const int ForgettingCurveUtils::DECAY_INTERVAL_SECONDS = 2 * 60 * 60;
const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityTable; const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityTable;
ForgettingCurveUtils::TimeKeeper ForgettingCurveUtils::sTimeKeeper;
void ForgettingCurveUtils::TimeKeeper::setCurrentTime() {
mCurrentTime = time(0);
}
/* static */ int ForgettingCurveUtils::getProbability(const int encodedUnigramProbability, /* static */ int ForgettingCurveUtils::getProbability(const int encodedUnigramProbability,
const int encodedBigramProbability) { const int encodedBigramProbability) {
@ -76,19 +84,44 @@ const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityT
return encodedProbability >= MIN_VALID_ENCODED_PROBABILITY; return encodedProbability >= MIN_VALID_ENCODED_PROBABILITY;
} }
/* static */ int ForgettingCurveUtils::getEncodedProbabilityToSave(const int encodedProbability) { /* static */ int ForgettingCurveUtils::getEncodedProbabilityToSave(const int encodedProbability,
const int currentEncodedProbability = max(min(encodedProbability, MAX_ENCODED_PROBABILITY), 0); const DictionaryHeaderStructurePolicy *const headerPolicy) {
const int elapsedTime = sTimeKeeper.peekCurrentTime() - headerPolicy->getLastDecayedTime();
const int decayIterationCount = max(elapsedTime / DECAY_INTERVAL_SECONDS, 1);
int currentEncodedProbability = max(min(encodedProbability, MAX_ENCODED_PROBABILITY), 0);
// TODO: Implement the decay in more proper way. // TODO: Implement the decay in more proper way.
for (int i = 0; i < decayIterationCount; ++i) {
const float currentRate = static_cast<float>(currentEncodedProbability) const float currentRate = static_cast<float>(currentEncodedProbability)
/ static_cast<float>(MAX_ENCODED_PROBABILITY); / static_cast<float>(MAX_ENCODED_PROBABILITY);
const float thresholdToDecay = MIN_PROBABILITY_TO_DECAY const float thresholdToDecay = MIN_PROBABILITY_TO_DECAY
+ (1.0f - MIN_PROBABILITY_TO_DECAY) * (1.0f - currentRate); + (1.0f - MIN_PROBABILITY_TO_DECAY) * currentRate;
const float randValue = static_cast<float>(rand()) / static_cast<float>(RAND_MAX); const float randValue = static_cast<float>(rand()) / static_cast<float>(RAND_MAX);
if (thresholdToDecay < randValue) { if (thresholdToDecay < randValue) {
return max(currentEncodedProbability - ENCODED_PROBABILITY_STEP, 0); currentEncodedProbability = max(currentEncodedProbability - ENCODED_PROBABILITY_STEP,
} else { 0);
}
}
return currentEncodedProbability; return currentEncodedProbability;
} }
/* static */ bool ForgettingCurveUtils::needsToDecay(const bool mindsBlockByDecay,
const int unigramCount, const int bigramCount,
const DictionaryHeaderStructurePolicy *const headerPolicy) {
if (unigramCount >= ForgettingCurveUtils::MAX_UNIGRAM_COUNT) {
// Unigram count exceeds the limit.
return true;
} else if (bigramCount >= ForgettingCurveUtils::MAX_BIGRAM_COUNT) {
// Bigram count exceeds the limit.
return true;
}
if (mindsBlockByDecay) {
return false;
}
if (headerPolicy->getLastDecayedTime() + DECAY_INTERVAL_SECONDS < time(0)) {
// Time to decay.
return true;
}
return false;
} }
/* static */ int ForgettingCurveUtils::decodeProbability(const int encodedProbability) { /* static */ int ForgettingCurveUtils::decodeProbability(const int encodedProbability) {

View File

@ -23,16 +23,32 @@
namespace latinime { namespace latinime {
class DictionaryHeaderStructurePolicy;
// TODO: Check the elapsed time and decrease the probability depending on the time. Time field is // TODO: Check the elapsed time and decrease the probability depending on the time. Time field is
// required to introduced to each terminal PtNode and bigram entry. // required to introduced to each terminal PtNode and bigram entry.
// TODO: Quit using bigram probability to indicate the delta. // TODO: Quit using bigram probability to indicate the delta.
class ForgettingCurveUtils { class ForgettingCurveUtils {
public: public:
class TimeKeeper {
public:
TimeKeeper() : mCurrentTime(0) {}
void setCurrentTime();
int peekCurrentTime() const { return mCurrentTime; };
private:
DISALLOW_COPY_AND_ASSIGN(TimeKeeper);
int mCurrentTime;
};
static const int MAX_UNIGRAM_COUNT; static const int MAX_UNIGRAM_COUNT;
static const int MAX_UNIGRAM_COUNT_AFTER_GC; static const int MAX_UNIGRAM_COUNT_AFTER_GC;
static const int MAX_BIGRAM_COUNT; static const int MAX_BIGRAM_COUNT;
static const int MAX_BIGRAM_COUNT_AFTER_GC; static const int MAX_BIGRAM_COUNT_AFTER_GC;
static TimeKeeper sTimeKeeper;
static int getProbability(const int encodedUnigramProbability, static int getProbability(const int encodedUnigramProbability,
const int encodedBigramProbability); const int encodedBigramProbability);
@ -41,7 +57,11 @@ class ForgettingCurveUtils {
static int isValidEncodedProbability(const int encodedProbability); static int isValidEncodedProbability(const int encodedProbability);
static int getEncodedProbabilityToSave(const int encodedProbability); static int getEncodedProbabilityToSave(const int encodedProbability,
const DictionaryHeaderStructurePolicy *const headerPolicy);
static bool needsToDecay(const bool mindsBlockByDecay, const int unigramCount,
const int bigramCount, const DictionaryHeaderStructurePolicy *const headerPolicy);
private: private:
DISALLOW_IMPLICIT_CONSTRUCTORS(ForgettingCurveUtils); DISALLOW_IMPLICIT_CONSTRUCTORS(ForgettingCurveUtils);
@ -68,6 +88,7 @@ class ForgettingCurveUtils {
static const int MIN_VALID_ENCODED_PROBABILITY; static const int MIN_VALID_ENCODED_PROBABILITY;
static const int ENCODED_PROBABILITY_STEP; static const int ENCODED_PROBABILITY_STEP;
static const float MIN_PROBABILITY_TO_DECAY; static const float MIN_PROBABILITY_TO_DECAY;
static const int DECAY_INTERVAL_SECONDS;
static const ProbabilityTable sProbabilityTable; static const ProbabilityTable sProbabilityTable;

View File

@ -50,8 +50,8 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase {
} }
private void forcePassingShortTime(final BinaryDictionary binaryDictionary) { private void forcePassingShortTime(final BinaryDictionary binaryDictionary) {
// Entries having low probability would be suppressed once in 2 GCs. // Entries having low probability would be suppressed once in 3 GCs.
final int count = 2; final int count = 3;
for (int i = 0; i < count; i++) { for (int i = 0; i < count; i++) {
binaryDictionary.getPropertyForTests(SET_NEEDS_TO_DECAY_FOR_TESTING_KEY); binaryDictionary.getPropertyForTests(SET_NEEDS_TO_DECAY_FOR_TESTING_KEY);
binaryDictionary.flushWithGC(); binaryDictionary.flushWithGC();