am 5544b966: am 3e5f4b53: Merge "Check the elapsed time for decaying."
* commit '5544b966e3d395fb670192ebe728029dd7bdce63': Check the elapsed time for decaying.main
commit
449245c898
|
@ -37,6 +37,8 @@ class DictionaryHeaderStructurePolicy {
|
||||||
|
|
||||||
virtual float getMultiWordCostMultiplier() const = 0;
|
virtual float getMultiWordCostMultiplier() const = 0;
|
||||||
|
|
||||||
|
virtual int getLastDecayedTime() const = 0;
|
||||||
|
|
||||||
virtual void readHeaderValueOrQuestionMark(const char *const key, int *outValue,
|
virtual void readHeaderValueOrQuestionMark(const char *const key, int *outValue,
|
||||||
int outValueSize) const = 0;
|
int outValueSize) const = 0;
|
||||||
|
|
||||||
|
|
|
@ -360,13 +360,13 @@ int DynamicBigramListPolicy::followBigramLinkAndGetCurrentBigramPtNodePos(
|
||||||
}
|
}
|
||||||
|
|
||||||
bool DynamicBigramListPolicy::updateProbabilityForDecay(
|
bool DynamicBigramListPolicy::updateProbabilityForDecay(
|
||||||
BigramListReadWriteUtils::BigramFlags bigramFlags, const int targetPtNodePos,
|
const BigramListReadWriteUtils::BigramFlags bigramFlags, const int targetPtNodePos,
|
||||||
int *const bigramEntryPos, bool *const outRemoved) const {
|
int *const bigramEntryPos, bool *const outRemoved) const {
|
||||||
*outRemoved = false;
|
*outRemoved = false;
|
||||||
if (mIsDecayingDict) {
|
if (mIsDecayingDict) {
|
||||||
// Update bigram probability for decaying.
|
// Update bigram probability for decaying.
|
||||||
const int newProbability = ForgettingCurveUtils::getEncodedProbabilityToSave(
|
const int newProbability = ForgettingCurveUtils::getEncodedProbabilityToSave(
|
||||||
BigramListReadWriteUtils::getProbabilityFromFlags(bigramFlags));
|
BigramListReadWriteUtils::getProbabilityFromFlags(bigramFlags), mHeaderPolicy);
|
||||||
if (ForgettingCurveUtils::isValidEncodedProbability(newProbability)) {
|
if (ForgettingCurveUtils::isValidEncodedProbability(newProbability)) {
|
||||||
// Write new probability.
|
// Write new probability.
|
||||||
const BigramListReadWriteUtils::BigramFlags updatedBigramFlags =
|
const BigramListReadWriteUtils::BigramFlags updatedBigramFlags =
|
||||||
|
|
|
@ -27,6 +27,7 @@
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
|
||||||
class BufferWithExtendableBuffer;
|
class BufferWithExtendableBuffer;
|
||||||
|
class DictionaryHeaderStructurePolicy;
|
||||||
class DictionaryShortcutsStructurePolicy;
|
class DictionaryShortcutsStructurePolicy;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -34,10 +35,12 @@ class DictionaryShortcutsStructurePolicy;
|
||||||
*/
|
*/
|
||||||
class DynamicBigramListPolicy : public DictionaryBigramsStructurePolicy {
|
class DynamicBigramListPolicy : public DictionaryBigramsStructurePolicy {
|
||||||
public:
|
public:
|
||||||
DynamicBigramListPolicy(BufferWithExtendableBuffer *const buffer,
|
DynamicBigramListPolicy(const DictionaryHeaderStructurePolicy *const headerPolicy,
|
||||||
|
BufferWithExtendableBuffer *const buffer,
|
||||||
const DictionaryShortcutsStructurePolicy *const shortcutPolicy,
|
const DictionaryShortcutsStructurePolicy *const shortcutPolicy,
|
||||||
const bool isDecayingDict)
|
const bool isDecayingDict)
|
||||||
: mBuffer(buffer), mShortcutPolicy(shortcutPolicy), mIsDecayingDict(isDecayingDict) {}
|
: mHeaderPolicy(headerPolicy), mBuffer(buffer), mShortcutPolicy(shortcutPolicy),
|
||||||
|
mIsDecayingDict(isDecayingDict) {}
|
||||||
|
|
||||||
~DynamicBigramListPolicy() {}
|
~DynamicBigramListPolicy() {}
|
||||||
|
|
||||||
|
@ -74,6 +77,7 @@ class DynamicBigramListPolicy : public DictionaryBigramsStructurePolicy {
|
||||||
static const int CONTINUING_BIGRAM_LINK_COUNT_LIMIT;
|
static const int CONTINUING_BIGRAM_LINK_COUNT_LIMIT;
|
||||||
static const int BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT;
|
static const int BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT;
|
||||||
|
|
||||||
|
const DictionaryHeaderStructurePolicy *const mHeaderPolicy;
|
||||||
BufferWithExtendableBuffer *const mBuffer;
|
BufferWithExtendableBuffer *const mBuffer;
|
||||||
const DictionaryShortcutsStructurePolicy *const mShortcutPolicy;
|
const DictionaryShortcutsStructurePolicy *const mShortcutPolicy;
|
||||||
const bool mIsDecayingDict;
|
const bool mIsDecayingDict;
|
||||||
|
@ -81,7 +85,7 @@ class DynamicBigramListPolicy : public DictionaryBigramsStructurePolicy {
|
||||||
// Follow bigram link and return the position of bigram target PtNode that is currently valid.
|
// Follow bigram link and return the position of bigram target PtNode that is currently valid.
|
||||||
int followBigramLinkAndGetCurrentBigramPtNodePos(const int originalBigramPos) const;
|
int followBigramLinkAndGetCurrentBigramPtNodePos(const int originalBigramPos) const;
|
||||||
|
|
||||||
bool updateProbabilityForDecay(BigramListReadWriteUtils::BigramFlags bigramFlags,
|
bool updateProbabilityForDecay(const BigramListReadWriteUtils::BigramFlags bigramFlags,
|
||||||
const int targetPtNodePos, int *const bigramEntryPos, bool *const outRemoved) const;
|
const int targetPtNodePos, int *const bigramEntryPos, bool *const outRemoved) const;
|
||||||
};
|
};
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
|
|
|
@ -16,6 +16,7 @@
|
||||||
|
|
||||||
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h"
|
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h"
|
||||||
|
|
||||||
|
#include "suggest/core/policy/dictionary_header_structure_policy.h"
|
||||||
#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
|
#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
@ -29,7 +30,8 @@ bool DynamicPatriciaTrieGcEventListeners
|
||||||
bool isUselessPtNode = !node->isTerminal();
|
bool isUselessPtNode = !node->isTerminal();
|
||||||
if (node->isTerminal() && mIsDecayingDict) {
|
if (node->isTerminal() && mIsDecayingDict) {
|
||||||
const int newProbability =
|
const int newProbability =
|
||||||
ForgettingCurveUtils::getEncodedProbabilityToSave(node->getProbability());
|
ForgettingCurveUtils::getEncodedProbabilityToSave(node->getProbability(),
|
||||||
|
mHeaderPolicy);
|
||||||
int writingPos = node->getProbabilityFieldPos();
|
int writingPos = node->getProbabilityFieldPos();
|
||||||
// Update probability.
|
// Update probability.
|
||||||
if (!DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition(
|
if (!DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition(
|
||||||
|
|
|
@ -29,6 +29,8 @@
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
|
||||||
|
class DictionaryHeaderStructurePolicy;
|
||||||
|
|
||||||
class DynamicPatriciaTrieGcEventListeners {
|
class DynamicPatriciaTrieGcEventListeners {
|
||||||
public:
|
public:
|
||||||
// Updates all PtNodes that can be reached from the root. Checks if each PtNode is useless or
|
// Updates all PtNodes that can be reached from the root. Checks if each PtNode is useless or
|
||||||
|
@ -38,10 +40,12 @@ class DynamicPatriciaTrieGcEventListeners {
|
||||||
: public DynamicPatriciaTrieReadingHelper::TraversingEventListener {
|
: public DynamicPatriciaTrieReadingHelper::TraversingEventListener {
|
||||||
public:
|
public:
|
||||||
TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted(
|
TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted(
|
||||||
|
const DictionaryHeaderStructurePolicy *const headerPolicy,
|
||||||
DynamicPatriciaTrieWritingHelper *const writingHelper,
|
DynamicPatriciaTrieWritingHelper *const writingHelper,
|
||||||
BufferWithExtendableBuffer *const buffer, const bool isDecayingDict)
|
BufferWithExtendableBuffer *const buffer, const bool isDecayingDict)
|
||||||
: mWritingHelper(writingHelper), mBuffer(buffer), mIsDecayingDict(isDecayingDict),
|
: mHeaderPolicy(headerPolicy), mWritingHelper(writingHelper), mBuffer(buffer),
|
||||||
mValueStack(), mChildrenValue(0), mValidUnigramCount(0) {}
|
mIsDecayingDict(isDecayingDict), mValueStack(), mChildrenValue(0),
|
||||||
|
mValidUnigramCount(0) {}
|
||||||
|
|
||||||
~TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted() {};
|
~TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted() {};
|
||||||
|
|
||||||
|
@ -72,9 +76,10 @@ class DynamicPatriciaTrieGcEventListeners {
|
||||||
DISALLOW_IMPLICIT_CONSTRUCTORS(
|
DISALLOW_IMPLICIT_CONSTRUCTORS(
|
||||||
TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted);
|
TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted);
|
||||||
|
|
||||||
|
const DictionaryHeaderStructurePolicy *const mHeaderPolicy;
|
||||||
DynamicPatriciaTrieWritingHelper *const mWritingHelper;
|
DynamicPatriciaTrieWritingHelper *const mWritingHelper;
|
||||||
BufferWithExtendableBuffer *const mBuffer;
|
BufferWithExtendableBuffer *const mBuffer;
|
||||||
const int mIsDecayingDict;
|
const bool mIsDecayingDict;
|
||||||
std::vector<int> mValueStack;
|
std::vector<int> mValueStack;
|
||||||
int mChildrenValue;
|
int mChildrenValue;
|
||||||
int mValidUnigramCount;
|
int mValidUnigramCount;
|
||||||
|
@ -85,7 +90,8 @@ class DynamicPatriciaTrieGcEventListeners {
|
||||||
class TraversePolicyToUpdateBigramProbability
|
class TraversePolicyToUpdateBigramProbability
|
||||||
: public DynamicPatriciaTrieReadingHelper::TraversingEventListener {
|
: public DynamicPatriciaTrieReadingHelper::TraversingEventListener {
|
||||||
public:
|
public:
|
||||||
TraversePolicyToUpdateBigramProbability(DynamicBigramListPolicy *const bigramPolicy)
|
TraversePolicyToUpdateBigramProbability(
|
||||||
|
DynamicBigramListPolicy *const bigramPolicy)
|
||||||
: mBigramPolicy(bigramPolicy), mValidBigramEntryCount(0) {}
|
: mBigramPolicy(bigramPolicy), mValidBigramEntryCount(0) {}
|
||||||
|
|
||||||
bool onAscend() { return true; }
|
bool onAscend() { return true; }
|
||||||
|
|
|
@ -42,7 +42,6 @@ const char *const DynamicPatriciaTriePolicy::SET_NEEDS_TO_DECAY_FOR_TESTING_QUER
|
||||||
const int DynamicPatriciaTriePolicy::MAX_DICT_EXTENDED_REGION_SIZE = 1024 * 1024;
|
const int DynamicPatriciaTriePolicy::MAX_DICT_EXTENDED_REGION_SIZE = 1024 * 1024;
|
||||||
const int DynamicPatriciaTriePolicy::MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS =
|
const int DynamicPatriciaTriePolicy::MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS =
|
||||||
DynamicPatriciaTrieWritingHelper::MAX_DICTIONARY_SIZE - 1024;
|
DynamicPatriciaTrieWritingHelper::MAX_DICTIONARY_SIZE - 1024;
|
||||||
const int DynamicPatriciaTriePolicy::DECAY_INTERVAL_FOR_DECAYING_DICTS = 2 * 60 * 60;
|
|
||||||
|
|
||||||
void DynamicPatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode,
|
void DynamicPatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode,
|
||||||
DicNodeVector *const childDicNodes) const {
|
DicNodeVector *const childDicNodes) const {
|
||||||
|
@ -314,15 +313,15 @@ void DynamicPatriciaTriePolicy::flushWithGC(const char *const filePath) {
|
||||||
AKLOGI("Warning: flushWithGC() is called for non-updatable dictionary.");
|
AKLOGI("Warning: flushWithGC() is called for non-updatable dictionary.");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
const bool runGCwithDecay = needsToDecay();
|
const bool needsToDecay = mHeaderPolicy.isDecayingDict()
|
||||||
DynamicBigramListPolicy bigramListPolicyForGC(&mBufferWithExtendableBuffer,
|
&& (mNeedsToDecayForTesting || ForgettingCurveUtils::needsToDecay(
|
||||||
&mShortcutListPolicy, runGCwithDecay);
|
false /* mindsBlockByDecay */, mUnigramCount, mBigramCount, &mHeaderPolicy));
|
||||||
|
DynamicBigramListPolicy bigramListPolicyForGC(&mHeaderPolicy, &mBufferWithExtendableBuffer,
|
||||||
|
&mShortcutListPolicy, needsToDecay);
|
||||||
DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer,
|
DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer,
|
||||||
&bigramListPolicyForGC, &mShortcutListPolicy, runGCwithDecay);
|
&bigramListPolicyForGC, &mShortcutListPolicy, needsToDecay);
|
||||||
writingHelper.writeToDictFileWithGC(getRootPosition(), filePath, &mHeaderPolicy);
|
writingHelper.writeToDictFileWithGC(getRootPosition(), filePath, &mHeaderPolicy);
|
||||||
if (runGCwithDecay) {
|
mNeedsToDecayForTesting = false;
|
||||||
mNeedsToDecayForTesting = false;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool DynamicPatriciaTriePolicy::needsToRunGC(const bool mindsBlockByGC) const {
|
bool DynamicPatriciaTriePolicy::needsToRunGC(const bool mindsBlockByGC) const {
|
||||||
|
@ -344,16 +343,8 @@ bool DynamicPatriciaTriePolicy::needsToRunGC(const bool mindsBlockByGC) const {
|
||||||
// Needs to reduce dictionary size.
|
// Needs to reduce dictionary size.
|
||||||
return true;
|
return true;
|
||||||
} else if (mHeaderPolicy.isDecayingDict()) {
|
} else if (mHeaderPolicy.isDecayingDict()) {
|
||||||
if (mUnigramCount >= ForgettingCurveUtils::MAX_UNIGRAM_COUNT) {
|
return mNeedsToDecayForTesting || ForgettingCurveUtils::needsToDecay(
|
||||||
// Unigram count exceeds the limit.
|
mindsBlockByGC, mUnigramCount, mBigramCount, &mHeaderPolicy);
|
||||||
return true;
|
|
||||||
} else if (mBigramCount >= ForgettingCurveUtils::MAX_BIGRAM_COUNT) {
|
|
||||||
// Bigram count exceeds the limit.
|
|
||||||
return true;
|
|
||||||
} else if (mindsBlockByGC && needsToDecay()) {
|
|
||||||
// Time to update probabilities for decaying.
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -369,9 +360,4 @@ void DynamicPatriciaTriePolicy::getProperty(const char *const query, char *const
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool DynamicPatriciaTriePolicy::needsToDecay() const {
|
|
||||||
return mHeaderPolicy.isDecayingDict() && (mNeedsToDecayForTesting
|
|
||||||
|| mHeaderPolicy.getLastDecayedTime() + DECAY_INTERVAL_FOR_DECAYING_DICTS < time(0));
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
|
|
|
@ -37,7 +37,7 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
||||||
mBufferWithExtendableBuffer(mBuffer->getBuffer() + mHeaderPolicy.getSize(),
|
mBufferWithExtendableBuffer(mBuffer->getBuffer() + mHeaderPolicy.getSize(),
|
||||||
mBuffer->getBufferSize() - mHeaderPolicy.getSize()),
|
mBuffer->getBufferSize() - mHeaderPolicy.getSize()),
|
||||||
mShortcutListPolicy(&mBufferWithExtendableBuffer),
|
mShortcutListPolicy(&mBufferWithExtendableBuffer),
|
||||||
mBigramListPolicy(&mBufferWithExtendableBuffer, &mShortcutListPolicy,
|
mBigramListPolicy(&mHeaderPolicy, &mBufferWithExtendableBuffer, &mShortcutListPolicy,
|
||||||
mHeaderPolicy.isDecayingDict()),
|
mHeaderPolicy.isDecayingDict()),
|
||||||
mUnigramCount(mHeaderPolicy.getUnigramCount()),
|
mUnigramCount(mHeaderPolicy.getUnigramCount()),
|
||||||
mBigramCount(mHeaderPolicy.getBigramCount()), mNeedsToDecayForTesting(false) {}
|
mBigramCount(mHeaderPolicy.getBigramCount()), mNeedsToDecayForTesting(false) {}
|
||||||
|
@ -105,7 +105,6 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
||||||
static const char *const SET_NEEDS_TO_DECAY_FOR_TESTING_QUERY;
|
static const char *const SET_NEEDS_TO_DECAY_FOR_TESTING_QUERY;
|
||||||
static const int MAX_DICT_EXTENDED_REGION_SIZE;
|
static const int MAX_DICT_EXTENDED_REGION_SIZE;
|
||||||
static const int MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS;
|
static const int MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS;
|
||||||
static const int DECAY_INTERVAL_FOR_DECAYING_DICTS;
|
|
||||||
|
|
||||||
const MmappedBuffer *const mBuffer;
|
const MmappedBuffer *const mBuffer;
|
||||||
const HeaderPolicy mHeaderPolicy;
|
const HeaderPolicy mHeaderPolicy;
|
||||||
|
@ -115,8 +114,6 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
||||||
int mUnigramCount;
|
int mUnigramCount;
|
||||||
int mBigramCount;
|
int mBigramCount;
|
||||||
int mNeedsToDecayForTesting;
|
int mNeedsToDecayForTesting;
|
||||||
|
|
||||||
bool needsToDecay() const;
|
|
||||||
};
|
};
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
#endif // LATINIME_DYNAMIC_PATRICIA_TRIE_POLICY_H
|
#endif // LATINIME_DYNAMIC_PATRICIA_TRIE_POLICY_H
|
||||||
|
|
|
@ -165,7 +165,10 @@ void DynamicPatriciaTrieWritingHelper::writeToDictFileWithGC(const int rootPtNod
|
||||||
MAX_DICTIONARY_SIZE);
|
MAX_DICTIONARY_SIZE);
|
||||||
int unigramCount = 0;
|
int unigramCount = 0;
|
||||||
int bigramCount = 0;
|
int bigramCount = 0;
|
||||||
if (!runGC(rootPtNodeArrayPos, &newDictBuffer, &unigramCount, &bigramCount)) {
|
if (mNeedsToDecay) {
|
||||||
|
ForgettingCurveUtils::sTimeKeeper.setCurrentTime();
|
||||||
|
}
|
||||||
|
if (!runGC(rootPtNodeArrayPos, headerPolicy, &newDictBuffer, &unigramCount, &bigramCount)) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
BufferWithExtendableBuffer headerBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */);
|
BufferWithExtendableBuffer headerBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */);
|
||||||
|
@ -481,14 +484,14 @@ bool DynamicPatriciaTrieWritingHelper::reallocatePtNodeAndAddNewPtNodes(
|
||||||
}
|
}
|
||||||
|
|
||||||
bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
|
bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
|
||||||
BufferWithExtendableBuffer *const bufferToWrite, int *const outUnigramCount,
|
const HeaderPolicy *const headerPolicy, BufferWithExtendableBuffer *const bufferToWrite,
|
||||||
int *const outBigramCount) {
|
int *const outUnigramCount, int *const outBigramCount) {
|
||||||
DynamicPatriciaTrieReadingHelper readingHelper(mBuffer, mBigramPolicy, mShortcutPolicy);
|
DynamicPatriciaTrieReadingHelper readingHelper(mBuffer, mBigramPolicy, mShortcutPolicy);
|
||||||
readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
|
readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
|
||||||
DynamicPatriciaTrieGcEventListeners
|
DynamicPatriciaTrieGcEventListeners
|
||||||
::TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted
|
::TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted
|
||||||
traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted(
|
traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted(
|
||||||
this, mBuffer, mNeedsToDecay);
|
headerPolicy, this, mBuffer, mNeedsToDecay);
|
||||||
if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(
|
if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(
|
||||||
&traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted)) {
|
&traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted)) {
|
||||||
return false;
|
return false;
|
||||||
|
@ -505,7 +508,6 @@ bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
|
||||||
&traversePolicyToUpdateBigramProbability)) {
|
&traversePolicyToUpdateBigramProbability)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (mNeedsToDecay && traversePolicyToUpdateBigramProbability.getValidBigramEntryCount()
|
if (mNeedsToDecay && traversePolicyToUpdateBigramProbability.getValidBigramEntryCount()
|
||||||
> ForgettingCurveUtils::MAX_BIGRAM_COUNT_AFTER_GC) {
|
> ForgettingCurveUtils::MAX_BIGRAM_COUNT_AFTER_GC) {
|
||||||
// TODO: Remove more bigrams.
|
// TODO: Remove more bigrams.
|
||||||
|
@ -524,7 +526,7 @@ bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
|
||||||
|
|
||||||
// Create policy instance for the GCed dictionary.
|
// Create policy instance for the GCed dictionary.
|
||||||
DynamicShortcutListPolicy newDictShortcutPolicy(bufferToWrite);
|
DynamicShortcutListPolicy newDictShortcutPolicy(bufferToWrite);
|
||||||
DynamicBigramListPolicy newDictBigramPolicy(bufferToWrite, &newDictShortcutPolicy,
|
DynamicBigramListPolicy newDictBigramPolicy(headerPolicy, bufferToWrite, &newDictShortcutPolicy,
|
||||||
mNeedsToDecay);
|
mNeedsToDecay);
|
||||||
// Create reading helper for the GCed dictionary.
|
// Create reading helper for the GCed dictionary.
|
||||||
DynamicPatriciaTrieReadingHelper newDictReadingHelper(bufferToWrite, &newDictBigramPolicy,
|
DynamicPatriciaTrieReadingHelper newDictReadingHelper(bufferToWrite, &newDictBigramPolicy,
|
||||||
|
|
|
@ -128,8 +128,9 @@ class DynamicPatriciaTrieWritingHelper {
|
||||||
const int probabilityOfNewPtNode, const int *const newNodeCodePoints,
|
const int probabilityOfNewPtNode, const int *const newNodeCodePoints,
|
||||||
const int newNodeCodePointCount);
|
const int newNodeCodePointCount);
|
||||||
|
|
||||||
bool runGC(const int rootPtNodeArrayPos, BufferWithExtendableBuffer *const bufferToWrite,
|
bool runGC(const int rootPtNodeArrayPos, const HeaderPolicy *const headerPolicy,
|
||||||
int *const outUnigramCount, int *const outBigramCount);
|
BufferWithExtendableBuffer *const bufferToWrite, int *const outUnigramCount,
|
||||||
|
int *const outBigramCount);
|
||||||
|
|
||||||
int getUpdatedProbability(const int originalProbability, const int newProbability);
|
int getUpdatedProbability(const int originalProbability, const int newProbability);
|
||||||
};
|
};
|
||||||
|
|
|
@ -15,10 +15,12 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
|
#include <ctime>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
|
|
||||||
#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
|
#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
|
||||||
|
|
||||||
|
#include "suggest/core/policy/dictionary_header_structure_policy.h"
|
||||||
#include "suggest/policyimpl/dictionary/utils/probability_utils.h"
|
#include "suggest/policyimpl/dictionary/utils/probability_utils.h"
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
@ -35,8 +37,14 @@ const int ForgettingCurveUtils::ENCODED_PROBABILITY_STEP = 1;
|
||||||
// Currently, we try to decay each uni/bigram once every 2 hours. Accordingly, the expected
|
// Currently, we try to decay each uni/bigram once every 2 hours. Accordingly, the expected
|
||||||
// duration of the decay is approximately 66hours.
|
// duration of the decay is approximately 66hours.
|
||||||
const float ForgettingCurveUtils::MIN_PROBABILITY_TO_DECAY = 0.03f;
|
const float ForgettingCurveUtils::MIN_PROBABILITY_TO_DECAY = 0.03f;
|
||||||
|
const int ForgettingCurveUtils::DECAY_INTERVAL_SECONDS = 2 * 60 * 60;
|
||||||
|
|
||||||
const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityTable;
|
const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityTable;
|
||||||
|
ForgettingCurveUtils::TimeKeeper ForgettingCurveUtils::sTimeKeeper;
|
||||||
|
|
||||||
|
void ForgettingCurveUtils::TimeKeeper::setCurrentTime() {
|
||||||
|
mCurrentTime = time(0);
|
||||||
|
}
|
||||||
|
|
||||||
/* static */ int ForgettingCurveUtils::getProbability(const int encodedUnigramProbability,
|
/* static */ int ForgettingCurveUtils::getProbability(const int encodedUnigramProbability,
|
||||||
const int encodedBigramProbability) {
|
const int encodedBigramProbability) {
|
||||||
|
@ -76,19 +84,44 @@ const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityT
|
||||||
return encodedProbability >= MIN_VALID_ENCODED_PROBABILITY;
|
return encodedProbability >= MIN_VALID_ENCODED_PROBABILITY;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* static */ int ForgettingCurveUtils::getEncodedProbabilityToSave(const int encodedProbability) {
|
/* static */ int ForgettingCurveUtils::getEncodedProbabilityToSave(const int encodedProbability,
|
||||||
const int currentEncodedProbability = max(min(encodedProbability, MAX_ENCODED_PROBABILITY), 0);
|
const DictionaryHeaderStructurePolicy *const headerPolicy) {
|
||||||
|
const int elapsedTime = sTimeKeeper.peekCurrentTime() - headerPolicy->getLastDecayedTime();
|
||||||
|
const int decayIterationCount = max(elapsedTime / DECAY_INTERVAL_SECONDS, 1);
|
||||||
|
int currentEncodedProbability = max(min(encodedProbability, MAX_ENCODED_PROBABILITY), 0);
|
||||||
// TODO: Implement the decay in more proper way.
|
// TODO: Implement the decay in more proper way.
|
||||||
const float currentRate = static_cast<float>(currentEncodedProbability)
|
for (int i = 0; i < decayIterationCount; ++i) {
|
||||||
/ static_cast<float>(MAX_ENCODED_PROBABILITY);
|
const float currentRate = static_cast<float>(currentEncodedProbability)
|
||||||
const float thresholdToDecay = MIN_PROBABILITY_TO_DECAY
|
/ static_cast<float>(MAX_ENCODED_PROBABILITY);
|
||||||
+ (1.0f - MIN_PROBABILITY_TO_DECAY) * (1.0f - currentRate);
|
const float thresholdToDecay = MIN_PROBABILITY_TO_DECAY
|
||||||
const float randValue = static_cast<float>(rand()) / static_cast<float>(RAND_MAX);
|
+ (1.0f - MIN_PROBABILITY_TO_DECAY) * currentRate;
|
||||||
if (thresholdToDecay < randValue) {
|
const float randValue = static_cast<float>(rand()) / static_cast<float>(RAND_MAX);
|
||||||
return max(currentEncodedProbability - ENCODED_PROBABILITY_STEP, 0);
|
if (thresholdToDecay < randValue) {
|
||||||
} else {
|
currentEncodedProbability = max(currentEncodedProbability - ENCODED_PROBABILITY_STEP,
|
||||||
return currentEncodedProbability;
|
0);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
return currentEncodedProbability;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* static */ bool ForgettingCurveUtils::needsToDecay(const bool mindsBlockByDecay,
|
||||||
|
const int unigramCount, const int bigramCount,
|
||||||
|
const DictionaryHeaderStructurePolicy *const headerPolicy) {
|
||||||
|
if (unigramCount >= ForgettingCurveUtils::MAX_UNIGRAM_COUNT) {
|
||||||
|
// Unigram count exceeds the limit.
|
||||||
|
return true;
|
||||||
|
} else if (bigramCount >= ForgettingCurveUtils::MAX_BIGRAM_COUNT) {
|
||||||
|
// Bigram count exceeds the limit.
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
if (mindsBlockByDecay) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (headerPolicy->getLastDecayedTime() + DECAY_INTERVAL_SECONDS < time(0)) {
|
||||||
|
// Time to decay.
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* static */ int ForgettingCurveUtils::decodeProbability(const int encodedProbability) {
|
/* static */ int ForgettingCurveUtils::decodeProbability(const int encodedProbability) {
|
||||||
|
|
|
@ -23,16 +23,32 @@
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
|
||||||
|
class DictionaryHeaderStructurePolicy;
|
||||||
|
|
||||||
// TODO: Check the elapsed time and decrease the probability depending on the time. Time field is
|
// TODO: Check the elapsed time and decrease the probability depending on the time. Time field is
|
||||||
// required to introduced to each terminal PtNode and bigram entry.
|
// required to introduced to each terminal PtNode and bigram entry.
|
||||||
// TODO: Quit using bigram probability to indicate the delta.
|
// TODO: Quit using bigram probability to indicate the delta.
|
||||||
class ForgettingCurveUtils {
|
class ForgettingCurveUtils {
|
||||||
public:
|
public:
|
||||||
|
class TimeKeeper {
|
||||||
|
public:
|
||||||
|
TimeKeeper() : mCurrentTime(0) {}
|
||||||
|
void setCurrentTime();
|
||||||
|
int peekCurrentTime() const { return mCurrentTime; };
|
||||||
|
|
||||||
|
private:
|
||||||
|
DISALLOW_COPY_AND_ASSIGN(TimeKeeper);
|
||||||
|
|
||||||
|
int mCurrentTime;
|
||||||
|
};
|
||||||
|
|
||||||
static const int MAX_UNIGRAM_COUNT;
|
static const int MAX_UNIGRAM_COUNT;
|
||||||
static const int MAX_UNIGRAM_COUNT_AFTER_GC;
|
static const int MAX_UNIGRAM_COUNT_AFTER_GC;
|
||||||
static const int MAX_BIGRAM_COUNT;
|
static const int MAX_BIGRAM_COUNT;
|
||||||
static const int MAX_BIGRAM_COUNT_AFTER_GC;
|
static const int MAX_BIGRAM_COUNT_AFTER_GC;
|
||||||
|
|
||||||
|
static TimeKeeper sTimeKeeper;
|
||||||
|
|
||||||
static int getProbability(const int encodedUnigramProbability,
|
static int getProbability(const int encodedUnigramProbability,
|
||||||
const int encodedBigramProbability);
|
const int encodedBigramProbability);
|
||||||
|
|
||||||
|
@ -41,7 +57,11 @@ class ForgettingCurveUtils {
|
||||||
|
|
||||||
static int isValidEncodedProbability(const int encodedProbability);
|
static int isValidEncodedProbability(const int encodedProbability);
|
||||||
|
|
||||||
static int getEncodedProbabilityToSave(const int encodedProbability);
|
static int getEncodedProbabilityToSave(const int encodedProbability,
|
||||||
|
const DictionaryHeaderStructurePolicy *const headerPolicy);
|
||||||
|
|
||||||
|
static bool needsToDecay(const bool mindsBlockByDecay, const int unigramCount,
|
||||||
|
const int bigramCount, const DictionaryHeaderStructurePolicy *const headerPolicy);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
DISALLOW_IMPLICIT_CONSTRUCTORS(ForgettingCurveUtils);
|
DISALLOW_IMPLICIT_CONSTRUCTORS(ForgettingCurveUtils);
|
||||||
|
@ -68,6 +88,7 @@ class ForgettingCurveUtils {
|
||||||
static const int MIN_VALID_ENCODED_PROBABILITY;
|
static const int MIN_VALID_ENCODED_PROBABILITY;
|
||||||
static const int ENCODED_PROBABILITY_STEP;
|
static const int ENCODED_PROBABILITY_STEP;
|
||||||
static const float MIN_PROBABILITY_TO_DECAY;
|
static const float MIN_PROBABILITY_TO_DECAY;
|
||||||
|
static const int DECAY_INTERVAL_SECONDS;
|
||||||
|
|
||||||
static const ProbabilityTable sProbabilityTable;
|
static const ProbabilityTable sProbabilityTable;
|
||||||
|
|
||||||
|
|
|
@ -50,8 +50,8 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
private void forcePassingShortTime(final BinaryDictionary binaryDictionary) {
|
private void forcePassingShortTime(final BinaryDictionary binaryDictionary) {
|
||||||
// Entries having low probability would be suppressed once in 2 GCs.
|
// Entries having low probability would be suppressed once in 3 GCs.
|
||||||
final int count = 2;
|
final int count = 3;
|
||||||
for (int i = 0; i < count; i++) {
|
for (int i = 0; i < count; i++) {
|
||||||
binaryDictionary.getPropertyForTests(SET_NEEDS_TO_DECAY_FOR_TESTING_KEY);
|
binaryDictionary.getPropertyForTests(SET_NEEDS_TO_DECAY_FOR_TESTING_KEY);
|
||||||
binaryDictionary.flushWithGC();
|
binaryDictionary.flushWithGC();
|
||||||
|
|
Loading…
Reference in New Issue