Start using historical info for decaying.

Bug: 11073222

Change-Id: Iad5f42d7268939df35589c4900bf6f4425adaa3f
This commit is contained in:
Keisuke Kuroyanagi 2013-12-10 17:53:06 +09:00
parent 26266bd53b
commit 2fdb62a6a2
20 changed files with 189 additions and 235 deletions

View file

@ -163,7 +163,8 @@ int BigramDictionary::getBigramProbability(const int *word0, int length0, const
mDictionaryStructurePolicy->getBigramsStructurePolicy(), pos);
while (bigramsIt.hasNext()) {
bigramsIt.next();
if (bigramsIt.getBigramPos() == nextWordPos) {
if (bigramsIt.getBigramPos() == nextWordPos
&& bigramsIt.getProbability() != NOT_A_PROBABILITY) {
return mDictionaryStructurePolicy->getProbability(
mDictionaryStructurePolicy->getUnigramProbabilityOfPtNode(nextWordPos),
bigramsIt.getProbability());

View file

@ -17,6 +17,7 @@
#include "suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h"
#include "suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h"
#include "suggest/policyimpl/dictionary/header/header_policy.h"
#include "suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h"
#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
@ -34,7 +35,12 @@ void Ver4BigramListPolicy::getNextBigram(int *const outBigramPos, int *const out
bigramEntry.getTargetTerminalId());
}
if (outProbability) {
*outProbability = bigramEntry.getProbability();
if (bigramEntry.hasHistoricalInfo()) {
*outProbability =
ForgettingCurveUtils::decodeProbability(bigramEntry.getHistoricalInfo());
} else {
*outProbability = bigramEntry.getProbability();
}
}
if (outHasNext) {
*outHasNext = bigramEntry.hasNext();
@ -152,17 +158,12 @@ bool Ver4BigramListPolicy::updateAllBigramEntriesAndDeleteUselessEntries(const i
if (!mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPos)) {
return false;
}
} else if (mNeedsToDecayWhenUpdating) {
const int probability = ForgettingCurveUtils::getEncodedProbabilityToSave(
bigramEntry.getProbability(), mHeaderPolicy);
const HistoricalInfo historicalInfo =
ForgettingCurveUtils::createHistoricalInfoToSave(
bigramEntry.getHistoricalInfo());
// TODO: Use ForgettingCurveUtils::needsToKeep(&historicalInfo).
if (ForgettingCurveUtils::isValidEncodedProbability(probability)) {
} else if (bigramEntry.hasHistoricalInfo()) {
const HistoricalInfo historicalInfo = ForgettingCurveUtils::createHistoricalInfoToSave(
bigramEntry.getHistoricalInfo());
if (ForgettingCurveUtils::needsToKeep(&historicalInfo)) {
const BigramEntry updatedBigramEntry =
bigramEntry.updateProbabilityAndGetEntry(probability)
.updateHistoricalInfoAndGetEntry(&historicalInfo);
bigramEntry.updateHistoricalInfoAndGetEntry(&historicalInfo);
if (!mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPos)) {
return false;
}
@ -225,14 +226,12 @@ int Ver4BigramListPolicy::getEntryPosToUpdate(const int targetTerminalIdToFind,
const BigramEntry Ver4BigramListPolicy::createUpdatedBigramEntryFrom(
const BigramEntry *const originalBigramEntry, const int newProbability,
const int timestamp) const {
if (mNeedsToDecayWhenUpdating) {
const int probability = ForgettingCurveUtils::getUpdatedEncodedProbability(
originalBigramEntry->getProbability(), newProbability);
// TODO: Consolidate historical info and probability.
if (mHeaderPolicy->hasHistoricalInfoOfWords()) {
const HistoricalInfo updatedHistoricalInfo =
ForgettingCurveUtils::createUpdatedHistoricalInfo(
originalBigramEntry->getHistoricalInfo(), newProbability, timestamp);
return originalBigramEntry->updateProbabilityAndGetEntry(probability)
.updateHistoricalInfoAndGetEntry(&updatedHistoricalInfo);
return originalBigramEntry->updateHistoricalInfoAndGetEntry(&updatedHistoricalInfo);
} else {
return originalBigramEntry->updateProbabilityAndGetEntry(newProbability);
}

View file

@ -24,18 +24,17 @@
namespace latinime {
class BigramDictContent;
class DictionaryHeaderStructurePolicy;
class HeaderPolicy;
class TerminalPositionLookupTable;
class Ver4BigramListPolicy : public DictionaryBigramsStructurePolicy {
public:
Ver4BigramListPolicy(BigramDictContent *const bigramDictContent,
const TerminalPositionLookupTable *const terminalPositionLookupTable,
const DictionaryHeaderStructurePolicy *const headerPolicy,
const bool needsToDecayWhenUpdating)
const HeaderPolicy *const headerPolicy)
: mBigramDictContent(bigramDictContent),
mTerminalPositionLookupTable(terminalPositionLookupTable),
mHeaderPolicy(headerPolicy), mNeedsToDecayWhenUpdating(needsToDecayWhenUpdating) {}
mHeaderPolicy(headerPolicy) {}
void getNextBigram(int *const outBigramPos, int *const outProbability,
bool *const outHasNext, int *const bigramEntryPos) const;
@ -64,8 +63,7 @@ class Ver4BigramListPolicy : public DictionaryBigramsStructurePolicy {
BigramDictContent *const mBigramDictContent;
const TerminalPositionLookupTable *const mTerminalPositionLookupTable;
const DictionaryHeaderStructurePolicy *const mHeaderPolicy;
const bool mNeedsToDecayWhenUpdating;
const HeaderPolicy *const mHeaderPolicy;
};
} // namespace latinime
#endif /* LATINIME_VER4_BIGRAM_LIST_POLICY_H */

View file

@ -93,6 +93,18 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
}
}
AK_FORCE_INLINE bool isValid() const {
// Decaying dictionary must have historical information.
if (!mIsDecayingDict) {
return true;
}
if (mHasHistoricalInfoOfWords) {
return true;
} else {
return false;
}
}
AK_FORCE_INLINE int getSize() const {
return mSize;
}
@ -137,7 +149,7 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
return mExtendedRegionSize;
}
AK_FORCE_INLINE bool hasHistricalInfoOfWords() const {
AK_FORCE_INLINE bool hasHistoricalInfoOfWords() const {
return mHasHistoricalInfoOfWords;
}

View file

@ -20,7 +20,6 @@
#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h"
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_utils.h"
#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
namespace latinime {
@ -30,8 +29,7 @@ bool DynamicPatriciaTrieGcEventListeners
// PtNode is useless when the PtNode is not a terminal and doesn't have any not useless
// children.
bool isUselessPtNode = !ptNodeParams->isTerminal();
// TODO: Quit checking mNeedsToDecayWhenUpdating.
if (ptNodeParams->isTerminal() && mNeedsToDecayWhenUpdating) {
if (ptNodeParams->isTerminal()) {
bool needsToKeepPtNode = true;
if (!mPtNodeWriter->updatePtNodeProbabilityAndGetNeedsToKeepPtNodeAfterGC(ptNodeParams,
&needsToKeepPtNode)) {

View file

@ -27,8 +27,6 @@
namespace latinime {
class DictionaryHeaderStructurePolicy;
class PtNodeWriter;
class PtNodeParams;
// TODO: Move to pt_common.
@ -41,12 +39,9 @@ class DynamicPatriciaTrieGcEventListeners {
: public DynamicPatriciaTrieReadingHelper::TraversingEventListener {
public:
TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted(
const DictionaryHeaderStructurePolicy *const headerPolicy,
PtNodeWriter *const ptNodeWriter, BufferWithExtendableBuffer *const buffer,
const bool needsToDecayWhenUpdating)
: mHeaderPolicy(headerPolicy), mPtNodeWriter(ptNodeWriter), mBuffer(buffer),
mNeedsToDecayWhenUpdating(needsToDecayWhenUpdating), mValueStack(),
mChildrenValue(0), mValidUnigramCount(0) {}
PtNodeWriter *const ptNodeWriter)
: mPtNodeWriter(ptNodeWriter), mValueStack(), mChildrenValue(0),
mValidUnigramCount(0) {}
~TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted() {};
@ -77,10 +72,7 @@ class DynamicPatriciaTrieGcEventListeners {
DISALLOW_IMPLICIT_CONSTRUCTORS(
TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted);
const DictionaryHeaderStructurePolicy *const mHeaderPolicy;
PtNodeWriter *const mPtNodeWriter;
BufferWithExtendableBuffer *const mBuffer;
const bool mNeedsToDecayWhenUpdating;
std::vector<int> mValueStack;
int mChildrenValue;
int mValidUnigramCount;

View file

@ -73,6 +73,10 @@ class BigramEntry {
return mProbability;
}
bool hasHistoricalInfo() const {
return mHistoricalInfo.isValid();
}
const HistoricalInfo *getHistoricalInfo() const {
return &mHistoricalInfo;
}

View file

@ -51,6 +51,10 @@ class ProbabilityEntry {
return ProbabilityEntry(mFlags, mProbability, historicalInfo);
}
bool hasHistoricalInfo() const {
return mHistoricalInfo.isValid();
}
int getFlags() const {
return mFlags;
}

View file

@ -45,9 +45,9 @@ class Ver4DictBuffers {
}
AK_FORCE_INLINE bool isValid() const {
return mDictBuffer.get() != 0 && mProbabilityDictContent.isValid()
&& mTerminalPositionLookupTable.isValid() && mBigramDictContent.isValid()
&& mShortcutDictContent.isValid();
return mDictBuffer.get() != 0 && mHeaderPolicy.isValid()
&& mProbabilityDictContent.isValid() && mTerminalPositionLookupTable.isValid()
&& mBigramDictContent.isValid() && mShortcutDictContent.isValid();
}
AK_FORCE_INLINE bool isNearSizeLimit() const {
@ -131,9 +131,10 @@ class Ver4DictBuffers {
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
// TODO: Quit using header size.
mTerminalPositionLookupTable(dictDirPath, isUpdatable, mHeaderPolicy.getSize()),
mProbabilityDictContent(dictDirPath, mHeaderPolicy.hasHistricalInfoOfWords(),
mProbabilityDictContent(dictDirPath, mHeaderPolicy.hasHistoricalInfoOfWords(),
isUpdatable),
mBigramDictContent(dictDirPath, mHeaderPolicy.hasHistoricalInfoOfWords(),
isUpdatable),
mBigramDictContent(dictDirPath, mHeaderPolicy.hasHistricalInfoOfWords(), isUpdatable),
mShortcutDictContent(dictDirPath, isUpdatable),
mIsUpdatable(isUpdatable) {}
@ -142,8 +143,8 @@ class Ver4DictBuffers {
mExpandableHeaderBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE),
mExpandableTrieBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE),
mTerminalPositionLookupTable(),
mProbabilityDictContent(headerPolicy->hasHistricalInfoOfWords()),
mBigramDictContent(headerPolicy->hasHistricalInfoOfWords()), mShortcutDictContent(),
mProbabilityDictContent(headerPolicy->hasHistoricalInfoOfWords()),
mBigramDictContent(headerPolicy->hasHistoricalInfoOfWords()), mShortcutDictContent(),
mIsUpdatable(true) {}
const MmappedBuffer::MmappedBufferPtr mDictBuffer;

View file

@ -22,6 +22,7 @@
#include "suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
namespace latinime {
@ -62,7 +63,12 @@ const PtNodeParams Ver4PatriciaTrieNodeReader::fetchPtNodeInfoFromBufferAndProce
terminalId = Ver4PatriciaTrieReadingUtils::getTerminalIdAndAdvancePosition(dictBuf, &pos);
const ProbabilityEntry probabilityEntry =
mProbabilityDictContent->getProbabilityEntry(terminalId);
probability = probabilityEntry.getProbability();
if (probabilityEntry.hasHistoricalInfo()) {
probability = ForgettingCurveUtils::decodeProbability(
probabilityEntry.getHistoricalInfo());
} else {
probability = probabilityEntry.getProbability();
}
}
int childrenPosFieldPos = pos;
if (usesAdditionalBuffer) {

View file

@ -17,6 +17,7 @@
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h"
#include "suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h"
#include "suggest/policyimpl/dictionary/header/header_policy.h"
#include "suggest/policyimpl/dictionary/shortcut/ver4_shortcut_list_policy.h"
#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h"
#include "suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h"
@ -147,27 +148,21 @@ bool Ver4PatriciaTrieNodeWriter::updatePtNodeProbabilityAndGetNeedsToKeepPtNodeA
AKLOGE("updatePtNodeProbabilityAndGetNeedsToSaveForGC is called for non-terminal PtNode.");
return false;
}
if (mBuffers->getHeaderPolicy()->isDecayingDict()) {
const ProbabilityEntry originalProbabilityEntry =
mBuffers->getProbabilityDictContent()->getProbabilityEntry(
toBeUpdatedPtNodeParams->getTerminalId());
// TODO: Remove.
const int newProbability = ForgettingCurveUtils::getEncodedProbabilityToSave(
originalProbabilityEntry.getProbability(), mBuffers->getHeaderPolicy());
const HistoricalInfo historicalInfo =
ForgettingCurveUtils::createHistoricalInfoToSave(
originalProbabilityEntry.getHistoricalInfo());
const ProbabilityEntry originalProbabilityEntry =
mBuffers->getProbabilityDictContent()->getProbabilityEntry(
toBeUpdatedPtNodeParams->getTerminalId());
if (originalProbabilityEntry.hasHistoricalInfo()) {
const HistoricalInfo historicalInfo = ForgettingCurveUtils::createHistoricalInfoToSave(
originalProbabilityEntry.getHistoricalInfo());
const ProbabilityEntry probabilityEntry =
originalProbabilityEntry.createEntryWithUpdatedProbability(newProbability)
.createEntryWithUpdatedHistoricalInfo(&historicalInfo);
originalProbabilityEntry.createEntryWithUpdatedHistoricalInfo(&historicalInfo);
if (!mBuffers->getMutableProbabilityDictContent()->setProbabilityEntry(
toBeUpdatedPtNodeParams->getTerminalId(), &probabilityEntry)) {
AKLOGE("Cannot write updated probability entry. terminalId: %d",
toBeUpdatedPtNodeParams->getTerminalId());
return false;
}
// TODO: Use ForgettingCurveUtils::needsToKeep(&historicalInfo).
const bool isValid = ForgettingCurveUtils::isValidEncodedProbability(newProbability);
const bool isValid = ForgettingCurveUtils::needsToKeep(&historicalInfo);
if (!isValid) {
if (!markPtNodeAsWillBecomeNonTerminal(toBeUpdatedPtNodeParams)) {
AKLOGE("Cannot mark PtNode as willBecomeNonTerminal.");
@ -380,14 +375,13 @@ bool Ver4PatriciaTrieNodeWriter::writePtNodeAndGetTerminalIdAndAdvancePosition(
const ProbabilityEntry Ver4PatriciaTrieNodeWriter::createUpdatedEntryFrom(
const ProbabilityEntry *const originalProbabilityEntry, const int newProbability,
const int timestamp) const {
if (mNeedsToDecayWhenUpdating) {
const int updatedProbability = ForgettingCurveUtils::getUpdatedEncodedProbability(
originalProbabilityEntry->getProbability(), newProbability);
// TODO: Consolidate historical info and probability.
if (mBuffers->getHeaderPolicy()->hasHistoricalInfoOfWords()) {
const HistoricalInfo updatedHistoricalInfo =
ForgettingCurveUtils::createUpdatedHistoricalInfo(
originalProbabilityEntry->getHistoricalInfo(), newProbability, timestamp);
return originalProbabilityEntry->createEntryWithUpdatedProbability(updatedProbability)
.createEntryWithUpdatedHistoricalInfo(&updatedHistoricalInfo);
return originalProbabilityEntry->createEntryWithUpdatedHistoricalInfo(
&updatedHistoricalInfo);
} else {
return originalProbabilityEntry->createEntryWithUpdatedProbability(newProbability);
}

View file

@ -40,12 +40,10 @@ class Ver4PatriciaTrieNodeWriter : public PtNodeWriter {
public:
Ver4PatriciaTrieNodeWriter(BufferWithExtendableBuffer *const trieBuffer,
Ver4DictBuffers *const buffers, const Ver4PatriciaTrieNodeReader *const ptNodeReader,
Ver4BigramListPolicy *const bigramPolicy, Ver4ShortcutListPolicy *const shortcutPolicy,
const bool needsToDecayWhenUpdating)
Ver4BigramListPolicy *const bigramPolicy, Ver4ShortcutListPolicy *const shortcutPolicy)
: mTrieBuffer(trieBuffer), mBuffers(buffers), mPtNodeReader(ptNodeReader),
mReadingHelper(mTrieBuffer, mPtNodeReader),
mBigramPolicy(bigramPolicy), mShortcutPolicy(shortcutPolicy),
mNeedsToDecayWhenUpdating(needsToDecayWhenUpdating) {}
mBigramPolicy(bigramPolicy), mShortcutPolicy(shortcutPolicy) {}
virtual ~Ver4PatriciaTrieNodeWriter() {}
@ -120,7 +118,6 @@ class Ver4PatriciaTrieNodeWriter : public PtNodeWriter {
DynamicPatriciaTrieReadingHelper mReadingHelper;
Ver4BigramListPolicy *const mBigramPolicy;
Ver4ShortcutListPolicy *const mShortcutPolicy;
const bool mNeedsToDecayWhenUpdating;
};
} // namespace latinime
#endif /* LATINIME_VER4_PATRICIA_TRIE_NODE_WRITER_H */

View file

@ -34,8 +34,6 @@ const char *const Ver4PatriciaTriePolicy::UNIGRAM_COUNT_QUERY = "UNIGRAM_COUNT";
const char *const Ver4PatriciaTriePolicy::BIGRAM_COUNT_QUERY = "BIGRAM_COUNT";
const char *const Ver4PatriciaTriePolicy::MAX_UNIGRAM_COUNT_QUERY = "MAX_UNIGRAM_COUNT";
const char *const Ver4PatriciaTriePolicy::MAX_BIGRAM_COUNT_QUERY = "MAX_BIGRAM_COUNT";
const char *const Ver4PatriciaTriePolicy::SET_NEEDS_TO_DECAY_FOR_TESTING_QUERY =
"SET_NEEDS_TO_DECAY_FOR_TESTING";
const char *const Ver4PatriciaTriePolicy::SET_CURRENT_TIME_FOR_TESTING_QUERY_FORMAT =
"SET_CURRENT_TIME_FOR_TESTING:%d";
const char *const Ver4PatriciaTriePolicy::GET_CURRENT_TIME_QUERY = "GET_CURRENT_TIME";
@ -62,8 +60,7 @@ void Ver4PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const d
// A DecayingDict may have a terminal PtNode that has a terminal DicNode whose
// probability is NOT_A_PROBABILITY. In such case, we don't want to treat it as a
// valid terminal DicNode.
isTerminal = getProbability(ptNodeParams.getProbability(), NOT_A_PROBABILITY)
!= NOT_A_PROBABILITY;
isTerminal = ptNodeParams.getProbability() != NOT_A_PROBABILITY;
}
childDicNodes->pushLeavingChild(dicNode, ptNodeParams.getHeadPos(),
ptNodeParams.getChildrenPos(), ptNodeParams.getProbability(), isTerminal,
@ -262,11 +259,7 @@ void Ver4PatriciaTriePolicy::flushWithGC(const char *const filePath) {
AKLOGI("Warning: flushWithGC() is called for non-updatable dictionary.");
return;
}
const bool needsToDecay = mHeaderPolicy->isDecayingDict()
&& (mNeedsToDecayForTesting || ForgettingCurveUtils::needsToDecay(
false /* mindsBlockByDecay */, mUnigramCount, mBigramCount, mHeaderPolicy));
mWritingHelper.writeToDictFileWithGC(getRootPosition(), filePath, needsToDecay);
mNeedsToDecayForTesting = false;
mWritingHelper.writeToDictFileWithGC(getRootPosition(), filePath);
}
bool Ver4PatriciaTriePolicy::needsToRunGC(const bool mindsBlockByGC) const {
@ -286,8 +279,8 @@ bool Ver4PatriciaTriePolicy::needsToRunGC(const bool mindsBlockByGC) const {
// Needs to reduce dictionary size.
return true;
} else if (mHeaderPolicy->isDecayingDict()) {
return mNeedsToDecayForTesting || ForgettingCurveUtils::needsToDecay(
mindsBlockByGC, mUnigramCount, mBigramCount, mHeaderPolicy);
return ForgettingCurveUtils::needsToDecay(mindsBlockByGC, mUnigramCount, mBigramCount,
mHeaderPolicy);
}
return false;
}
@ -308,8 +301,6 @@ void Ver4PatriciaTriePolicy::getProperty(const char *const query, const int quer
snprintf(outResult, maxResultLength, "%d",
mHeaderPolicy->isDecayingDict() ? ForgettingCurveUtils::MAX_BIGRAM_COUNT :
static_cast<int>(Ver4DictConstants::MAX_DICTIONARY_SIZE));
} else if (strncmp(query, SET_NEEDS_TO_DECAY_FOR_TESTING_QUERY, compareLength) == 0) {
mNeedsToDecayForTesting = true;
} else if (sscanf(query, SET_CURRENT_TIME_FOR_TESTING_QUERY_FORMAT, &timestamp) == 1) {
TimeKeeper::startTestModeWithForceCurrentTime(timestamp);
} else if (strncmp(query, GET_CURRENT_TIME_QUERY, compareLength) == 0) {

View file

@ -41,17 +41,16 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
: mBuffers(buffers), mHeaderPolicy(mBuffers.get()->getHeaderPolicy()),
mDictBuffer(mBuffers.get()->getWritableTrieBuffer()),
mBigramPolicy(mBuffers.get()->getMutableBigramDictContent(),
mBuffers.get()->getTerminalPositionLookupTable(), mHeaderPolicy,
mHeaderPolicy->isDecayingDict()),
mBuffers.get()->getTerminalPositionLookupTable(), mHeaderPolicy),
mShortcutPolicy(mBuffers.get()->getMutableShortcutDictContent(),
mBuffers.get()->getTerminalPositionLookupTable()),
mNodeReader(mDictBuffer, mBuffers.get()->getProbabilityDictContent()),
mNodeWriter(mDictBuffer, mBuffers.get(), &mNodeReader, &mBigramPolicy,
&mShortcutPolicy, mHeaderPolicy->isDecayingDict()),
&mShortcutPolicy),
mUpdatingHelper(mDictBuffer, &mNodeReader, &mNodeWriter),
mWritingHelper(mBuffers.get()),
mUnigramCount(mHeaderPolicy->getUnigramCount()),
mBigramCount(mHeaderPolicy->getBigramCount()), mNeedsToDecayForTesting(false) {};
mBigramCount(mHeaderPolicy->getBigramCount()) {};
AK_FORCE_INLINE int getRootPosition() const {
return 0;
@ -117,7 +116,6 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
static const char *const BIGRAM_COUNT_QUERY;
static const char *const MAX_UNIGRAM_COUNT_QUERY;
static const char *const MAX_BIGRAM_COUNT_QUERY;
static const char *const SET_NEEDS_TO_DECAY_FOR_TESTING_QUERY;
static const char *const SET_CURRENT_TIME_FOR_TESTING_QUERY_FORMAT;
static const char *const GET_CURRENT_TIME_QUERY;
static const char *const QUIT_TIMEKEEPER_TEST_MODE_QUERY;
@ -137,7 +135,6 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
Ver4PatriciaTrieWritingHelper mWritingHelper;
int mUnigramCount;
int mBigramCount;
bool mNeedsToDecayForTesting;
};
} // namespace latinime
#endif // LATINIME_VER4_PATRICIA_TRIE_POLICY_H

View file

@ -53,20 +53,20 @@ void Ver4PatriciaTrieWritingHelper::writeToDictFile(const char *const trieFilePa
}
void Ver4PatriciaTrieWritingHelper::writeToDictFileWithGC(const int rootPtNodeArrayPos,
const char *const trieFilePath, const bool needsToDecay) {
const char *const trieFilePath) {
const HeaderPolicy *const headerPolicy = mBuffers->getHeaderPolicy();
Ver4DictBuffers::Ver4DictBuffersPtr dictBuffers(
Ver4DictBuffers::createVer4DictBuffers(headerPolicy));
int unigramCount = 0;
int bigramCount = 0;
if (!runGC(rootPtNodeArrayPos, headerPolicy, dictBuffers.get(), &unigramCount, &bigramCount,
needsToDecay)) {
if (!runGC(rootPtNodeArrayPos, headerPolicy, dictBuffers.get(), &unigramCount, &bigramCount)) {
return;
}
BufferWithExtendableBuffer headerBuffer(
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE);
if (!headerPolicy->writeHeaderToBuffer(&headerBuffer, true /* updatesLastUpdatedTime */,
needsToDecay, unigramCount, bigramCount, 0 /* extendedRegionSize */)) {
true /* updatesLastDecayedTime */, unigramCount, bigramCount,
0 /* extendedRegionSize */)) {
return;
}
const int dirPathBufSize = strlen(trieFilePath) + 1 /* terminator */;
@ -77,30 +77,29 @@ void Ver4PatriciaTrieWritingHelper::writeToDictFileWithGC(const int rootPtNodeAr
bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
const HeaderPolicy *const headerPolicy, Ver4DictBuffers *const buffersToWrite,
int *const outUnigramCount, int *const outBigramCount, const bool needsToDecay) {
int *const outUnigramCount, int *const outBigramCount) {
Ver4PatriciaTrieNodeReader ptNodeReader(mBuffers->getTrieBuffer(),
mBuffers->getProbabilityDictContent());
Ver4BigramListPolicy bigramPolicy(mBuffers->getMutableBigramDictContent(),
mBuffers->getTerminalPositionLookupTable(), headerPolicy, needsToDecay);
mBuffers->getTerminalPositionLookupTable(), headerPolicy);
Ver4ShortcutListPolicy shortcutPolicy(mBuffers->getMutableShortcutDictContent(),
mBuffers->getTerminalPositionLookupTable());
Ver4PatriciaTrieNodeWriter ptNodeWriter(mBuffers->getWritableTrieBuffer(),
mBuffers, &ptNodeReader, &bigramPolicy, &shortcutPolicy,
false /* needsToDecayWhenUpdating */);
mBuffers, &ptNodeReader, &bigramPolicy, &shortcutPolicy);
DynamicPatriciaTrieReadingHelper readingHelper(mBuffers->getTrieBuffer(), &ptNodeReader);
readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
DynamicPatriciaTrieGcEventListeners
::TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted
traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted(
headerPolicy, &ptNodeWriter, mBuffers->getWritableTrieBuffer(),
needsToDecay);
&ptNodeWriter);
if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(
&traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted)) {
return false;
}
if (needsToDecay && traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted
.getValidUnigramCount() > ForgettingCurveUtils::MAX_UNIGRAM_COUNT_AFTER_GC) {
if (headerPolicy->isDecayingDict()
&& traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted
.getValidUnigramCount() > ForgettingCurveUtils::MAX_UNIGRAM_COUNT_AFTER_GC) {
// TODO: Remove more unigrams.
}
@ -111,8 +110,9 @@ bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
&traversePolicyToUpdateBigramProbability)) {
return false;
}
if (needsToDecay && traversePolicyToUpdateBigramProbability.getValidBigramEntryCount()
> ForgettingCurveUtils::MAX_BIGRAM_COUNT_AFTER_GC) {
if (headerPolicy->isDecayingDict()
&& traversePolicyToUpdateBigramProbability.getValidBigramEntryCount()
> ForgettingCurveUtils::MAX_BIGRAM_COUNT_AFTER_GC) {
// TODO: Remove more bigrams.
}
@ -120,8 +120,7 @@ bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
PtNodeWriter::DictPositionRelocationMap dictPositionRelocationMap;
readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
Ver4PatriciaTrieNodeWriter ptNodeWriterForNewBuffers(buffersToWrite->getWritableTrieBuffer(),
buffersToWrite, &ptNodeReader, &bigramPolicy, &shortcutPolicy,
false /* needsToDecayWhenUpdating */);
buffersToWrite, &ptNodeReader, &bigramPolicy, &shortcutPolicy);
DynamicPatriciaTrieGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer
traversePolicyToPlaceAndWriteValidPtNodesToBuffer(&ptNodeWriterForNewBuffers,
buffersToWrite->getWritableTrieBuffer(), &dictPositionRelocationMap);
@ -134,13 +133,11 @@ bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
Ver4PatriciaTrieNodeReader newPtNodeReader(buffersToWrite->getTrieBuffer(),
buffersToWrite->getProbabilityDictContent());
Ver4BigramListPolicy newBigramPolicy(buffersToWrite->getMutableBigramDictContent(),
buffersToWrite->getTerminalPositionLookupTable(), headerPolicy,
false /* needsToDecay */);
buffersToWrite->getTerminalPositionLookupTable(), headerPolicy);
Ver4ShortcutListPolicy newShortcutPolicy(buffersToWrite->getMutableShortcutDictContent(),
buffersToWrite->getTerminalPositionLookupTable());
Ver4PatriciaTrieNodeWriter newPtNodeWriter(buffersToWrite->getWritableTrieBuffer(),
buffersToWrite, &newPtNodeReader, &newBigramPolicy, &newShortcutPolicy,
false /* needsToDecayWhenUpdating */);
buffersToWrite, &newPtNodeReader, &newBigramPolicy, &newShortcutPolicy);
// Re-assign terminal IDs for valid terminal PtNodes.
TerminalPositionLookupTable::TerminalIdMap terminalIdMap;
if(!buffersToWrite->getMutableTerminalPositionLookupTable()->runGCTerminalIds(

View file

@ -36,7 +36,7 @@ class Ver4PatriciaTrieWritingHelper {
const int bigramCount) const;
void writeToDictFileWithGC(const int rootPtNodeArrayPos,
const char *const trieFilePath, const bool needsToDecay);
const char *const trieFilePath);
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4PatriciaTrieWritingHelper);
@ -66,7 +66,7 @@ class Ver4PatriciaTrieWritingHelper {
bool runGC(const int rootPtNodeArrayPos, const HeaderPolicy *const headerPolicy,
Ver4DictBuffers *const buffersToWrite, int *const outUnigramCount,
int *const outBigramCount, const bool needsToDecay);
int *const outBigramCount);
Ver4DictBuffers *const mBuffers;
};

View file

@ -31,12 +31,6 @@ const int ForgettingCurveUtils::MAX_BIGRAM_COUNT = 12000;
const int ForgettingCurveUtils::MAX_BIGRAM_COUNT_AFTER_GC = 10000;
const int ForgettingCurveUtils::MAX_COMPUTED_PROBABILITY = 127;
const int ForgettingCurveUtils::MAX_ENCODED_PROBABILITY = 15;
const int ForgettingCurveUtils::MIN_VALID_ENCODED_PROBABILITY = 3;
const int ForgettingCurveUtils::ENCODED_PROBABILITY_STEP = 1;
// Currently, we try to decay each uni/bigram once every 2 hours. Accordingly, the expected
// duration of the decay is approximately 66hours.
const float ForgettingCurveUtils::MIN_PROBABILITY_TO_DECAY = 0.03f;
const int ForgettingCurveUtils::DECAY_INTERVAL_SECONDS = 2 * 60 * 60;
const int ForgettingCurveUtils::MAX_LEVEL = 3;
@ -53,9 +47,9 @@ const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityT
const int newProbability, const int timestamp) {
if (newProbability != NOT_A_PROBABILITY && originalHistoricalInfo->getLevel() == 0) {
return HistoricalInfo(timestamp, MIN_VALID_LEVEL /* level */, 0 /* count */);
} else if (originalHistoricalInfo->getTimeStamp() == NOT_A_TIMESTAMP) {
} else if (!originalHistoricalInfo->isValid()) {
// Initial information.
return HistoricalInfo(timestamp, 0 /* level */, 0 /* count */);
return HistoricalInfo(timestamp, 0 /* level */, 1 /* count */);
} else {
const int updatedCount = originalHistoricalInfo->getCount() + 1;
if (updatedCount > MAX_COUNT) {
@ -75,85 +69,46 @@ const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityT
}
}
/* static */ int ForgettingCurveUtils::getProbability(const int encodedUnigramProbability,
const int encodedBigramProbability) {
if (encodedUnigramProbability == NOT_A_PROBABILITY) {
/* static */ int ForgettingCurveUtils::decodeProbability(
const HistoricalInfo *const historicalInfo) {
const int elapsedTimeStepCount = getElapsedTimeStepCount(historicalInfo->getTimeStamp());
return sProbabilityTable.getProbability(historicalInfo->getLevel(),
min(max(elapsedTimeStepCount, 0), MAX_ELAPSED_TIME_STEP_COUNT));
}
/* static */ int ForgettingCurveUtils::getProbability(const int unigramProbability,
const int bigramProbability) {
if (unigramProbability == NOT_A_PROBABILITY) {
return NOT_A_PROBABILITY;
} else if (encodedBigramProbability == NOT_A_PROBABILITY) {
return backoff(decodeProbability(encodedUnigramProbability));
} else if (bigramProbability == NOT_A_PROBABILITY) {
return min(backoff(unigramProbability), MAX_COMPUTED_PROBABILITY);
} else {
const int unigramProbability = decodeProbability(encodedUnigramProbability);
const int bigramProbability = decodeProbability(encodedBigramProbability);
return min(max(unigramProbability, bigramProbability), MAX_COMPUTED_PROBABILITY);
}
}
// Caveat: Unlike getProbability(), this method doesn't assume special bigram probability encoding
// (i.e. unigram probability + bigram probability delta).
/* static */ int ForgettingCurveUtils::getUpdatedEncodedProbability(
const int originalEncodedProbability, const int newProbability) {
if (originalEncodedProbability == NOT_A_PROBABILITY) {
// The bigram relation is not in this dictionary.
if (newProbability == NOT_A_PROBABILITY) {
// The bigram target is not in other dictionaries.
return 0;
} else {
return MIN_VALID_ENCODED_PROBABILITY;
}
} else {
if (newProbability != NOT_A_PROBABILITY
&& originalEncodedProbability < MIN_VALID_ENCODED_PROBABILITY) {
return MIN_VALID_ENCODED_PROBABILITY;
}
return min(originalEncodedProbability + ENCODED_PROBABILITY_STEP, MAX_ENCODED_PROBABILITY);
}
}
/* static */ int ForgettingCurveUtils::isValidEncodedProbability(const int encodedProbability) {
return encodedProbability >= MIN_VALID_ENCODED_PROBABILITY;
}
/* static */ bool ForgettingCurveUtils::needsToKeep(const HistoricalInfo *const historicalInfo) {
return historicalInfo->getLevel() > 0
|| getElapsedTimeStepCount(historicalInfo->getTimeStamp())
< DISCARD_LEVEL_ZERO_ENTRY_TIME_STEP_COUNT_THRESHOLD;
}
/* static */ int ForgettingCurveUtils::getEncodedProbabilityToSave(const int encodedProbability,
const DictionaryHeaderStructurePolicy *const headerPolicy) {
const int elapsedTime = TimeKeeper::peekCurrentTime() - headerPolicy->getLastDecayedTime();
const int decayIterationCount = max(elapsedTime / DECAY_INTERVAL_SECONDS, 1);
int currentEncodedProbability = max(min(encodedProbability, MAX_ENCODED_PROBABILITY), 0);
// TODO: Implement the decay in more proper way.
for (int i = 0; i < decayIterationCount; ++i) {
const float currentRate = static_cast<float>(currentEncodedProbability)
/ static_cast<float>(MAX_ENCODED_PROBABILITY);
const float thresholdToDecay = (1.0f - MIN_PROBABILITY_TO_DECAY) * currentRate;
const float randValue = static_cast<float>(rand()) / static_cast<float>(RAND_MAX);
if (thresholdToDecay < randValue) {
currentEncodedProbability = max(currentEncodedProbability - ENCODED_PROBABILITY_STEP,
0);
}
}
return currentEncodedProbability;
}
/* static */ const HistoricalInfo ForgettingCurveUtils::createHistoricalInfoToSave(
const HistoricalInfo *const originalHistoricalInfo) {
if (originalHistoricalInfo->getTimeStamp() == NOT_A_TIMESTAMP) {
return HistoricalInfo();
}
const int elapsedTimeStep = getElapsedTimeStepCount(originalHistoricalInfo->getTimeStamp());
if (elapsedTimeStep < MAX_ELAPSED_TIME_STEP_COUNT) {
if (elapsedTimeStep <= MAX_ELAPSED_TIME_STEP_COUNT) {
// No need to update historical info.
return *originalHistoricalInfo;
}
// Level down.
const int maxLevelDownAmonut = elapsedTimeStep / MAX_ELAPSED_TIME_STEP_COUNT;
const int maxLevelDownAmonut = elapsedTimeStep / (MAX_ELAPSED_TIME_STEP_COUNT + 1);
const int levelDownAmount = (maxLevelDownAmonut >= originalHistoricalInfo->getLevel()) ?
originalHistoricalInfo->getLevel() : maxLevelDownAmonut;
const int adjustedTimestamp = originalHistoricalInfo->getTimeStamp() +
levelDownAmount * MAX_ELAPSED_TIME_STEP_COUNT * TIME_STEP_DURATION_IN_SECONDS;
levelDownAmount * (MAX_ELAPSED_TIME_STEP_COUNT + 1) * TIME_STEP_DURATION_IN_SECONDS;
return HistoricalInfo(adjustedTimestamp,
originalHistoricalInfo->getLevel() - levelDownAmount, 0 /* count */);
}
@ -179,14 +134,6 @@ const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityT
return false;
}
/* static */ int ForgettingCurveUtils::decodeProbability(const int encodedProbability) {
if (encodedProbability < MIN_VALID_ENCODED_PROBABILITY) {
return NOT_A_PROBABILITY;
} else {
return min(sProbabilityTable.getProbability(encodedProbability), MAX_ENCODED_PROBABILITY);
}
}
// See comments in ProbabilityUtils::backoff().
/* static */ int ForgettingCurveUtils::backoff(const int unigramProbability) {
if (unigramProbability == NOT_A_PROBABILITY) {
@ -201,14 +148,24 @@ const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityT
}
ForgettingCurveUtils::ProbabilityTable::ProbabilityTable() : mTable() {
// Table entry is as follows:
// 1, 1, 1, 2, 3, 5, 6, 9, 13, 18, 25, 34, 48, 66, 91, 127.
// Note that first MIN_VALID_ENCODED_PROBABILITY values are not used.
mTable.resize(MAX_ENCODED_PROBABILITY + 1);
for (int i = 0; i <= MAX_ENCODED_PROBABILITY; ++i) {
const int probability = static_cast<int>(powf(static_cast<float>(MAX_COMPUTED_PROBABILITY),
static_cast<float>(i) / static_cast<float>(MAX_ENCODED_PROBABILITY)));
mTable[i] = min(MAX_COMPUTED_PROBABILITY, max(0, probability));
mTable.resize(MAX_LEVEL + 1);
for (int level = 0; level <= MAX_LEVEL; ++level) {
mTable[level].resize(MAX_ELAPSED_TIME_STEP_COUNT + 1);
const float initialProbability =
static_cast<float>(MAX_COMPUTED_PROBABILITY / (1 << (MAX_LEVEL - level)));
for (int timeStepCount = 0; timeStepCount <= MAX_ELAPSED_TIME_STEP_COUNT; ++timeStepCount) {
if (level == 0) {
mTable[level][timeStepCount] = NOT_A_PROBABILITY;
continue;
}
const int elapsedTime = timeStepCount * TIME_STEP_DURATION_IN_SECONDS;
const float probability = initialProbability
* powf(2.0f, -1.0f * static_cast<float>(elapsedTime)
/ static_cast<float>(TIME_STEP_DURATION_IN_SECONDS
* (MAX_ELAPSED_TIME_STEP_COUNT + 1)));
mTable[level][timeStepCount] =
min(max(static_cast<int>(probability), 1), MAX_COMPUTED_PROBABILITY);
}
}
}

View file

@ -26,8 +26,6 @@ namespace latinime {
class DictionaryHeaderStructurePolicy;
// TODO: Check the elapsed time and decrease the probability depending on the time. Time field is
// required to introduced to each terminal PtNode and bigram entry.
// TODO: Quit using bigram probability to indicate the delta.
class ForgettingCurveUtils {
public:
@ -43,22 +41,13 @@ class ForgettingCurveUtils {
static const HistoricalInfo createHistoricalInfoToSave(
const HistoricalInfo *const originalHistoricalInfo);
static int decodeProbability(const HistoricalInfo *const historicalInfo);
static int getProbability(const int encodedUnigramProbability,
const int encodedBigramProbability);
// TODO: Remove.
static int getUpdatedEncodedProbability(const int originalEncodedProbability,
const int newProbability);
// TODO: Remove.
static int isValidEncodedProbability(const int encodedProbability);
static bool needsToKeep(const HistoricalInfo *const historicalInfo);
// TODO: Remove.
static int getEncodedProbabilityToSave(const int encodedProbability,
const DictionaryHeaderStructurePolicy *const headerPolicy);
static bool needsToDecay(const bool mindsBlockByDecay, const int unigramCount,
const int bigramCount, const DictionaryHeaderStructurePolicy *const headerPolicy);
@ -69,24 +58,17 @@ class ForgettingCurveUtils {
public:
ProbabilityTable();
int getProbability(const int encodedProbability) const {
if (encodedProbability < 0 || encodedProbability > static_cast<int>(mTable.size())) {
return NOT_A_PROBABILITY;
}
return mTable[encodedProbability];
int getProbability(const int level, const int elapsedTimeStepCount) const {
return mTable[level][elapsedTimeStepCount];
}
private:
DISALLOW_COPY_AND_ASSIGN(ProbabilityTable);
std::vector<int> mTable;
std::vector<std::vector<int> > mTable;
};
static const int MAX_COMPUTED_PROBABILITY;
static const int MAX_ENCODED_PROBABILITY;
static const int MIN_VALID_ENCODED_PROBABILITY;
static const int ENCODED_PROBABILITY_STEP;
static const float MIN_PROBABILITY_TO_DECAY;
static const int DECAY_INTERVAL_SECONDS;
static const int MAX_LEVEL;
@ -95,11 +77,10 @@ class ForgettingCurveUtils {
static const int TIME_STEP_DURATION_IN_SECONDS;
static const int MAX_ELAPSED_TIME_STEP_COUNT;
static const int DISCARD_LEVEL_ZERO_ENTRY_TIME_STEP_COUNT_THRESHOLD;
static const int HALF_LIFE_TIME_IN_SECONDS;
static const ProbabilityTable sProbabilityTable;
static int decodeProbability(const int encodedProbability);
static int backoff(const int unigramProbability);
static int getElapsedTimeStepCount(const int timestamp);

View file

@ -30,6 +30,10 @@ class HistoricalInfo {
HistoricalInfo(const int timestamp, const int level, const int count)
: mTimestamp(timestamp), mLevel(level), mCount(count) {}
bool isValid() const {
return mTimestamp != NOT_A_TIMESTAMP;
}
int getTimeStamp() const {
return mTimestamp;
}

View file

@ -30,6 +30,7 @@ import java.util.HashMap;
import java.util.Locale;
import java.util.Map;
import java.util.Random;
import java.util.concurrent.TimeUnit;
@LargeTest
public class BinaryDictionaryDecayingTests extends AndroidTestCase {
@ -38,8 +39,6 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase {
// Note that these are corresponding definitions in native code in
// latinime::Ver4PatriciaTriePolicy.
private static final String SET_NEEDS_TO_DECAY_FOR_TESTING_KEY =
"SET_NEEDS_TO_DECAY_FOR_TESTING";
private static final String SET_CURRENT_TIME_FOR_TESTING_QUERY =
"SET_CURRENT_TIME_FOR_TESTING";
private static final String GET_CURRENT_TIME_QUERY = "GET_CURRENT_TIME";
@ -47,14 +46,28 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase {
private static final int DUMMY_PROBABILITY = 0;
private int mCurrentTime = 0;
@Override
protected void setUp() throws Exception {
super.setUp();
mCurrentTime = 0;
}
@Override
protected void tearDown() throws Exception {
super.tearDown();
try {
final File dictFile =
createEmptyDictionaryAndGetFile("TestBinaryDictionary", FormatSpec.VERSION4);
final BinaryDictionary binaryDictionary =
new BinaryDictionary(dictFile.getAbsolutePath(), 0 /* offset */,
dictFile.length(), true /* useFullEditDistance */, Locale.getDefault(),
TEST_LOCALE, true /* isUpdatable */);
binaryDictionary.getPropertyForTests(QUIT_TIMEKEEPER_TEST_MODE_QUERY);
} catch (IOException e) {
fail("IOException while writing an initial dictionary : " + e);
}
}
private void addUnigramWord(final BinaryDictionary binaryDictionary, final String word,
@ -62,32 +75,29 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase {
binaryDictionary.addUnigramWord(word, probability, "" /* shortcutTarget */,
BinaryDictionary.NOT_A_PROBABILITY /* shortcutProbability */,
false /* isNotAWord */, false /* isBlacklisted */,
BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */);
mCurrentTime /* timestamp */);
}
private void addBigramWords(final BinaryDictionary binaryDictionary, final String word0,
final String word1, final int probability) {
binaryDictionary.addBigramWords(word0, word1, probability,
BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */);
mCurrentTime /* timestamp */);
}
private void forcePassingShortTime(final BinaryDictionary binaryDictionary) {
// Entries having low probability would be suppressed once in 3 GCs.
final int count = 3;
for (int i = 0; i < count; i++) {
binaryDictionary.getPropertyForTests(SET_NEEDS_TO_DECAY_FOR_TESTING_KEY);
binaryDictionary.flushWithGC();
}
// 4 days.
final int timeToElapse = (int)TimeUnit.SECONDS.convert(4, TimeUnit.DAYS);
mCurrentTime += timeToElapse;
setCurrentTime(binaryDictionary, mCurrentTime);
binaryDictionary.flushWithGC();
}
private void forcePassingLongTime(final BinaryDictionary binaryDictionary) {
// Currently, probabilities are decayed when GC is run. All entries that have never been
// typed in 128 GCs would be removed.
final int count = 128;
for (int i = 0; i < count; i++) {
binaryDictionary.getPropertyForTests(SET_NEEDS_TO_DECAY_FOR_TESTING_KEY);
binaryDictionary.flushWithGC();
}
// 60 days.
final int timeToElapse = (int)TimeUnit.SECONDS.convert(60, TimeUnit.DAYS);
mCurrentTime += timeToElapse;
setCurrentTime(binaryDictionary, mCurrentTime);
binaryDictionary.flushWithGC();
}
private File createEmptyDictionaryAndGetFile(final String dictId,
@ -147,6 +157,7 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase {
BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
binaryDictionary.getPropertyForTests(QUIT_TIMEKEEPER_TEST_MODE_QUERY);
final int startTime = getCurrentTime(binaryDictionary);
for (int i = 0; i < TEST_COUNT; i++) {
final int currentTime = random.nextInt(Integer.MAX_VALUE);
@ -233,6 +244,8 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase {
addUnigramWord(binaryDictionary, "a", DUMMY_PROBABILITY);
addUnigramWord(binaryDictionary, "a", DUMMY_PROBABILITY);
addUnigramWord(binaryDictionary, "a", DUMMY_PROBABILITY);
addUnigramWord(binaryDictionary, "a", DUMMY_PROBABILITY);
assertTrue(binaryDictionary.isValidWord("a"));
forcePassingShortTime(binaryDictionary);
assertTrue(binaryDictionary.isValidWord("a"));
forcePassingLongTime(binaryDictionary);
@ -257,6 +270,9 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase {
addUnigramWord(binaryDictionary, "a", DUMMY_PROBABILITY);
addUnigramWord(binaryDictionary, "b", DUMMY_PROBABILITY);
addBigramWords(binaryDictionary, "a", "b", DUMMY_PROBABILITY);
addUnigramWord(binaryDictionary, "a", DUMMY_PROBABILITY);
addUnigramWord(binaryDictionary, "b", DUMMY_PROBABILITY);
addBigramWords(binaryDictionary, "a", "b", DUMMY_PROBABILITY);
assertTrue(binaryDictionary.isValidBigram("a", "b"));
forcePassingShortTime(binaryDictionary);
assertTrue(binaryDictionary.isValidBigram("a", "b"));
@ -307,8 +323,7 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase {
Integer.parseInt(binaryDictionary.getPropertyForTests(
BinaryDictionary.UNIGRAM_COUNT_QUERY));
while (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) {
binaryDictionary.getPropertyForTests(SET_NEEDS_TO_DECAY_FOR_TESTING_KEY);
binaryDictionary.flushWithGC();
forcePassingShortTime(binaryDictionary);
}
final int unigramCountAfterGC =
Integer.parseInt(binaryDictionary.getPropertyForTests(
@ -321,6 +336,10 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase {
BinaryDictionary.UNIGRAM_COUNT_QUERY)) > 0);
assertTrue(Integer.parseInt(binaryDictionary.getPropertyForTests(
BinaryDictionary.UNIGRAM_COUNT_QUERY)) <= maxUnigramCount);
forcePassingLongTime(binaryDictionary);
assertEquals(0, Integer.parseInt(binaryDictionary.getPropertyForTests(
BinaryDictionary.UNIGRAM_COUNT_QUERY)));
}
public void testAddManyBigramsToDecayingDict() {
@ -378,8 +397,7 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase {
Integer.parseInt(binaryDictionary.getPropertyForTests(
BinaryDictionary.BIGRAM_COUNT_QUERY));
while (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) {
binaryDictionary.getPropertyForTests(SET_NEEDS_TO_DECAY_FOR_TESTING_KEY);
binaryDictionary.flushWithGC();
forcePassingShortTime(binaryDictionary);
}
final int bigramCountAfterGC =
Integer.parseInt(binaryDictionary.getPropertyForTests(
@ -392,5 +410,8 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase {
BinaryDictionary.BIGRAM_COUNT_QUERY)) > 0);
assertTrue(Integer.parseInt(binaryDictionary.getPropertyForTests(
BinaryDictionary.BIGRAM_COUNT_QUERY)) <= maxBigramCount);
forcePassingLongTime(binaryDictionary);
assertEquals(0, Integer.parseInt(binaryDictionary.getPropertyForTests(
BinaryDictionary.BIGRAM_COUNT_QUERY)));
}
}