am 2fdb62a6: Start using historical info for decaying.

* commit '2fdb62a6a28d769435fe5d48d8b997c3e9a083d5':
  Start using historical info for decaying.
main
Keisuke Kuroyanagi 2013-12-10 01:48:28 -08:00 committed by Android Git Automerger
commit 35dff6f9f2
20 changed files with 189 additions and 235 deletions

View File

@ -163,7 +163,8 @@ int BigramDictionary::getBigramProbability(const int *word0, int length0, const
mDictionaryStructurePolicy->getBigramsStructurePolicy(), pos); mDictionaryStructurePolicy->getBigramsStructurePolicy(), pos);
while (bigramsIt.hasNext()) { while (bigramsIt.hasNext()) {
bigramsIt.next(); bigramsIt.next();
if (bigramsIt.getBigramPos() == nextWordPos) { if (bigramsIt.getBigramPos() == nextWordPos
&& bigramsIt.getProbability() != NOT_A_PROBABILITY) {
return mDictionaryStructurePolicy->getProbability( return mDictionaryStructurePolicy->getProbability(
mDictionaryStructurePolicy->getUnigramProbabilityOfPtNode(nextWordPos), mDictionaryStructurePolicy->getUnigramProbabilityOfPtNode(nextWordPos),
bigramsIt.getProbability()); bigramsIt.getProbability());

View File

@ -17,6 +17,7 @@
#include "suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h" #include "suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h"
#include "suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h" #include "suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h"
#include "suggest/policyimpl/dictionary/header/header_policy.h"
#include "suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h" #include "suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h"
#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h" #include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h" #include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
@ -34,8 +35,13 @@ void Ver4BigramListPolicy::getNextBigram(int *const outBigramPos, int *const out
bigramEntry.getTargetTerminalId()); bigramEntry.getTargetTerminalId());
} }
if (outProbability) { if (outProbability) {
if (bigramEntry.hasHistoricalInfo()) {
*outProbability =
ForgettingCurveUtils::decodeProbability(bigramEntry.getHistoricalInfo());
} else {
*outProbability = bigramEntry.getProbability(); *outProbability = bigramEntry.getProbability();
} }
}
if (outHasNext) { if (outHasNext) {
*outHasNext = bigramEntry.hasNext(); *outHasNext = bigramEntry.hasNext();
} }
@ -152,17 +158,12 @@ bool Ver4BigramListPolicy::updateAllBigramEntriesAndDeleteUselessEntries(const i
if (!mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPos)) { if (!mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPos)) {
return false; return false;
} }
} else if (mNeedsToDecayWhenUpdating) { } else if (bigramEntry.hasHistoricalInfo()) {
const int probability = ForgettingCurveUtils::getEncodedProbabilityToSave( const HistoricalInfo historicalInfo = ForgettingCurveUtils::createHistoricalInfoToSave(
bigramEntry.getProbability(), mHeaderPolicy);
const HistoricalInfo historicalInfo =
ForgettingCurveUtils::createHistoricalInfoToSave(
bigramEntry.getHistoricalInfo()); bigramEntry.getHistoricalInfo());
// TODO: Use ForgettingCurveUtils::needsToKeep(&historicalInfo). if (ForgettingCurveUtils::needsToKeep(&historicalInfo)) {
if (ForgettingCurveUtils::isValidEncodedProbability(probability)) {
const BigramEntry updatedBigramEntry = const BigramEntry updatedBigramEntry =
bigramEntry.updateProbabilityAndGetEntry(probability) bigramEntry.updateHistoricalInfoAndGetEntry(&historicalInfo);
.updateHistoricalInfoAndGetEntry(&historicalInfo);
if (!mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPos)) { if (!mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPos)) {
return false; return false;
} }
@ -225,14 +226,12 @@ int Ver4BigramListPolicy::getEntryPosToUpdate(const int targetTerminalIdToFind,
const BigramEntry Ver4BigramListPolicy::createUpdatedBigramEntryFrom( const BigramEntry Ver4BigramListPolicy::createUpdatedBigramEntryFrom(
const BigramEntry *const originalBigramEntry, const int newProbability, const BigramEntry *const originalBigramEntry, const int newProbability,
const int timestamp) const { const int timestamp) const {
if (mNeedsToDecayWhenUpdating) { // TODO: Consolidate historical info and probability.
const int probability = ForgettingCurveUtils::getUpdatedEncodedProbability( if (mHeaderPolicy->hasHistoricalInfoOfWords()) {
originalBigramEntry->getProbability(), newProbability);
const HistoricalInfo updatedHistoricalInfo = const HistoricalInfo updatedHistoricalInfo =
ForgettingCurveUtils::createUpdatedHistoricalInfo( ForgettingCurveUtils::createUpdatedHistoricalInfo(
originalBigramEntry->getHistoricalInfo(), newProbability, timestamp); originalBigramEntry->getHistoricalInfo(), newProbability, timestamp);
return originalBigramEntry->updateProbabilityAndGetEntry(probability) return originalBigramEntry->updateHistoricalInfoAndGetEntry(&updatedHistoricalInfo);
.updateHistoricalInfoAndGetEntry(&updatedHistoricalInfo);
} else { } else {
return originalBigramEntry->updateProbabilityAndGetEntry(newProbability); return originalBigramEntry->updateProbabilityAndGetEntry(newProbability);
} }

View File

@ -24,18 +24,17 @@
namespace latinime { namespace latinime {
class BigramDictContent; class BigramDictContent;
class DictionaryHeaderStructurePolicy; class HeaderPolicy;
class TerminalPositionLookupTable; class TerminalPositionLookupTable;
class Ver4BigramListPolicy : public DictionaryBigramsStructurePolicy { class Ver4BigramListPolicy : public DictionaryBigramsStructurePolicy {
public: public:
Ver4BigramListPolicy(BigramDictContent *const bigramDictContent, Ver4BigramListPolicy(BigramDictContent *const bigramDictContent,
const TerminalPositionLookupTable *const terminalPositionLookupTable, const TerminalPositionLookupTable *const terminalPositionLookupTable,
const DictionaryHeaderStructurePolicy *const headerPolicy, const HeaderPolicy *const headerPolicy)
const bool needsToDecayWhenUpdating)
: mBigramDictContent(bigramDictContent), : mBigramDictContent(bigramDictContent),
mTerminalPositionLookupTable(terminalPositionLookupTable), mTerminalPositionLookupTable(terminalPositionLookupTable),
mHeaderPolicy(headerPolicy), mNeedsToDecayWhenUpdating(needsToDecayWhenUpdating) {} mHeaderPolicy(headerPolicy) {}
void getNextBigram(int *const outBigramPos, int *const outProbability, void getNextBigram(int *const outBigramPos, int *const outProbability,
bool *const outHasNext, int *const bigramEntryPos) const; bool *const outHasNext, int *const bigramEntryPos) const;
@ -64,8 +63,7 @@ class Ver4BigramListPolicy : public DictionaryBigramsStructurePolicy {
BigramDictContent *const mBigramDictContent; BigramDictContent *const mBigramDictContent;
const TerminalPositionLookupTable *const mTerminalPositionLookupTable; const TerminalPositionLookupTable *const mTerminalPositionLookupTable;
const DictionaryHeaderStructurePolicy *const mHeaderPolicy; const HeaderPolicy *const mHeaderPolicy;
const bool mNeedsToDecayWhenUpdating;
}; };
} // namespace latinime } // namespace latinime
#endif /* LATINIME_VER4_BIGRAM_LIST_POLICY_H */ #endif /* LATINIME_VER4_BIGRAM_LIST_POLICY_H */

View File

@ -93,6 +93,18 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
} }
} }
AK_FORCE_INLINE bool isValid() const {
// Decaying dictionary must have historical information.
if (!mIsDecayingDict) {
return true;
}
if (mHasHistoricalInfoOfWords) {
return true;
} else {
return false;
}
}
AK_FORCE_INLINE int getSize() const { AK_FORCE_INLINE int getSize() const {
return mSize; return mSize;
} }
@ -137,7 +149,7 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
return mExtendedRegionSize; return mExtendedRegionSize;
} }
AK_FORCE_INLINE bool hasHistricalInfoOfWords() const { AK_FORCE_INLINE bool hasHistoricalInfoOfWords() const {
return mHasHistoricalInfoOfWords; return mHasHistoricalInfoOfWords;
} }

View File

@ -20,7 +20,6 @@
#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h" #include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h" #include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h"
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_utils.h" #include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_utils.h"
#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
namespace latinime { namespace latinime {
@ -30,8 +29,7 @@ bool DynamicPatriciaTrieGcEventListeners
// PtNode is useless when the PtNode is not a terminal and doesn't have any not useless // PtNode is useless when the PtNode is not a terminal and doesn't have any not useless
// children. // children.
bool isUselessPtNode = !ptNodeParams->isTerminal(); bool isUselessPtNode = !ptNodeParams->isTerminal();
// TODO: Quit checking mNeedsToDecayWhenUpdating. if (ptNodeParams->isTerminal()) {
if (ptNodeParams->isTerminal() && mNeedsToDecayWhenUpdating) {
bool needsToKeepPtNode = true; bool needsToKeepPtNode = true;
if (!mPtNodeWriter->updatePtNodeProbabilityAndGetNeedsToKeepPtNodeAfterGC(ptNodeParams, if (!mPtNodeWriter->updatePtNodeProbabilityAndGetNeedsToKeepPtNodeAfterGC(ptNodeParams,
&needsToKeepPtNode)) { &needsToKeepPtNode)) {

View File

@ -27,8 +27,6 @@
namespace latinime { namespace latinime {
class DictionaryHeaderStructurePolicy;
class PtNodeWriter;
class PtNodeParams; class PtNodeParams;
// TODO: Move to pt_common. // TODO: Move to pt_common.
@ -41,12 +39,9 @@ class DynamicPatriciaTrieGcEventListeners {
: public DynamicPatriciaTrieReadingHelper::TraversingEventListener { : public DynamicPatriciaTrieReadingHelper::TraversingEventListener {
public: public:
TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted( TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted(
const DictionaryHeaderStructurePolicy *const headerPolicy, PtNodeWriter *const ptNodeWriter)
PtNodeWriter *const ptNodeWriter, BufferWithExtendableBuffer *const buffer, : mPtNodeWriter(ptNodeWriter), mValueStack(), mChildrenValue(0),
const bool needsToDecayWhenUpdating) mValidUnigramCount(0) {}
: mHeaderPolicy(headerPolicy), mPtNodeWriter(ptNodeWriter), mBuffer(buffer),
mNeedsToDecayWhenUpdating(needsToDecayWhenUpdating), mValueStack(),
mChildrenValue(0), mValidUnigramCount(0) {}
~TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted() {}; ~TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted() {};
@ -77,10 +72,7 @@ class DynamicPatriciaTrieGcEventListeners {
DISALLOW_IMPLICIT_CONSTRUCTORS( DISALLOW_IMPLICIT_CONSTRUCTORS(
TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted); TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted);
const DictionaryHeaderStructurePolicy *const mHeaderPolicy;
PtNodeWriter *const mPtNodeWriter; PtNodeWriter *const mPtNodeWriter;
BufferWithExtendableBuffer *const mBuffer;
const bool mNeedsToDecayWhenUpdating;
std::vector<int> mValueStack; std::vector<int> mValueStack;
int mChildrenValue; int mChildrenValue;
int mValidUnigramCount; int mValidUnigramCount;

View File

@ -73,6 +73,10 @@ class BigramEntry {
return mProbability; return mProbability;
} }
bool hasHistoricalInfo() const {
return mHistoricalInfo.isValid();
}
const HistoricalInfo *getHistoricalInfo() const { const HistoricalInfo *getHistoricalInfo() const {
return &mHistoricalInfo; return &mHistoricalInfo;
} }

View File

@ -51,6 +51,10 @@ class ProbabilityEntry {
return ProbabilityEntry(mFlags, mProbability, historicalInfo); return ProbabilityEntry(mFlags, mProbability, historicalInfo);
} }
bool hasHistoricalInfo() const {
return mHistoricalInfo.isValid();
}
int getFlags() const { int getFlags() const {
return mFlags; return mFlags;
} }

View File

@ -45,9 +45,9 @@ class Ver4DictBuffers {
} }
AK_FORCE_INLINE bool isValid() const { AK_FORCE_INLINE bool isValid() const {
return mDictBuffer.get() != 0 && mProbabilityDictContent.isValid() return mDictBuffer.get() != 0 && mHeaderPolicy.isValid()
&& mTerminalPositionLookupTable.isValid() && mBigramDictContent.isValid() && mProbabilityDictContent.isValid() && mTerminalPositionLookupTable.isValid()
&& mShortcutDictContent.isValid(); && mBigramDictContent.isValid() && mShortcutDictContent.isValid();
} }
AK_FORCE_INLINE bool isNearSizeLimit() const { AK_FORCE_INLINE bool isNearSizeLimit() const {
@ -131,9 +131,10 @@ class Ver4DictBuffers {
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE), BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
// TODO: Quit using header size. // TODO: Quit using header size.
mTerminalPositionLookupTable(dictDirPath, isUpdatable, mHeaderPolicy.getSize()), mTerminalPositionLookupTable(dictDirPath, isUpdatable, mHeaderPolicy.getSize()),
mProbabilityDictContent(dictDirPath, mHeaderPolicy.hasHistricalInfoOfWords(), mProbabilityDictContent(dictDirPath, mHeaderPolicy.hasHistoricalInfoOfWords(),
isUpdatable),
mBigramDictContent(dictDirPath, mHeaderPolicy.hasHistoricalInfoOfWords(),
isUpdatable), isUpdatable),
mBigramDictContent(dictDirPath, mHeaderPolicy.hasHistricalInfoOfWords(), isUpdatable),
mShortcutDictContent(dictDirPath, isUpdatable), mShortcutDictContent(dictDirPath, isUpdatable),
mIsUpdatable(isUpdatable) {} mIsUpdatable(isUpdatable) {}
@ -142,8 +143,8 @@ class Ver4DictBuffers {
mExpandableHeaderBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE), mExpandableHeaderBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE),
mExpandableTrieBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE), mExpandableTrieBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE),
mTerminalPositionLookupTable(), mTerminalPositionLookupTable(),
mProbabilityDictContent(headerPolicy->hasHistricalInfoOfWords()), mProbabilityDictContent(headerPolicy->hasHistoricalInfoOfWords()),
mBigramDictContent(headerPolicy->hasHistricalInfoOfWords()), mShortcutDictContent(), mBigramDictContent(headerPolicy->hasHistoricalInfoOfWords()), mShortcutDictContent(),
mIsUpdatable(true) {} mIsUpdatable(true) {}
const MmappedBuffer::MmappedBufferPtr mDictBuffer; const MmappedBuffer::MmappedBufferPtr mDictBuffer;

View File

@ -22,6 +22,7 @@
#include "suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h" #include "suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.h" #include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" #include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
namespace latinime { namespace latinime {
@ -62,8 +63,13 @@ const PtNodeParams Ver4PatriciaTrieNodeReader::fetchPtNodeInfoFromBufferAndProce
terminalId = Ver4PatriciaTrieReadingUtils::getTerminalIdAndAdvancePosition(dictBuf, &pos); terminalId = Ver4PatriciaTrieReadingUtils::getTerminalIdAndAdvancePosition(dictBuf, &pos);
const ProbabilityEntry probabilityEntry = const ProbabilityEntry probabilityEntry =
mProbabilityDictContent->getProbabilityEntry(terminalId); mProbabilityDictContent->getProbabilityEntry(terminalId);
if (probabilityEntry.hasHistoricalInfo()) {
probability = ForgettingCurveUtils::decodeProbability(
probabilityEntry.getHistoricalInfo());
} else {
probability = probabilityEntry.getProbability(); probability = probabilityEntry.getProbability();
} }
}
int childrenPosFieldPos = pos; int childrenPosFieldPos = pos;
if (usesAdditionalBuffer) { if (usesAdditionalBuffer) {
childrenPosFieldPos += mBuffer->getOriginalBufferSize(); childrenPosFieldPos += mBuffer->getOriginalBufferSize();

View File

@ -17,6 +17,7 @@
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h" #include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h"
#include "suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h" #include "suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h"
#include "suggest/policyimpl/dictionary/header/header_policy.h"
#include "suggest/policyimpl/dictionary/shortcut/ver4_shortcut_list_policy.h" #include "suggest/policyimpl/dictionary/shortcut/ver4_shortcut_list_policy.h"
#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h" #include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h"
#include "suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h" #include "suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h"
@ -147,27 +148,21 @@ bool Ver4PatriciaTrieNodeWriter::updatePtNodeProbabilityAndGetNeedsToKeepPtNodeA
AKLOGE("updatePtNodeProbabilityAndGetNeedsToSaveForGC is called for non-terminal PtNode."); AKLOGE("updatePtNodeProbabilityAndGetNeedsToSaveForGC is called for non-terminal PtNode.");
return false; return false;
} }
if (mBuffers->getHeaderPolicy()->isDecayingDict()) {
const ProbabilityEntry originalProbabilityEntry = const ProbabilityEntry originalProbabilityEntry =
mBuffers->getProbabilityDictContent()->getProbabilityEntry( mBuffers->getProbabilityDictContent()->getProbabilityEntry(
toBeUpdatedPtNodeParams->getTerminalId()); toBeUpdatedPtNodeParams->getTerminalId());
// TODO: Remove. if (originalProbabilityEntry.hasHistoricalInfo()) {
const int newProbability = ForgettingCurveUtils::getEncodedProbabilityToSave( const HistoricalInfo historicalInfo = ForgettingCurveUtils::createHistoricalInfoToSave(
originalProbabilityEntry.getProbability(), mBuffers->getHeaderPolicy());
const HistoricalInfo historicalInfo =
ForgettingCurveUtils::createHistoricalInfoToSave(
originalProbabilityEntry.getHistoricalInfo()); originalProbabilityEntry.getHistoricalInfo());
const ProbabilityEntry probabilityEntry = const ProbabilityEntry probabilityEntry =
originalProbabilityEntry.createEntryWithUpdatedProbability(newProbability) originalProbabilityEntry.createEntryWithUpdatedHistoricalInfo(&historicalInfo);
.createEntryWithUpdatedHistoricalInfo(&historicalInfo);
if (!mBuffers->getMutableProbabilityDictContent()->setProbabilityEntry( if (!mBuffers->getMutableProbabilityDictContent()->setProbabilityEntry(
toBeUpdatedPtNodeParams->getTerminalId(), &probabilityEntry)) { toBeUpdatedPtNodeParams->getTerminalId(), &probabilityEntry)) {
AKLOGE("Cannot write updated probability entry. terminalId: %d", AKLOGE("Cannot write updated probability entry. terminalId: %d",
toBeUpdatedPtNodeParams->getTerminalId()); toBeUpdatedPtNodeParams->getTerminalId());
return false; return false;
} }
// TODO: Use ForgettingCurveUtils::needsToKeep(&historicalInfo). const bool isValid = ForgettingCurveUtils::needsToKeep(&historicalInfo);
const bool isValid = ForgettingCurveUtils::isValidEncodedProbability(newProbability);
if (!isValid) { if (!isValid) {
if (!markPtNodeAsWillBecomeNonTerminal(toBeUpdatedPtNodeParams)) { if (!markPtNodeAsWillBecomeNonTerminal(toBeUpdatedPtNodeParams)) {
AKLOGE("Cannot mark PtNode as willBecomeNonTerminal."); AKLOGE("Cannot mark PtNode as willBecomeNonTerminal.");
@ -380,14 +375,13 @@ bool Ver4PatriciaTrieNodeWriter::writePtNodeAndGetTerminalIdAndAdvancePosition(
const ProbabilityEntry Ver4PatriciaTrieNodeWriter::createUpdatedEntryFrom( const ProbabilityEntry Ver4PatriciaTrieNodeWriter::createUpdatedEntryFrom(
const ProbabilityEntry *const originalProbabilityEntry, const int newProbability, const ProbabilityEntry *const originalProbabilityEntry, const int newProbability,
const int timestamp) const { const int timestamp) const {
if (mNeedsToDecayWhenUpdating) { // TODO: Consolidate historical info and probability.
const int updatedProbability = ForgettingCurveUtils::getUpdatedEncodedProbability( if (mBuffers->getHeaderPolicy()->hasHistoricalInfoOfWords()) {
originalProbabilityEntry->getProbability(), newProbability);
const HistoricalInfo updatedHistoricalInfo = const HistoricalInfo updatedHistoricalInfo =
ForgettingCurveUtils::createUpdatedHistoricalInfo( ForgettingCurveUtils::createUpdatedHistoricalInfo(
originalProbabilityEntry->getHistoricalInfo(), newProbability, timestamp); originalProbabilityEntry->getHistoricalInfo(), newProbability, timestamp);
return originalProbabilityEntry->createEntryWithUpdatedProbability(updatedProbability) return originalProbabilityEntry->createEntryWithUpdatedHistoricalInfo(
.createEntryWithUpdatedHistoricalInfo(&updatedHistoricalInfo); &updatedHistoricalInfo);
} else { } else {
return originalProbabilityEntry->createEntryWithUpdatedProbability(newProbability); return originalProbabilityEntry->createEntryWithUpdatedProbability(newProbability);
} }

View File

@ -40,12 +40,10 @@ class Ver4PatriciaTrieNodeWriter : public PtNodeWriter {
public: public:
Ver4PatriciaTrieNodeWriter(BufferWithExtendableBuffer *const trieBuffer, Ver4PatriciaTrieNodeWriter(BufferWithExtendableBuffer *const trieBuffer,
Ver4DictBuffers *const buffers, const Ver4PatriciaTrieNodeReader *const ptNodeReader, Ver4DictBuffers *const buffers, const Ver4PatriciaTrieNodeReader *const ptNodeReader,
Ver4BigramListPolicy *const bigramPolicy, Ver4ShortcutListPolicy *const shortcutPolicy, Ver4BigramListPolicy *const bigramPolicy, Ver4ShortcutListPolicy *const shortcutPolicy)
const bool needsToDecayWhenUpdating)
: mTrieBuffer(trieBuffer), mBuffers(buffers), mPtNodeReader(ptNodeReader), : mTrieBuffer(trieBuffer), mBuffers(buffers), mPtNodeReader(ptNodeReader),
mReadingHelper(mTrieBuffer, mPtNodeReader), mReadingHelper(mTrieBuffer, mPtNodeReader),
mBigramPolicy(bigramPolicy), mShortcutPolicy(shortcutPolicy), mBigramPolicy(bigramPolicy), mShortcutPolicy(shortcutPolicy) {}
mNeedsToDecayWhenUpdating(needsToDecayWhenUpdating) {}
virtual ~Ver4PatriciaTrieNodeWriter() {} virtual ~Ver4PatriciaTrieNodeWriter() {}
@ -120,7 +118,6 @@ class Ver4PatriciaTrieNodeWriter : public PtNodeWriter {
DynamicPatriciaTrieReadingHelper mReadingHelper; DynamicPatriciaTrieReadingHelper mReadingHelper;
Ver4BigramListPolicy *const mBigramPolicy; Ver4BigramListPolicy *const mBigramPolicy;
Ver4ShortcutListPolicy *const mShortcutPolicy; Ver4ShortcutListPolicy *const mShortcutPolicy;
const bool mNeedsToDecayWhenUpdating;
}; };
} // namespace latinime } // namespace latinime
#endif /* LATINIME_VER4_PATRICIA_TRIE_NODE_WRITER_H */ #endif /* LATINIME_VER4_PATRICIA_TRIE_NODE_WRITER_H */

View File

@ -34,8 +34,6 @@ const char *const Ver4PatriciaTriePolicy::UNIGRAM_COUNT_QUERY = "UNIGRAM_COUNT";
const char *const Ver4PatriciaTriePolicy::BIGRAM_COUNT_QUERY = "BIGRAM_COUNT"; const char *const Ver4PatriciaTriePolicy::BIGRAM_COUNT_QUERY = "BIGRAM_COUNT";
const char *const Ver4PatriciaTriePolicy::MAX_UNIGRAM_COUNT_QUERY = "MAX_UNIGRAM_COUNT"; const char *const Ver4PatriciaTriePolicy::MAX_UNIGRAM_COUNT_QUERY = "MAX_UNIGRAM_COUNT";
const char *const Ver4PatriciaTriePolicy::MAX_BIGRAM_COUNT_QUERY = "MAX_BIGRAM_COUNT"; const char *const Ver4PatriciaTriePolicy::MAX_BIGRAM_COUNT_QUERY = "MAX_BIGRAM_COUNT";
const char *const Ver4PatriciaTriePolicy::SET_NEEDS_TO_DECAY_FOR_TESTING_QUERY =
"SET_NEEDS_TO_DECAY_FOR_TESTING";
const char *const Ver4PatriciaTriePolicy::SET_CURRENT_TIME_FOR_TESTING_QUERY_FORMAT = const char *const Ver4PatriciaTriePolicy::SET_CURRENT_TIME_FOR_TESTING_QUERY_FORMAT =
"SET_CURRENT_TIME_FOR_TESTING:%d"; "SET_CURRENT_TIME_FOR_TESTING:%d";
const char *const Ver4PatriciaTriePolicy::GET_CURRENT_TIME_QUERY = "GET_CURRENT_TIME"; const char *const Ver4PatriciaTriePolicy::GET_CURRENT_TIME_QUERY = "GET_CURRENT_TIME";
@ -62,8 +60,7 @@ void Ver4PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const d
// A DecayingDict may have a terminal PtNode that has a terminal DicNode whose // A DecayingDict may have a terminal PtNode that has a terminal DicNode whose
// probability is NOT_A_PROBABILITY. In such case, we don't want to treat it as a // probability is NOT_A_PROBABILITY. In such case, we don't want to treat it as a
// valid terminal DicNode. // valid terminal DicNode.
isTerminal = getProbability(ptNodeParams.getProbability(), NOT_A_PROBABILITY) isTerminal = ptNodeParams.getProbability() != NOT_A_PROBABILITY;
!= NOT_A_PROBABILITY;
} }
childDicNodes->pushLeavingChild(dicNode, ptNodeParams.getHeadPos(), childDicNodes->pushLeavingChild(dicNode, ptNodeParams.getHeadPos(),
ptNodeParams.getChildrenPos(), ptNodeParams.getProbability(), isTerminal, ptNodeParams.getChildrenPos(), ptNodeParams.getProbability(), isTerminal,
@ -262,11 +259,7 @@ void Ver4PatriciaTriePolicy::flushWithGC(const char *const filePath) {
AKLOGI("Warning: flushWithGC() is called for non-updatable dictionary."); AKLOGI("Warning: flushWithGC() is called for non-updatable dictionary.");
return; return;
} }
const bool needsToDecay = mHeaderPolicy->isDecayingDict() mWritingHelper.writeToDictFileWithGC(getRootPosition(), filePath);
&& (mNeedsToDecayForTesting || ForgettingCurveUtils::needsToDecay(
false /* mindsBlockByDecay */, mUnigramCount, mBigramCount, mHeaderPolicy));
mWritingHelper.writeToDictFileWithGC(getRootPosition(), filePath, needsToDecay);
mNeedsToDecayForTesting = false;
} }
bool Ver4PatriciaTriePolicy::needsToRunGC(const bool mindsBlockByGC) const { bool Ver4PatriciaTriePolicy::needsToRunGC(const bool mindsBlockByGC) const {
@ -286,8 +279,8 @@ bool Ver4PatriciaTriePolicy::needsToRunGC(const bool mindsBlockByGC) const {
// Needs to reduce dictionary size. // Needs to reduce dictionary size.
return true; return true;
} else if (mHeaderPolicy->isDecayingDict()) { } else if (mHeaderPolicy->isDecayingDict()) {
return mNeedsToDecayForTesting || ForgettingCurveUtils::needsToDecay( return ForgettingCurveUtils::needsToDecay(mindsBlockByGC, mUnigramCount, mBigramCount,
mindsBlockByGC, mUnigramCount, mBigramCount, mHeaderPolicy); mHeaderPolicy);
} }
return false; return false;
} }
@ -308,8 +301,6 @@ void Ver4PatriciaTriePolicy::getProperty(const char *const query, const int quer
snprintf(outResult, maxResultLength, "%d", snprintf(outResult, maxResultLength, "%d",
mHeaderPolicy->isDecayingDict() ? ForgettingCurveUtils::MAX_BIGRAM_COUNT : mHeaderPolicy->isDecayingDict() ? ForgettingCurveUtils::MAX_BIGRAM_COUNT :
static_cast<int>(Ver4DictConstants::MAX_DICTIONARY_SIZE)); static_cast<int>(Ver4DictConstants::MAX_DICTIONARY_SIZE));
} else if (strncmp(query, SET_NEEDS_TO_DECAY_FOR_TESTING_QUERY, compareLength) == 0) {
mNeedsToDecayForTesting = true;
} else if (sscanf(query, SET_CURRENT_TIME_FOR_TESTING_QUERY_FORMAT, &timestamp) == 1) { } else if (sscanf(query, SET_CURRENT_TIME_FOR_TESTING_QUERY_FORMAT, &timestamp) == 1) {
TimeKeeper::startTestModeWithForceCurrentTime(timestamp); TimeKeeper::startTestModeWithForceCurrentTime(timestamp);
} else if (strncmp(query, GET_CURRENT_TIME_QUERY, compareLength) == 0) { } else if (strncmp(query, GET_CURRENT_TIME_QUERY, compareLength) == 0) {

View File

@ -41,17 +41,16 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
: mBuffers(buffers), mHeaderPolicy(mBuffers.get()->getHeaderPolicy()), : mBuffers(buffers), mHeaderPolicy(mBuffers.get()->getHeaderPolicy()),
mDictBuffer(mBuffers.get()->getWritableTrieBuffer()), mDictBuffer(mBuffers.get()->getWritableTrieBuffer()),
mBigramPolicy(mBuffers.get()->getMutableBigramDictContent(), mBigramPolicy(mBuffers.get()->getMutableBigramDictContent(),
mBuffers.get()->getTerminalPositionLookupTable(), mHeaderPolicy, mBuffers.get()->getTerminalPositionLookupTable(), mHeaderPolicy),
mHeaderPolicy->isDecayingDict()),
mShortcutPolicy(mBuffers.get()->getMutableShortcutDictContent(), mShortcutPolicy(mBuffers.get()->getMutableShortcutDictContent(),
mBuffers.get()->getTerminalPositionLookupTable()), mBuffers.get()->getTerminalPositionLookupTable()),
mNodeReader(mDictBuffer, mBuffers.get()->getProbabilityDictContent()), mNodeReader(mDictBuffer, mBuffers.get()->getProbabilityDictContent()),
mNodeWriter(mDictBuffer, mBuffers.get(), &mNodeReader, &mBigramPolicy, mNodeWriter(mDictBuffer, mBuffers.get(), &mNodeReader, &mBigramPolicy,
&mShortcutPolicy, mHeaderPolicy->isDecayingDict()), &mShortcutPolicy),
mUpdatingHelper(mDictBuffer, &mNodeReader, &mNodeWriter), mUpdatingHelper(mDictBuffer, &mNodeReader, &mNodeWriter),
mWritingHelper(mBuffers.get()), mWritingHelper(mBuffers.get()),
mUnigramCount(mHeaderPolicy->getUnigramCount()), mUnigramCount(mHeaderPolicy->getUnigramCount()),
mBigramCount(mHeaderPolicy->getBigramCount()), mNeedsToDecayForTesting(false) {}; mBigramCount(mHeaderPolicy->getBigramCount()) {};
AK_FORCE_INLINE int getRootPosition() const { AK_FORCE_INLINE int getRootPosition() const {
return 0; return 0;
@ -117,7 +116,6 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
static const char *const BIGRAM_COUNT_QUERY; static const char *const BIGRAM_COUNT_QUERY;
static const char *const MAX_UNIGRAM_COUNT_QUERY; static const char *const MAX_UNIGRAM_COUNT_QUERY;
static const char *const MAX_BIGRAM_COUNT_QUERY; static const char *const MAX_BIGRAM_COUNT_QUERY;
static const char *const SET_NEEDS_TO_DECAY_FOR_TESTING_QUERY;
static const char *const SET_CURRENT_TIME_FOR_TESTING_QUERY_FORMAT; static const char *const SET_CURRENT_TIME_FOR_TESTING_QUERY_FORMAT;
static const char *const GET_CURRENT_TIME_QUERY; static const char *const GET_CURRENT_TIME_QUERY;
static const char *const QUIT_TIMEKEEPER_TEST_MODE_QUERY; static const char *const QUIT_TIMEKEEPER_TEST_MODE_QUERY;
@ -137,7 +135,6 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
Ver4PatriciaTrieWritingHelper mWritingHelper; Ver4PatriciaTrieWritingHelper mWritingHelper;
int mUnigramCount; int mUnigramCount;
int mBigramCount; int mBigramCount;
bool mNeedsToDecayForTesting;
}; };
} // namespace latinime } // namespace latinime
#endif // LATINIME_VER4_PATRICIA_TRIE_POLICY_H #endif // LATINIME_VER4_PATRICIA_TRIE_POLICY_H

View File

@ -53,20 +53,20 @@ void Ver4PatriciaTrieWritingHelper::writeToDictFile(const char *const trieFilePa
} }
void Ver4PatriciaTrieWritingHelper::writeToDictFileWithGC(const int rootPtNodeArrayPos, void Ver4PatriciaTrieWritingHelper::writeToDictFileWithGC(const int rootPtNodeArrayPos,
const char *const trieFilePath, const bool needsToDecay) { const char *const trieFilePath) {
const HeaderPolicy *const headerPolicy = mBuffers->getHeaderPolicy(); const HeaderPolicy *const headerPolicy = mBuffers->getHeaderPolicy();
Ver4DictBuffers::Ver4DictBuffersPtr dictBuffers( Ver4DictBuffers::Ver4DictBuffersPtr dictBuffers(
Ver4DictBuffers::createVer4DictBuffers(headerPolicy)); Ver4DictBuffers::createVer4DictBuffers(headerPolicy));
int unigramCount = 0; int unigramCount = 0;
int bigramCount = 0; int bigramCount = 0;
if (!runGC(rootPtNodeArrayPos, headerPolicy, dictBuffers.get(), &unigramCount, &bigramCount, if (!runGC(rootPtNodeArrayPos, headerPolicy, dictBuffers.get(), &unigramCount, &bigramCount)) {
needsToDecay)) {
return; return;
} }
BufferWithExtendableBuffer headerBuffer( BufferWithExtendableBuffer headerBuffer(
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE); BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE);
if (!headerPolicy->writeHeaderToBuffer(&headerBuffer, true /* updatesLastUpdatedTime */, if (!headerPolicy->writeHeaderToBuffer(&headerBuffer, true /* updatesLastUpdatedTime */,
needsToDecay, unigramCount, bigramCount, 0 /* extendedRegionSize */)) { true /* updatesLastDecayedTime */, unigramCount, bigramCount,
0 /* extendedRegionSize */)) {
return; return;
} }
const int dirPathBufSize = strlen(trieFilePath) + 1 /* terminator */; const int dirPathBufSize = strlen(trieFilePath) + 1 /* terminator */;
@ -77,29 +77,28 @@ void Ver4PatriciaTrieWritingHelper::writeToDictFileWithGC(const int rootPtNodeAr
bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos, bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
const HeaderPolicy *const headerPolicy, Ver4DictBuffers *const buffersToWrite, const HeaderPolicy *const headerPolicy, Ver4DictBuffers *const buffersToWrite,
int *const outUnigramCount, int *const outBigramCount, const bool needsToDecay) { int *const outUnigramCount, int *const outBigramCount) {
Ver4PatriciaTrieNodeReader ptNodeReader(mBuffers->getTrieBuffer(), Ver4PatriciaTrieNodeReader ptNodeReader(mBuffers->getTrieBuffer(),
mBuffers->getProbabilityDictContent()); mBuffers->getProbabilityDictContent());
Ver4BigramListPolicy bigramPolicy(mBuffers->getMutableBigramDictContent(), Ver4BigramListPolicy bigramPolicy(mBuffers->getMutableBigramDictContent(),
mBuffers->getTerminalPositionLookupTable(), headerPolicy, needsToDecay); mBuffers->getTerminalPositionLookupTable(), headerPolicy);
Ver4ShortcutListPolicy shortcutPolicy(mBuffers->getMutableShortcutDictContent(), Ver4ShortcutListPolicy shortcutPolicy(mBuffers->getMutableShortcutDictContent(),
mBuffers->getTerminalPositionLookupTable()); mBuffers->getTerminalPositionLookupTable());
Ver4PatriciaTrieNodeWriter ptNodeWriter(mBuffers->getWritableTrieBuffer(), Ver4PatriciaTrieNodeWriter ptNodeWriter(mBuffers->getWritableTrieBuffer(),
mBuffers, &ptNodeReader, &bigramPolicy, &shortcutPolicy, mBuffers, &ptNodeReader, &bigramPolicy, &shortcutPolicy);
false /* needsToDecayWhenUpdating */);
DynamicPatriciaTrieReadingHelper readingHelper(mBuffers->getTrieBuffer(), &ptNodeReader); DynamicPatriciaTrieReadingHelper readingHelper(mBuffers->getTrieBuffer(), &ptNodeReader);
readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos); readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
DynamicPatriciaTrieGcEventListeners DynamicPatriciaTrieGcEventListeners
::TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted ::TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted
traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted( traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted(
headerPolicy, &ptNodeWriter, mBuffers->getWritableTrieBuffer(), &ptNodeWriter);
needsToDecay);
if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner( if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(
&traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted)) { &traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted)) {
return false; return false;
} }
if (needsToDecay && traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted if (headerPolicy->isDecayingDict()
&& traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted
.getValidUnigramCount() > ForgettingCurveUtils::MAX_UNIGRAM_COUNT_AFTER_GC) { .getValidUnigramCount() > ForgettingCurveUtils::MAX_UNIGRAM_COUNT_AFTER_GC) {
// TODO: Remove more unigrams. // TODO: Remove more unigrams.
} }
@ -111,7 +110,8 @@ bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
&traversePolicyToUpdateBigramProbability)) { &traversePolicyToUpdateBigramProbability)) {
return false; return false;
} }
if (needsToDecay && traversePolicyToUpdateBigramProbability.getValidBigramEntryCount() if (headerPolicy->isDecayingDict()
&& traversePolicyToUpdateBigramProbability.getValidBigramEntryCount()
> ForgettingCurveUtils::MAX_BIGRAM_COUNT_AFTER_GC) { > ForgettingCurveUtils::MAX_BIGRAM_COUNT_AFTER_GC) {
// TODO: Remove more bigrams. // TODO: Remove more bigrams.
} }
@ -120,8 +120,7 @@ bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
PtNodeWriter::DictPositionRelocationMap dictPositionRelocationMap; PtNodeWriter::DictPositionRelocationMap dictPositionRelocationMap;
readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos); readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
Ver4PatriciaTrieNodeWriter ptNodeWriterForNewBuffers(buffersToWrite->getWritableTrieBuffer(), Ver4PatriciaTrieNodeWriter ptNodeWriterForNewBuffers(buffersToWrite->getWritableTrieBuffer(),
buffersToWrite, &ptNodeReader, &bigramPolicy, &shortcutPolicy, buffersToWrite, &ptNodeReader, &bigramPolicy, &shortcutPolicy);
false /* needsToDecayWhenUpdating */);
DynamicPatriciaTrieGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer DynamicPatriciaTrieGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer
traversePolicyToPlaceAndWriteValidPtNodesToBuffer(&ptNodeWriterForNewBuffers, traversePolicyToPlaceAndWriteValidPtNodesToBuffer(&ptNodeWriterForNewBuffers,
buffersToWrite->getWritableTrieBuffer(), &dictPositionRelocationMap); buffersToWrite->getWritableTrieBuffer(), &dictPositionRelocationMap);
@ -134,13 +133,11 @@ bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
Ver4PatriciaTrieNodeReader newPtNodeReader(buffersToWrite->getTrieBuffer(), Ver4PatriciaTrieNodeReader newPtNodeReader(buffersToWrite->getTrieBuffer(),
buffersToWrite->getProbabilityDictContent()); buffersToWrite->getProbabilityDictContent());
Ver4BigramListPolicy newBigramPolicy(buffersToWrite->getMutableBigramDictContent(), Ver4BigramListPolicy newBigramPolicy(buffersToWrite->getMutableBigramDictContent(),
buffersToWrite->getTerminalPositionLookupTable(), headerPolicy, buffersToWrite->getTerminalPositionLookupTable(), headerPolicy);
false /* needsToDecay */);
Ver4ShortcutListPolicy newShortcutPolicy(buffersToWrite->getMutableShortcutDictContent(), Ver4ShortcutListPolicy newShortcutPolicy(buffersToWrite->getMutableShortcutDictContent(),
buffersToWrite->getTerminalPositionLookupTable()); buffersToWrite->getTerminalPositionLookupTable());
Ver4PatriciaTrieNodeWriter newPtNodeWriter(buffersToWrite->getWritableTrieBuffer(), Ver4PatriciaTrieNodeWriter newPtNodeWriter(buffersToWrite->getWritableTrieBuffer(),
buffersToWrite, &newPtNodeReader, &newBigramPolicy, &newShortcutPolicy, buffersToWrite, &newPtNodeReader, &newBigramPolicy, &newShortcutPolicy);
false /* needsToDecayWhenUpdating */);
// Re-assign terminal IDs for valid terminal PtNodes. // Re-assign terminal IDs for valid terminal PtNodes.
TerminalPositionLookupTable::TerminalIdMap terminalIdMap; TerminalPositionLookupTable::TerminalIdMap terminalIdMap;
if(!buffersToWrite->getMutableTerminalPositionLookupTable()->runGCTerminalIds( if(!buffersToWrite->getMutableTerminalPositionLookupTable()->runGCTerminalIds(

View File

@ -36,7 +36,7 @@ class Ver4PatriciaTrieWritingHelper {
const int bigramCount) const; const int bigramCount) const;
void writeToDictFileWithGC(const int rootPtNodeArrayPos, void writeToDictFileWithGC(const int rootPtNodeArrayPos,
const char *const trieFilePath, const bool needsToDecay); const char *const trieFilePath);
private: private:
DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4PatriciaTrieWritingHelper); DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4PatriciaTrieWritingHelper);
@ -66,7 +66,7 @@ class Ver4PatriciaTrieWritingHelper {
bool runGC(const int rootPtNodeArrayPos, const HeaderPolicy *const headerPolicy, bool runGC(const int rootPtNodeArrayPos, const HeaderPolicy *const headerPolicy,
Ver4DictBuffers *const buffersToWrite, int *const outUnigramCount, Ver4DictBuffers *const buffersToWrite, int *const outUnigramCount,
int *const outBigramCount, const bool needsToDecay); int *const outBigramCount);
Ver4DictBuffers *const mBuffers; Ver4DictBuffers *const mBuffers;
}; };

View File

@ -31,12 +31,6 @@ const int ForgettingCurveUtils::MAX_BIGRAM_COUNT = 12000;
const int ForgettingCurveUtils::MAX_BIGRAM_COUNT_AFTER_GC = 10000; const int ForgettingCurveUtils::MAX_BIGRAM_COUNT_AFTER_GC = 10000;
const int ForgettingCurveUtils::MAX_COMPUTED_PROBABILITY = 127; const int ForgettingCurveUtils::MAX_COMPUTED_PROBABILITY = 127;
const int ForgettingCurveUtils::MAX_ENCODED_PROBABILITY = 15;
const int ForgettingCurveUtils::MIN_VALID_ENCODED_PROBABILITY = 3;
const int ForgettingCurveUtils::ENCODED_PROBABILITY_STEP = 1;
// Currently, we try to decay each uni/bigram once every 2 hours. Accordingly, the expected
// duration of the decay is approximately 66hours.
const float ForgettingCurveUtils::MIN_PROBABILITY_TO_DECAY = 0.03f;
const int ForgettingCurveUtils::DECAY_INTERVAL_SECONDS = 2 * 60 * 60; const int ForgettingCurveUtils::DECAY_INTERVAL_SECONDS = 2 * 60 * 60;
const int ForgettingCurveUtils::MAX_LEVEL = 3; const int ForgettingCurveUtils::MAX_LEVEL = 3;
@ -53,9 +47,9 @@ const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityT
const int newProbability, const int timestamp) { const int newProbability, const int timestamp) {
if (newProbability != NOT_A_PROBABILITY && originalHistoricalInfo->getLevel() == 0) { if (newProbability != NOT_A_PROBABILITY && originalHistoricalInfo->getLevel() == 0) {
return HistoricalInfo(timestamp, MIN_VALID_LEVEL /* level */, 0 /* count */); return HistoricalInfo(timestamp, MIN_VALID_LEVEL /* level */, 0 /* count */);
} else if (originalHistoricalInfo->getTimeStamp() == NOT_A_TIMESTAMP) { } else if (!originalHistoricalInfo->isValid()) {
// Initial information. // Initial information.
return HistoricalInfo(timestamp, 0 /* level */, 0 /* count */); return HistoricalInfo(timestamp, 0 /* level */, 1 /* count */);
} else { } else {
const int updatedCount = originalHistoricalInfo->getCount() + 1; const int updatedCount = originalHistoricalInfo->getCount() + 1;
if (updatedCount > MAX_COUNT) { if (updatedCount > MAX_COUNT) {
@ -75,85 +69,46 @@ const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityT
} }
} }
/* static */ int ForgettingCurveUtils::getProbability(const int encodedUnigramProbability, /* static */ int ForgettingCurveUtils::decodeProbability(
const int encodedBigramProbability) { const HistoricalInfo *const historicalInfo) {
if (encodedUnigramProbability == NOT_A_PROBABILITY) { const int elapsedTimeStepCount = getElapsedTimeStepCount(historicalInfo->getTimeStamp());
return sProbabilityTable.getProbability(historicalInfo->getLevel(),
min(max(elapsedTimeStepCount, 0), MAX_ELAPSED_TIME_STEP_COUNT));
}
/* static */ int ForgettingCurveUtils::getProbability(const int unigramProbability,
const int bigramProbability) {
if (unigramProbability == NOT_A_PROBABILITY) {
return NOT_A_PROBABILITY; return NOT_A_PROBABILITY;
} else if (encodedBigramProbability == NOT_A_PROBABILITY) { } else if (bigramProbability == NOT_A_PROBABILITY) {
return backoff(decodeProbability(encodedUnigramProbability)); return min(backoff(unigramProbability), MAX_COMPUTED_PROBABILITY);
} else { } else {
const int unigramProbability = decodeProbability(encodedUnigramProbability);
const int bigramProbability = decodeProbability(encodedBigramProbability);
return min(max(unigramProbability, bigramProbability), MAX_COMPUTED_PROBABILITY); return min(max(unigramProbability, bigramProbability), MAX_COMPUTED_PROBABILITY);
} }
} }
// Caveat: Unlike getProbability(), this method doesn't assume special bigram probability encoding
// (i.e. unigram probability + bigram probability delta).
/* static */ int ForgettingCurveUtils::getUpdatedEncodedProbability(
const int originalEncodedProbability, const int newProbability) {
if (originalEncodedProbability == NOT_A_PROBABILITY) {
// The bigram relation is not in this dictionary.
if (newProbability == NOT_A_PROBABILITY) {
// The bigram target is not in other dictionaries.
return 0;
} else {
return MIN_VALID_ENCODED_PROBABILITY;
}
} else {
if (newProbability != NOT_A_PROBABILITY
&& originalEncodedProbability < MIN_VALID_ENCODED_PROBABILITY) {
return MIN_VALID_ENCODED_PROBABILITY;
}
return min(originalEncodedProbability + ENCODED_PROBABILITY_STEP, MAX_ENCODED_PROBABILITY);
}
}
/* static */ int ForgettingCurveUtils::isValidEncodedProbability(const int encodedProbability) {
return encodedProbability >= MIN_VALID_ENCODED_PROBABILITY;
}
/* static */ bool ForgettingCurveUtils::needsToKeep(const HistoricalInfo *const historicalInfo) { /* static */ bool ForgettingCurveUtils::needsToKeep(const HistoricalInfo *const historicalInfo) {
return historicalInfo->getLevel() > 0 return historicalInfo->getLevel() > 0
|| getElapsedTimeStepCount(historicalInfo->getTimeStamp()) || getElapsedTimeStepCount(historicalInfo->getTimeStamp())
< DISCARD_LEVEL_ZERO_ENTRY_TIME_STEP_COUNT_THRESHOLD; < DISCARD_LEVEL_ZERO_ENTRY_TIME_STEP_COUNT_THRESHOLD;
} }
/* static */ int ForgettingCurveUtils::getEncodedProbabilityToSave(const int encodedProbability,
const DictionaryHeaderStructurePolicy *const headerPolicy) {
const int elapsedTime = TimeKeeper::peekCurrentTime() - headerPolicy->getLastDecayedTime();
const int decayIterationCount = max(elapsedTime / DECAY_INTERVAL_SECONDS, 1);
int currentEncodedProbability = max(min(encodedProbability, MAX_ENCODED_PROBABILITY), 0);
// TODO: Implement the decay in more proper way.
for (int i = 0; i < decayIterationCount; ++i) {
const float currentRate = static_cast<float>(currentEncodedProbability)
/ static_cast<float>(MAX_ENCODED_PROBABILITY);
const float thresholdToDecay = (1.0f - MIN_PROBABILITY_TO_DECAY) * currentRate;
const float randValue = static_cast<float>(rand()) / static_cast<float>(RAND_MAX);
if (thresholdToDecay < randValue) {
currentEncodedProbability = max(currentEncodedProbability - ENCODED_PROBABILITY_STEP,
0);
}
}
return currentEncodedProbability;
}
/* static */ const HistoricalInfo ForgettingCurveUtils::createHistoricalInfoToSave( /* static */ const HistoricalInfo ForgettingCurveUtils::createHistoricalInfoToSave(
const HistoricalInfo *const originalHistoricalInfo) { const HistoricalInfo *const originalHistoricalInfo) {
if (originalHistoricalInfo->getTimeStamp() == NOT_A_TIMESTAMP) { if (originalHistoricalInfo->getTimeStamp() == NOT_A_TIMESTAMP) {
return HistoricalInfo(); return HistoricalInfo();
} }
const int elapsedTimeStep = getElapsedTimeStepCount(originalHistoricalInfo->getTimeStamp()); const int elapsedTimeStep = getElapsedTimeStepCount(originalHistoricalInfo->getTimeStamp());
if (elapsedTimeStep < MAX_ELAPSED_TIME_STEP_COUNT) { if (elapsedTimeStep <= MAX_ELAPSED_TIME_STEP_COUNT) {
// No need to update historical info. // No need to update historical info.
return *originalHistoricalInfo; return *originalHistoricalInfo;
} }
// Level down. // Level down.
const int maxLevelDownAmonut = elapsedTimeStep / MAX_ELAPSED_TIME_STEP_COUNT; const int maxLevelDownAmonut = elapsedTimeStep / (MAX_ELAPSED_TIME_STEP_COUNT + 1);
const int levelDownAmount = (maxLevelDownAmonut >= originalHistoricalInfo->getLevel()) ? const int levelDownAmount = (maxLevelDownAmonut >= originalHistoricalInfo->getLevel()) ?
originalHistoricalInfo->getLevel() : maxLevelDownAmonut; originalHistoricalInfo->getLevel() : maxLevelDownAmonut;
const int adjustedTimestamp = originalHistoricalInfo->getTimeStamp() + const int adjustedTimestamp = originalHistoricalInfo->getTimeStamp() +
levelDownAmount * MAX_ELAPSED_TIME_STEP_COUNT * TIME_STEP_DURATION_IN_SECONDS; levelDownAmount * (MAX_ELAPSED_TIME_STEP_COUNT + 1) * TIME_STEP_DURATION_IN_SECONDS;
return HistoricalInfo(adjustedTimestamp, return HistoricalInfo(adjustedTimestamp,
originalHistoricalInfo->getLevel() - levelDownAmount, 0 /* count */); originalHistoricalInfo->getLevel() - levelDownAmount, 0 /* count */);
} }
@ -179,14 +134,6 @@ const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityT
return false; return false;
} }
/* static */ int ForgettingCurveUtils::decodeProbability(const int encodedProbability) {
if (encodedProbability < MIN_VALID_ENCODED_PROBABILITY) {
return NOT_A_PROBABILITY;
} else {
return min(sProbabilityTable.getProbability(encodedProbability), MAX_ENCODED_PROBABILITY);
}
}
// See comments in ProbabilityUtils::backoff(). // See comments in ProbabilityUtils::backoff().
/* static */ int ForgettingCurveUtils::backoff(const int unigramProbability) { /* static */ int ForgettingCurveUtils::backoff(const int unigramProbability) {
if (unigramProbability == NOT_A_PROBABILITY) { if (unigramProbability == NOT_A_PROBABILITY) {
@ -201,14 +148,24 @@ const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityT
} }
ForgettingCurveUtils::ProbabilityTable::ProbabilityTable() : mTable() { ForgettingCurveUtils::ProbabilityTable::ProbabilityTable() : mTable() {
// Table entry is as follows: mTable.resize(MAX_LEVEL + 1);
// 1, 1, 1, 2, 3, 5, 6, 9, 13, 18, 25, 34, 48, 66, 91, 127. for (int level = 0; level <= MAX_LEVEL; ++level) {
// Note that first MIN_VALID_ENCODED_PROBABILITY values are not used. mTable[level].resize(MAX_ELAPSED_TIME_STEP_COUNT + 1);
mTable.resize(MAX_ENCODED_PROBABILITY + 1); const float initialProbability =
for (int i = 0; i <= MAX_ENCODED_PROBABILITY; ++i) { static_cast<float>(MAX_COMPUTED_PROBABILITY / (1 << (MAX_LEVEL - level)));
const int probability = static_cast<int>(powf(static_cast<float>(MAX_COMPUTED_PROBABILITY), for (int timeStepCount = 0; timeStepCount <= MAX_ELAPSED_TIME_STEP_COUNT; ++timeStepCount) {
static_cast<float>(i) / static_cast<float>(MAX_ENCODED_PROBABILITY))); if (level == 0) {
mTable[i] = min(MAX_COMPUTED_PROBABILITY, max(0, probability)); mTable[level][timeStepCount] = NOT_A_PROBABILITY;
continue;
}
const int elapsedTime = timeStepCount * TIME_STEP_DURATION_IN_SECONDS;
const float probability = initialProbability
* powf(2.0f, -1.0f * static_cast<float>(elapsedTime)
/ static_cast<float>(TIME_STEP_DURATION_IN_SECONDS
* (MAX_ELAPSED_TIME_STEP_COUNT + 1)));
mTable[level][timeStepCount] =
min(max(static_cast<int>(probability), 1), MAX_COMPUTED_PROBABILITY);
}
} }
} }

View File

@ -26,8 +26,6 @@ namespace latinime {
class DictionaryHeaderStructurePolicy; class DictionaryHeaderStructurePolicy;
// TODO: Check the elapsed time and decrease the probability depending on the time. Time field is
// required to introduced to each terminal PtNode and bigram entry.
// TODO: Quit using bigram probability to indicate the delta. // TODO: Quit using bigram probability to indicate the delta.
class ForgettingCurveUtils { class ForgettingCurveUtils {
public: public:
@ -43,22 +41,13 @@ class ForgettingCurveUtils {
static const HistoricalInfo createHistoricalInfoToSave( static const HistoricalInfo createHistoricalInfoToSave(
const HistoricalInfo *const originalHistoricalInfo); const HistoricalInfo *const originalHistoricalInfo);
static int decodeProbability(const HistoricalInfo *const historicalInfo);
static int getProbability(const int encodedUnigramProbability, static int getProbability(const int encodedUnigramProbability,
const int encodedBigramProbability); const int encodedBigramProbability);
// TODO: Remove.
static int getUpdatedEncodedProbability(const int originalEncodedProbability,
const int newProbability);
// TODO: Remove.
static int isValidEncodedProbability(const int encodedProbability);
static bool needsToKeep(const HistoricalInfo *const historicalInfo); static bool needsToKeep(const HistoricalInfo *const historicalInfo);
// TODO: Remove.
static int getEncodedProbabilityToSave(const int encodedProbability,
const DictionaryHeaderStructurePolicy *const headerPolicy);
static bool needsToDecay(const bool mindsBlockByDecay, const int unigramCount, static bool needsToDecay(const bool mindsBlockByDecay, const int unigramCount,
const int bigramCount, const DictionaryHeaderStructurePolicy *const headerPolicy); const int bigramCount, const DictionaryHeaderStructurePolicy *const headerPolicy);
@ -69,24 +58,17 @@ class ForgettingCurveUtils {
public: public:
ProbabilityTable(); ProbabilityTable();
int getProbability(const int encodedProbability) const { int getProbability(const int level, const int elapsedTimeStepCount) const {
if (encodedProbability < 0 || encodedProbability > static_cast<int>(mTable.size())) { return mTable[level][elapsedTimeStepCount];
return NOT_A_PROBABILITY;
}
return mTable[encodedProbability];
} }
private: private:
DISALLOW_COPY_AND_ASSIGN(ProbabilityTable); DISALLOW_COPY_AND_ASSIGN(ProbabilityTable);
std::vector<int> mTable; std::vector<std::vector<int> > mTable;
}; };
static const int MAX_COMPUTED_PROBABILITY; static const int MAX_COMPUTED_PROBABILITY;
static const int MAX_ENCODED_PROBABILITY;
static const int MIN_VALID_ENCODED_PROBABILITY;
static const int ENCODED_PROBABILITY_STEP;
static const float MIN_PROBABILITY_TO_DECAY;
static const int DECAY_INTERVAL_SECONDS; static const int DECAY_INTERVAL_SECONDS;
static const int MAX_LEVEL; static const int MAX_LEVEL;
@ -95,11 +77,10 @@ class ForgettingCurveUtils {
static const int TIME_STEP_DURATION_IN_SECONDS; static const int TIME_STEP_DURATION_IN_SECONDS;
static const int MAX_ELAPSED_TIME_STEP_COUNT; static const int MAX_ELAPSED_TIME_STEP_COUNT;
static const int DISCARD_LEVEL_ZERO_ENTRY_TIME_STEP_COUNT_THRESHOLD; static const int DISCARD_LEVEL_ZERO_ENTRY_TIME_STEP_COUNT_THRESHOLD;
static const int HALF_LIFE_TIME_IN_SECONDS;
static const ProbabilityTable sProbabilityTable; static const ProbabilityTable sProbabilityTable;
static int decodeProbability(const int encodedProbability);
static int backoff(const int unigramProbability); static int backoff(const int unigramProbability);
static int getElapsedTimeStepCount(const int timestamp); static int getElapsedTimeStepCount(const int timestamp);

View File

@ -30,6 +30,10 @@ class HistoricalInfo {
HistoricalInfo(const int timestamp, const int level, const int count) HistoricalInfo(const int timestamp, const int level, const int count)
: mTimestamp(timestamp), mLevel(level), mCount(count) {} : mTimestamp(timestamp), mLevel(level), mCount(count) {}
bool isValid() const {
return mTimestamp != NOT_A_TIMESTAMP;
}
int getTimeStamp() const { int getTimeStamp() const {
return mTimestamp; return mTimestamp;
} }

View File

@ -30,6 +30,7 @@ import java.util.HashMap;
import java.util.Locale; import java.util.Locale;
import java.util.Map; import java.util.Map;
import java.util.Random; import java.util.Random;
import java.util.concurrent.TimeUnit;
@LargeTest @LargeTest
public class BinaryDictionaryDecayingTests extends AndroidTestCase { public class BinaryDictionaryDecayingTests extends AndroidTestCase {
@ -38,8 +39,6 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase {
// Note that these are corresponding definitions in native code in // Note that these are corresponding definitions in native code in
// latinime::Ver4PatriciaTriePolicy. // latinime::Ver4PatriciaTriePolicy.
private static final String SET_NEEDS_TO_DECAY_FOR_TESTING_KEY =
"SET_NEEDS_TO_DECAY_FOR_TESTING";
private static final String SET_CURRENT_TIME_FOR_TESTING_QUERY = private static final String SET_CURRENT_TIME_FOR_TESTING_QUERY =
"SET_CURRENT_TIME_FOR_TESTING"; "SET_CURRENT_TIME_FOR_TESTING";
private static final String GET_CURRENT_TIME_QUERY = "GET_CURRENT_TIME"; private static final String GET_CURRENT_TIME_QUERY = "GET_CURRENT_TIME";
@ -47,14 +46,28 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase {
private static final int DUMMY_PROBABILITY = 0; private static final int DUMMY_PROBABILITY = 0;
private int mCurrentTime = 0;
@Override @Override
protected void setUp() throws Exception { protected void setUp() throws Exception {
super.setUp(); super.setUp();
mCurrentTime = 0;
} }
@Override @Override
protected void tearDown() throws Exception { protected void tearDown() throws Exception {
super.tearDown(); super.tearDown();
try {
final File dictFile =
createEmptyDictionaryAndGetFile("TestBinaryDictionary", FormatSpec.VERSION4);
final BinaryDictionary binaryDictionary =
new BinaryDictionary(dictFile.getAbsolutePath(), 0 /* offset */,
dictFile.length(), true /* useFullEditDistance */, Locale.getDefault(),
TEST_LOCALE, true /* isUpdatable */);
binaryDictionary.getPropertyForTests(QUIT_TIMEKEEPER_TEST_MODE_QUERY);
} catch (IOException e) {
fail("IOException while writing an initial dictionary : " + e);
}
} }
private void addUnigramWord(final BinaryDictionary binaryDictionary, final String word, private void addUnigramWord(final BinaryDictionary binaryDictionary, final String word,
@ -62,33 +75,30 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase {
binaryDictionary.addUnigramWord(word, probability, "" /* shortcutTarget */, binaryDictionary.addUnigramWord(word, probability, "" /* shortcutTarget */,
BinaryDictionary.NOT_A_PROBABILITY /* shortcutProbability */, BinaryDictionary.NOT_A_PROBABILITY /* shortcutProbability */,
false /* isNotAWord */, false /* isBlacklisted */, false /* isNotAWord */, false /* isBlacklisted */,
BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */); mCurrentTime /* timestamp */);
} }
private void addBigramWords(final BinaryDictionary binaryDictionary, final String word0, private void addBigramWords(final BinaryDictionary binaryDictionary, final String word0,
final String word1, final int probability) { final String word1, final int probability) {
binaryDictionary.addBigramWords(word0, word1, probability, binaryDictionary.addBigramWords(word0, word1, probability,
BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */); mCurrentTime /* timestamp */);
} }
private void forcePassingShortTime(final BinaryDictionary binaryDictionary) { private void forcePassingShortTime(final BinaryDictionary binaryDictionary) {
// Entries having low probability would be suppressed once in 3 GCs. // 4 days.
final int count = 3; final int timeToElapse = (int)TimeUnit.SECONDS.convert(4, TimeUnit.DAYS);
for (int i = 0; i < count; i++) { mCurrentTime += timeToElapse;
binaryDictionary.getPropertyForTests(SET_NEEDS_TO_DECAY_FOR_TESTING_KEY); setCurrentTime(binaryDictionary, mCurrentTime);
binaryDictionary.flushWithGC(); binaryDictionary.flushWithGC();
} }
}
private void forcePassingLongTime(final BinaryDictionary binaryDictionary) { private void forcePassingLongTime(final BinaryDictionary binaryDictionary) {
// Currently, probabilities are decayed when GC is run. All entries that have never been // 60 days.
// typed in 128 GCs would be removed. final int timeToElapse = (int)TimeUnit.SECONDS.convert(60, TimeUnit.DAYS);
final int count = 128; mCurrentTime += timeToElapse;
for (int i = 0; i < count; i++) { setCurrentTime(binaryDictionary, mCurrentTime);
binaryDictionary.getPropertyForTests(SET_NEEDS_TO_DECAY_FOR_TESTING_KEY);
binaryDictionary.flushWithGC(); binaryDictionary.flushWithGC();
} }
}
private File createEmptyDictionaryAndGetFile(final String dictId, private File createEmptyDictionaryAndGetFile(final String dictId,
final int formatVersion) throws IOException { final int formatVersion) throws IOException {
@ -147,6 +157,7 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase {
BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
binaryDictionary.getPropertyForTests(QUIT_TIMEKEEPER_TEST_MODE_QUERY);
final int startTime = getCurrentTime(binaryDictionary); final int startTime = getCurrentTime(binaryDictionary);
for (int i = 0; i < TEST_COUNT; i++) { for (int i = 0; i < TEST_COUNT; i++) {
final int currentTime = random.nextInt(Integer.MAX_VALUE); final int currentTime = random.nextInt(Integer.MAX_VALUE);
@ -233,6 +244,8 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase {
addUnigramWord(binaryDictionary, "a", DUMMY_PROBABILITY); addUnigramWord(binaryDictionary, "a", DUMMY_PROBABILITY);
addUnigramWord(binaryDictionary, "a", DUMMY_PROBABILITY); addUnigramWord(binaryDictionary, "a", DUMMY_PROBABILITY);
addUnigramWord(binaryDictionary, "a", DUMMY_PROBABILITY); addUnigramWord(binaryDictionary, "a", DUMMY_PROBABILITY);
addUnigramWord(binaryDictionary, "a", DUMMY_PROBABILITY);
assertTrue(binaryDictionary.isValidWord("a"));
forcePassingShortTime(binaryDictionary); forcePassingShortTime(binaryDictionary);
assertTrue(binaryDictionary.isValidWord("a")); assertTrue(binaryDictionary.isValidWord("a"));
forcePassingLongTime(binaryDictionary); forcePassingLongTime(binaryDictionary);
@ -257,6 +270,9 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase {
addUnigramWord(binaryDictionary, "a", DUMMY_PROBABILITY); addUnigramWord(binaryDictionary, "a", DUMMY_PROBABILITY);
addUnigramWord(binaryDictionary, "b", DUMMY_PROBABILITY); addUnigramWord(binaryDictionary, "b", DUMMY_PROBABILITY);
addBigramWords(binaryDictionary, "a", "b", DUMMY_PROBABILITY); addBigramWords(binaryDictionary, "a", "b", DUMMY_PROBABILITY);
addUnigramWord(binaryDictionary, "a", DUMMY_PROBABILITY);
addUnigramWord(binaryDictionary, "b", DUMMY_PROBABILITY);
addBigramWords(binaryDictionary, "a", "b", DUMMY_PROBABILITY);
assertTrue(binaryDictionary.isValidBigram("a", "b")); assertTrue(binaryDictionary.isValidBigram("a", "b"));
forcePassingShortTime(binaryDictionary); forcePassingShortTime(binaryDictionary);
assertTrue(binaryDictionary.isValidBigram("a", "b")); assertTrue(binaryDictionary.isValidBigram("a", "b"));
@ -307,8 +323,7 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase {
Integer.parseInt(binaryDictionary.getPropertyForTests( Integer.parseInt(binaryDictionary.getPropertyForTests(
BinaryDictionary.UNIGRAM_COUNT_QUERY)); BinaryDictionary.UNIGRAM_COUNT_QUERY));
while (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) { while (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) {
binaryDictionary.getPropertyForTests(SET_NEEDS_TO_DECAY_FOR_TESTING_KEY); forcePassingShortTime(binaryDictionary);
binaryDictionary.flushWithGC();
} }
final int unigramCountAfterGC = final int unigramCountAfterGC =
Integer.parseInt(binaryDictionary.getPropertyForTests( Integer.parseInt(binaryDictionary.getPropertyForTests(
@ -321,6 +336,10 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase {
BinaryDictionary.UNIGRAM_COUNT_QUERY)) > 0); BinaryDictionary.UNIGRAM_COUNT_QUERY)) > 0);
assertTrue(Integer.parseInt(binaryDictionary.getPropertyForTests( assertTrue(Integer.parseInt(binaryDictionary.getPropertyForTests(
BinaryDictionary.UNIGRAM_COUNT_QUERY)) <= maxUnigramCount); BinaryDictionary.UNIGRAM_COUNT_QUERY)) <= maxUnigramCount);
forcePassingLongTime(binaryDictionary);
assertEquals(0, Integer.parseInt(binaryDictionary.getPropertyForTests(
BinaryDictionary.UNIGRAM_COUNT_QUERY)));
} }
public void testAddManyBigramsToDecayingDict() { public void testAddManyBigramsToDecayingDict() {
@ -378,8 +397,7 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase {
Integer.parseInt(binaryDictionary.getPropertyForTests( Integer.parseInt(binaryDictionary.getPropertyForTests(
BinaryDictionary.BIGRAM_COUNT_QUERY)); BinaryDictionary.BIGRAM_COUNT_QUERY));
while (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) { while (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) {
binaryDictionary.getPropertyForTests(SET_NEEDS_TO_DECAY_FOR_TESTING_KEY); forcePassingShortTime(binaryDictionary);
binaryDictionary.flushWithGC();
} }
final int bigramCountAfterGC = final int bigramCountAfterGC =
Integer.parseInt(binaryDictionary.getPropertyForTests( Integer.parseInt(binaryDictionary.getPropertyForTests(
@ -392,5 +410,8 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase {
BinaryDictionary.BIGRAM_COUNT_QUERY)) > 0); BinaryDictionary.BIGRAM_COUNT_QUERY)) > 0);
assertTrue(Integer.parseInt(binaryDictionary.getPropertyForTests( assertTrue(Integer.parseInt(binaryDictionary.getPropertyForTests(
BinaryDictionary.BIGRAM_COUNT_QUERY)) <= maxBigramCount); BinaryDictionary.BIGRAM_COUNT_QUERY)) <= maxBigramCount);
forcePassingLongTime(binaryDictionary);
assertEquals(0, Integer.parseInt(binaryDictionary.getPropertyForTests(
BinaryDictionary.BIGRAM_COUNT_QUERY)));
} }
} }