Separate "GC" and "GC with decay".
Bug: 6669677 Change-Id: I9d6aba76cef2616f0549e612db9701e1d6a19467main
parent
126d758c1e
commit
6995310996
|
@ -344,8 +344,7 @@ static jstring latinime_BinaryDictionary_getProperty(JNIEnv *env, jclass clazz,
|
||||||
static const int GET_PROPERTY_RESULT_LENGTH = 100;
|
static const int GET_PROPERTY_RESULT_LENGTH = 100;
|
||||||
char resultChars[GET_PROPERTY_RESULT_LENGTH];
|
char resultChars[GET_PROPERTY_RESULT_LENGTH];
|
||||||
resultChars[0] = '\0';
|
resultChars[0] = '\0';
|
||||||
dictionary->getDictionaryStructurePolicy()->getProperty(queryChars, resultChars,
|
dictionary->getProperty(queryChars, resultChars, GET_PROPERTY_RESULT_LENGTH);
|
||||||
GET_PROPERTY_RESULT_LENGTH);
|
|
||||||
return env->NewStringUTF(resultChars);
|
return env->NewStringUTF(resultChars);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -129,7 +129,7 @@ bool Dictionary::needsToRunGC(const bool mindsBlockByGC) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void Dictionary::getProperty(const char *const query, char *const outResult,
|
void Dictionary::getProperty(const char *const query, char *const outResult,
|
||||||
const int maxResultLength) const {
|
const int maxResultLength) {
|
||||||
return mDictionaryStructureWithBufferPolicy->getProperty(query, outResult, maxResultLength);
|
return mDictionaryStructureWithBufferPolicy->getProperty(query, outResult, maxResultLength);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -84,7 +84,7 @@ class Dictionary {
|
||||||
bool needsToRunGC(const bool mindsBlockByGC);
|
bool needsToRunGC(const bool mindsBlockByGC);
|
||||||
|
|
||||||
void getProperty(const char *const query, char *const outResult,
|
void getProperty(const char *const query, char *const outResult,
|
||||||
const int maxResultLength) const;
|
const int maxResultLength);
|
||||||
|
|
||||||
const DictionaryStructureWithBufferPolicy *getDictionaryStructurePolicy() const {
|
const DictionaryStructureWithBufferPolicy *getDictionaryStructurePolicy() const {
|
||||||
return mDictionaryStructureWithBufferPolicy;
|
return mDictionaryStructureWithBufferPolicy;
|
||||||
|
|
|
@ -80,8 +80,10 @@ class DictionaryStructureWithBufferPolicy {
|
||||||
|
|
||||||
virtual bool needsToRunGC(const bool mindsBlockByGC) const = 0;
|
virtual bool needsToRunGC(const bool mindsBlockByGC) const = 0;
|
||||||
|
|
||||||
|
// Currently, this method is used only for testing. You may want to consider creating new
|
||||||
|
// dedicated method instead of this if you want to use this in the production.
|
||||||
virtual void getProperty(const char *const query, char *const outResult,
|
virtual void getProperty(const char *const query, char *const outResult,
|
||||||
const int maxResultLength) const = 0;
|
const int maxResultLength) = 0;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
DictionaryStructureWithBufferPolicy() {}
|
DictionaryStructureWithBufferPolicy() {}
|
||||||
|
|
|
@ -33,12 +33,16 @@
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
|
||||||
|
// Note that these are corresponding definitions in Java side in BinaryDictionaryTests and
|
||||||
|
// BinaryDictionaryDecayingTests.
|
||||||
const char *const DynamicPatriciaTriePolicy::UNIGRAM_COUNT_QUERY = "UNIGRAM_COUNT";
|
const char *const DynamicPatriciaTriePolicy::UNIGRAM_COUNT_QUERY = "UNIGRAM_COUNT";
|
||||||
const char *const DynamicPatriciaTriePolicy::BIGRAM_COUNT_QUERY = "BIGRAM_COUNT";
|
const char *const DynamicPatriciaTriePolicy::BIGRAM_COUNT_QUERY = "BIGRAM_COUNT";
|
||||||
|
const char *const DynamicPatriciaTriePolicy::SET_NEEDS_TO_DECAY_FOR_TESTING_QUERY =
|
||||||
|
"SET_NEEDS_TO_DECAY_FOR_TESTING";
|
||||||
const int DynamicPatriciaTriePolicy::MAX_DICT_EXTENDED_REGION_SIZE = 1024 * 1024;
|
const int DynamicPatriciaTriePolicy::MAX_DICT_EXTENDED_REGION_SIZE = 1024 * 1024;
|
||||||
const int DynamicPatriciaTriePolicy::MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS =
|
const int DynamicPatriciaTriePolicy::MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS =
|
||||||
DynamicPatriciaTrieWritingHelper::MAX_DICTIONARY_SIZE - 1024;
|
DynamicPatriciaTrieWritingHelper::MAX_DICTIONARY_SIZE - 1024;
|
||||||
const int DynamicPatriciaTriePolicy::MIN_SECONDS_TO_REQUIRE_GC_WHEN_WRITING = 2 * 60 * 60;
|
const int DynamicPatriciaTriePolicy::DECAY_INTERVAL_FOR_DECAYING_DICTS = 2 * 60 * 60;
|
||||||
|
|
||||||
void DynamicPatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode,
|
void DynamicPatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode,
|
||||||
DicNodeVector *const childDicNodes) const {
|
DicNodeVector *const childDicNodes) const {
|
||||||
|
@ -301,7 +305,7 @@ void DynamicPatriciaTriePolicy::flush(const char *const filePath) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer,
|
DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer,
|
||||||
&mBigramListPolicy, &mShortcutListPolicy, mHeaderPolicy.isDecayingDict());
|
&mBigramListPolicy, &mShortcutListPolicy, false /* needsToDecay */);
|
||||||
writingHelper.writeToDictFile(filePath, &mHeaderPolicy, mUnigramCount, mBigramCount);
|
writingHelper.writeToDictFile(filePath, &mHeaderPolicy, mUnigramCount, mBigramCount);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -310,9 +314,15 @@ void DynamicPatriciaTriePolicy::flushWithGC(const char *const filePath) {
|
||||||
AKLOGI("Warning: flushWithGC() is called for non-updatable dictionary.");
|
AKLOGI("Warning: flushWithGC() is called for non-updatable dictionary.");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
const bool runGCwithDecay = needsToDecay();
|
||||||
|
DynamicBigramListPolicy bigramListPolicyForGC(&mBufferWithExtendableBuffer,
|
||||||
|
&mShortcutListPolicy, runGCwithDecay);
|
||||||
DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer,
|
DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer,
|
||||||
&mBigramListPolicy, &mShortcutListPolicy, mHeaderPolicy.isDecayingDict());
|
&bigramListPolicyForGC, &mShortcutListPolicy, runGCwithDecay);
|
||||||
writingHelper.writeToDictFileWithGC(getRootPosition(), filePath, &mHeaderPolicy);
|
writingHelper.writeToDictFileWithGC(getRootPosition(), filePath, &mHeaderPolicy);
|
||||||
|
if (runGCwithDecay) {
|
||||||
|
mNeedsToDecayForTesting = false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool DynamicPatriciaTriePolicy::needsToRunGC(const bool mindsBlockByGC) const {
|
bool DynamicPatriciaTriePolicy::needsToRunGC(const bool mindsBlockByGC) const {
|
||||||
|
@ -340,8 +350,7 @@ bool DynamicPatriciaTriePolicy::needsToRunGC(const bool mindsBlockByGC) const {
|
||||||
} else if (mBigramCount >= DecayingUtils::MAX_BIGRAM_COUNT) {
|
} else if (mBigramCount >= DecayingUtils::MAX_BIGRAM_COUNT) {
|
||||||
// Bigram count exceeds the limit.
|
// Bigram count exceeds the limit.
|
||||||
return true;
|
return true;
|
||||||
} else if (mindsBlockByGC && mHeaderPolicy.getLastUpdatedTime()
|
} else if (mindsBlockByGC && needsToDecay()) {
|
||||||
+ MIN_SECONDS_TO_REQUIRE_GC_WHEN_WRITING < time(0)) {
|
|
||||||
// Time to update probabilities for decaying.
|
// Time to update probabilities for decaying.
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -350,12 +359,19 @@ bool DynamicPatriciaTriePolicy::needsToRunGC(const bool mindsBlockByGC) const {
|
||||||
}
|
}
|
||||||
|
|
||||||
void DynamicPatriciaTriePolicy::getProperty(const char *const query, char *const outResult,
|
void DynamicPatriciaTriePolicy::getProperty(const char *const query, char *const outResult,
|
||||||
const int maxResultLength) const {
|
const int maxResultLength) {
|
||||||
if (strncmp(query, UNIGRAM_COUNT_QUERY, maxResultLength) == 0) {
|
if (strncmp(query, UNIGRAM_COUNT_QUERY, maxResultLength) == 0) {
|
||||||
snprintf(outResult, maxResultLength, "%d", mUnigramCount);
|
snprintf(outResult, maxResultLength, "%d", mUnigramCount);
|
||||||
} else if (strncmp(query, BIGRAM_COUNT_QUERY, maxResultLength) == 0) {
|
} else if (strncmp(query, BIGRAM_COUNT_QUERY, maxResultLength) == 0) {
|
||||||
snprintf(outResult, maxResultLength, "%d", mBigramCount);
|
snprintf(outResult, maxResultLength, "%d", mBigramCount);
|
||||||
|
} else if (strncmp(query, SET_NEEDS_TO_DECAY_FOR_TESTING_QUERY, maxResultLength) == 0) {
|
||||||
|
mNeedsToDecayForTesting = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool DynamicPatriciaTriePolicy::needsToDecay() const {
|
||||||
|
return mHeaderPolicy.isDecayingDict() && (mNeedsToDecayForTesting
|
||||||
|
|| mHeaderPolicy.getLastDecayedTime() + DECAY_INTERVAL_FOR_DECAYING_DICTS < time(0));
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
|
|
|
@ -40,7 +40,7 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
||||||
mBigramListPolicy(&mBufferWithExtendableBuffer, &mShortcutListPolicy,
|
mBigramListPolicy(&mBufferWithExtendableBuffer, &mShortcutListPolicy,
|
||||||
mHeaderPolicy.isDecayingDict()),
|
mHeaderPolicy.isDecayingDict()),
|
||||||
mUnigramCount(mHeaderPolicy.getUnigramCount()),
|
mUnigramCount(mHeaderPolicy.getUnigramCount()),
|
||||||
mBigramCount(mHeaderPolicy.getBigramCount()) {}
|
mBigramCount(mHeaderPolicy.getBigramCount()), mNeedsToDecayForTesting(false) {}
|
||||||
|
|
||||||
~DynamicPatriciaTriePolicy() {
|
~DynamicPatriciaTriePolicy() {
|
||||||
delete mBuffer;
|
delete mBuffer;
|
||||||
|
@ -95,16 +95,17 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
||||||
bool needsToRunGC(const bool mindsBlockByGC) const;
|
bool needsToRunGC(const bool mindsBlockByGC) const;
|
||||||
|
|
||||||
void getProperty(const char *const query, char *const outResult,
|
void getProperty(const char *const query, char *const outResult,
|
||||||
const int maxResultLength) const;
|
const int maxResultLength);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTriePolicy);
|
DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTriePolicy);
|
||||||
|
|
||||||
static const char*const UNIGRAM_COUNT_QUERY;
|
static const char *const UNIGRAM_COUNT_QUERY;
|
||||||
static const char*const BIGRAM_COUNT_QUERY;
|
static const char *const BIGRAM_COUNT_QUERY;
|
||||||
|
static const char *const SET_NEEDS_TO_DECAY_FOR_TESTING_QUERY;
|
||||||
static const int MAX_DICT_EXTENDED_REGION_SIZE;
|
static const int MAX_DICT_EXTENDED_REGION_SIZE;
|
||||||
static const int MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS;
|
static const int MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS;
|
||||||
static const int MIN_SECONDS_TO_REQUIRE_GC_WHEN_WRITING;
|
static const int DECAY_INTERVAL_FOR_DECAYING_DICTS;
|
||||||
|
|
||||||
const MmappedBuffer *const mBuffer;
|
const MmappedBuffer *const mBuffer;
|
||||||
const HeaderPolicy mHeaderPolicy;
|
const HeaderPolicy mHeaderPolicy;
|
||||||
|
@ -113,6 +114,9 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
||||||
DynamicBigramListPolicy mBigramListPolicy;
|
DynamicBigramListPolicy mBigramListPolicy;
|
||||||
int mUnigramCount;
|
int mUnigramCount;
|
||||||
int mBigramCount;
|
int mBigramCount;
|
||||||
|
int mNeedsToDecayForTesting;
|
||||||
|
|
||||||
|
bool needsToDecay() const;
|
||||||
};
|
};
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
#endif // LATINIME_DYNAMIC_PATRICIA_TRIE_POLICY_H
|
#endif // LATINIME_DYNAMIC_PATRICIA_TRIE_POLICY_H
|
||||||
|
|
|
@ -153,7 +153,7 @@ void DynamicPatriciaTrieWritingHelper::writeToDictFile(const char *const fileNam
|
||||||
const int extendedRegionSize = headerPolicy->getExtendedRegionSize() +
|
const int extendedRegionSize = headerPolicy->getExtendedRegionSize() +
|
||||||
mBuffer->getUsedAdditionalBufferSize();
|
mBuffer->getUsedAdditionalBufferSize();
|
||||||
if (!headerPolicy->writeHeaderToBuffer(&headerBuffer, false /* updatesLastUpdatedTime */,
|
if (!headerPolicy->writeHeaderToBuffer(&headerBuffer, false /* updatesLastUpdatedTime */,
|
||||||
unigramCount, bigramCount, extendedRegionSize)) {
|
false /* updatesLastDecayedTime */, unigramCount, bigramCount, extendedRegionSize)) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
DictFileWritingUtils::flushAllHeaderAndBodyToFile(fileName, &headerBuffer, mBuffer);
|
DictFileWritingUtils::flushAllHeaderAndBodyToFile(fileName, &headerBuffer, mBuffer);
|
||||||
|
@ -170,7 +170,7 @@ void DynamicPatriciaTrieWritingHelper::writeToDictFileWithGC(const int rootPtNod
|
||||||
}
|
}
|
||||||
BufferWithExtendableBuffer headerBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */);
|
BufferWithExtendableBuffer headerBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */);
|
||||||
if (!headerPolicy->writeHeaderToBuffer(&headerBuffer, true /* updatesLastUpdatedTime */,
|
if (!headerPolicy->writeHeaderToBuffer(&headerBuffer, true /* updatesLastUpdatedTime */,
|
||||||
unigramCount, bigramCount, 0 /* extendedRegionSize */)) {
|
mNeedsToDecay, unigramCount, bigramCount, 0 /* extendedRegionSize */)) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
DictFileWritingUtils::flushAllHeaderAndBodyToFile(fileName, &headerBuffer, &newDictBuffer);
|
DictFileWritingUtils::flushAllHeaderAndBodyToFile(fileName, &headerBuffer, &newDictBuffer);
|
||||||
|
@ -488,12 +488,12 @@ bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
|
||||||
DynamicPatriciaTrieGcEventListeners
|
DynamicPatriciaTrieGcEventListeners
|
||||||
::TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted
|
::TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted
|
||||||
traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted(
|
traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted(
|
||||||
this, mBuffer, mIsDecayingDict);
|
this, mBuffer, mNeedsToDecay);
|
||||||
if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(
|
if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(
|
||||||
&traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted)) {
|
&traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (mIsDecayingDict && traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted
|
if (mNeedsToDecay && traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted
|
||||||
.getValidUnigramCount() > DecayingUtils::MAX_UNIGRAM_COUNT_AFTER_GC) {
|
.getValidUnigramCount() > DecayingUtils::MAX_UNIGRAM_COUNT_AFTER_GC) {
|
||||||
// TODO: Remove more unigrams.
|
// TODO: Remove more unigrams.
|
||||||
}
|
}
|
||||||
|
@ -506,7 +506,7 @@ bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (mIsDecayingDict && traversePolicyToUpdateBigramProbability.getValidBigramEntryCount()
|
if (mNeedsToDecay && traversePolicyToUpdateBigramProbability.getValidBigramEntryCount()
|
||||||
> DecayingUtils::MAX_BIGRAM_COUNT_AFTER_GC) {
|
> DecayingUtils::MAX_BIGRAM_COUNT_AFTER_GC) {
|
||||||
// TODO: Remove more bigrams.
|
// TODO: Remove more bigrams.
|
||||||
}
|
}
|
||||||
|
@ -525,7 +525,7 @@ bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
|
||||||
// Create policy instance for the GCed dictionary.
|
// Create policy instance for the GCed dictionary.
|
||||||
DynamicShortcutListPolicy newDictShortcutPolicy(bufferToWrite);
|
DynamicShortcutListPolicy newDictShortcutPolicy(bufferToWrite);
|
||||||
DynamicBigramListPolicy newDictBigramPolicy(bufferToWrite, &newDictShortcutPolicy,
|
DynamicBigramListPolicy newDictBigramPolicy(bufferToWrite, &newDictShortcutPolicy,
|
||||||
mIsDecayingDict);
|
mNeedsToDecay);
|
||||||
// Create reading helper for the GCed dictionary.
|
// Create reading helper for the GCed dictionary.
|
||||||
DynamicPatriciaTrieReadingHelper newDictReadingHelper(bufferToWrite, &newDictBigramPolicy,
|
DynamicPatriciaTrieReadingHelper newDictReadingHelper(bufferToWrite, &newDictBigramPolicy,
|
||||||
&newDictShortcutPolicy);
|
&newDictShortcutPolicy);
|
||||||
|
@ -544,7 +544,7 @@ bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
|
||||||
|
|
||||||
int DynamicPatriciaTrieWritingHelper::getUpdatedProbability(const int originalProbability,
|
int DynamicPatriciaTrieWritingHelper::getUpdatedProbability(const int originalProbability,
|
||||||
const int newProbability) {
|
const int newProbability) {
|
||||||
if (mIsDecayingDict) {
|
if (mNeedsToDecay) {
|
||||||
return DecayingUtils::getUpdatedUnigramProbability(originalProbability, newProbability);
|
return DecayingUtils::getUpdatedUnigramProbability(originalProbability, newProbability);
|
||||||
} else {
|
} else {
|
||||||
return newProbability;
|
return newProbability;
|
||||||
|
|
|
@ -51,9 +51,9 @@ class DynamicPatriciaTrieWritingHelper {
|
||||||
|
|
||||||
DynamicPatriciaTrieWritingHelper(BufferWithExtendableBuffer *const buffer,
|
DynamicPatriciaTrieWritingHelper(BufferWithExtendableBuffer *const buffer,
|
||||||
DynamicBigramListPolicy *const bigramPolicy,
|
DynamicBigramListPolicy *const bigramPolicy,
|
||||||
DynamicShortcutListPolicy *const shortcutPolicy, const bool isDecayingDict)
|
DynamicShortcutListPolicy *const shortcutPolicy, const bool needsToDecay)
|
||||||
: mBuffer(buffer), mBigramPolicy(bigramPolicy), mShortcutPolicy(shortcutPolicy),
|
: mBuffer(buffer), mBigramPolicy(bigramPolicy), mShortcutPolicy(shortcutPolicy),
|
||||||
mIsDecayingDict(isDecayingDict) {}
|
mNeedsToDecay(needsToDecay) {}
|
||||||
|
|
||||||
~DynamicPatriciaTrieWritingHelper() {}
|
~DynamicPatriciaTrieWritingHelper() {}
|
||||||
|
|
||||||
|
@ -94,7 +94,7 @@ class DynamicPatriciaTrieWritingHelper {
|
||||||
BufferWithExtendableBuffer *const mBuffer;
|
BufferWithExtendableBuffer *const mBuffer;
|
||||||
DynamicBigramListPolicy *const mBigramPolicy;
|
DynamicBigramListPolicy *const mBigramPolicy;
|
||||||
DynamicShortcutListPolicy *const mShortcutPolicy;
|
DynamicShortcutListPolicy *const mShortcutPolicy;
|
||||||
const bool mIsDecayingDict;
|
const bool mNeedsToDecay;
|
||||||
|
|
||||||
bool markNodeAsMovedAndSetPosition(const DynamicPatriciaTrieNodeReader *const nodeToUpdate,
|
bool markNodeAsMovedAndSetPosition(const DynamicPatriciaTrieNodeReader *const nodeToUpdate,
|
||||||
const int movedPos, const int bigramLinkedNodePos);
|
const int movedPos, const int bigramLinkedNodePos);
|
||||||
|
|
|
@ -23,6 +23,7 @@ const char *const HeaderPolicy::MULTIPLE_WORDS_DEMOTION_RATE_KEY = "MULTIPLE_WOR
|
||||||
// TODO: Change attribute string to "IS_DECAYING_DICT".
|
// TODO: Change attribute string to "IS_DECAYING_DICT".
|
||||||
const char *const HeaderPolicy::IS_DECAYING_DICT_KEY = "USES_FORGETTING_CURVE";
|
const char *const HeaderPolicy::IS_DECAYING_DICT_KEY = "USES_FORGETTING_CURVE";
|
||||||
const char *const HeaderPolicy::LAST_UPDATED_TIME_KEY = "date";
|
const char *const HeaderPolicy::LAST_UPDATED_TIME_KEY = "date";
|
||||||
|
const char *const HeaderPolicy::LAST_DECAYED_TIME_KEY = "LAST_DECAYED_TIME";
|
||||||
const char *const HeaderPolicy::UNIGRAM_COUNT_KEY = "UNIGRAM_COUNT";
|
const char *const HeaderPolicy::UNIGRAM_COUNT_KEY = "UNIGRAM_COUNT";
|
||||||
const char *const HeaderPolicy::BIGRAM_COUNT_KEY = "BIGRAM_COUNT";
|
const char *const HeaderPolicy::BIGRAM_COUNT_KEY = "BIGRAM_COUNT";
|
||||||
const char *const HeaderPolicy::EXTENDED_REGION_SIZE_KEY = "EXTENDED_REGION_SIZE";
|
const char *const HeaderPolicy::EXTENDED_REGION_SIZE_KEY = "EXTENDED_REGION_SIZE";
|
||||||
|
@ -63,8 +64,8 @@ float HeaderPolicy::readMultipleWordCostMultiplier() const {
|
||||||
}
|
}
|
||||||
|
|
||||||
bool HeaderPolicy::writeHeaderToBuffer(BufferWithExtendableBuffer *const bufferToWrite,
|
bool HeaderPolicy::writeHeaderToBuffer(BufferWithExtendableBuffer *const bufferToWrite,
|
||||||
const bool updatesLastUpdatedTime, const int unigramCount, const int bigramCount,
|
const bool updatesLastUpdatedTime, const bool updatesLastDecayedTime,
|
||||||
const int extendedRegionSize) const {
|
const int unigramCount, const int bigramCount, const int extendedRegionSize) const {
|
||||||
int writingPos = 0;
|
int writingPos = 0;
|
||||||
if (!HeaderReadWriteUtils::writeDictionaryVersion(bufferToWrite, mDictFormatVersion,
|
if (!HeaderReadWriteUtils::writeDictionaryVersion(bufferToWrite, mDictFormatVersion,
|
||||||
&writingPos)) {
|
&writingPos)) {
|
||||||
|
@ -90,6 +91,11 @@ bool HeaderPolicy::writeHeaderToBuffer(BufferWithExtendableBuffer *const bufferT
|
||||||
HeaderReadWriteUtils::setIntAttribute(&attributeMapTowrite, LAST_UPDATED_TIME_KEY,
|
HeaderReadWriteUtils::setIntAttribute(&attributeMapTowrite, LAST_UPDATED_TIME_KEY,
|
||||||
time(0));
|
time(0));
|
||||||
}
|
}
|
||||||
|
if (updatesLastDecayedTime) {
|
||||||
|
// Set current time as a last updated time.
|
||||||
|
HeaderReadWriteUtils::setIntAttribute(&attributeMapTowrite, LAST_DECAYED_TIME_KEY,
|
||||||
|
time(0));
|
||||||
|
}
|
||||||
if (!HeaderReadWriteUtils::writeHeaderAttributes(bufferToWrite, &attributeMapTowrite,
|
if (!HeaderReadWriteUtils::writeHeaderAttributes(bufferToWrite, &attributeMapTowrite,
|
||||||
&writingPos)) {
|
&writingPos)) {
|
||||||
return false;
|
return false;
|
||||||
|
|
|
@ -40,6 +40,8 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
|
||||||
IS_DECAYING_DICT_KEY, false /* defaultValue */)),
|
IS_DECAYING_DICT_KEY, false /* defaultValue */)),
|
||||||
mLastUpdatedTime(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap,
|
mLastUpdatedTime(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap,
|
||||||
LAST_UPDATED_TIME_KEY, time(0) /* defaultValue */)),
|
LAST_UPDATED_TIME_KEY, time(0) /* defaultValue */)),
|
||||||
|
mLastDecayedTime(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap,
|
||||||
|
LAST_DECAYED_TIME_KEY, time(0) /* defaultValue */)),
|
||||||
mUnigramCount(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap,
|
mUnigramCount(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap,
|
||||||
UNIGRAM_COUNT_KEY, 0 /* defaultValue */)),
|
UNIGRAM_COUNT_KEY, 0 /* defaultValue */)),
|
||||||
mBigramCount(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap,
|
mBigramCount(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap,
|
||||||
|
@ -58,6 +60,8 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
|
||||||
IS_DECAYING_DICT_KEY, false /* defaultValue */)),
|
IS_DECAYING_DICT_KEY, false /* defaultValue */)),
|
||||||
mLastUpdatedTime(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap,
|
mLastUpdatedTime(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap,
|
||||||
LAST_UPDATED_TIME_KEY, time(0) /* defaultValue */)),
|
LAST_UPDATED_TIME_KEY, time(0) /* defaultValue */)),
|
||||||
|
mLastDecayedTime(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap,
|
||||||
|
LAST_UPDATED_TIME_KEY, time(0) /* defaultValue */)),
|
||||||
mUnigramCount(0), mBigramCount(0), mExtendedRegionSize(0) {}
|
mUnigramCount(0), mBigramCount(0), mExtendedRegionSize(0) {}
|
||||||
|
|
||||||
~HeaderPolicy() {}
|
~HeaderPolicy() {}
|
||||||
|
@ -90,6 +94,10 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
|
||||||
return mLastUpdatedTime;
|
return mLastUpdatedTime;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
AK_FORCE_INLINE int getLastDecayedTime() const {
|
||||||
|
return mLastDecayedTime;
|
||||||
|
}
|
||||||
|
|
||||||
AK_FORCE_INLINE int getUnigramCount() const {
|
AK_FORCE_INLINE int getUnigramCount() const {
|
||||||
return mUnigramCount;
|
return mUnigramCount;
|
||||||
}
|
}
|
||||||
|
@ -106,8 +114,8 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
|
||||||
int *outValue, int outValueSize) const;
|
int *outValue, int outValueSize) const;
|
||||||
|
|
||||||
bool writeHeaderToBuffer(BufferWithExtendableBuffer *const bufferToWrite,
|
bool writeHeaderToBuffer(BufferWithExtendableBuffer *const bufferToWrite,
|
||||||
const bool updatesLastUpdatedTime, const int unigramCount,
|
const bool updatesLastUpdatedTime, const bool updatesLastDecayedTime,
|
||||||
const int bigramCount, const int extendedRegionSize) const;
|
const int unigramCount, const int bigramCount, const int extendedRegionSize) const;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
DISALLOW_IMPLICIT_CONSTRUCTORS(HeaderPolicy);
|
DISALLOW_IMPLICIT_CONSTRUCTORS(HeaderPolicy);
|
||||||
|
@ -115,6 +123,7 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
|
||||||
static const char *const MULTIPLE_WORDS_DEMOTION_RATE_KEY;
|
static const char *const MULTIPLE_WORDS_DEMOTION_RATE_KEY;
|
||||||
static const char *const IS_DECAYING_DICT_KEY;
|
static const char *const IS_DECAYING_DICT_KEY;
|
||||||
static const char *const LAST_UPDATED_TIME_KEY;
|
static const char *const LAST_UPDATED_TIME_KEY;
|
||||||
|
static const char *const LAST_DECAYED_TIME_KEY;
|
||||||
static const char *const UNIGRAM_COUNT_KEY;
|
static const char *const UNIGRAM_COUNT_KEY;
|
||||||
static const char *const BIGRAM_COUNT_KEY;
|
static const char *const BIGRAM_COUNT_KEY;
|
||||||
static const char *const EXTENDED_REGION_SIZE_KEY;
|
static const char *const EXTENDED_REGION_SIZE_KEY;
|
||||||
|
@ -128,6 +137,7 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
|
||||||
const float mMultiWordCostMultiplier;
|
const float mMultiWordCostMultiplier;
|
||||||
const bool mIsDecayingDict;
|
const bool mIsDecayingDict;
|
||||||
const int mLastUpdatedTime;
|
const int mLastUpdatedTime;
|
||||||
|
const int mLastDecayedTime;
|
||||||
const int mUnigramCount;
|
const int mUnigramCount;
|
||||||
const int mBigramCount;
|
const int mBigramCount;
|
||||||
const int mExtendedRegionSize;
|
const int mExtendedRegionSize;
|
||||||
|
|
|
@ -114,7 +114,7 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
||||||
}
|
}
|
||||||
|
|
||||||
void getProperty(const char *const query, char *const outResult,
|
void getProperty(const char *const query, char *const outResult,
|
||||||
const int maxResultLength) const {
|
const int maxResultLength) {
|
||||||
// getProperty is not supported for this class.
|
// getProperty is not supported for this class.
|
||||||
if (maxResultLength > 0) {
|
if (maxResultLength > 0) {
|
||||||
outResult[0] = '\0';
|
outResult[0] = '\0';
|
||||||
|
|
|
@ -44,7 +44,8 @@ const char *const DictFileWritingUtils::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE =
|
||||||
BufferWithExtendableBuffer headerBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */);
|
BufferWithExtendableBuffer headerBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */);
|
||||||
HeaderPolicy headerPolicy(FormatUtils::VERSION_3, attributeMap);
|
HeaderPolicy headerPolicy(FormatUtils::VERSION_3, attributeMap);
|
||||||
headerPolicy.writeHeaderToBuffer(&headerBuffer, true /* updatesLastUpdatedTime */,
|
headerPolicy.writeHeaderToBuffer(&headerBuffer, true /* updatesLastUpdatedTime */,
|
||||||
0 /* unigramCount */, 0 /* bigramCount */, 0 /* extendedRegionSize */);
|
true /* updatesLastDecayedTime */, 0 /* unigramCount */, 0 /* bigramCount */,
|
||||||
|
0 /* extendedRegionSize */);
|
||||||
BufferWithExtendableBuffer bodyBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */);
|
BufferWithExtendableBuffer bodyBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */);
|
||||||
if (!DynamicPatriciaTrieWritingUtils::writeEmptyDictionary(&bodyBuffer, 0 /* rootPos */)) {
|
if (!DynamicPatriciaTrieWritingUtils::writeEmptyDictionary(&bodyBuffer, 0 /* rootPos */)) {
|
||||||
return false;
|
return false;
|
||||||
|
|
|
@ -32,6 +32,11 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase {
|
||||||
private static final String TEST_DICT_FILE_EXTENSION = ".testDict";
|
private static final String TEST_DICT_FILE_EXTENSION = ".testDict";
|
||||||
private static final String TEST_LOCALE = "test";
|
private static final String TEST_LOCALE = "test";
|
||||||
|
|
||||||
|
// Note that these are corresponding definitions in native code in
|
||||||
|
// latinime::DynamicPatriciaTriePolicy.
|
||||||
|
private static final String SET_NEEDS_TO_DECAY_FOR_TESTING_KEY =
|
||||||
|
"SET_NEEDS_TO_DECAY_FOR_TESTING";
|
||||||
|
|
||||||
private static final int DUMMY_PROBABILITY = 0;
|
private static final int DUMMY_PROBABILITY = 0;
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -45,6 +50,7 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
private void forcePassingShortTime(final BinaryDictionary binaryDictionary) {
|
private void forcePassingShortTime(final BinaryDictionary binaryDictionary) {
|
||||||
|
binaryDictionary.getPropertyForTests(SET_NEEDS_TO_DECAY_FOR_TESTING_KEY);
|
||||||
binaryDictionary.flushWithGC();
|
binaryDictionary.flushWithGC();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -53,6 +59,7 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase {
|
||||||
// typed in 32 GCs are removed.
|
// typed in 32 GCs are removed.
|
||||||
final int count = 32;
|
final int count = 32;
|
||||||
for (int i = 0; i < count; i++) {
|
for (int i = 0; i < count; i++) {
|
||||||
|
binaryDictionary.getPropertyForTests(SET_NEEDS_TO_DECAY_FOR_TESTING_KEY);
|
||||||
binaryDictionary.flushWithGC();
|
binaryDictionary.flushWithGC();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue