am e89a179b: Merge "Separate "GC" and "GC with decay"."

* commit 'e89a179b1048eb7d9a2edba5c140d6219a85b056':
  Separate "GC" and "GC with decay".
main
Keisuke Kuroyanagi 2013-10-01 09:51:55 -07:00 committed by Android Git Automerger
commit a231c2ab72
13 changed files with 77 additions and 32 deletions

View File

@ -344,8 +344,7 @@ static jstring latinime_BinaryDictionary_getProperty(JNIEnv *env, jclass clazz,
static const int GET_PROPERTY_RESULT_LENGTH = 100;
char resultChars[GET_PROPERTY_RESULT_LENGTH];
resultChars[0] = '\0';
dictionary->getDictionaryStructurePolicy()->getProperty(queryChars, resultChars,
GET_PROPERTY_RESULT_LENGTH);
dictionary->getProperty(queryChars, resultChars, GET_PROPERTY_RESULT_LENGTH);
return env->NewStringUTF(resultChars);
}

View File

@ -129,7 +129,7 @@ bool Dictionary::needsToRunGC(const bool mindsBlockByGC) {
}
void Dictionary::getProperty(const char *const query, char *const outResult,
const int maxResultLength) const {
const int maxResultLength) {
return mDictionaryStructureWithBufferPolicy->getProperty(query, outResult, maxResultLength);
}

View File

@ -84,7 +84,7 @@ class Dictionary {
bool needsToRunGC(const bool mindsBlockByGC);
void getProperty(const char *const query, char *const outResult,
const int maxResultLength) const;
const int maxResultLength);
const DictionaryStructureWithBufferPolicy *getDictionaryStructurePolicy() const {
return mDictionaryStructureWithBufferPolicy;

View File

@ -80,8 +80,10 @@ class DictionaryStructureWithBufferPolicy {
virtual bool needsToRunGC(const bool mindsBlockByGC) const = 0;
// Currently, this method is used only for testing. You may want to consider creating new
// dedicated method instead of this if you want to use this in the production.
virtual void getProperty(const char *const query, char *const outResult,
const int maxResultLength) const = 0;
const int maxResultLength) = 0;
protected:
DictionaryStructureWithBufferPolicy() {}

View File

@ -33,12 +33,16 @@
namespace latinime {
// Note that these are corresponding definitions in Java side in BinaryDictionaryTests and
// BinaryDictionaryDecayingTests.
const char *const DynamicPatriciaTriePolicy::UNIGRAM_COUNT_QUERY = "UNIGRAM_COUNT";
const char *const DynamicPatriciaTriePolicy::BIGRAM_COUNT_QUERY = "BIGRAM_COUNT";
const char *const DynamicPatriciaTriePolicy::SET_NEEDS_TO_DECAY_FOR_TESTING_QUERY =
"SET_NEEDS_TO_DECAY_FOR_TESTING";
const int DynamicPatriciaTriePolicy::MAX_DICT_EXTENDED_REGION_SIZE = 1024 * 1024;
const int DynamicPatriciaTriePolicy::MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS =
DynamicPatriciaTrieWritingHelper::MAX_DICTIONARY_SIZE - 1024;
const int DynamicPatriciaTriePolicy::MIN_SECONDS_TO_REQUIRE_GC_WHEN_WRITING = 2 * 60 * 60;
const int DynamicPatriciaTriePolicy::DECAY_INTERVAL_FOR_DECAYING_DICTS = 2 * 60 * 60;
void DynamicPatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode,
DicNodeVector *const childDicNodes) const {
@ -301,7 +305,7 @@ void DynamicPatriciaTriePolicy::flush(const char *const filePath) {
return;
}
DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer,
&mBigramListPolicy, &mShortcutListPolicy, mHeaderPolicy.isDecayingDict());
&mBigramListPolicy, &mShortcutListPolicy, false /* needsToDecay */);
writingHelper.writeToDictFile(filePath, &mHeaderPolicy, mUnigramCount, mBigramCount);
}
@ -310,9 +314,15 @@ void DynamicPatriciaTriePolicy::flushWithGC(const char *const filePath) {
AKLOGI("Warning: flushWithGC() is called for non-updatable dictionary.");
return;
}
const bool runGCwithDecay = needsToDecay();
DynamicBigramListPolicy bigramListPolicyForGC(&mBufferWithExtendableBuffer,
&mShortcutListPolicy, runGCwithDecay);
DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer,
&mBigramListPolicy, &mShortcutListPolicy, mHeaderPolicy.isDecayingDict());
&bigramListPolicyForGC, &mShortcutListPolicy, runGCwithDecay);
writingHelper.writeToDictFileWithGC(getRootPosition(), filePath, &mHeaderPolicy);
if (runGCwithDecay) {
mNeedsToDecayForTesting = false;
}
}
bool DynamicPatriciaTriePolicy::needsToRunGC(const bool mindsBlockByGC) const {
@ -340,8 +350,7 @@ bool DynamicPatriciaTriePolicy::needsToRunGC(const bool mindsBlockByGC) const {
} else if (mBigramCount >= DecayingUtils::MAX_BIGRAM_COUNT) {
// Bigram count exceeds the limit.
return true;
} else if (mindsBlockByGC && mHeaderPolicy.getLastUpdatedTime()
+ MIN_SECONDS_TO_REQUIRE_GC_WHEN_WRITING < time(0)) {
} else if (mindsBlockByGC && needsToDecay()) {
// Time to update probabilities for decaying.
return true;
}
@ -350,12 +359,19 @@ bool DynamicPatriciaTriePolicy::needsToRunGC(const bool mindsBlockByGC) const {
}
void DynamicPatriciaTriePolicy::getProperty(const char *const query, char *const outResult,
const int maxResultLength) const {
const int maxResultLength) {
if (strncmp(query, UNIGRAM_COUNT_QUERY, maxResultLength) == 0) {
snprintf(outResult, maxResultLength, "%d", mUnigramCount);
} else if (strncmp(query, BIGRAM_COUNT_QUERY, maxResultLength) == 0) {
snprintf(outResult, maxResultLength, "%d", mBigramCount);
} else if (strncmp(query, SET_NEEDS_TO_DECAY_FOR_TESTING_QUERY, maxResultLength) == 0) {
mNeedsToDecayForTesting = true;
}
}
bool DynamicPatriciaTriePolicy::needsToDecay() const {
return mHeaderPolicy.isDecayingDict() && (mNeedsToDecayForTesting
|| mHeaderPolicy.getLastDecayedTime() + DECAY_INTERVAL_FOR_DECAYING_DICTS < time(0));
}
} // namespace latinime

View File

@ -40,7 +40,7 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
mBigramListPolicy(&mBufferWithExtendableBuffer, &mShortcutListPolicy,
mHeaderPolicy.isDecayingDict()),
mUnigramCount(mHeaderPolicy.getUnigramCount()),
mBigramCount(mHeaderPolicy.getBigramCount()) {}
mBigramCount(mHeaderPolicy.getBigramCount()), mNeedsToDecayForTesting(false) {}
~DynamicPatriciaTriePolicy() {
delete mBuffer;
@ -95,16 +95,17 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
bool needsToRunGC(const bool mindsBlockByGC) const;
void getProperty(const char *const query, char *const outResult,
const int maxResultLength) const;
const int maxResultLength);
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTriePolicy);
static const char*const UNIGRAM_COUNT_QUERY;
static const char*const BIGRAM_COUNT_QUERY;
static const char *const UNIGRAM_COUNT_QUERY;
static const char *const BIGRAM_COUNT_QUERY;
static const char *const SET_NEEDS_TO_DECAY_FOR_TESTING_QUERY;
static const int MAX_DICT_EXTENDED_REGION_SIZE;
static const int MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS;
static const int MIN_SECONDS_TO_REQUIRE_GC_WHEN_WRITING;
static const int DECAY_INTERVAL_FOR_DECAYING_DICTS;
const MmappedBuffer *const mBuffer;
const HeaderPolicy mHeaderPolicy;
@ -113,6 +114,9 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
DynamicBigramListPolicy mBigramListPolicy;
int mUnigramCount;
int mBigramCount;
int mNeedsToDecayForTesting;
bool needsToDecay() const;
};
} // namespace latinime
#endif // LATINIME_DYNAMIC_PATRICIA_TRIE_POLICY_H

View File

@ -153,7 +153,7 @@ void DynamicPatriciaTrieWritingHelper::writeToDictFile(const char *const fileNam
const int extendedRegionSize = headerPolicy->getExtendedRegionSize() +
mBuffer->getUsedAdditionalBufferSize();
if (!headerPolicy->writeHeaderToBuffer(&headerBuffer, false /* updatesLastUpdatedTime */,
unigramCount, bigramCount, extendedRegionSize)) {
false /* updatesLastDecayedTime */, unigramCount, bigramCount, extendedRegionSize)) {
return;
}
DictFileWritingUtils::flushAllHeaderAndBodyToFile(fileName, &headerBuffer, mBuffer);
@ -170,7 +170,7 @@ void DynamicPatriciaTrieWritingHelper::writeToDictFileWithGC(const int rootPtNod
}
BufferWithExtendableBuffer headerBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */);
if (!headerPolicy->writeHeaderToBuffer(&headerBuffer, true /* updatesLastUpdatedTime */,
unigramCount, bigramCount, 0 /* extendedRegionSize */)) {
mNeedsToDecay, unigramCount, bigramCount, 0 /* extendedRegionSize */)) {
return;
}
DictFileWritingUtils::flushAllHeaderAndBodyToFile(fileName, &headerBuffer, &newDictBuffer);
@ -488,12 +488,12 @@ bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
DynamicPatriciaTrieGcEventListeners
::TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted
traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted(
this, mBuffer, mIsDecayingDict);
this, mBuffer, mNeedsToDecay);
if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(
&traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted)) {
return false;
}
if (mIsDecayingDict && traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted
if (mNeedsToDecay && traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted
.getValidUnigramCount() > DecayingUtils::MAX_UNIGRAM_COUNT_AFTER_GC) {
// TODO: Remove more unigrams.
}
@ -506,7 +506,7 @@ bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
return false;
}
if (mIsDecayingDict && traversePolicyToUpdateBigramProbability.getValidBigramEntryCount()
if (mNeedsToDecay && traversePolicyToUpdateBigramProbability.getValidBigramEntryCount()
> DecayingUtils::MAX_BIGRAM_COUNT_AFTER_GC) {
// TODO: Remove more bigrams.
}
@ -525,7 +525,7 @@ bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
// Create policy instance for the GCed dictionary.
DynamicShortcutListPolicy newDictShortcutPolicy(bufferToWrite);
DynamicBigramListPolicy newDictBigramPolicy(bufferToWrite, &newDictShortcutPolicy,
mIsDecayingDict);
mNeedsToDecay);
// Create reading helper for the GCed dictionary.
DynamicPatriciaTrieReadingHelper newDictReadingHelper(bufferToWrite, &newDictBigramPolicy,
&newDictShortcutPolicy);
@ -544,7 +544,7 @@ bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
int DynamicPatriciaTrieWritingHelper::getUpdatedProbability(const int originalProbability,
const int newProbability) {
if (mIsDecayingDict) {
if (mNeedsToDecay) {
return DecayingUtils::getUpdatedUnigramProbability(originalProbability, newProbability);
} else {
return newProbability;

View File

@ -51,9 +51,9 @@ class DynamicPatriciaTrieWritingHelper {
DynamicPatriciaTrieWritingHelper(BufferWithExtendableBuffer *const buffer,
DynamicBigramListPolicy *const bigramPolicy,
DynamicShortcutListPolicy *const shortcutPolicy, const bool isDecayingDict)
DynamicShortcutListPolicy *const shortcutPolicy, const bool needsToDecay)
: mBuffer(buffer), mBigramPolicy(bigramPolicy), mShortcutPolicy(shortcutPolicy),
mIsDecayingDict(isDecayingDict) {}
mNeedsToDecay(needsToDecay) {}
~DynamicPatriciaTrieWritingHelper() {}
@ -94,7 +94,7 @@ class DynamicPatriciaTrieWritingHelper {
BufferWithExtendableBuffer *const mBuffer;
DynamicBigramListPolicy *const mBigramPolicy;
DynamicShortcutListPolicy *const mShortcutPolicy;
const bool mIsDecayingDict;
const bool mNeedsToDecay;
bool markNodeAsMovedAndSetPosition(const DynamicPatriciaTrieNodeReader *const nodeToUpdate,
const int movedPos, const int bigramLinkedNodePos);

View File

@ -23,6 +23,7 @@ const char *const HeaderPolicy::MULTIPLE_WORDS_DEMOTION_RATE_KEY = "MULTIPLE_WOR
// TODO: Change attribute string to "IS_DECAYING_DICT".
const char *const HeaderPolicy::IS_DECAYING_DICT_KEY = "USES_FORGETTING_CURVE";
const char *const HeaderPolicy::LAST_UPDATED_TIME_KEY = "date";
const char *const HeaderPolicy::LAST_DECAYED_TIME_KEY = "LAST_DECAYED_TIME";
const char *const HeaderPolicy::UNIGRAM_COUNT_KEY = "UNIGRAM_COUNT";
const char *const HeaderPolicy::BIGRAM_COUNT_KEY = "BIGRAM_COUNT";
const char *const HeaderPolicy::EXTENDED_REGION_SIZE_KEY = "EXTENDED_REGION_SIZE";
@ -63,8 +64,8 @@ float HeaderPolicy::readMultipleWordCostMultiplier() const {
}
bool HeaderPolicy::writeHeaderToBuffer(BufferWithExtendableBuffer *const bufferToWrite,
const bool updatesLastUpdatedTime, const int unigramCount, const int bigramCount,
const int extendedRegionSize) const {
const bool updatesLastUpdatedTime, const bool updatesLastDecayedTime,
const int unigramCount, const int bigramCount, const int extendedRegionSize) const {
int writingPos = 0;
if (!HeaderReadWriteUtils::writeDictionaryVersion(bufferToWrite, mDictFormatVersion,
&writingPos)) {
@ -90,6 +91,11 @@ bool HeaderPolicy::writeHeaderToBuffer(BufferWithExtendableBuffer *const bufferT
HeaderReadWriteUtils::setIntAttribute(&attributeMapTowrite, LAST_UPDATED_TIME_KEY,
time(0));
}
if (updatesLastDecayedTime) {
// Set current time as a last updated time.
HeaderReadWriteUtils::setIntAttribute(&attributeMapTowrite, LAST_DECAYED_TIME_KEY,
time(0));
}
if (!HeaderReadWriteUtils::writeHeaderAttributes(bufferToWrite, &attributeMapTowrite,
&writingPos)) {
return false;

View File

@ -40,6 +40,8 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
IS_DECAYING_DICT_KEY, false /* defaultValue */)),
mLastUpdatedTime(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap,
LAST_UPDATED_TIME_KEY, time(0) /* defaultValue */)),
mLastDecayedTime(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap,
LAST_DECAYED_TIME_KEY, time(0) /* defaultValue */)),
mUnigramCount(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap,
UNIGRAM_COUNT_KEY, 0 /* defaultValue */)),
mBigramCount(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap,
@ -58,6 +60,8 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
IS_DECAYING_DICT_KEY, false /* defaultValue */)),
mLastUpdatedTime(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap,
LAST_UPDATED_TIME_KEY, time(0) /* defaultValue */)),
mLastDecayedTime(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap,
LAST_UPDATED_TIME_KEY, time(0) /* defaultValue */)),
mUnigramCount(0), mBigramCount(0), mExtendedRegionSize(0) {}
~HeaderPolicy() {}
@ -90,6 +94,10 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
return mLastUpdatedTime;
}
AK_FORCE_INLINE int getLastDecayedTime() const {
return mLastDecayedTime;
}
AK_FORCE_INLINE int getUnigramCount() const {
return mUnigramCount;
}
@ -106,8 +114,8 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
int *outValue, int outValueSize) const;
bool writeHeaderToBuffer(BufferWithExtendableBuffer *const bufferToWrite,
const bool updatesLastUpdatedTime, const int unigramCount,
const int bigramCount, const int extendedRegionSize) const;
const bool updatesLastUpdatedTime, const bool updatesLastDecayedTime,
const int unigramCount, const int bigramCount, const int extendedRegionSize) const;
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(HeaderPolicy);
@ -115,6 +123,7 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
static const char *const MULTIPLE_WORDS_DEMOTION_RATE_KEY;
static const char *const IS_DECAYING_DICT_KEY;
static const char *const LAST_UPDATED_TIME_KEY;
static const char *const LAST_DECAYED_TIME_KEY;
static const char *const UNIGRAM_COUNT_KEY;
static const char *const BIGRAM_COUNT_KEY;
static const char *const EXTENDED_REGION_SIZE_KEY;
@ -128,6 +137,7 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
const float mMultiWordCostMultiplier;
const bool mIsDecayingDict;
const int mLastUpdatedTime;
const int mLastDecayedTime;
const int mUnigramCount;
const int mBigramCount;
const int mExtendedRegionSize;

View File

@ -114,7 +114,7 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
}
void getProperty(const char *const query, char *const outResult,
const int maxResultLength) const {
const int maxResultLength) {
// getProperty is not supported for this class.
if (maxResultLength > 0) {
outResult[0] = '\0';

View File

@ -44,7 +44,8 @@ const char *const DictFileWritingUtils::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE =
BufferWithExtendableBuffer headerBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */);
HeaderPolicy headerPolicy(FormatUtils::VERSION_3, attributeMap);
headerPolicy.writeHeaderToBuffer(&headerBuffer, true /* updatesLastUpdatedTime */,
0 /* unigramCount */, 0 /* bigramCount */, 0 /* extendedRegionSize */);
true /* updatesLastDecayedTime */, 0 /* unigramCount */, 0 /* bigramCount */,
0 /* extendedRegionSize */);
BufferWithExtendableBuffer bodyBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */);
if (!DynamicPatriciaTrieWritingUtils::writeEmptyDictionary(&bodyBuffer, 0 /* rootPos */)) {
return false;

View File

@ -32,6 +32,11 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase {
private static final String TEST_DICT_FILE_EXTENSION = ".testDict";
private static final String TEST_LOCALE = "test";
// Note that these are corresponding definitions in native code in
// latinime::DynamicPatriciaTriePolicy.
private static final String SET_NEEDS_TO_DECAY_FOR_TESTING_KEY =
"SET_NEEDS_TO_DECAY_FOR_TESTING";
private static final int DUMMY_PROBABILITY = 0;
@Override
@ -45,6 +50,7 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase {
}
private void forcePassingShortTime(final BinaryDictionary binaryDictionary) {
binaryDictionary.getPropertyForTests(SET_NEEDS_TO_DECAY_FOR_TESTING_KEY);
binaryDictionary.flushWithGC();
}
@ -53,6 +59,7 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase {
// typed in 32 GCs are removed.
final int count = 32;
for (int i = 0; i < count; i++) {
binaryDictionary.getPropertyForTests(SET_NEEDS_TO_DECAY_FOR_TESTING_KEY);
binaryDictionary.flushWithGC();
}
}