Prepare dictionary decay.

Bug: 6669677
Change-Id: I8fbae190dd44a6bdbee7e9b6d3a16208322727f7
main
Keisuke Kuroyanagi 2013-09-30 13:57:54 +09:00
parent 1cf4789ba6
commit c18510049a
16 changed files with 62 additions and 47 deletions

View File

@ -115,7 +115,7 @@ public final class BinaryDictionary extends Dictionary {
private static native long openNative(String sourceDir, long dictOffset, long dictSize, private static native long openNative(String sourceDir, long dictOffset, long dictSize,
boolean isUpdatable); boolean isUpdatable);
private static native void flushNative(long dict, String filePath); private static native void flushNative(long dict, String filePath);
private static native boolean needsToRunGCNative(long dict); private static native boolean needsToRunGCNative(long dict, boolean mindsBlockByGC);
private static native void flushWithGCNative(long dict, String filePath); private static native void flushWithGCNative(long dict, String filePath);
private static native void closeNative(long dict); private static native void closeNative(long dict);
private static native int getProbabilityNative(long dict, int[] word); private static native int getProbabilityNative(long dict, int[] word);
@ -270,7 +270,7 @@ public final class BinaryDictionary extends Dictionary {
} }
private void runGCIfRequired() { private void runGCIfRequired() {
if (needsToRunGCNative(mNativeDict)) { if (needsToRunGC(true /* mindsBlockByGC */)) {
flushWithGC(); flushWithGC();
} }
} }
@ -326,9 +326,15 @@ public final class BinaryDictionary extends Dictionary {
reopen(); reopen();
} }
public boolean needsToRunGC() { /**
* Checks whether GC is needed to run or not.
* @param mindsBlockByGC Whether to mind operations blocked by GC. We don't need to care about
* the blocking in some situations such as in idle time or just before closing.
* @return whether GC is needed to run or not.
*/
public boolean needsToRunGC(final boolean mindsBlockByGC) {
if (!isValidDictionary()) return false; if (!isValidDictionary()) return false;
return needsToRunGCNative(mNativeDict); return needsToRunGCNative(mNativeDict, mindsBlockByGC);
} }
@UsedForTesting @UsedForTesting

View File

@ -505,7 +505,7 @@ abstract public class ExpandableBinaryDictionary extends Dictionary {
BinaryDictionary.createEmptyDictFile(file.getAbsolutePath(), BinaryDictionary.createEmptyDictFile(file.getAbsolutePath(),
DICTIONARY_FORMAT_VERSION, getHeaderAttributeMap()); DICTIONARY_FORMAT_VERSION, getHeaderAttributeMap());
} else { } else {
if (mBinaryDictionary.needsToRunGC()) { if (mBinaryDictionary.needsToRunGC(false /* mindsBlockByGC */)) {
mBinaryDictionary.flushWithGC(); mBinaryDictionary.flushWithGC();
} else { } else {
mBinaryDictionary.flush(); mBinaryDictionary.flush();

View File

@ -113,10 +113,10 @@ static void latinime_BinaryDictionary_flush(JNIEnv *env, jclass clazz, jlong dic
} }
static bool latinime_BinaryDictionary_needsToRunGC(JNIEnv *env, jclass clazz, static bool latinime_BinaryDictionary_needsToRunGC(JNIEnv *env, jclass clazz,
jlong dict) { jlong dict, jboolean mindsBlockByGC) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict); Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) return false; if (!dictionary) return false;
return dictionary->needsToRunGC(); return dictionary->needsToRunGC(mindsBlockByGC == JNI_TRUE);
} }
static void latinime_BinaryDictionary_flushWithGC(JNIEnv *env, jclass clazz, jlong dict, static void latinime_BinaryDictionary_flushWithGC(JNIEnv *env, jclass clazz, jlong dict,
@ -364,7 +364,7 @@ static const JNINativeMethod sMethods[] = {
}, },
{ {
const_cast<char *>("needsToRunGCNative"), const_cast<char *>("needsToRunGCNative"),
const_cast<char *>("(J)Z"), const_cast<char *>("(JZ)Z"),
reinterpret_cast<void *>(latinime_BinaryDictionary_needsToRunGC) reinterpret_cast<void *>(latinime_BinaryDictionary_needsToRunGC)
}, },
{ {

View File

@ -123,8 +123,8 @@ void Dictionary::flushWithGC(const char *const filePath) {
mDictionaryStructureWithBufferPolicy->flushWithGC(filePath); mDictionaryStructureWithBufferPolicy->flushWithGC(filePath);
} }
bool Dictionary::needsToRunGC() { bool Dictionary::needsToRunGC(const bool mindsBlockByGC) {
return mDictionaryStructureWithBufferPolicy->needsToRunGC(); return mDictionaryStructureWithBufferPolicy->needsToRunGC(mindsBlockByGC);
} }
void Dictionary::getProperty(const char *const query, char *const outResult, void Dictionary::getProperty(const char *const query, char *const outResult,

View File

@ -81,7 +81,7 @@ class Dictionary {
void flushWithGC(const char *const filePath); void flushWithGC(const char *const filePath);
bool needsToRunGC(); bool needsToRunGC(const bool mindsBlockByGC);
void getProperty(const char *const query, char *const outResult, void getProperty(const char *const query, char *const outResult,
const int maxResultLength) const; const int maxResultLength) const;

View File

@ -78,7 +78,7 @@ class DictionaryStructureWithBufferPolicy {
virtual void flushWithGC(const char *const filePath) = 0; virtual void flushWithGC(const char *const filePath) = 0;
virtual bool needsToRunGC() const = 0; virtual bool needsToRunGC(const bool mindsBlockByGC) const = 0;
virtual void getProperty(const char *const query, char *const outResult, virtual void getProperty(const char *const query, char *const outResult,
const int maxResultLength) const = 0; const int maxResultLength) const = 0;

View File

@ -41,7 +41,7 @@ bool DynamicPatriciaTrieGcEventListeners
return false; return false;
} }
} else { } else {
valueStack.back() += 1; mValueStack.back() += 1;
if (node->isTerminal()) { if (node->isTerminal()) {
mValidUnigramCount += 1; mValidUnigramCount += 1;
} }
@ -49,6 +49,23 @@ bool DynamicPatriciaTrieGcEventListeners
return true; return true;
} }
bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateBigramProbability
::onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node,
const int *const nodeCodePoints) {
if (!node->isDeleted()) {
int pos = node->getBigramsPos();
if (pos != NOT_A_DICT_POS) {
int bigramEntryCount = 0;
if (!mBigramPolicy->updateAllBigramEntriesAndDeleteUselessEntries(&pos,
&bigramEntryCount)) {
return false;
}
mValidBigramEntryCount += bigramEntryCount;
}
}
return true;
}
// Writes dummy PtNode array size when the head of PtNode array is read. // Writes dummy PtNode array size when the head of PtNode array is read.
bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer
::onDescend(const int ptNodeArrayPos) { ::onDescend(const int ptNodeArrayPos) {

View File

@ -40,22 +40,22 @@ class DynamicPatriciaTrieGcEventListeners {
TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted( TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted(
DynamicPatriciaTrieWritingHelper *const writingHelper, DynamicPatriciaTrieWritingHelper *const writingHelper,
BufferWithExtendableBuffer *const buffer) BufferWithExtendableBuffer *const buffer)
: mWritingHelper(writingHelper), mBuffer(buffer), valueStack(), : mWritingHelper(writingHelper), mBuffer(buffer), mValueStack(),
mChildrenValue(0), mValidUnigramCount(0) {} mChildrenValue(0), mValidUnigramCount(0) {}
~TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted() {}; ~TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted() {};
bool onAscend() { bool onAscend() {
if (valueStack.empty()) { if (mValueStack.empty()) {
return false; return false;
} }
mChildrenValue = valueStack.back(); mChildrenValue = mValueStack.back();
valueStack.pop_back(); mValueStack.pop_back();
return true; return true;
} }
bool onDescend(const int ptNodeArrayPos) { bool onDescend(const int ptNodeArrayPos) {
valueStack.push_back(0); mValueStack.push_back(0);
return true; return true;
} }
@ -74,7 +74,7 @@ class DynamicPatriciaTrieGcEventListeners {
DynamicPatriciaTrieWritingHelper *const mWritingHelper; DynamicPatriciaTrieWritingHelper *const mWritingHelper;
BufferWithExtendableBuffer *const mBuffer; BufferWithExtendableBuffer *const mBuffer;
std::vector<int> valueStack; std::vector<int> mValueStack;
int mChildrenValue; int mChildrenValue;
int mValidUnigramCount; int mValidUnigramCount;
}; };
@ -94,20 +94,7 @@ class DynamicPatriciaTrieGcEventListeners {
bool onReadingPtNodeArrayTail() { return true; } bool onReadingPtNodeArrayTail() { return true; }
bool onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node, bool onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node,
const int *const nodeCodePoints) { const int *const nodeCodePoints);
if (!node->isDeleted()) {
int pos = node->getBigramsPos();
if (pos != NOT_A_DICT_POS) {
int bigramEntryCount = 0;
if (!mBigramPolicy->updateAllBigramEntriesAndDeleteUselessEntries(&pos,
&bigramEntryCount)) {
return false;
}
mValidBigramEntryCount += bigramEntryCount;
}
}
return true;
}
int getValidBigramEntryCount() const { int getValidBigramEntryCount() const {
return mValidBigramEntryCount; return mValidBigramEntryCount;

View File

@ -291,7 +291,7 @@ void DynamicPatriciaTriePolicy::flushWithGC(const char *const filePath) {
writingHelper.writeToDictFileWithGC(getRootPosition(), filePath, &mHeaderPolicy); writingHelper.writeToDictFileWithGC(getRootPosition(), filePath, &mHeaderPolicy);
} }
bool DynamicPatriciaTriePolicy::needsToRunGC() const { bool DynamicPatriciaTriePolicy::needsToRunGC(const bool mindsBlockByGC) const {
if (!mBuffer->isUpdatable()) { if (!mBuffer->isUpdatable()) {
AKLOGI("Warning: needsToRunGC() is called for non-updatable dictionary."); AKLOGI("Warning: needsToRunGC() is called for non-updatable dictionary.");
return false; return false;

View File

@ -91,7 +91,7 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
void flushWithGC(const char *const filePath); void flushWithGC(const char *const filePath);
bool needsToRunGC() const; bool needsToRunGC(const bool mindsBlockByGC) const;
void getProperty(const char *const query, char *const outResult, void getProperty(const char *const query, char *const outResult,
const int maxResultLength) const; const int maxResultLength) const;

View File

@ -147,7 +147,7 @@ void DynamicPatriciaTrieWritingHelper::writeToDictFile(const char *const fileNam
const HeaderPolicy *const headerPolicy, const int unigramCount, const int bigramCount) { const HeaderPolicy *const headerPolicy, const int unigramCount, const int bigramCount) {
BufferWithExtendableBuffer headerBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */); BufferWithExtendableBuffer headerBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */);
const int extendedRegionSize = headerPolicy->getExtendedRegionSize() + const int extendedRegionSize = headerPolicy->getExtendedRegionSize() +
mBuffer->getTailPosition() - mBuffer->getOriginalBufferSize(); mBuffer->getUsedAdditionalBufferSize();
if (!headerPolicy->writeHeaderToBuffer(&headerBuffer, false /* updatesLastUpdatedTime */, if (!headerPolicy->writeHeaderToBuffer(&headerBuffer, false /* updatesLastUpdatedTime */,
unigramCount, bigramCount, extendedRegionSize)) { unigramCount, bigramCount, extendedRegionSize)) {
return; return;

View File

@ -20,7 +20,8 @@ namespace latinime {
// Note that these are corresponding definitions in Java side in FormatSpec.FileHeader. // Note that these are corresponding definitions in Java side in FormatSpec.FileHeader.
const char *const HeaderPolicy::MULTIPLE_WORDS_DEMOTION_RATE_KEY = "MULTIPLE_WORDS_DEMOTION_RATE"; const char *const HeaderPolicy::MULTIPLE_WORDS_DEMOTION_RATE_KEY = "MULTIPLE_WORDS_DEMOTION_RATE";
const char *const HeaderPolicy::USES_FORGETTING_CURVE_KEY = "USES_FORGETTING_CURVE"; // TODO: Change attribute string to "IS_DECAYING_DICT".
const char *const HeaderPolicy::IS_DECAYING_DICT_KEY = "USES_FORGETTING_CURVE";
const char *const HeaderPolicy::LAST_UPDATED_TIME_KEY = "date"; const char *const HeaderPolicy::LAST_UPDATED_TIME_KEY = "date";
const char *const HeaderPolicy::UNIGRAM_COUNT_KEY = "UNIGRAM_COUNT"; const char *const HeaderPolicy::UNIGRAM_COUNT_KEY = "UNIGRAM_COUNT";
const char *const HeaderPolicy::BIGRAM_COUNT_KEY = "BIGRAM_COUNT"; const char *const HeaderPolicy::BIGRAM_COUNT_KEY = "BIGRAM_COUNT";

View File

@ -36,8 +36,8 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
mSize(HeaderReadWriteUtils::getHeaderSize(dictBuf)), mSize(HeaderReadWriteUtils::getHeaderSize(dictBuf)),
mAttributeMap(createAttributeMapAndReadAllAttributes(dictBuf)), mAttributeMap(createAttributeMapAndReadAllAttributes(dictBuf)),
mMultiWordCostMultiplier(readMultipleWordCostMultiplier()), mMultiWordCostMultiplier(readMultipleWordCostMultiplier()),
mUsesForgettingCurve(HeaderReadWriteUtils::readBoolAttributeValue(&mAttributeMap, mIsDecayingDict(HeaderReadWriteUtils::readBoolAttributeValue(&mAttributeMap,
USES_FORGETTING_CURVE_KEY, false /* defaultValue */)), IS_DECAYING_DICT_KEY, false /* defaultValue */)),
mLastUpdatedTime(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap, mLastUpdatedTime(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap,
LAST_UPDATED_TIME_KEY, time(0) /* defaultValue */)), LAST_UPDATED_TIME_KEY, time(0) /* defaultValue */)),
mUnigramCount(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap, mUnigramCount(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap,
@ -54,8 +54,8 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
mDictionaryFlags(HeaderReadWriteUtils::createAndGetDictionaryFlagsUsingAttributeMap( mDictionaryFlags(HeaderReadWriteUtils::createAndGetDictionaryFlagsUsingAttributeMap(
attributeMap)), mSize(0), mAttributeMap(*attributeMap), attributeMap)), mSize(0), mAttributeMap(*attributeMap),
mMultiWordCostMultiplier(readMultipleWordCostMultiplier()), mMultiWordCostMultiplier(readMultipleWordCostMultiplier()),
mUsesForgettingCurve(HeaderReadWriteUtils::readBoolAttributeValue(&mAttributeMap, mIsDecayingDict(HeaderReadWriteUtils::readBoolAttributeValue(&mAttributeMap,
USES_FORGETTING_CURVE_KEY, false /* defaultValue */)), IS_DECAYING_DICT_KEY, false /* defaultValue */)),
mLastUpdatedTime(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap, mLastUpdatedTime(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap,
LAST_UPDATED_TIME_KEY, time(0) /* defaultValue */)), LAST_UPDATED_TIME_KEY, time(0) /* defaultValue */)),
mUnigramCount(0), mBigramCount(0), mExtendedRegionSize(0) {} mUnigramCount(0), mBigramCount(0), mExtendedRegionSize(0) {}
@ -82,8 +82,8 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
return mMultiWordCostMultiplier; return mMultiWordCostMultiplier;
} }
AK_FORCE_INLINE bool usesForgettingCurve() const { AK_FORCE_INLINE bool isDecayingDict() const {
return mUsesForgettingCurve; return mIsDecayingDict;
} }
AK_FORCE_INLINE int getLastUpdatedTime() const { AK_FORCE_INLINE int getLastUpdatedTime() const {
@ -113,7 +113,7 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
DISALLOW_IMPLICIT_CONSTRUCTORS(HeaderPolicy); DISALLOW_IMPLICIT_CONSTRUCTORS(HeaderPolicy);
static const char *const MULTIPLE_WORDS_DEMOTION_RATE_KEY; static const char *const MULTIPLE_WORDS_DEMOTION_RATE_KEY;
static const char *const USES_FORGETTING_CURVE_KEY; static const char *const IS_DECAYING_DICT_KEY;
static const char *const LAST_UPDATED_TIME_KEY; static const char *const LAST_UPDATED_TIME_KEY;
static const char *const UNIGRAM_COUNT_KEY; static const char *const UNIGRAM_COUNT_KEY;
static const char *const BIGRAM_COUNT_KEY; static const char *const BIGRAM_COUNT_KEY;
@ -126,7 +126,7 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
const int mSize; const int mSize;
HeaderReadWriteUtils::AttributeMap mAttributeMap; HeaderReadWriteUtils::AttributeMap mAttributeMap;
const float mMultiWordCostMultiplier; const float mMultiWordCostMultiplier;
const bool mUsesForgettingCurve; const bool mIsDecayingDict;
const int mLastUpdatedTime; const int mLastUpdatedTime;
const int mUnigramCount; const int mUnigramCount;
const int mBigramCount; const int mBigramCount;

View File

@ -107,7 +107,7 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
AKLOGI("Warning: flushWithGC() is called for non-updatable dictionary."); AKLOGI("Warning: flushWithGC() is called for non-updatable dictionary.");
} }
bool needsToRunGC() const { bool needsToRunGC(const bool mindsBlockByGC) const {
// This method should not be called for non-updatable dictionary. // This method should not be called for non-updatable dictionary.
AKLOGI("Warning: needsToRunGC() is called for non-updatable dictionary."); AKLOGI("Warning: needsToRunGC() is called for non-updatable dictionary.");
return false; return false;

View File

@ -42,6 +42,10 @@ class BufferWithExtendableBuffer {
return mOriginalBufferSize + mUsedAdditionalBufferSize; return mOriginalBufferSize + mUsedAdditionalBufferSize;
} }
AK_FORCE_INLINE int getUsedAdditionalBufferSize() const {
return mUsedAdditionalBufferSize;
}
/** /**
* For reading. * For reading.
*/ */

View File

@ -96,7 +96,7 @@ const char *const DictFileWritingUtils::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE =
fclose(file); fclose(file);
return false; return false;
} }
const int additionalBufSize = buffer->getTailPosition() - buffer->getOriginalBufferSize(); const int additionalBufSize = buffer->getUsedAdditionalBufferSize();
if (additionalBufSize > 0 && fwrite(buffer->getBuffer(true /* usesAdditionalBuffer */), if (additionalBufSize > 0 && fwrite(buffer->getBuffer(true /* usesAdditionalBuffer */),
additionalBufSize, 1, file) < 1) { additionalBufSize, 1, file) < 1) {
fclose(file); fclose(file);