Prepare dictionary decay.

Bug: 6669677
Change-Id: I8fbae190dd44a6bdbee7e9b6d3a16208322727f7
This commit is contained in:
Keisuke Kuroyanagi 2013-09-30 13:57:54 +09:00
parent 1cf4789ba6
commit c18510049a
16 changed files with 62 additions and 47 deletions

View file

@ -115,7 +115,7 @@ public final class BinaryDictionary extends Dictionary {
private static native long openNative(String sourceDir, long dictOffset, long dictSize,
boolean isUpdatable);
private static native void flushNative(long dict, String filePath);
private static native boolean needsToRunGCNative(long dict);
private static native boolean needsToRunGCNative(long dict, boolean mindsBlockByGC);
private static native void flushWithGCNative(long dict, String filePath);
private static native void closeNative(long dict);
private static native int getProbabilityNative(long dict, int[] word);
@ -270,7 +270,7 @@ public final class BinaryDictionary extends Dictionary {
}
private void runGCIfRequired() {
if (needsToRunGCNative(mNativeDict)) {
if (needsToRunGC(true /* mindsBlockByGC */)) {
flushWithGC();
}
}
@ -326,9 +326,15 @@ public final class BinaryDictionary extends Dictionary {
reopen();
}
public boolean needsToRunGC() {
/**
* Checks whether GC is needed to run or not.
* @param mindsBlockByGC Whether to mind operations blocked by GC. We don't need to care about
* the blocking in some situations such as in idle time or just before closing.
* @return whether GC is needed to run or not.
*/
public boolean needsToRunGC(final boolean mindsBlockByGC) {
if (!isValidDictionary()) return false;
return needsToRunGCNative(mNativeDict);
return needsToRunGCNative(mNativeDict, mindsBlockByGC);
}
@UsedForTesting

View file

@ -505,7 +505,7 @@ abstract public class ExpandableBinaryDictionary extends Dictionary {
BinaryDictionary.createEmptyDictFile(file.getAbsolutePath(),
DICTIONARY_FORMAT_VERSION, getHeaderAttributeMap());
} else {
if (mBinaryDictionary.needsToRunGC()) {
if (mBinaryDictionary.needsToRunGC(false /* mindsBlockByGC */)) {
mBinaryDictionary.flushWithGC();
} else {
mBinaryDictionary.flush();

View file

@ -113,10 +113,10 @@ static void latinime_BinaryDictionary_flush(JNIEnv *env, jclass clazz, jlong dic
}
static bool latinime_BinaryDictionary_needsToRunGC(JNIEnv *env, jclass clazz,
jlong dict) {
jlong dict, jboolean mindsBlockByGC) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) return false;
return dictionary->needsToRunGC();
return dictionary->needsToRunGC(mindsBlockByGC == JNI_TRUE);
}
static void latinime_BinaryDictionary_flushWithGC(JNIEnv *env, jclass clazz, jlong dict,
@ -364,7 +364,7 @@ static const JNINativeMethod sMethods[] = {
},
{
const_cast<char *>("needsToRunGCNative"),
const_cast<char *>("(J)Z"),
const_cast<char *>("(JZ)Z"),
reinterpret_cast<void *>(latinime_BinaryDictionary_needsToRunGC)
},
{

View file

@ -123,8 +123,8 @@ void Dictionary::flushWithGC(const char *const filePath) {
mDictionaryStructureWithBufferPolicy->flushWithGC(filePath);
}
bool Dictionary::needsToRunGC() {
return mDictionaryStructureWithBufferPolicy->needsToRunGC();
bool Dictionary::needsToRunGC(const bool mindsBlockByGC) {
return mDictionaryStructureWithBufferPolicy->needsToRunGC(mindsBlockByGC);
}
void Dictionary::getProperty(const char *const query, char *const outResult,

View file

@ -81,7 +81,7 @@ class Dictionary {
void flushWithGC(const char *const filePath);
bool needsToRunGC();
bool needsToRunGC(const bool mindsBlockByGC);
void getProperty(const char *const query, char *const outResult,
const int maxResultLength) const;

View file

@ -78,7 +78,7 @@ class DictionaryStructureWithBufferPolicy {
virtual void flushWithGC(const char *const filePath) = 0;
virtual bool needsToRunGC() const = 0;
virtual bool needsToRunGC(const bool mindsBlockByGC) const = 0;
virtual void getProperty(const char *const query, char *const outResult,
const int maxResultLength) const = 0;

View file

@ -41,7 +41,7 @@ bool DynamicPatriciaTrieGcEventListeners
return false;
}
} else {
valueStack.back() += 1;
mValueStack.back() += 1;
if (node->isTerminal()) {
mValidUnigramCount += 1;
}
@ -49,6 +49,23 @@ bool DynamicPatriciaTrieGcEventListeners
return true;
}
bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateBigramProbability
::onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node,
const int *const nodeCodePoints) {
if (!node->isDeleted()) {
int pos = node->getBigramsPos();
if (pos != NOT_A_DICT_POS) {
int bigramEntryCount = 0;
if (!mBigramPolicy->updateAllBigramEntriesAndDeleteUselessEntries(&pos,
&bigramEntryCount)) {
return false;
}
mValidBigramEntryCount += bigramEntryCount;
}
}
return true;
}
// Writes dummy PtNode array size when the head of PtNode array is read.
bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer
::onDescend(const int ptNodeArrayPos) {

View file

@ -40,22 +40,22 @@ class DynamicPatriciaTrieGcEventListeners {
TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted(
DynamicPatriciaTrieWritingHelper *const writingHelper,
BufferWithExtendableBuffer *const buffer)
: mWritingHelper(writingHelper), mBuffer(buffer), valueStack(),
: mWritingHelper(writingHelper), mBuffer(buffer), mValueStack(),
mChildrenValue(0), mValidUnigramCount(0) {}
~TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted() {};
bool onAscend() {
if (valueStack.empty()) {
if (mValueStack.empty()) {
return false;
}
mChildrenValue = valueStack.back();
valueStack.pop_back();
mChildrenValue = mValueStack.back();
mValueStack.pop_back();
return true;
}
bool onDescend(const int ptNodeArrayPos) {
valueStack.push_back(0);
mValueStack.push_back(0);
return true;
}
@ -74,7 +74,7 @@ class DynamicPatriciaTrieGcEventListeners {
DynamicPatriciaTrieWritingHelper *const mWritingHelper;
BufferWithExtendableBuffer *const mBuffer;
std::vector<int> valueStack;
std::vector<int> mValueStack;
int mChildrenValue;
int mValidUnigramCount;
};
@ -94,20 +94,7 @@ class DynamicPatriciaTrieGcEventListeners {
bool onReadingPtNodeArrayTail() { return true; }
bool onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node,
const int *const nodeCodePoints) {
if (!node->isDeleted()) {
int pos = node->getBigramsPos();
if (pos != NOT_A_DICT_POS) {
int bigramEntryCount = 0;
if (!mBigramPolicy->updateAllBigramEntriesAndDeleteUselessEntries(&pos,
&bigramEntryCount)) {
return false;
}
mValidBigramEntryCount += bigramEntryCount;
}
}
return true;
}
const int *const nodeCodePoints);
int getValidBigramEntryCount() const {
return mValidBigramEntryCount;

View file

@ -291,7 +291,7 @@ void DynamicPatriciaTriePolicy::flushWithGC(const char *const filePath) {
writingHelper.writeToDictFileWithGC(getRootPosition(), filePath, &mHeaderPolicy);
}
bool DynamicPatriciaTriePolicy::needsToRunGC() const {
bool DynamicPatriciaTriePolicy::needsToRunGC(const bool mindsBlockByGC) const {
if (!mBuffer->isUpdatable()) {
AKLOGI("Warning: needsToRunGC() is called for non-updatable dictionary.");
return false;

View file

@ -91,7 +91,7 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
void flushWithGC(const char *const filePath);
bool needsToRunGC() const;
bool needsToRunGC(const bool mindsBlockByGC) const;
void getProperty(const char *const query, char *const outResult,
const int maxResultLength) const;

View file

@ -147,7 +147,7 @@ void DynamicPatriciaTrieWritingHelper::writeToDictFile(const char *const fileNam
const HeaderPolicy *const headerPolicy, const int unigramCount, const int bigramCount) {
BufferWithExtendableBuffer headerBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */);
const int extendedRegionSize = headerPolicy->getExtendedRegionSize() +
mBuffer->getTailPosition() - mBuffer->getOriginalBufferSize();
mBuffer->getUsedAdditionalBufferSize();
if (!headerPolicy->writeHeaderToBuffer(&headerBuffer, false /* updatesLastUpdatedTime */,
unigramCount, bigramCount, extendedRegionSize)) {
return;

View file

@ -20,7 +20,8 @@ namespace latinime {
// Note that these are corresponding definitions in Java side in FormatSpec.FileHeader.
const char *const HeaderPolicy::MULTIPLE_WORDS_DEMOTION_RATE_KEY = "MULTIPLE_WORDS_DEMOTION_RATE";
const char *const HeaderPolicy::USES_FORGETTING_CURVE_KEY = "USES_FORGETTING_CURVE";
// TODO: Change attribute string to "IS_DECAYING_DICT".
const char *const HeaderPolicy::IS_DECAYING_DICT_KEY = "USES_FORGETTING_CURVE";
const char *const HeaderPolicy::LAST_UPDATED_TIME_KEY = "date";
const char *const HeaderPolicy::UNIGRAM_COUNT_KEY = "UNIGRAM_COUNT";
const char *const HeaderPolicy::BIGRAM_COUNT_KEY = "BIGRAM_COUNT";

View file

@ -36,8 +36,8 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
mSize(HeaderReadWriteUtils::getHeaderSize(dictBuf)),
mAttributeMap(createAttributeMapAndReadAllAttributes(dictBuf)),
mMultiWordCostMultiplier(readMultipleWordCostMultiplier()),
mUsesForgettingCurve(HeaderReadWriteUtils::readBoolAttributeValue(&mAttributeMap,
USES_FORGETTING_CURVE_KEY, false /* defaultValue */)),
mIsDecayingDict(HeaderReadWriteUtils::readBoolAttributeValue(&mAttributeMap,
IS_DECAYING_DICT_KEY, false /* defaultValue */)),
mLastUpdatedTime(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap,
LAST_UPDATED_TIME_KEY, time(0) /* defaultValue */)),
mUnigramCount(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap,
@ -54,8 +54,8 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
mDictionaryFlags(HeaderReadWriteUtils::createAndGetDictionaryFlagsUsingAttributeMap(
attributeMap)), mSize(0), mAttributeMap(*attributeMap),
mMultiWordCostMultiplier(readMultipleWordCostMultiplier()),
mUsesForgettingCurve(HeaderReadWriteUtils::readBoolAttributeValue(&mAttributeMap,
USES_FORGETTING_CURVE_KEY, false /* defaultValue */)),
mIsDecayingDict(HeaderReadWriteUtils::readBoolAttributeValue(&mAttributeMap,
IS_DECAYING_DICT_KEY, false /* defaultValue */)),
mLastUpdatedTime(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap,
LAST_UPDATED_TIME_KEY, time(0) /* defaultValue */)),
mUnigramCount(0), mBigramCount(0), mExtendedRegionSize(0) {}
@ -82,8 +82,8 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
return mMultiWordCostMultiplier;
}
AK_FORCE_INLINE bool usesForgettingCurve() const {
return mUsesForgettingCurve;
AK_FORCE_INLINE bool isDecayingDict() const {
return mIsDecayingDict;
}
AK_FORCE_INLINE int getLastUpdatedTime() const {
@ -113,7 +113,7 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
DISALLOW_IMPLICIT_CONSTRUCTORS(HeaderPolicy);
static const char *const MULTIPLE_WORDS_DEMOTION_RATE_KEY;
static const char *const USES_FORGETTING_CURVE_KEY;
static const char *const IS_DECAYING_DICT_KEY;
static const char *const LAST_UPDATED_TIME_KEY;
static const char *const UNIGRAM_COUNT_KEY;
static const char *const BIGRAM_COUNT_KEY;
@ -126,7 +126,7 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
const int mSize;
HeaderReadWriteUtils::AttributeMap mAttributeMap;
const float mMultiWordCostMultiplier;
const bool mUsesForgettingCurve;
const bool mIsDecayingDict;
const int mLastUpdatedTime;
const int mUnigramCount;
const int mBigramCount;

View file

@ -107,7 +107,7 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
AKLOGI("Warning: flushWithGC() is called for non-updatable dictionary.");
}
bool needsToRunGC() const {
bool needsToRunGC(const bool mindsBlockByGC) const {
// This method should not be called for non-updatable dictionary.
AKLOGI("Warning: needsToRunGC() is called for non-updatable dictionary.");
return false;

View file

@ -42,6 +42,10 @@ class BufferWithExtendableBuffer {
return mOriginalBufferSize + mUsedAdditionalBufferSize;
}
AK_FORCE_INLINE int getUsedAdditionalBufferSize() const {
return mUsedAdditionalBufferSize;
}
/**
* For reading.
*/

View file

@ -96,7 +96,7 @@ const char *const DictFileWritingUtils::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE =
fclose(file);
return false;
}
const int additionalBufSize = buffer->getTailPosition() - buffer->getOriginalBufferSize();
const int additionalBufSize = buffer->getUsedAdditionalBufferSize();
if (additionalBufSize > 0 && fwrite(buffer->getBuffer(true /* usesAdditionalBuffer */),
additionalBufSize, 1, file) < 1) {
fclose(file);