* commit '0aaf42b4caa6956c23462a36a8278e9acb8514b1': Prepare dictionary decay.
This commit is contained in:
commit
4c677d7b8f
16 changed files with 62 additions and 47 deletions
|
@ -115,7 +115,7 @@ public final class BinaryDictionary extends Dictionary {
|
|||
private static native long openNative(String sourceDir, long dictOffset, long dictSize,
|
||||
boolean isUpdatable);
|
||||
private static native void flushNative(long dict, String filePath);
|
||||
private static native boolean needsToRunGCNative(long dict);
|
||||
private static native boolean needsToRunGCNative(long dict, boolean mindsBlockByGC);
|
||||
private static native void flushWithGCNative(long dict, String filePath);
|
||||
private static native void closeNative(long dict);
|
||||
private static native int getProbabilityNative(long dict, int[] word);
|
||||
|
@ -270,7 +270,7 @@ public final class BinaryDictionary extends Dictionary {
|
|||
}
|
||||
|
||||
private void runGCIfRequired() {
|
||||
if (needsToRunGCNative(mNativeDict)) {
|
||||
if (needsToRunGC(true /* mindsBlockByGC */)) {
|
||||
flushWithGC();
|
||||
}
|
||||
}
|
||||
|
@ -326,9 +326,15 @@ public final class BinaryDictionary extends Dictionary {
|
|||
reopen();
|
||||
}
|
||||
|
||||
public boolean needsToRunGC() {
|
||||
/**
|
||||
* Checks whether GC is needed to run or not.
|
||||
* @param mindsBlockByGC Whether to mind operations blocked by GC. We don't need to care about
|
||||
* the blocking in some situations such as in idle time or just before closing.
|
||||
* @return whether GC is needed to run or not.
|
||||
*/
|
||||
public boolean needsToRunGC(final boolean mindsBlockByGC) {
|
||||
if (!isValidDictionary()) return false;
|
||||
return needsToRunGCNative(mNativeDict);
|
||||
return needsToRunGCNative(mNativeDict, mindsBlockByGC);
|
||||
}
|
||||
|
||||
@UsedForTesting
|
||||
|
|
|
@ -505,7 +505,7 @@ abstract public class ExpandableBinaryDictionary extends Dictionary {
|
|||
BinaryDictionary.createEmptyDictFile(file.getAbsolutePath(),
|
||||
DICTIONARY_FORMAT_VERSION, getHeaderAttributeMap());
|
||||
} else {
|
||||
if (mBinaryDictionary.needsToRunGC()) {
|
||||
if (mBinaryDictionary.needsToRunGC(false /* mindsBlockByGC */)) {
|
||||
mBinaryDictionary.flushWithGC();
|
||||
} else {
|
||||
mBinaryDictionary.flush();
|
||||
|
|
|
@ -113,10 +113,10 @@ static void latinime_BinaryDictionary_flush(JNIEnv *env, jclass clazz, jlong dic
|
|||
}
|
||||
|
||||
static bool latinime_BinaryDictionary_needsToRunGC(JNIEnv *env, jclass clazz,
|
||||
jlong dict) {
|
||||
jlong dict, jboolean mindsBlockByGC) {
|
||||
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
|
||||
if (!dictionary) return false;
|
||||
return dictionary->needsToRunGC();
|
||||
return dictionary->needsToRunGC(mindsBlockByGC == JNI_TRUE);
|
||||
}
|
||||
|
||||
static void latinime_BinaryDictionary_flushWithGC(JNIEnv *env, jclass clazz, jlong dict,
|
||||
|
@ -364,7 +364,7 @@ static const JNINativeMethod sMethods[] = {
|
|||
},
|
||||
{
|
||||
const_cast<char *>("needsToRunGCNative"),
|
||||
const_cast<char *>("(J)Z"),
|
||||
const_cast<char *>("(JZ)Z"),
|
||||
reinterpret_cast<void *>(latinime_BinaryDictionary_needsToRunGC)
|
||||
},
|
||||
{
|
||||
|
|
|
@ -123,8 +123,8 @@ void Dictionary::flushWithGC(const char *const filePath) {
|
|||
mDictionaryStructureWithBufferPolicy->flushWithGC(filePath);
|
||||
}
|
||||
|
||||
bool Dictionary::needsToRunGC() {
|
||||
return mDictionaryStructureWithBufferPolicy->needsToRunGC();
|
||||
bool Dictionary::needsToRunGC(const bool mindsBlockByGC) {
|
||||
return mDictionaryStructureWithBufferPolicy->needsToRunGC(mindsBlockByGC);
|
||||
}
|
||||
|
||||
void Dictionary::getProperty(const char *const query, char *const outResult,
|
||||
|
|
|
@ -81,7 +81,7 @@ class Dictionary {
|
|||
|
||||
void flushWithGC(const char *const filePath);
|
||||
|
||||
bool needsToRunGC();
|
||||
bool needsToRunGC(const bool mindsBlockByGC);
|
||||
|
||||
void getProperty(const char *const query, char *const outResult,
|
||||
const int maxResultLength) const;
|
||||
|
|
|
@ -78,7 +78,7 @@ class DictionaryStructureWithBufferPolicy {
|
|||
|
||||
virtual void flushWithGC(const char *const filePath) = 0;
|
||||
|
||||
virtual bool needsToRunGC() const = 0;
|
||||
virtual bool needsToRunGC(const bool mindsBlockByGC) const = 0;
|
||||
|
||||
virtual void getProperty(const char *const query, char *const outResult,
|
||||
const int maxResultLength) const = 0;
|
||||
|
|
|
@ -41,7 +41,7 @@ bool DynamicPatriciaTrieGcEventListeners
|
|||
return false;
|
||||
}
|
||||
} else {
|
||||
valueStack.back() += 1;
|
||||
mValueStack.back() += 1;
|
||||
if (node->isTerminal()) {
|
||||
mValidUnigramCount += 1;
|
||||
}
|
||||
|
@ -49,6 +49,23 @@ bool DynamicPatriciaTrieGcEventListeners
|
|||
return true;
|
||||
}
|
||||
|
||||
bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateBigramProbability
|
||||
::onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node,
|
||||
const int *const nodeCodePoints) {
|
||||
if (!node->isDeleted()) {
|
||||
int pos = node->getBigramsPos();
|
||||
if (pos != NOT_A_DICT_POS) {
|
||||
int bigramEntryCount = 0;
|
||||
if (!mBigramPolicy->updateAllBigramEntriesAndDeleteUselessEntries(&pos,
|
||||
&bigramEntryCount)) {
|
||||
return false;
|
||||
}
|
||||
mValidBigramEntryCount += bigramEntryCount;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// Writes dummy PtNode array size when the head of PtNode array is read.
|
||||
bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer
|
||||
::onDescend(const int ptNodeArrayPos) {
|
||||
|
|
|
@ -40,22 +40,22 @@ class DynamicPatriciaTrieGcEventListeners {
|
|||
TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted(
|
||||
DynamicPatriciaTrieWritingHelper *const writingHelper,
|
||||
BufferWithExtendableBuffer *const buffer)
|
||||
: mWritingHelper(writingHelper), mBuffer(buffer), valueStack(),
|
||||
: mWritingHelper(writingHelper), mBuffer(buffer), mValueStack(),
|
||||
mChildrenValue(0), mValidUnigramCount(0) {}
|
||||
|
||||
~TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted() {};
|
||||
|
||||
bool onAscend() {
|
||||
if (valueStack.empty()) {
|
||||
if (mValueStack.empty()) {
|
||||
return false;
|
||||
}
|
||||
mChildrenValue = valueStack.back();
|
||||
valueStack.pop_back();
|
||||
mChildrenValue = mValueStack.back();
|
||||
mValueStack.pop_back();
|
||||
return true;
|
||||
}
|
||||
|
||||
bool onDescend(const int ptNodeArrayPos) {
|
||||
valueStack.push_back(0);
|
||||
mValueStack.push_back(0);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -74,7 +74,7 @@ class DynamicPatriciaTrieGcEventListeners {
|
|||
|
||||
DynamicPatriciaTrieWritingHelper *const mWritingHelper;
|
||||
BufferWithExtendableBuffer *const mBuffer;
|
||||
std::vector<int> valueStack;
|
||||
std::vector<int> mValueStack;
|
||||
int mChildrenValue;
|
||||
int mValidUnigramCount;
|
||||
};
|
||||
|
@ -94,20 +94,7 @@ class DynamicPatriciaTrieGcEventListeners {
|
|||
bool onReadingPtNodeArrayTail() { return true; }
|
||||
|
||||
bool onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node,
|
||||
const int *const nodeCodePoints) {
|
||||
if (!node->isDeleted()) {
|
||||
int pos = node->getBigramsPos();
|
||||
if (pos != NOT_A_DICT_POS) {
|
||||
int bigramEntryCount = 0;
|
||||
if (!mBigramPolicy->updateAllBigramEntriesAndDeleteUselessEntries(&pos,
|
||||
&bigramEntryCount)) {
|
||||
return false;
|
||||
}
|
||||
mValidBigramEntryCount += bigramEntryCount;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
const int *const nodeCodePoints);
|
||||
|
||||
int getValidBigramEntryCount() const {
|
||||
return mValidBigramEntryCount;
|
||||
|
|
|
@ -291,7 +291,7 @@ void DynamicPatriciaTriePolicy::flushWithGC(const char *const filePath) {
|
|||
writingHelper.writeToDictFileWithGC(getRootPosition(), filePath, &mHeaderPolicy);
|
||||
}
|
||||
|
||||
bool DynamicPatriciaTriePolicy::needsToRunGC() const {
|
||||
bool DynamicPatriciaTriePolicy::needsToRunGC(const bool mindsBlockByGC) const {
|
||||
if (!mBuffer->isUpdatable()) {
|
||||
AKLOGI("Warning: needsToRunGC() is called for non-updatable dictionary.");
|
||||
return false;
|
||||
|
|
|
@ -91,7 +91,7 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
|||
|
||||
void flushWithGC(const char *const filePath);
|
||||
|
||||
bool needsToRunGC() const;
|
||||
bool needsToRunGC(const bool mindsBlockByGC) const;
|
||||
|
||||
void getProperty(const char *const query, char *const outResult,
|
||||
const int maxResultLength) const;
|
||||
|
|
|
@ -147,7 +147,7 @@ void DynamicPatriciaTrieWritingHelper::writeToDictFile(const char *const fileNam
|
|||
const HeaderPolicy *const headerPolicy, const int unigramCount, const int bigramCount) {
|
||||
BufferWithExtendableBuffer headerBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */);
|
||||
const int extendedRegionSize = headerPolicy->getExtendedRegionSize() +
|
||||
mBuffer->getTailPosition() - mBuffer->getOriginalBufferSize();
|
||||
mBuffer->getUsedAdditionalBufferSize();
|
||||
if (!headerPolicy->writeHeaderToBuffer(&headerBuffer, false /* updatesLastUpdatedTime */,
|
||||
unigramCount, bigramCount, extendedRegionSize)) {
|
||||
return;
|
||||
|
|
|
@ -20,7 +20,8 @@ namespace latinime {
|
|||
|
||||
// Note that these are corresponding definitions in Java side in FormatSpec.FileHeader.
|
||||
const char *const HeaderPolicy::MULTIPLE_WORDS_DEMOTION_RATE_KEY = "MULTIPLE_WORDS_DEMOTION_RATE";
|
||||
const char *const HeaderPolicy::USES_FORGETTING_CURVE_KEY = "USES_FORGETTING_CURVE";
|
||||
// TODO: Change attribute string to "IS_DECAYING_DICT".
|
||||
const char *const HeaderPolicy::IS_DECAYING_DICT_KEY = "USES_FORGETTING_CURVE";
|
||||
const char *const HeaderPolicy::LAST_UPDATED_TIME_KEY = "date";
|
||||
const char *const HeaderPolicy::UNIGRAM_COUNT_KEY = "UNIGRAM_COUNT";
|
||||
const char *const HeaderPolicy::BIGRAM_COUNT_KEY = "BIGRAM_COUNT";
|
||||
|
|
|
@ -36,8 +36,8 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
|
|||
mSize(HeaderReadWriteUtils::getHeaderSize(dictBuf)),
|
||||
mAttributeMap(createAttributeMapAndReadAllAttributes(dictBuf)),
|
||||
mMultiWordCostMultiplier(readMultipleWordCostMultiplier()),
|
||||
mUsesForgettingCurve(HeaderReadWriteUtils::readBoolAttributeValue(&mAttributeMap,
|
||||
USES_FORGETTING_CURVE_KEY, false /* defaultValue */)),
|
||||
mIsDecayingDict(HeaderReadWriteUtils::readBoolAttributeValue(&mAttributeMap,
|
||||
IS_DECAYING_DICT_KEY, false /* defaultValue */)),
|
||||
mLastUpdatedTime(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap,
|
||||
LAST_UPDATED_TIME_KEY, time(0) /* defaultValue */)),
|
||||
mUnigramCount(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap,
|
||||
|
@ -54,8 +54,8 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
|
|||
mDictionaryFlags(HeaderReadWriteUtils::createAndGetDictionaryFlagsUsingAttributeMap(
|
||||
attributeMap)), mSize(0), mAttributeMap(*attributeMap),
|
||||
mMultiWordCostMultiplier(readMultipleWordCostMultiplier()),
|
||||
mUsesForgettingCurve(HeaderReadWriteUtils::readBoolAttributeValue(&mAttributeMap,
|
||||
USES_FORGETTING_CURVE_KEY, false /* defaultValue */)),
|
||||
mIsDecayingDict(HeaderReadWriteUtils::readBoolAttributeValue(&mAttributeMap,
|
||||
IS_DECAYING_DICT_KEY, false /* defaultValue */)),
|
||||
mLastUpdatedTime(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap,
|
||||
LAST_UPDATED_TIME_KEY, time(0) /* defaultValue */)),
|
||||
mUnigramCount(0), mBigramCount(0), mExtendedRegionSize(0) {}
|
||||
|
@ -82,8 +82,8 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
|
|||
return mMultiWordCostMultiplier;
|
||||
}
|
||||
|
||||
AK_FORCE_INLINE bool usesForgettingCurve() const {
|
||||
return mUsesForgettingCurve;
|
||||
AK_FORCE_INLINE bool isDecayingDict() const {
|
||||
return mIsDecayingDict;
|
||||
}
|
||||
|
||||
AK_FORCE_INLINE int getLastUpdatedTime() const {
|
||||
|
@ -113,7 +113,7 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
|
|||
DISALLOW_IMPLICIT_CONSTRUCTORS(HeaderPolicy);
|
||||
|
||||
static const char *const MULTIPLE_WORDS_DEMOTION_RATE_KEY;
|
||||
static const char *const USES_FORGETTING_CURVE_KEY;
|
||||
static const char *const IS_DECAYING_DICT_KEY;
|
||||
static const char *const LAST_UPDATED_TIME_KEY;
|
||||
static const char *const UNIGRAM_COUNT_KEY;
|
||||
static const char *const BIGRAM_COUNT_KEY;
|
||||
|
@ -126,7 +126,7 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
|
|||
const int mSize;
|
||||
HeaderReadWriteUtils::AttributeMap mAttributeMap;
|
||||
const float mMultiWordCostMultiplier;
|
||||
const bool mUsesForgettingCurve;
|
||||
const bool mIsDecayingDict;
|
||||
const int mLastUpdatedTime;
|
||||
const int mUnigramCount;
|
||||
const int mBigramCount;
|
||||
|
|
|
@ -107,7 +107,7 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
|||
AKLOGI("Warning: flushWithGC() is called for non-updatable dictionary.");
|
||||
}
|
||||
|
||||
bool needsToRunGC() const {
|
||||
bool needsToRunGC(const bool mindsBlockByGC) const {
|
||||
// This method should not be called for non-updatable dictionary.
|
||||
AKLOGI("Warning: needsToRunGC() is called for non-updatable dictionary.");
|
||||
return false;
|
||||
|
|
|
@ -42,6 +42,10 @@ class BufferWithExtendableBuffer {
|
|||
return mOriginalBufferSize + mUsedAdditionalBufferSize;
|
||||
}
|
||||
|
||||
AK_FORCE_INLINE int getUsedAdditionalBufferSize() const {
|
||||
return mUsedAdditionalBufferSize;
|
||||
}
|
||||
|
||||
/**
|
||||
* For reading.
|
||||
*/
|
||||
|
|
|
@ -96,7 +96,7 @@ const char *const DictFileWritingUtils::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE =
|
|||
fclose(file);
|
||||
return false;
|
||||
}
|
||||
const int additionalBufSize = buffer->getTailPosition() - buffer->getOriginalBufferSize();
|
||||
const int additionalBufSize = buffer->getUsedAdditionalBufferSize();
|
||||
if (additionalBufSize > 0 && fwrite(buffer->getBuffer(true /* usesAdditionalBuffer */),
|
||||
additionalBufSize, 1, file) < 1) {
|
||||
fclose(file);
|
||||
|
|
Loading…
Reference in a new issue