am 2fed447b
: Merge "Refactoring: Get header policy form Ver4DictBuffers."
* commit '2fed447b8e9d3e43ba3bc9e7e5fc4ce7093dd7d0': Refactoring: Get header policy form Ver4DictBuffers.
This commit is contained in:
commit
205fb5a906
5 changed files with 30 additions and 29 deletions
|
@ -58,6 +58,10 @@ class Ver4DictBuffers {
|
||||||
|| mShortcutDictContent.isNearSizeLimit();
|
|| mShortcutDictContent.isNearSizeLimit();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
AK_FORCE_INLINE const HeaderPolicy *getHeaderPolicy() const {
|
||||||
|
return &mHeaderPolicy;
|
||||||
|
}
|
||||||
|
|
||||||
AK_FORCE_INLINE BufferWithExtendableBuffer *getWritableHeaderBuffer() {
|
AK_FORCE_INLINE BufferWithExtendableBuffer *getWritableHeaderBuffer() {
|
||||||
return &mExpandableHeaderBuffer;
|
return &mExpandableHeaderBuffer;
|
||||||
}
|
}
|
||||||
|
|
|
@ -50,7 +50,7 @@ void Ver4PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const d
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
bool isTerminal = ptNodeParams.isTerminal() && !ptNodeParams.isDeleted();
|
bool isTerminal = ptNodeParams.isTerminal() && !ptNodeParams.isDeleted();
|
||||||
if (isTerminal && mHeaderPolicy.isDecayingDict()) {
|
if (isTerminal && mHeaderPolicy->isDecayingDict()) {
|
||||||
// A DecayingDict may have a terminal PtNode that has a terminal DicNode whose
|
// A DecayingDict may have a terminal PtNode that has a terminal DicNode whose
|
||||||
// probability is NOT_A_PROBABILITY. In such case, we don't want to treat it as a
|
// probability is NOT_A_PROBABILITY. In such case, we don't want to treat it as a
|
||||||
// valid terminal DicNode.
|
// valid terminal DicNode.
|
||||||
|
@ -85,7 +85,7 @@ int Ver4PatriciaTriePolicy::getTerminalPtNodePositionOfWord(const int *const inW
|
||||||
|
|
||||||
int Ver4PatriciaTriePolicy::getProbability(const int unigramProbability,
|
int Ver4PatriciaTriePolicy::getProbability(const int unigramProbability,
|
||||||
const int bigramProbability) const {
|
const int bigramProbability) const {
|
||||||
if (mHeaderPolicy.isDecayingDict()) {
|
if (mHeaderPolicy->isDecayingDict()) {
|
||||||
// Both probabilities are encoded. Decode them and get probability.
|
// Both probabilities are encoded. Decode them and get probability.
|
||||||
return ForgettingCurveUtils::getProbability(unigramProbability, bigramProbability);
|
return ForgettingCurveUtils::getProbability(unigramProbability, bigramProbability);
|
||||||
} else {
|
} else {
|
||||||
|
@ -229,7 +229,7 @@ void Ver4PatriciaTriePolicy::flush(const char *const filePath) {
|
||||||
AKLOGI("Warning: flush() is called for non-updatable dictionary. filePath: %s", filePath);
|
AKLOGI("Warning: flush() is called for non-updatable dictionary. filePath: %s", filePath);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
mWritingHelper.writeToDictFile(filePath, &mHeaderPolicy, mUnigramCount, mBigramCount);
|
mWritingHelper.writeToDictFile(filePath, mUnigramCount, mBigramCount);
|
||||||
}
|
}
|
||||||
|
|
||||||
void Ver4PatriciaTriePolicy::flushWithGC(const char *const filePath) {
|
void Ver4PatriciaTriePolicy::flushWithGC(const char *const filePath) {
|
||||||
|
@ -237,10 +237,10 @@ void Ver4PatriciaTriePolicy::flushWithGC(const char *const filePath) {
|
||||||
AKLOGI("Warning: flushWithGC() is called for non-updatable dictionary.");
|
AKLOGI("Warning: flushWithGC() is called for non-updatable dictionary.");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
const bool needsToDecay = mHeaderPolicy.isDecayingDict()
|
const bool needsToDecay = mHeaderPolicy->isDecayingDict()
|
||||||
&& (mNeedsToDecayForTesting || ForgettingCurveUtils::needsToDecay(
|
&& (mNeedsToDecayForTesting || ForgettingCurveUtils::needsToDecay(
|
||||||
false /* mindsBlockByDecay */, mUnigramCount, mBigramCount, &mHeaderPolicy));
|
false /* mindsBlockByDecay */, mUnigramCount, mBigramCount, mHeaderPolicy));
|
||||||
mWritingHelper.writeToDictFileWithGC(getRootPosition(), filePath, &mHeaderPolicy, needsToDecay);
|
mWritingHelper.writeToDictFileWithGC(getRootPosition(), filePath, needsToDecay);
|
||||||
mNeedsToDecayForTesting = false;
|
mNeedsToDecayForTesting = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -252,7 +252,7 @@ bool Ver4PatriciaTriePolicy::needsToRunGC(const bool mindsBlockByGC) const {
|
||||||
if (mBuffers.get()->isNearSizeLimit()) {
|
if (mBuffers.get()->isNearSizeLimit()) {
|
||||||
// Additional buffer size is near the limit.
|
// Additional buffer size is near the limit.
|
||||||
return true;
|
return true;
|
||||||
} else if (mHeaderPolicy.getExtendedRegionSize() + mDictBuffer->getUsedAdditionalBufferSize()
|
} else if (mHeaderPolicy->getExtendedRegionSize() + mDictBuffer->getUsedAdditionalBufferSize()
|
||||||
> Ver4DictConstants::MAX_DICT_EXTENDED_REGION_SIZE) {
|
> Ver4DictConstants::MAX_DICT_EXTENDED_REGION_SIZE) {
|
||||||
// Total extended region size of the trie exceeds the limit.
|
// Total extended region size of the trie exceeds the limit.
|
||||||
return true;
|
return true;
|
||||||
|
@ -260,9 +260,9 @@ bool Ver4PatriciaTriePolicy::needsToRunGC(const bool mindsBlockByGC) const {
|
||||||
&& mDictBuffer->getUsedAdditionalBufferSize() > 0) {
|
&& mDictBuffer->getUsedAdditionalBufferSize() > 0) {
|
||||||
// Needs to reduce dictionary size.
|
// Needs to reduce dictionary size.
|
||||||
return true;
|
return true;
|
||||||
} else if (mHeaderPolicy.isDecayingDict()) {
|
} else if (mHeaderPolicy->isDecayingDict()) {
|
||||||
return mNeedsToDecayForTesting || ForgettingCurveUtils::needsToDecay(
|
return mNeedsToDecayForTesting || ForgettingCurveUtils::needsToDecay(
|
||||||
mindsBlockByGC, mUnigramCount, mBigramCount, &mHeaderPolicy);
|
mindsBlockByGC, mUnigramCount, mBigramCount, mHeaderPolicy);
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -276,11 +276,11 @@ void Ver4PatriciaTriePolicy::getProperty(const char *const query, const int quer
|
||||||
snprintf(outResult, maxResultLength, "%d", mBigramCount);
|
snprintf(outResult, maxResultLength, "%d", mBigramCount);
|
||||||
} else if (strncmp(query, MAX_UNIGRAM_COUNT_QUERY, compareLength) == 0) {
|
} else if (strncmp(query, MAX_UNIGRAM_COUNT_QUERY, compareLength) == 0) {
|
||||||
snprintf(outResult, maxResultLength, "%d",
|
snprintf(outResult, maxResultLength, "%d",
|
||||||
mHeaderPolicy.isDecayingDict() ? ForgettingCurveUtils::MAX_UNIGRAM_COUNT :
|
mHeaderPolicy->isDecayingDict() ? ForgettingCurveUtils::MAX_UNIGRAM_COUNT :
|
||||||
static_cast<int>(Ver4DictConstants::MAX_DICTIONARY_SIZE));
|
static_cast<int>(Ver4DictConstants::MAX_DICTIONARY_SIZE));
|
||||||
} else if (strncmp(query, MAX_BIGRAM_COUNT_QUERY, compareLength) == 0) {
|
} else if (strncmp(query, MAX_BIGRAM_COUNT_QUERY, compareLength) == 0) {
|
||||||
snprintf(outResult, maxResultLength, "%d",
|
snprintf(outResult, maxResultLength, "%d",
|
||||||
mHeaderPolicy.isDecayingDict() ? ForgettingCurveUtils::MAX_BIGRAM_COUNT :
|
mHeaderPolicy->isDecayingDict() ? ForgettingCurveUtils::MAX_BIGRAM_COUNT :
|
||||||
static_cast<int>(Ver4DictConstants::MAX_DICTIONARY_SIZE));
|
static_cast<int>(Ver4DictConstants::MAX_DICTIONARY_SIZE));
|
||||||
} else if (strncmp(query, SET_NEEDS_TO_DECAY_FOR_TESTING_QUERY, compareLength) == 0) {
|
} else if (strncmp(query, SET_NEEDS_TO_DECAY_FOR_TESTING_QUERY, compareLength) == 0) {
|
||||||
mNeedsToDecayForTesting = true;
|
mNeedsToDecayForTesting = true;
|
||||||
|
|
|
@ -38,22 +38,20 @@ class DicNodeVector;
|
||||||
class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
||||||
public:
|
public:
|
||||||
Ver4PatriciaTriePolicy(const Ver4DictBuffers::Ver4DictBuffersPtr &buffers)
|
Ver4PatriciaTriePolicy(const Ver4DictBuffers::Ver4DictBuffersPtr &buffers)
|
||||||
: mBuffers(buffers),
|
: mBuffers(buffers), mHeaderPolicy(mBuffers.get()->getHeaderPolicy()),
|
||||||
mHeaderPolicy(mBuffers.get()->getWritableHeaderBuffer()->getBuffer(
|
|
||||||
false /* usesAdditionalBuffer*/), FormatUtils::VERSION_4),
|
|
||||||
mDictBuffer(mBuffers.get()->getWritableTrieBuffer()),
|
mDictBuffer(mBuffers.get()->getWritableTrieBuffer()),
|
||||||
mBigramPolicy(mBuffers.get()->getUpdatableBigramDictContent(),
|
mBigramPolicy(mBuffers.get()->getUpdatableBigramDictContent(),
|
||||||
mBuffers.get()->getTerminalPositionLookupTable(), &mHeaderPolicy,
|
mBuffers.get()->getTerminalPositionLookupTable(), mHeaderPolicy,
|
||||||
mHeaderPolicy.isDecayingDict()),
|
mHeaderPolicy->isDecayingDict()),
|
||||||
mShortcutPolicy(mBuffers.get()->getShortcutDictContent(),
|
mShortcutPolicy(mBuffers.get()->getShortcutDictContent(),
|
||||||
mBuffers.get()->getTerminalPositionLookupTable()),
|
mBuffers.get()->getTerminalPositionLookupTable()),
|
||||||
mNodeReader(mDictBuffer, mBuffers.get()->getProbabilityDictContent()),
|
mNodeReader(mDictBuffer, mBuffers.get()->getProbabilityDictContent()),
|
||||||
mNodeWriter(mDictBuffer, mBuffers.get(), &mNodeReader, &mBigramPolicy,
|
mNodeWriter(mDictBuffer, mBuffers.get(), &mNodeReader, &mBigramPolicy,
|
||||||
&mShortcutPolicy, mHeaderPolicy.isDecayingDict()),
|
&mShortcutPolicy, mHeaderPolicy->isDecayingDict()),
|
||||||
mUpdatingHelper(mDictBuffer, &mNodeReader, &mNodeWriter),
|
mUpdatingHelper(mDictBuffer, &mNodeReader, &mNodeWriter),
|
||||||
mWritingHelper(mBuffers.get()),
|
mWritingHelper(mBuffers.get()),
|
||||||
mUnigramCount(mHeaderPolicy.getUnigramCount()),
|
mUnigramCount(mHeaderPolicy->getUnigramCount()),
|
||||||
mBigramCount(mHeaderPolicy.getBigramCount()), mNeedsToDecayForTesting(false) {};
|
mBigramCount(mHeaderPolicy->getBigramCount()), mNeedsToDecayForTesting(false) {};
|
||||||
|
|
||||||
AK_FORCE_INLINE int getRootPosition() const {
|
AK_FORCE_INLINE int getRootPosition() const {
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -78,7 +76,7 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
||||||
int getBigramsPositionOfPtNode(const int ptNodePos) const;
|
int getBigramsPositionOfPtNode(const int ptNodePos) const;
|
||||||
|
|
||||||
const DictionaryHeaderStructurePolicy *getHeaderStructurePolicy() const {
|
const DictionaryHeaderStructurePolicy *getHeaderStructurePolicy() const {
|
||||||
return &mHeaderPolicy;
|
return mHeaderPolicy;
|
||||||
}
|
}
|
||||||
|
|
||||||
const DictionaryBigramsStructurePolicy *getBigramsStructurePolicy() const {
|
const DictionaryBigramsStructurePolicy *getBigramsStructurePolicy() const {
|
||||||
|
@ -121,7 +119,7 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
||||||
static const int MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS;
|
static const int MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS;
|
||||||
|
|
||||||
Ver4DictBuffers::Ver4DictBuffersPtr mBuffers;
|
Ver4DictBuffers::Ver4DictBuffersPtr mBuffers;
|
||||||
const HeaderPolicy mHeaderPolicy;
|
const HeaderPolicy *const mHeaderPolicy;
|
||||||
BufferWithExtendableBuffer *const mDictBuffer;
|
BufferWithExtendableBuffer *const mDictBuffer;
|
||||||
Ver4BigramListPolicy mBigramPolicy;
|
Ver4BigramListPolicy mBigramPolicy;
|
||||||
Ver4ShortcutListPolicy mShortcutPolicy;
|
Ver4ShortcutListPolicy mShortcutPolicy;
|
||||||
|
|
|
@ -32,8 +32,8 @@
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
|
||||||
void Ver4PatriciaTrieWritingHelper::writeToDictFile(const char *const trieFilePath,
|
void Ver4PatriciaTrieWritingHelper::writeToDictFile(const char *const trieFilePath,
|
||||||
const HeaderPolicy *const headerPolicy, const int unigramCount,
|
const int unigramCount, const int bigramCount) const {
|
||||||
const int bigramCount) const {
|
const HeaderPolicy *const headerPolicy = mBuffers->getHeaderPolicy();
|
||||||
const int dirPathBufSize = strlen(trieFilePath) + 1 /* terminator */;
|
const int dirPathBufSize = strlen(trieFilePath) + 1 /* terminator */;
|
||||||
char dirPath[dirPathBufSize];
|
char dirPath[dirPathBufSize];
|
||||||
FileUtils::getDirPath(trieFilePath, dirPathBufSize, dirPath);
|
FileUtils::getDirPath(trieFilePath, dirPathBufSize, dirPath);
|
||||||
|
@ -53,8 +53,8 @@ void Ver4PatriciaTrieWritingHelper::writeToDictFile(const char *const trieFilePa
|
||||||
}
|
}
|
||||||
|
|
||||||
void Ver4PatriciaTrieWritingHelper::writeToDictFileWithGC(const int rootPtNodeArrayPos,
|
void Ver4PatriciaTrieWritingHelper::writeToDictFileWithGC(const int rootPtNodeArrayPos,
|
||||||
const char *const trieFilePath, const HeaderPolicy *const headerPolicy,
|
const char *const trieFilePath, const bool needsToDecay) {
|
||||||
const bool needsToDecay) {
|
const HeaderPolicy *const headerPolicy = mBuffers->getHeaderPolicy();
|
||||||
Ver4DictBuffers::Ver4DictBuffersPtr dictBuffers(
|
Ver4DictBuffers::Ver4DictBuffersPtr dictBuffers(
|
||||||
Ver4DictBuffers::createVer4DictBuffers(headerPolicy));
|
Ver4DictBuffers::createVer4DictBuffers(headerPolicy));
|
||||||
int unigramCount = 0;
|
int unigramCount = 0;
|
||||||
|
|
|
@ -32,12 +32,11 @@ class Ver4PatriciaTrieWritingHelper {
|
||||||
Ver4PatriciaTrieWritingHelper(Ver4DictBuffers *const buffers)
|
Ver4PatriciaTrieWritingHelper(Ver4DictBuffers *const buffers)
|
||||||
: mBuffers(buffers) {}
|
: mBuffers(buffers) {}
|
||||||
|
|
||||||
void writeToDictFile(const char *const trieFilePath, const HeaderPolicy *const headerPolicy,
|
void writeToDictFile(const char *const trieFilePath, const int unigramCount,
|
||||||
const int unigramCount, const int bigramCount) const;
|
const int bigramCount) const;
|
||||||
|
|
||||||
void writeToDictFileWithGC(const int rootPtNodeArrayPos,
|
void writeToDictFileWithGC(const int rootPtNodeArrayPos,
|
||||||
const char *const trieFilePath, const HeaderPolicy *const headerPolicy,
|
const char *const trieFilePath, const bool needsToDecay);
|
||||||
const bool needsToDecay);
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4PatriciaTrieWritingHelper);
|
DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4PatriciaTrieWritingHelper);
|
||||||
|
|
Loading…
Reference in a new issue