Merge "Refactoring to extend probability field."

This commit is contained in:
Keisuke Kuroyanagi 2013-11-28 09:17:33 +00:00 committed by Android (Google) Code Review
commit c7dd2eb7ad
13 changed files with 296 additions and 200 deletions

View file

@ -57,6 +57,9 @@ class PtNodeWriter {
virtual bool writePtNodeAndAdvancePosition(const PtNodeParams *const ptNodeParams,
int *const ptNodeWritingPos) = 0;
virtual bool writeNewTerminalPtNodeAndAdvancePosition(const PtNodeParams *const ptNodeParams,
int *const ptNodeWritingPos) = 0;
virtual bool addNewBigramEntry(const PtNodeParams *const sourcePtNodeParams,
const PtNodeParams *const targetPtNodeParam, const int probability,
bool *const outAddedNewBigram) = 0;

View file

@ -23,6 +23,7 @@
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.h"
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_utils.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
namespace latinime {
@ -105,9 +106,11 @@ bool DynamicPatriciaTrieNodeWriter::updatePtNodeProbability(
if (!toBeUpdatedPtNodeParams->isTerminal()) {
return false;
}
const int probabilityToWrite = getUpdatedProbability(toBeUpdatedPtNodeParams->getProbability(),
newProbability);
int probabilityFieldPos = toBeUpdatedPtNodeParams->getProbabilityFieldPos();
return DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition(mBuffer,
newProbability, &probabilityFieldPos);
probabilityToWrite, &probabilityFieldPos);
}
bool DynamicPatriciaTrieNodeWriter::updateChildrenPosition(
@ -119,67 +122,24 @@ bool DynamicPatriciaTrieNodeWriter::updateChildrenPosition(
bool DynamicPatriciaTrieNodeWriter::writePtNodeAndAdvancePosition(
const PtNodeParams *const ptNodeParams, int *const ptNodeWritingPos) {
const int nodePos = *ptNodeWritingPos;
// Write dummy flags. The Node flags are updated with appropriate flags at the last step of the
// PtNode writing.
if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer,
0 /* nodeFlags */, ptNodeWritingPos)) {
return false;
}
// Calculate a parent offset and write the offset.
if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition(mBuffer,
ptNodeParams->getParentPos(), nodePos, ptNodeWritingPos)) {
return false;
}
// Write code points
if (!DynamicPatriciaTrieWritingUtils::writeCodePointsAndAdvancePosition(mBuffer,
ptNodeParams->getCodePoints(), ptNodeParams->getCodePointCount(), ptNodeWritingPos)) {
return false;
}
// Write probability when the probability is a valid probability, which means this node is
// terminal.
if (ptNodeParams->getProbability() != NOT_A_PROBABILITY) {
if (!DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition(mBuffer,
ptNodeParams->getProbability(), ptNodeWritingPos)) {
return false;
}
}
// Write children position
if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(mBuffer,
ptNodeParams->getChildrenPos(), ptNodeWritingPos)) {
return false;
}
// Copy shortcut list when the originalShortcutListPos is valid dictionary position.
if (ptNodeParams->getShortcutPos() != NOT_A_DICT_POS) {
int fromPos = ptNodeParams->getShortcutPos();
if (!mShortcutPolicy->copyAllShortcutsAndReturnIfSucceededOrNot(mBuffer, &fromPos,
return writePtNodeAndGetProbabilityFieldPosAndAdvancePosition(ptNodeParams,
0 /* outProbabilityFieldPos */, ptNodeWritingPos);
}
bool DynamicPatriciaTrieNodeWriter::writeNewTerminalPtNodeAndAdvancePosition(
const PtNodeParams *const ptNodeParams, int *const ptNodeWritingPos) {
int probabilityFieldPos = NOT_A_DICT_POS;
if (!writePtNodeAndGetProbabilityFieldPosAndAdvancePosition(ptNodeParams, &probabilityFieldPos,
ptNodeWritingPos)) {
return false;
}
}
// Copy bigram list when the originalBigramListPos is valid dictionary position.
int bigramCount = 0;
if (ptNodeParams->getBigramsPos() != NOT_A_DICT_POS) {
int fromPos = ptNodeParams->getBigramsPos();
if (!mBigramPolicy->copyAllBigrams(mBuffer, &fromPos, ptNodeWritingPos, &bigramCount)) {
if (probabilityFieldPos == NOT_A_DICT_POS) {
return false;
}
}
// Create node flags and write them.
PatriciaTrieReadingUtils::NodeFlags nodeFlags =
PatriciaTrieReadingUtils::createAndGetFlags(ptNodeParams->isBlacklisted(),
ptNodeParams->isNotAWord(),
ptNodeParams->getProbability() != NOT_A_PROBABILITY /* isTerminal */,
ptNodeParams->getShortcutPos() != NOT_A_DICT_POS /* hasShortcutTargets */,
bigramCount > 0 /* hasBigrams */,
ptNodeParams->getCodePointCount() > 1 /* hasMultipleChars */,
CHILDREN_POSITION_FIELD_SIZE);
int flagsFieldPos = nodePos;
if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, nodeFlags,
&flagsFieldPos)) {
return false;
}
return true;
const int probabilityToWrite = getUpdatedProbability(
NOT_A_PROBABILITY /* originalProbability */, ptNodeParams->getProbability());
return DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition(mBuffer,
probabilityToWrite, &probabilityFieldPos);
}
bool DynamicPatriciaTrieNodeWriter::addNewBigramEntry(
@ -289,4 +249,90 @@ bool DynamicPatriciaTrieNodeWriter::updateAllPositionFields(
return true;
}
bool DynamicPatriciaTrieNodeWriter::writePtNodeAndGetProbabilityFieldPosAndAdvancePosition(
const PtNodeParams *const ptNodeParams, int *const outProbabilityFieldPos,
int *const ptNodeWritingPos) {
const int nodePos = *ptNodeWritingPos;
// Write dummy flags. The Node flags are updated with appropriate flags at the last step of the
// PtNode writing.
if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer,
0 /* nodeFlags */, ptNodeWritingPos)) {
return false;
}
// Calculate a parent offset and write the offset.
if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition(mBuffer,
ptNodeParams->getParentPos(), nodePos, ptNodeWritingPos)) {
return false;
}
// Write code points
if (!DynamicPatriciaTrieWritingUtils::writeCodePointsAndAdvancePosition(mBuffer,
ptNodeParams->getCodePoints(), ptNodeParams->getCodePointCount(), ptNodeWritingPos)) {
return false;
}
// Write probability when the probability is a valid probability, which means this node is
// terminal.
if (ptNodeParams->isTerminal()) {
if (outProbabilityFieldPos) {
*outProbabilityFieldPos = *ptNodeWritingPos;
}
if (ptNodeParams->getProbability() == NOT_A_PROBABILITY) {
// Write a dummy probability.
if (!DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition(mBuffer,
0 /* probability */, ptNodeWritingPos)) {
return false;
}
} else {
if (!DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition(mBuffer,
ptNodeParams->getProbability(), ptNodeWritingPos)) {
return false;
}
}
}
// Write children position
if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(mBuffer,
ptNodeParams->getChildrenPos(), ptNodeWritingPos)) {
return false;
}
// Copy shortcut list when the originalShortcutListPos is valid dictionary position.
if (ptNodeParams->getShortcutPos() != NOT_A_DICT_POS) {
int fromPos = ptNodeParams->getShortcutPos();
if (!mShortcutPolicy->copyAllShortcutsAndReturnIfSucceededOrNot(mBuffer, &fromPos,
ptNodeWritingPos)) {
return false;
}
}
// Copy bigram list when the originalBigramListPos is valid dictionary position.
int bigramCount = 0;
if (ptNodeParams->getBigramsPos() != NOT_A_DICT_POS) {
int fromPos = ptNodeParams->getBigramsPos();
if (!mBigramPolicy->copyAllBigrams(mBuffer, &fromPos, ptNodeWritingPos, &bigramCount)) {
return false;
}
}
// Create node flags and write them.
PatriciaTrieReadingUtils::NodeFlags nodeFlags =
PatriciaTrieReadingUtils::createAndGetFlags(ptNodeParams->isBlacklisted(),
ptNodeParams->isNotAWord(), ptNodeParams->isTerminal(),
ptNodeParams->getShortcutPos() != NOT_A_DICT_POS /* hasShortcutTargets */,
bigramCount > 0 /* hasBigrams */,
ptNodeParams->getCodePointCount() > 1 /* hasMultipleChars */,
CHILDREN_POSITION_FIELD_SIZE);
int flagsFieldPos = nodePos;
if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, nodeFlags,
&flagsFieldPos)) {
return false;
}
return true;
}
int DynamicPatriciaTrieNodeWriter::getUpdatedProbability(const int originalProbability,
const int newProbability) const {
if (mNeedsToDecayWhenUpdating) {
return ForgettingCurveUtils::getUpdatedEncodedProbability(originalProbability,
newProbability);
} else {
return newProbability;
}
}
}

View file

@ -39,9 +39,10 @@ class DynamicPatriciaTrieNodeWriter : public PtNodeWriter {
DynamicPatriciaTrieNodeWriter(BufferWithExtendableBuffer *const buffer,
const DynamicPatriciaTrieNodeReader *const ptNodeReader,
DynamicBigramListPolicy *const bigramPolicy,
DynamicShortcutListPolicy *const shortcutPolicy)
DynamicShortcutListPolicy *const shortcutPolicy, const bool needsToDecayWhenUpdating)
: mBuffer(buffer), mPtNodeReader(ptNodeReader), mReadingHelper(mBuffer, ptNodeReader),
mBigramPolicy(bigramPolicy), mShortcutPolicy(shortcutPolicy) {}
mBigramPolicy(bigramPolicy), mShortcutPolicy(shortcutPolicy),
mNeedsToDecayWhenUpdating(needsToDecayWhenUpdating) {}
virtual ~DynamicPatriciaTrieNodeWriter() {}
@ -59,6 +60,9 @@ class DynamicPatriciaTrieNodeWriter : public PtNodeWriter {
virtual bool writePtNodeAndAdvancePosition(const PtNodeParams *const ptNodeParams,
int *const ptNodeWritingPos);
virtual bool writeNewTerminalPtNodeAndAdvancePosition(
const PtNodeParams *const ptNodeParams, int *const ptNodeWritingPos);
virtual bool addNewBigramEntry(const PtNodeParams *const sourcePtNodeParams,
const PtNodeParams *const targetPtNodeParam, const int probability,
bool *const outAddedNewBigram);
@ -76,6 +80,12 @@ class DynamicPatriciaTrieNodeWriter : public PtNodeWriter {
private:
DISALLOW_COPY_AND_ASSIGN(DynamicPatriciaTrieNodeWriter);
bool writePtNodeAndGetProbabilityFieldPosAndAdvancePosition(
const PtNodeParams *const ptNodeParams, int *const outProbabilityFieldPos,
int *const ptNodeWritingPos);
int getUpdatedProbability(const int originalProbability, const int newProbability) const;
static const int CHILDREN_POSITION_FIELD_SIZE;
BufferWithExtendableBuffer *const mBuffer;
@ -83,7 +93,7 @@ class DynamicPatriciaTrieNodeWriter : public PtNodeWriter {
DynamicPatriciaTrieReadingHelper mReadingHelper;
DynamicBigramListPolicy *const mBigramPolicy;
DynamicShortcutListPolicy *const mShortcutPolicy;
const bool mNeedsToDecayWhenUpdating;
};
} // namespace latinime
#endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_NODE_WRITER_H */

View file

@ -231,8 +231,8 @@ void DynamicPatriciaTriePolicy::flush(const char *const filePath) {
AKLOGI("Warning: flush() is called for non-updatable dictionary.");
return;
}
DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer, &mNodeReader,
&mNodeWriter, &mBigramListPolicy, &mShortcutListPolicy, false /* needsToDecay */);
DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer,
&mBigramListPolicy, &mShortcutListPolicy, false /* needsToDecay */);
writingHelper.writeToDictFile(filePath, &mHeaderPolicy, mUnigramCount, mBigramCount);
}
@ -246,8 +246,8 @@ void DynamicPatriciaTriePolicy::flushWithGC(const char *const filePath) {
false /* mindsBlockByDecay */, mUnigramCount, mBigramCount, &mHeaderPolicy));
DynamicBigramListPolicy bigramListPolicyForGC(&mHeaderPolicy, &mBufferWithExtendableBuffer,
&mShortcutListPolicy, needsToDecay);
DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer, &mNodeReader,
&mNodeWriter, &bigramListPolicyForGC, &mShortcutListPolicy, needsToDecay);
DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer,
&bigramListPolicyForGC, &mShortcutListPolicy, needsToDecay);
writingHelper.writeToDictFileWithGC(getRootPosition(), filePath, &mHeaderPolicy);
mNeedsToDecayForTesting = false;
}

View file

@ -49,9 +49,8 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
mHeaderPolicy.isDecayingDict()),
mNodeReader(&mBufferWithExtendableBuffer, &mBigramListPolicy, &mShortcutListPolicy),
mNodeWriter(&mBufferWithExtendableBuffer, &mNodeReader, &mBigramListPolicy,
&mShortcutListPolicy),
mUpdatingHelper(&mBufferWithExtendableBuffer, &mNodeReader, &mNodeWriter,
mHeaderPolicy.isDecayingDict()),
&mShortcutListPolicy, mHeaderPolicy.isDecayingDict()),
mUpdatingHelper(&mBufferWithExtendableBuffer, &mNodeReader, &mNodeWriter),
mUnigramCount(mHeaderPolicy.getUnigramCount()),
mBigramCount(mHeaderPolicy.getBigramCount()), mNeedsToDecayForTesting(false) {}

View file

@ -22,7 +22,6 @@
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.h"
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_utils.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
namespace latinime {
@ -53,9 +52,7 @@ bool DynamicPatriciaTrieUpdatingHelper::addUnigramWord(
if (nextIndex >= codePointCount || !readingHelper->isMatchedCodePoint(ptNodeParams, j,
wordCodePoints[matchedCodePointCount + j])) {
*outAddedNewUnigram = true;
return reallocatePtNodeAndAddNewPtNodes(&ptNodeParams, j,
getUpdatedProbability(NOT_A_PROBABILITY /* originalProbability */,
probability),
return reallocatePtNodeAndAddNewPtNodes(&ptNodeParams, j, probability,
wordCodePoints + matchedCodePointCount,
codePointCount - matchedCodePointCount);
}
@ -66,8 +63,7 @@ bool DynamicPatriciaTrieUpdatingHelper::addUnigramWord(
}
if (!ptNodeParams.hasChildren()) {
*outAddedNewUnigram = true;
return createChildrenPtNodeArrayAndAChildPtNode(&ptNodeParams,
getUpdatedProbability(NOT_A_PROBABILITY /* originalProbability */, probability),
return createChildrenPtNodeArrayAndAChildPtNode(&ptNodeParams, probability,
wordCodePoints + readingHelper->getTotalCodePointCount(ptNodeParams),
codePointCount - readingHelper->getTotalCodePointCount(ptNodeParams));
}
@ -83,8 +79,7 @@ bool DynamicPatriciaTrieUpdatingHelper::addUnigramWord(
*outAddedNewUnigram = true;
return createAndInsertNodeIntoPtNodeArray(parentPos,
wordCodePoints + readingHelper->getPrevTotalCodePointCount(),
codePointCount - readingHelper->getPrevTotalCodePointCount(),
getUpdatedProbability(NOT_A_PROBABILITY /* originalProbability */, probability), &pos);
codePointCount - readingHelper->getPrevTotalCodePointCount(), probability, &pos);
}
bool DynamicPatriciaTrieUpdatingHelper::addBigramWords(const int word0Pos, const int word1Pos,
@ -124,19 +119,18 @@ bool DynamicPatriciaTrieUpdatingHelper::setPtNodeProbability(
if (originalPtNodeParams->isTerminal()) {
// Overwrites the probability.
*outAddedNewUnigram = false;
const int probabilityToWrite = getUpdatedProbability(
originalPtNodeParams->getProbability(), probability);
return mPtNodeWriter->updatePtNodeProbability(originalPtNodeParams, probabilityToWrite);
return mPtNodeWriter->updatePtNodeProbability(originalPtNodeParams, probability);
} else {
// Make the node terminal and write the probability.
*outAddedNewUnigram = true;
const int movedPos = mBuffer->getTailPosition();
int writingPos = movedPos;
const PtNodeParams ptNodeParamsToWrite(getUpdatedPtNodeParams(originalPtNodeParams,
originalPtNodeParams->getParentPos(), originalPtNodeParams->getCodePointCount(),
originalPtNodeParams->getCodePoints(),
getUpdatedProbability(NOT_A_PROBABILITY /* originalProbability */, probability)));
if (!mPtNodeWriter->writePtNodeAndAdvancePosition(&ptNodeParamsToWrite, &writingPos)) {
true /* isTerminal */, originalPtNodeParams->getParentPos(),
originalPtNodeParams->getCodePointCount(), originalPtNodeParams->getCodePoints(),
probability));
if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&ptNodeParamsToWrite,
&writingPos)) {
return false;
}
if (!mPtNodeWriter->markPtNodeAsMoved(originalPtNodeParams, movedPos, movedPos)) {
@ -165,9 +159,10 @@ bool DynamicPatriciaTrieUpdatingHelper::createNewPtNodeArrayWithAChildPtNode(
1 /* arraySize */, &writingPos)) {
return false;
}
const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode(
const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode(true /* isTerminal */,
parentPtNodePos, nodeCodePointCount, nodeCodePoints, probability));
if (!mPtNodeWriter->writePtNodeAndAdvancePosition(&ptNodeParamsToWrite, &writingPos)) {
if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&ptNodeParamsToWrite,
&writingPos)) {
return false;
}
if (!DynamicPatriciaTrieWritingUtils::writeForwardLinkPositionAndAdvancePosition(mBuffer,
@ -194,13 +189,22 @@ bool DynamicPatriciaTrieUpdatingHelper::reallocatePtNodeAndAddNewPtNodes(
int writingPos = firstPartOfReallocatedPtNodePos;
// Write the 1st part of the reallocating node. The children position will be updated later
// with actual children position.
const int newProbability = addsExtraChild ? NOT_A_PROBABILITY : probabilityOfNewPtNode;
const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode(
if (addsExtraChild) {
const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode(false /* isTerminal */,
reallocatingPtNodeParams->getParentPos(), overlappingCodePointCount,
reallocatingPtNodeParams->getCodePoints(), newProbability));
reallocatingPtNodeParams->getCodePoints(), NOT_A_PROBABILITY));
if (!mPtNodeWriter->writePtNodeAndAdvancePosition(&ptNodeParamsToWrite, &writingPos)) {
return false;
}
} else {
const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode(true /* isTerminal */,
reallocatingPtNodeParams->getParentPos(), overlappingCodePointCount,
reallocatingPtNodeParams->getCodePoints(), probabilityOfNewPtNode));
if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&ptNodeParamsToWrite,
&writingPos)) {
return false;
}
}
const int actualChildrenPos = writingPos;
// Create new children PtNode array.
const size_t newPtNodeCount = addsExtraChild ? 2 : 1;
@ -211,7 +215,7 @@ bool DynamicPatriciaTrieUpdatingHelper::reallocatePtNodeAndAddNewPtNodes(
// Write the 2nd part of the reallocating node.
const int secondPartOfReallocatedPtNodePos = writingPos;
const PtNodeParams childPartPtNodeParams(getUpdatedPtNodeParams(reallocatingPtNodeParams,
firstPartOfReallocatedPtNodePos,
reallocatingPtNodeParams->isTerminal(), firstPartOfReallocatedPtNodePos,
reallocatingPtNodeParams->getCodePointCount() - overlappingCodePointCount,
reallocatingPtNodeParams->getCodePoints() + overlappingCodePointCount,
reallocatingPtNodeParams->getProbability()));
@ -219,10 +223,11 @@ bool DynamicPatriciaTrieUpdatingHelper::reallocatePtNodeAndAddNewPtNodes(
return false;
}
if (addsExtraChild) {
const PtNodeParams extraChildPtNodeParams(getPtNodeParamsForNewPtNode(
const PtNodeParams extraChildPtNodeParams(getPtNodeParamsForNewPtNode(true /* isTerminal */,
firstPartOfReallocatedPtNodePos, newNodeCodePointCount - overlappingCodePointCount,
newNodeCodePoints + overlappingCodePointCount, probabilityOfNewPtNode));
if (!mPtNodeWriter->writePtNodeAndAdvancePosition(&extraChildPtNodeParams, &writingPos)) {
if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&extraChildPtNodeParams,
&writingPos)) {
return false;
}
}
@ -242,22 +247,11 @@ bool DynamicPatriciaTrieUpdatingHelper::reallocatePtNodeAndAddNewPtNodes(
return mPtNodeWriter->updateChildrenPosition(&ptNodeParams, actualChildrenPos);
}
int DynamicPatriciaTrieUpdatingHelper::getUpdatedProbability(const int originalProbability,
const int newProbability) const {
if (mNeedsToDecay) {
return ForgettingCurveUtils::getUpdatedEncodedProbability(originalProbability,
newProbability);
} else {
return newProbability;
}
}
const PtNodeParams DynamicPatriciaTrieUpdatingHelper::getUpdatedPtNodeParams(
const PtNodeParams *const originalPtNodeParams, const int parentPos,
const PtNodeParams *const originalPtNodeParams, const bool isTerminal, const int parentPos,
const int codePointCount, const int *const codePoints, const int probability) const {
const PatriciaTrieReadingUtils::NodeFlags flags = PatriciaTrieReadingUtils::createAndGetFlags(
originalPtNodeParams->isBlacklisted(), originalPtNodeParams->isNotAWord(),
probability != NOT_A_PROBABILITY /* isTerminal */,
originalPtNodeParams->isBlacklisted(), originalPtNodeParams->isNotAWord(), isTerminal,
originalPtNodeParams->hasShortcutTargets(), originalPtNodeParams->hasBigrams(),
codePointCount > 1 /* hasMultipleChars */, CHILDREN_POSITION_FIELD_SIZE);
return PtNodeParams(originalPtNodeParams, flags, parentPos, codePointCount, codePoints,
@ -265,11 +259,10 @@ const PtNodeParams DynamicPatriciaTrieUpdatingHelper::getUpdatedPtNodeParams(
}
const PtNodeParams DynamicPatriciaTrieUpdatingHelper::getPtNodeParamsForNewPtNode(
const int parentPos, const int codePointCount, const int *const codePoints,
const int probability) const {
const bool isTerminal, const int parentPos, const int codePointCount,
const int *const codePoints, const int probability) const {
const PatriciaTrieReadingUtils::NodeFlags flags = PatriciaTrieReadingUtils::createAndGetFlags(
false /* isBlacklisted */, false /* isNotAWord */,
probability != NOT_A_PROBABILITY /* isTerminal */,
false /* isBlacklisted */, false /* isNotAWord */, isTerminal,
false /* hasShortcutTargets */, false /* hasBigrams */,
codePointCount > 1 /* hasMultipleChars */, CHILDREN_POSITION_FIELD_SIZE);
return PtNodeParams(flags, parentPos, codePointCount, codePoints, probability);

View file

@ -34,10 +34,8 @@ class PtNodeWriter;
class DynamicPatriciaTrieUpdatingHelper {
public:
DynamicPatriciaTrieUpdatingHelper(BufferWithExtendableBuffer *const buffer,
const PtNodeReader *const ptNodeReader, PtNodeWriter *const ptNodeWriter,
const bool needsToDecay)
: mBuffer(buffer), mPtNodeReader(ptNodeReader), mPtNodeWriter(ptNodeWriter),
mNeedsToDecay(needsToDecay) {}
const PtNodeReader *const ptNodeReader, PtNodeWriter *const ptNodeWriter)
: mBuffer(buffer), mPtNodeReader(ptNodeReader), mPtNodeWriter(ptNodeWriter) {}
~DynamicPatriciaTrieUpdatingHelper() {}
@ -61,7 +59,6 @@ class DynamicPatriciaTrieUpdatingHelper {
BufferWithExtendableBuffer *const mBuffer;
const PtNodeReader *const mPtNodeReader;
PtNodeWriter *const mPtNodeWriter;
const bool mNeedsToDecay;
bool createAndInsertNodeIntoPtNodeArray(const int parentPos, const int *const nodeCodePoints,
const int nodeCodePointCount, const int probability, int *const forwardLinkFieldPos);
@ -80,14 +77,12 @@ class DynamicPatriciaTrieUpdatingHelper {
const int probabilityOfNewPtNode, const int *const newNodeCodePoints,
const int newNodeCodePointCount);
int getUpdatedProbability(const int originalProbability, const int newProbability) const;
const PtNodeParams getUpdatedPtNodeParams(const PtNodeParams *const originalPtNodeParams,
const int parentPos, const int codePointCount, const int *const codePoints,
const int probability) const;
const PtNodeParams getPtNodeParamsForNewPtNode(const int parentPos, const int codePointCount,
const bool isTerminal, const int parentPos, const int codePointCount,
const int *const codePoints, const int probability) const;
const PtNodeParams getPtNodeParamsForNewPtNode(const bool isTerminal, const int parentPos,
const int codePointCount, const int *const codePoints, const int probability) const;
};
} // namespace latinime
#endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_UPDATING_HELPER_H */

View file

@ -76,11 +76,13 @@ bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
int *const outUnigramCount, int *const outBigramCount) {
DynamicPatriciaTrieNodeReader ptNodeReader(mBuffer, mBigramPolicy, mShortcutPolicy);
DynamicPatriciaTrieReadingHelper readingHelper(mBuffer, &ptNodeReader);
DynamicPatriciaTrieNodeWriter ptNodeWriter(mBuffer, &ptNodeReader, mBigramPolicy,
mShortcutPolicy, false /* needsToDecayWhenUpdating */);
readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
DynamicPatriciaTrieGcEventListeners
::TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted
traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted(
headerPolicy, mPtNodeWriter, mBuffer, mNeedsToDecay);
headerPolicy, &ptNodeWriter, mBuffer, mNeedsToDecay);
if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(
&traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted)) {
return false;
@ -92,7 +94,7 @@ bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateBigramProbability
traversePolicyToUpdateBigramProbability(mPtNodeWriter);
traversePolicyToUpdateBigramProbability(&ptNodeWriter);
if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(
&traversePolicyToUpdateBigramProbability)) {
return false;
@ -106,7 +108,7 @@ bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
PtNodeWriter::DictPositionRelocationMap dictPositionRelocationMap;
readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
DynamicPatriciaTrieNodeWriter newPtNodeWriter(bufferToWrite, &ptNodeReader, mBigramPolicy,
mShortcutPolicy);
mShortcutPolicy, false /* needsToDecayWhenUpdating */);
DynamicPatriciaTrieGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer
traversePolicyToPlaceAndWriteValidPtNodesToBuffer(&newPtNodeWriter, bufferToWrite,
&dictPositionRelocationMap);
@ -124,7 +126,7 @@ bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
&newDictShortcutPolicy);
DynamicPatriciaTrieReadingHelper newDictReadingHelper(bufferToWrite, &newDictNodeReader);
DynamicPatriciaTrieNodeWriter newDictNodeWriter(bufferToWrite, &newDictNodeReader,
&newDictBigramPolicy, &newDictShortcutPolicy);
&newDictBigramPolicy, &newDictShortcutPolicy, false /* needsToDecayWhenUpdating */);
newDictReadingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateAllPositionFields
traversePolicyToUpdateAllPositionFields(&newDictNodeWriter, &dictPositionRelocationMap);

View file

@ -29,8 +29,6 @@ class DynamicBigramListPolicy;
class DynamicPatriciaTrieReadingHelper;
class DynamicShortcutListPolicy;
class HeaderPolicy;
class PtNodeReader;
class PtNodeWriter;
// TODO: Make it independent from a particular format and move to pt_common.
class DynamicPatriciaTrieWritingHelper {
@ -38,11 +36,9 @@ class DynamicPatriciaTrieWritingHelper {
static const size_t MAX_DICTIONARY_SIZE;
DynamicPatriciaTrieWritingHelper(BufferWithExtendableBuffer *const buffer,
const PtNodeReader *const ptNodeReader, PtNodeWriter *const ptNodeWriter,
DynamicBigramListPolicy *const bigramPolicy,
DynamicShortcutListPolicy *const shortcutPolicy, const bool needsToDecay)
: mBuffer(buffer), mPtNodeReader(ptNodeReader), mPtNodeWriter(ptNodeWriter),
mBigramPolicy(bigramPolicy), mShortcutPolicy(shortcutPolicy),
: mBuffer(buffer), mBigramPolicy(bigramPolicy), mShortcutPolicy(shortcutPolicy),
mNeedsToDecay(needsToDecay) {}
~DynamicPatriciaTrieWritingHelper() {}
@ -57,8 +53,6 @@ class DynamicPatriciaTrieWritingHelper {
DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTrieWritingHelper);
BufferWithExtendableBuffer *const mBuffer;
const PtNodeReader *const mPtNodeReader;
PtNodeWriter *const mPtNodeWriter;
DynamicBigramListPolicy *const mBigramPolicy;
DynamicShortcutListPolicy *const mShortcutPolicy;
const bool mNeedsToDecay;

View file

@ -24,6 +24,7 @@
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_utils.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
namespace latinime {
@ -115,8 +116,10 @@ bool Ver4PatriciaTrieNodeWriter::updatePtNodeProbability(
if (!toBeUpdatedPtNodeParams->isTerminal()) {
return false;
}
const int probabilityToWrite = getUpdatedProbability(toBeUpdatedPtNodeParams->getProbability(),
newProbability);
return mBuffers->getUpdatableProbabilityDictContent()->setProbability(
toBeUpdatedPtNodeParams->getTerminalId(), newProbability);
toBeUpdatedPtNodeParams->getTerminalId(), probabilityToWrite);
}
bool Ver4PatriciaTrieNodeWriter::updateChildrenPosition(
@ -134,67 +137,23 @@ bool Ver4PatriciaTrieNodeWriter::updateTerminalId(const PtNodeParams *const toBe
bool Ver4PatriciaTrieNodeWriter::writePtNodeAndAdvancePosition(
const PtNodeParams *const ptNodeParams, int *const ptNodeWritingPos) {
const int nodePos = *ptNodeWritingPos;
// Write dummy flags. The Node flags are updated with appropriate flags at the last step of the
// PtNode writing.
if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mTrieBuffer,
0 /* nodeFlags */, ptNodeWritingPos)) {
return false;
}
// Calculate a parent offset and write the offset.
if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition(mTrieBuffer,
ptNodeParams->getParentPos(), nodePos, ptNodeWritingPos)) {
return false;
}
// Write code points
if (!DynamicPatriciaTrieWritingUtils::writeCodePointsAndAdvancePosition(mTrieBuffer,
ptNodeParams->getCodePoints(), ptNodeParams->getCodePointCount(), ptNodeWritingPos)) {
return false;
}
return writePtNodeAndGetTerminalIdAndAdvancePosition(ptNodeParams, 0 /* outTerminalId */,
ptNodeWritingPos);
}
bool Ver4PatriciaTrieNodeWriter::writeNewTerminalPtNodeAndAdvancePosition(
const PtNodeParams *const ptNodeParams, int *const ptNodeWritingPos) {
int terminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
if (ptNodeParams->getTerminalId() != Ver4DictConstants::NOT_A_TERMINAL_ID) {
terminalId = ptNodeParams->getTerminalId();
} else if (ptNodeParams->getProbability() != NOT_A_PROBABILITY) {
// Write terminal information using a new terminal id.
// Get a new unused terminal id.
terminalId = mBuffers->getTerminalPositionLookupTable()->getNextTerminalId();
}
const int isTerminal = terminalId != Ver4DictConstants::NOT_A_TERMINAL_ID;
if (isTerminal) {
// Update the lookup table.
if (!mBuffers->getUpdatableTerminalPositionLookupTable()->setTerminalPtNodePosition(
terminalId, nodePos)) {
return false;
}
// Write terminal Id.
if (!mTrieBuffer->writeUintAndAdvancePosition(terminalId,
Ver4DictConstants::TERMINAL_ID_FIELD_SIZE, ptNodeWritingPos)) {
if (!writePtNodeAndGetTerminalIdAndAdvancePosition(ptNodeParams, &terminalId,
ptNodeWritingPos)) {
return false;
}
// Write probability.
if (!mBuffers->getUpdatableProbabilityDictContent()->setProbability(
terminalId, ptNodeParams->getProbability())) {
return false;
}
}
// Write children position
if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(mTrieBuffer,
ptNodeParams->getChildrenPos(), ptNodeWritingPos)) {
return false;
}
// Create node flags and write them.
PatriciaTrieReadingUtils::NodeFlags nodeFlags =
PatriciaTrieReadingUtils::createAndGetFlags(ptNodeParams->isBlacklisted(),
ptNodeParams->isNotAWord(), isTerminal,
ptNodeParams->hasShortcutTargets(), ptNodeParams->hasBigrams(),
ptNodeParams->getCodePointCount() > 1 /* hasMultipleChars */,
CHILDREN_POSITION_FIELD_SIZE);
int flagsFieldPos = nodePos;
if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mTrieBuffer, nodeFlags,
&flagsFieldPos)) {
return false;
}
return true;
const int probabilityToWrite = getUpdatedProbability(NOT_A_PROBABILITY,
ptNodeParams->getProbability());
return mBuffers->getUpdatableProbabilityDictContent()->setProbability(terminalId,
probabilityToWrite);
}
bool Ver4PatriciaTrieNodeWriter::addNewBigramEntry(
@ -258,4 +217,85 @@ bool Ver4PatriciaTrieNodeWriter::updateAllPositionFields(
return true;
}
bool Ver4PatriciaTrieNodeWriter::writePtNodeAndGetTerminalIdAndAdvancePosition(
const PtNodeParams *const ptNodeParams, int *const outTerminalId,
int *const ptNodeWritingPos) {
const int nodePos = *ptNodeWritingPos;
// Write dummy flags. The Node flags are updated with appropriate flags at the last step of the
// PtNode writing.
if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mTrieBuffer,
0 /* nodeFlags */, ptNodeWritingPos)) {
return false;
}
// Calculate a parent offset and write the offset.
if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition(mTrieBuffer,
ptNodeParams->getParentPos(), nodePos, ptNodeWritingPos)) {
return false;
}
// Write code points
if (!DynamicPatriciaTrieWritingUtils::writeCodePointsAndAdvancePosition(mTrieBuffer,
ptNodeParams->getCodePoints(), ptNodeParams->getCodePointCount(), ptNodeWritingPos)) {
return false;
}
int terminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
if (ptNodeParams->getTerminalId() != Ver4DictConstants::NOT_A_TERMINAL_ID) {
terminalId = ptNodeParams->getTerminalId();
} else if (ptNodeParams->isTerminal()) {
// Write terminal information using a new terminal id.
// Get a new unused terminal id.
terminalId = mBuffers->getTerminalPositionLookupTable()->getNextTerminalId();
}
const int isTerminal = terminalId != Ver4DictConstants::NOT_A_TERMINAL_ID;
if (isTerminal) {
// Update the lookup table.
if (!mBuffers->getUpdatableTerminalPositionLookupTable()->setTerminalPtNodePosition(
terminalId, nodePos)) {
return false;
}
// Write terminal Id.
if (!mTrieBuffer->writeUintAndAdvancePosition(terminalId,
Ver4DictConstants::TERMINAL_ID_FIELD_SIZE, ptNodeWritingPos)) {
return false;
}
// Write probability.
if (ptNodeParams->getProbability() != NOT_A_PROBABILITY) {
if (!mBuffers->getUpdatableProbabilityDictContent()->setProbability(
terminalId, ptNodeParams->getProbability())) {
return false;
}
}
if (outTerminalId) {
*outTerminalId = terminalId;
}
}
// Write children position
if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(mTrieBuffer,
ptNodeParams->getChildrenPos(), ptNodeWritingPos)) {
return false;
}
// Create node flags and write them.
PatriciaTrieReadingUtils::NodeFlags nodeFlags =
PatriciaTrieReadingUtils::createAndGetFlags(ptNodeParams->isBlacklisted(),
ptNodeParams->isNotAWord(), isTerminal,
ptNodeParams->hasShortcutTargets(), ptNodeParams->hasBigrams(),
ptNodeParams->getCodePointCount() > 1 /* hasMultipleChars */,
CHILDREN_POSITION_FIELD_SIZE);
int flagsFieldPos = nodePos;
if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mTrieBuffer, nodeFlags,
&flagsFieldPos)) {
return false;
}
return true;
}
int Ver4PatriciaTrieNodeWriter::getUpdatedProbability(const int originalProbability,
const int newProbability) const {
if (mNeedsToDecayWhenUpdating) {
return ForgettingCurveUtils::getUpdatedEncodedProbability(originalProbability,
newProbability);
} else {
return newProbability;
}
}
}

View file

@ -39,10 +39,12 @@ class Ver4PatriciaTrieNodeWriter : public PtNodeWriter {
public:
Ver4PatriciaTrieNodeWriter(BufferWithExtendableBuffer *const trieBuffer,
Ver4DictBuffers *const buffers, const Ver4PatriciaTrieNodeReader *const ptNodeReader,
Ver4BigramListPolicy *const bigramPolicy, Ver4ShortcutListPolicy *const shortcutPolicy)
Ver4BigramListPolicy *const bigramPolicy, Ver4ShortcutListPolicy *const shortcutPolicy,
const bool needsToDecayWhenUpdating)
: mTrieBuffer(trieBuffer), mBuffers(buffers), mPtNodeReader(ptNodeReader),
mReadingHelper(mTrieBuffer, mPtNodeReader),
mBigramPolicy(bigramPolicy), mShortcutPolicy(shortcutPolicy) {}
mBigramPolicy(bigramPolicy), mShortcutPolicy(shortcutPolicy),
mNeedsToDecayWhenUpdating(needsToDecayWhenUpdating) {}
virtual ~Ver4PatriciaTrieNodeWriter() {}
@ -63,6 +65,9 @@ class Ver4PatriciaTrieNodeWriter : public PtNodeWriter {
virtual bool writePtNodeAndAdvancePosition(const PtNodeParams *const ptNodeParams,
int *const ptNodeWritingPos);
virtual bool writeNewTerminalPtNodeAndAdvancePosition(
const PtNodeParams *const ptNodeParams, int *const ptNodeWritingPos);
virtual bool addNewBigramEntry(const PtNodeParams *const sourcePtNodeParams,
const PtNodeParams *const targetPtNodeParam, const int probability,
bool *const outAddedNewBigram);
@ -80,6 +85,12 @@ class Ver4PatriciaTrieNodeWriter : public PtNodeWriter {
private:
DISALLOW_COPY_AND_ASSIGN(Ver4PatriciaTrieNodeWriter);
bool writePtNodeAndGetTerminalIdAndAdvancePosition(
const PtNodeParams *const ptNodeParams, int *const outTerminalId,
int *const ptNodeWritingPos);
int getUpdatedProbability(const int originalProbability, const int newProbability) const;
static const int CHILDREN_POSITION_FIELD_SIZE;
BufferWithExtendableBuffer *const mTrieBuffer;
@ -88,6 +99,7 @@ class Ver4PatriciaTrieNodeWriter : public PtNodeWriter {
DynamicPatriciaTrieReadingHelper mReadingHelper;
Ver4BigramListPolicy *const mBigramPolicy;
Ver4ShortcutListPolicy *const mShortcutPolicy;
const bool mNeedsToDecayWhenUpdating;
};
} // namespace latinime
#endif /* LATINIME_VER4_PATRICIA_TRIE_NODE_WRITER_H */

View file

@ -49,9 +49,8 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
mBuffers.get()->getTerminalPositionLookupTable()),
mNodeReader(mDictBuffer, mBuffers.get()->getProbabilityDictContent()),
mNodeWriter(mDictBuffer, mBuffers.get(), &mNodeReader, &mBigramPolicy,
&mShortcutPolicy),
mUpdatingHelper(mDictBuffer, &mNodeReader, &mNodeWriter,
mHeaderPolicy.isDecayingDict()),
&mShortcutPolicy, mHeaderPolicy.isDecayingDict()),
mUpdatingHelper(mDictBuffer, &mNodeReader, &mNodeWriter),
mWritingHelper(mBuffers.get()),
mUnigramCount(mHeaderPolicy.getUnigramCount()),
mBigramCount(mHeaderPolicy.getBigramCount()), mNeedsToDecayForTesting(false) {};

View file

@ -87,7 +87,8 @@ bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
Ver4ShortcutListPolicy shortcutPolicy(mBuffers->getShortcutDictContent(),
mBuffers->getTerminalPositionLookupTable());
Ver4PatriciaTrieNodeWriter ptNodeWriter(mBuffers->getWritableTrieBuffer(),
mBuffers, &ptNodeReader, &bigramPolicy, &shortcutPolicy);
mBuffers, &ptNodeReader, &bigramPolicy, &shortcutPolicy,
false /* needsToDecayWhenUpdating */);
DynamicPatriciaTrieReadingHelper readingHelper(mBuffers->getTrieBuffer(), &ptNodeReader);
readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
@ -121,7 +122,8 @@ bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
PtNodeWriter::DictPositionRelocationMap dictPositionRelocationMap;
readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
Ver4PatriciaTrieNodeWriter ptNodeWriterForNewBuffers(buffersToWrite->getWritableTrieBuffer(),
buffersToWrite, &ptNodeReader, &bigramPolicy, &shortcutPolicy);
buffersToWrite, &ptNodeReader, &bigramPolicy, &shortcutPolicy,
false /* needsToDecayWhenUpdating */);
DynamicPatriciaTrieGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer
traversePolicyToPlaceAndWriteValidPtNodesToBuffer(&ptNodeWriterForNewBuffers,
buffersToWrite->getWritableTrieBuffer(), &dictPositionRelocationMap);
@ -139,7 +141,8 @@ bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
Ver4ShortcutListPolicy newShortcutPolicy(buffersToWrite->getShortcutDictContent(),
buffersToWrite->getTerminalPositionLookupTable());
Ver4PatriciaTrieNodeWriter newPtNodeWriter(buffersToWrite->getWritableTrieBuffer(),
buffersToWrite, &newPtNodeReader, &newBigramPolicy, &newShortcutPolicy);
buffersToWrite, &newPtNodeReader, &newBigramPolicy, &newShortcutPolicy,
false /* needsToDecayWhenUpdating */);
DynamicPatriciaTrieReadingHelper newDictReadingHelper(buffersToWrite->getTrieBuffer(),
&newPtNodeReader);