Merge "Refactoring to extend probability field."
This commit is contained in:
commit
c7dd2eb7ad
13 changed files with 296 additions and 200 deletions
|
@ -57,6 +57,9 @@ class PtNodeWriter {
|
|||
virtual bool writePtNodeAndAdvancePosition(const PtNodeParams *const ptNodeParams,
|
||||
int *const ptNodeWritingPos) = 0;
|
||||
|
||||
virtual bool writeNewTerminalPtNodeAndAdvancePosition(const PtNodeParams *const ptNodeParams,
|
||||
int *const ptNodeWritingPos) = 0;
|
||||
|
||||
virtual bool addNewBigramEntry(const PtNodeParams *const sourcePtNodeParams,
|
||||
const PtNodeParams *const targetPtNodeParam, const int probability,
|
||||
bool *const outAddedNewBigram) = 0;
|
||||
|
|
|
@ -23,6 +23,7 @@
|
|||
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_utils.h"
|
||||
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
|
||||
#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
|
||||
|
||||
namespace latinime {
|
||||
|
||||
|
@ -105,9 +106,11 @@ bool DynamicPatriciaTrieNodeWriter::updatePtNodeProbability(
|
|||
if (!toBeUpdatedPtNodeParams->isTerminal()) {
|
||||
return false;
|
||||
}
|
||||
const int probabilityToWrite = getUpdatedProbability(toBeUpdatedPtNodeParams->getProbability(),
|
||||
newProbability);
|
||||
int probabilityFieldPos = toBeUpdatedPtNodeParams->getProbabilityFieldPos();
|
||||
return DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition(mBuffer,
|
||||
newProbability, &probabilityFieldPos);
|
||||
probabilityToWrite, &probabilityFieldPos);
|
||||
}
|
||||
|
||||
bool DynamicPatriciaTrieNodeWriter::updateChildrenPosition(
|
||||
|
@ -119,67 +122,24 @@ bool DynamicPatriciaTrieNodeWriter::updateChildrenPosition(
|
|||
|
||||
bool DynamicPatriciaTrieNodeWriter::writePtNodeAndAdvancePosition(
|
||||
const PtNodeParams *const ptNodeParams, int *const ptNodeWritingPos) {
|
||||
const int nodePos = *ptNodeWritingPos;
|
||||
// Write dummy flags. The Node flags are updated with appropriate flags at the last step of the
|
||||
// PtNode writing.
|
||||
if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer,
|
||||
0 /* nodeFlags */, ptNodeWritingPos)) {
|
||||
return false;
|
||||
}
|
||||
// Calculate a parent offset and write the offset.
|
||||
if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition(mBuffer,
|
||||
ptNodeParams->getParentPos(), nodePos, ptNodeWritingPos)) {
|
||||
return false;
|
||||
}
|
||||
// Write code points
|
||||
if (!DynamicPatriciaTrieWritingUtils::writeCodePointsAndAdvancePosition(mBuffer,
|
||||
ptNodeParams->getCodePoints(), ptNodeParams->getCodePointCount(), ptNodeWritingPos)) {
|
||||
return false;
|
||||
}
|
||||
// Write probability when the probability is a valid probability, which means this node is
|
||||
// terminal.
|
||||
if (ptNodeParams->getProbability() != NOT_A_PROBABILITY) {
|
||||
if (!DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition(mBuffer,
|
||||
ptNodeParams->getProbability(), ptNodeWritingPos)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
// Write children position
|
||||
if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(mBuffer,
|
||||
ptNodeParams->getChildrenPos(), ptNodeWritingPos)) {
|
||||
return false;
|
||||
}
|
||||
// Copy shortcut list when the originalShortcutListPos is valid dictionary position.
|
||||
if (ptNodeParams->getShortcutPos() != NOT_A_DICT_POS) {
|
||||
int fromPos = ptNodeParams->getShortcutPos();
|
||||
if (!mShortcutPolicy->copyAllShortcutsAndReturnIfSucceededOrNot(mBuffer, &fromPos,
|
||||
return writePtNodeAndGetProbabilityFieldPosAndAdvancePosition(ptNodeParams,
|
||||
0 /* outProbabilityFieldPos */, ptNodeWritingPos);
|
||||
}
|
||||
|
||||
bool DynamicPatriciaTrieNodeWriter::writeNewTerminalPtNodeAndAdvancePosition(
|
||||
const PtNodeParams *const ptNodeParams, int *const ptNodeWritingPos) {
|
||||
int probabilityFieldPos = NOT_A_DICT_POS;
|
||||
if (!writePtNodeAndGetProbabilityFieldPosAndAdvancePosition(ptNodeParams, &probabilityFieldPos,
|
||||
ptNodeWritingPos)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
// Copy bigram list when the originalBigramListPos is valid dictionary position.
|
||||
int bigramCount = 0;
|
||||
if (ptNodeParams->getBigramsPos() != NOT_A_DICT_POS) {
|
||||
int fromPos = ptNodeParams->getBigramsPos();
|
||||
if (!mBigramPolicy->copyAllBigrams(mBuffer, &fromPos, ptNodeWritingPos, &bigramCount)) {
|
||||
if (probabilityFieldPos == NOT_A_DICT_POS) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
// Create node flags and write them.
|
||||
PatriciaTrieReadingUtils::NodeFlags nodeFlags =
|
||||
PatriciaTrieReadingUtils::createAndGetFlags(ptNodeParams->isBlacklisted(),
|
||||
ptNodeParams->isNotAWord(),
|
||||
ptNodeParams->getProbability() != NOT_A_PROBABILITY /* isTerminal */,
|
||||
ptNodeParams->getShortcutPos() != NOT_A_DICT_POS /* hasShortcutTargets */,
|
||||
bigramCount > 0 /* hasBigrams */,
|
||||
ptNodeParams->getCodePointCount() > 1 /* hasMultipleChars */,
|
||||
CHILDREN_POSITION_FIELD_SIZE);
|
||||
int flagsFieldPos = nodePos;
|
||||
if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, nodeFlags,
|
||||
&flagsFieldPos)) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
const int probabilityToWrite = getUpdatedProbability(
|
||||
NOT_A_PROBABILITY /* originalProbability */, ptNodeParams->getProbability());
|
||||
return DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition(mBuffer,
|
||||
probabilityToWrite, &probabilityFieldPos);
|
||||
}
|
||||
|
||||
bool DynamicPatriciaTrieNodeWriter::addNewBigramEntry(
|
||||
|
@ -289,4 +249,90 @@ bool DynamicPatriciaTrieNodeWriter::updateAllPositionFields(
|
|||
return true;
|
||||
}
|
||||
|
||||
bool DynamicPatriciaTrieNodeWriter::writePtNodeAndGetProbabilityFieldPosAndAdvancePosition(
|
||||
const PtNodeParams *const ptNodeParams, int *const outProbabilityFieldPos,
|
||||
int *const ptNodeWritingPos) {
|
||||
const int nodePos = *ptNodeWritingPos;
|
||||
// Write dummy flags. The Node flags are updated with appropriate flags at the last step of the
|
||||
// PtNode writing.
|
||||
if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer,
|
||||
0 /* nodeFlags */, ptNodeWritingPos)) {
|
||||
return false;
|
||||
}
|
||||
// Calculate a parent offset and write the offset.
|
||||
if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition(mBuffer,
|
||||
ptNodeParams->getParentPos(), nodePos, ptNodeWritingPos)) {
|
||||
return false;
|
||||
}
|
||||
// Write code points
|
||||
if (!DynamicPatriciaTrieWritingUtils::writeCodePointsAndAdvancePosition(mBuffer,
|
||||
ptNodeParams->getCodePoints(), ptNodeParams->getCodePointCount(), ptNodeWritingPos)) {
|
||||
return false;
|
||||
}
|
||||
// Write probability when the probability is a valid probability, which means this node is
|
||||
// terminal.
|
||||
if (ptNodeParams->isTerminal()) {
|
||||
if (outProbabilityFieldPos) {
|
||||
*outProbabilityFieldPos = *ptNodeWritingPos;
|
||||
}
|
||||
if (ptNodeParams->getProbability() == NOT_A_PROBABILITY) {
|
||||
// Write a dummy probability.
|
||||
if (!DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition(mBuffer,
|
||||
0 /* probability */, ptNodeWritingPos)) {
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
if (!DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition(mBuffer,
|
||||
ptNodeParams->getProbability(), ptNodeWritingPos)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
// Write children position
|
||||
if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(mBuffer,
|
||||
ptNodeParams->getChildrenPos(), ptNodeWritingPos)) {
|
||||
return false;
|
||||
}
|
||||
// Copy shortcut list when the originalShortcutListPos is valid dictionary position.
|
||||
if (ptNodeParams->getShortcutPos() != NOT_A_DICT_POS) {
|
||||
int fromPos = ptNodeParams->getShortcutPos();
|
||||
if (!mShortcutPolicy->copyAllShortcutsAndReturnIfSucceededOrNot(mBuffer, &fromPos,
|
||||
ptNodeWritingPos)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
// Copy bigram list when the originalBigramListPos is valid dictionary position.
|
||||
int bigramCount = 0;
|
||||
if (ptNodeParams->getBigramsPos() != NOT_A_DICT_POS) {
|
||||
int fromPos = ptNodeParams->getBigramsPos();
|
||||
if (!mBigramPolicy->copyAllBigrams(mBuffer, &fromPos, ptNodeWritingPos, &bigramCount)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
// Create node flags and write them.
|
||||
PatriciaTrieReadingUtils::NodeFlags nodeFlags =
|
||||
PatriciaTrieReadingUtils::createAndGetFlags(ptNodeParams->isBlacklisted(),
|
||||
ptNodeParams->isNotAWord(), ptNodeParams->isTerminal(),
|
||||
ptNodeParams->getShortcutPos() != NOT_A_DICT_POS /* hasShortcutTargets */,
|
||||
bigramCount > 0 /* hasBigrams */,
|
||||
ptNodeParams->getCodePointCount() > 1 /* hasMultipleChars */,
|
||||
CHILDREN_POSITION_FIELD_SIZE);
|
||||
int flagsFieldPos = nodePos;
|
||||
if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, nodeFlags,
|
||||
&flagsFieldPos)) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
int DynamicPatriciaTrieNodeWriter::getUpdatedProbability(const int originalProbability,
|
||||
const int newProbability) const {
|
||||
if (mNeedsToDecayWhenUpdating) {
|
||||
return ForgettingCurveUtils::getUpdatedEncodedProbability(originalProbability,
|
||||
newProbability);
|
||||
} else {
|
||||
return newProbability;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -39,9 +39,10 @@ class DynamicPatriciaTrieNodeWriter : public PtNodeWriter {
|
|||
DynamicPatriciaTrieNodeWriter(BufferWithExtendableBuffer *const buffer,
|
||||
const DynamicPatriciaTrieNodeReader *const ptNodeReader,
|
||||
DynamicBigramListPolicy *const bigramPolicy,
|
||||
DynamicShortcutListPolicy *const shortcutPolicy)
|
||||
DynamicShortcutListPolicy *const shortcutPolicy, const bool needsToDecayWhenUpdating)
|
||||
: mBuffer(buffer), mPtNodeReader(ptNodeReader), mReadingHelper(mBuffer, ptNodeReader),
|
||||
mBigramPolicy(bigramPolicy), mShortcutPolicy(shortcutPolicy) {}
|
||||
mBigramPolicy(bigramPolicy), mShortcutPolicy(shortcutPolicy),
|
||||
mNeedsToDecayWhenUpdating(needsToDecayWhenUpdating) {}
|
||||
|
||||
virtual ~DynamicPatriciaTrieNodeWriter() {}
|
||||
|
||||
|
@ -59,6 +60,9 @@ class DynamicPatriciaTrieNodeWriter : public PtNodeWriter {
|
|||
virtual bool writePtNodeAndAdvancePosition(const PtNodeParams *const ptNodeParams,
|
||||
int *const ptNodeWritingPos);
|
||||
|
||||
virtual bool writeNewTerminalPtNodeAndAdvancePosition(
|
||||
const PtNodeParams *const ptNodeParams, int *const ptNodeWritingPos);
|
||||
|
||||
virtual bool addNewBigramEntry(const PtNodeParams *const sourcePtNodeParams,
|
||||
const PtNodeParams *const targetPtNodeParam, const int probability,
|
||||
bool *const outAddedNewBigram);
|
||||
|
@ -76,6 +80,12 @@ class DynamicPatriciaTrieNodeWriter : public PtNodeWriter {
|
|||
private:
|
||||
DISALLOW_COPY_AND_ASSIGN(DynamicPatriciaTrieNodeWriter);
|
||||
|
||||
bool writePtNodeAndGetProbabilityFieldPosAndAdvancePosition(
|
||||
const PtNodeParams *const ptNodeParams, int *const outProbabilityFieldPos,
|
||||
int *const ptNodeWritingPos);
|
||||
|
||||
int getUpdatedProbability(const int originalProbability, const int newProbability) const;
|
||||
|
||||
static const int CHILDREN_POSITION_FIELD_SIZE;
|
||||
|
||||
BufferWithExtendableBuffer *const mBuffer;
|
||||
|
@ -83,7 +93,7 @@ class DynamicPatriciaTrieNodeWriter : public PtNodeWriter {
|
|||
DynamicPatriciaTrieReadingHelper mReadingHelper;
|
||||
DynamicBigramListPolicy *const mBigramPolicy;
|
||||
DynamicShortcutListPolicy *const mShortcutPolicy;
|
||||
|
||||
const bool mNeedsToDecayWhenUpdating;
|
||||
};
|
||||
} // namespace latinime
|
||||
#endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_NODE_WRITER_H */
|
||||
|
|
|
@ -231,8 +231,8 @@ void DynamicPatriciaTriePolicy::flush(const char *const filePath) {
|
|||
AKLOGI("Warning: flush() is called for non-updatable dictionary.");
|
||||
return;
|
||||
}
|
||||
DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer, &mNodeReader,
|
||||
&mNodeWriter, &mBigramListPolicy, &mShortcutListPolicy, false /* needsToDecay */);
|
||||
DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer,
|
||||
&mBigramListPolicy, &mShortcutListPolicy, false /* needsToDecay */);
|
||||
writingHelper.writeToDictFile(filePath, &mHeaderPolicy, mUnigramCount, mBigramCount);
|
||||
}
|
||||
|
||||
|
@ -246,8 +246,8 @@ void DynamicPatriciaTriePolicy::flushWithGC(const char *const filePath) {
|
|||
false /* mindsBlockByDecay */, mUnigramCount, mBigramCount, &mHeaderPolicy));
|
||||
DynamicBigramListPolicy bigramListPolicyForGC(&mHeaderPolicy, &mBufferWithExtendableBuffer,
|
||||
&mShortcutListPolicy, needsToDecay);
|
||||
DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer, &mNodeReader,
|
||||
&mNodeWriter, &bigramListPolicyForGC, &mShortcutListPolicy, needsToDecay);
|
||||
DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer,
|
||||
&bigramListPolicyForGC, &mShortcutListPolicy, needsToDecay);
|
||||
writingHelper.writeToDictFileWithGC(getRootPosition(), filePath, &mHeaderPolicy);
|
||||
mNeedsToDecayForTesting = false;
|
||||
}
|
||||
|
|
|
@ -49,9 +49,8 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
|||
mHeaderPolicy.isDecayingDict()),
|
||||
mNodeReader(&mBufferWithExtendableBuffer, &mBigramListPolicy, &mShortcutListPolicy),
|
||||
mNodeWriter(&mBufferWithExtendableBuffer, &mNodeReader, &mBigramListPolicy,
|
||||
&mShortcutListPolicy),
|
||||
mUpdatingHelper(&mBufferWithExtendableBuffer, &mNodeReader, &mNodeWriter,
|
||||
mHeaderPolicy.isDecayingDict()),
|
||||
&mShortcutListPolicy, mHeaderPolicy.isDecayingDict()),
|
||||
mUpdatingHelper(&mBufferWithExtendableBuffer, &mNodeReader, &mNodeWriter),
|
||||
mUnigramCount(mHeaderPolicy.getUnigramCount()),
|
||||
mBigramCount(mHeaderPolicy.getBigramCount()), mNeedsToDecayForTesting(false) {}
|
||||
|
||||
|
|
|
@ -22,7 +22,6 @@
|
|||
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_utils.h"
|
||||
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
|
||||
#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
|
||||
|
||||
namespace latinime {
|
||||
|
||||
|
@ -53,9 +52,7 @@ bool DynamicPatriciaTrieUpdatingHelper::addUnigramWord(
|
|||
if (nextIndex >= codePointCount || !readingHelper->isMatchedCodePoint(ptNodeParams, j,
|
||||
wordCodePoints[matchedCodePointCount + j])) {
|
||||
*outAddedNewUnigram = true;
|
||||
return reallocatePtNodeAndAddNewPtNodes(&ptNodeParams, j,
|
||||
getUpdatedProbability(NOT_A_PROBABILITY /* originalProbability */,
|
||||
probability),
|
||||
return reallocatePtNodeAndAddNewPtNodes(&ptNodeParams, j, probability,
|
||||
wordCodePoints + matchedCodePointCount,
|
||||
codePointCount - matchedCodePointCount);
|
||||
}
|
||||
|
@ -66,8 +63,7 @@ bool DynamicPatriciaTrieUpdatingHelper::addUnigramWord(
|
|||
}
|
||||
if (!ptNodeParams.hasChildren()) {
|
||||
*outAddedNewUnigram = true;
|
||||
return createChildrenPtNodeArrayAndAChildPtNode(&ptNodeParams,
|
||||
getUpdatedProbability(NOT_A_PROBABILITY /* originalProbability */, probability),
|
||||
return createChildrenPtNodeArrayAndAChildPtNode(&ptNodeParams, probability,
|
||||
wordCodePoints + readingHelper->getTotalCodePointCount(ptNodeParams),
|
||||
codePointCount - readingHelper->getTotalCodePointCount(ptNodeParams));
|
||||
}
|
||||
|
@ -83,8 +79,7 @@ bool DynamicPatriciaTrieUpdatingHelper::addUnigramWord(
|
|||
*outAddedNewUnigram = true;
|
||||
return createAndInsertNodeIntoPtNodeArray(parentPos,
|
||||
wordCodePoints + readingHelper->getPrevTotalCodePointCount(),
|
||||
codePointCount - readingHelper->getPrevTotalCodePointCount(),
|
||||
getUpdatedProbability(NOT_A_PROBABILITY /* originalProbability */, probability), &pos);
|
||||
codePointCount - readingHelper->getPrevTotalCodePointCount(), probability, &pos);
|
||||
}
|
||||
|
||||
bool DynamicPatriciaTrieUpdatingHelper::addBigramWords(const int word0Pos, const int word1Pos,
|
||||
|
@ -124,19 +119,18 @@ bool DynamicPatriciaTrieUpdatingHelper::setPtNodeProbability(
|
|||
if (originalPtNodeParams->isTerminal()) {
|
||||
// Overwrites the probability.
|
||||
*outAddedNewUnigram = false;
|
||||
const int probabilityToWrite = getUpdatedProbability(
|
||||
originalPtNodeParams->getProbability(), probability);
|
||||
return mPtNodeWriter->updatePtNodeProbability(originalPtNodeParams, probabilityToWrite);
|
||||
return mPtNodeWriter->updatePtNodeProbability(originalPtNodeParams, probability);
|
||||
} else {
|
||||
// Make the node terminal and write the probability.
|
||||
*outAddedNewUnigram = true;
|
||||
const int movedPos = mBuffer->getTailPosition();
|
||||
int writingPos = movedPos;
|
||||
const PtNodeParams ptNodeParamsToWrite(getUpdatedPtNodeParams(originalPtNodeParams,
|
||||
originalPtNodeParams->getParentPos(), originalPtNodeParams->getCodePointCount(),
|
||||
originalPtNodeParams->getCodePoints(),
|
||||
getUpdatedProbability(NOT_A_PROBABILITY /* originalProbability */, probability)));
|
||||
if (!mPtNodeWriter->writePtNodeAndAdvancePosition(&ptNodeParamsToWrite, &writingPos)) {
|
||||
true /* isTerminal */, originalPtNodeParams->getParentPos(),
|
||||
originalPtNodeParams->getCodePointCount(), originalPtNodeParams->getCodePoints(),
|
||||
probability));
|
||||
if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&ptNodeParamsToWrite,
|
||||
&writingPos)) {
|
||||
return false;
|
||||
}
|
||||
if (!mPtNodeWriter->markPtNodeAsMoved(originalPtNodeParams, movedPos, movedPos)) {
|
||||
|
@ -165,9 +159,10 @@ bool DynamicPatriciaTrieUpdatingHelper::createNewPtNodeArrayWithAChildPtNode(
|
|||
1 /* arraySize */, &writingPos)) {
|
||||
return false;
|
||||
}
|
||||
const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode(
|
||||
const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode(true /* isTerminal */,
|
||||
parentPtNodePos, nodeCodePointCount, nodeCodePoints, probability));
|
||||
if (!mPtNodeWriter->writePtNodeAndAdvancePosition(&ptNodeParamsToWrite, &writingPos)) {
|
||||
if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&ptNodeParamsToWrite,
|
||||
&writingPos)) {
|
||||
return false;
|
||||
}
|
||||
if (!DynamicPatriciaTrieWritingUtils::writeForwardLinkPositionAndAdvancePosition(mBuffer,
|
||||
|
@ -194,13 +189,22 @@ bool DynamicPatriciaTrieUpdatingHelper::reallocatePtNodeAndAddNewPtNodes(
|
|||
int writingPos = firstPartOfReallocatedPtNodePos;
|
||||
// Write the 1st part of the reallocating node. The children position will be updated later
|
||||
// with actual children position.
|
||||
const int newProbability = addsExtraChild ? NOT_A_PROBABILITY : probabilityOfNewPtNode;
|
||||
const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode(
|
||||
if (addsExtraChild) {
|
||||
const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode(false /* isTerminal */,
|
||||
reallocatingPtNodeParams->getParentPos(), overlappingCodePointCount,
|
||||
reallocatingPtNodeParams->getCodePoints(), newProbability));
|
||||
reallocatingPtNodeParams->getCodePoints(), NOT_A_PROBABILITY));
|
||||
if (!mPtNodeWriter->writePtNodeAndAdvancePosition(&ptNodeParamsToWrite, &writingPos)) {
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode(true /* isTerminal */,
|
||||
reallocatingPtNodeParams->getParentPos(), overlappingCodePointCount,
|
||||
reallocatingPtNodeParams->getCodePoints(), probabilityOfNewPtNode));
|
||||
if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&ptNodeParamsToWrite,
|
||||
&writingPos)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
const int actualChildrenPos = writingPos;
|
||||
// Create new children PtNode array.
|
||||
const size_t newPtNodeCount = addsExtraChild ? 2 : 1;
|
||||
|
@ -211,7 +215,7 @@ bool DynamicPatriciaTrieUpdatingHelper::reallocatePtNodeAndAddNewPtNodes(
|
|||
// Write the 2nd part of the reallocating node.
|
||||
const int secondPartOfReallocatedPtNodePos = writingPos;
|
||||
const PtNodeParams childPartPtNodeParams(getUpdatedPtNodeParams(reallocatingPtNodeParams,
|
||||
firstPartOfReallocatedPtNodePos,
|
||||
reallocatingPtNodeParams->isTerminal(), firstPartOfReallocatedPtNodePos,
|
||||
reallocatingPtNodeParams->getCodePointCount() - overlappingCodePointCount,
|
||||
reallocatingPtNodeParams->getCodePoints() + overlappingCodePointCount,
|
||||
reallocatingPtNodeParams->getProbability()));
|
||||
|
@ -219,10 +223,11 @@ bool DynamicPatriciaTrieUpdatingHelper::reallocatePtNodeAndAddNewPtNodes(
|
|||
return false;
|
||||
}
|
||||
if (addsExtraChild) {
|
||||
const PtNodeParams extraChildPtNodeParams(getPtNodeParamsForNewPtNode(
|
||||
const PtNodeParams extraChildPtNodeParams(getPtNodeParamsForNewPtNode(true /* isTerminal */,
|
||||
firstPartOfReallocatedPtNodePos, newNodeCodePointCount - overlappingCodePointCount,
|
||||
newNodeCodePoints + overlappingCodePointCount, probabilityOfNewPtNode));
|
||||
if (!mPtNodeWriter->writePtNodeAndAdvancePosition(&extraChildPtNodeParams, &writingPos)) {
|
||||
if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&extraChildPtNodeParams,
|
||||
&writingPos)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
@ -242,22 +247,11 @@ bool DynamicPatriciaTrieUpdatingHelper::reallocatePtNodeAndAddNewPtNodes(
|
|||
return mPtNodeWriter->updateChildrenPosition(&ptNodeParams, actualChildrenPos);
|
||||
}
|
||||
|
||||
int DynamicPatriciaTrieUpdatingHelper::getUpdatedProbability(const int originalProbability,
|
||||
const int newProbability) const {
|
||||
if (mNeedsToDecay) {
|
||||
return ForgettingCurveUtils::getUpdatedEncodedProbability(originalProbability,
|
||||
newProbability);
|
||||
} else {
|
||||
return newProbability;
|
||||
}
|
||||
}
|
||||
|
||||
const PtNodeParams DynamicPatriciaTrieUpdatingHelper::getUpdatedPtNodeParams(
|
||||
const PtNodeParams *const originalPtNodeParams, const int parentPos,
|
||||
const PtNodeParams *const originalPtNodeParams, const bool isTerminal, const int parentPos,
|
||||
const int codePointCount, const int *const codePoints, const int probability) const {
|
||||
const PatriciaTrieReadingUtils::NodeFlags flags = PatriciaTrieReadingUtils::createAndGetFlags(
|
||||
originalPtNodeParams->isBlacklisted(), originalPtNodeParams->isNotAWord(),
|
||||
probability != NOT_A_PROBABILITY /* isTerminal */,
|
||||
originalPtNodeParams->isBlacklisted(), originalPtNodeParams->isNotAWord(), isTerminal,
|
||||
originalPtNodeParams->hasShortcutTargets(), originalPtNodeParams->hasBigrams(),
|
||||
codePointCount > 1 /* hasMultipleChars */, CHILDREN_POSITION_FIELD_SIZE);
|
||||
return PtNodeParams(originalPtNodeParams, flags, parentPos, codePointCount, codePoints,
|
||||
|
@ -265,11 +259,10 @@ const PtNodeParams DynamicPatriciaTrieUpdatingHelper::getUpdatedPtNodeParams(
|
|||
}
|
||||
|
||||
const PtNodeParams DynamicPatriciaTrieUpdatingHelper::getPtNodeParamsForNewPtNode(
|
||||
const int parentPos, const int codePointCount, const int *const codePoints,
|
||||
const int probability) const {
|
||||
const bool isTerminal, const int parentPos, const int codePointCount,
|
||||
const int *const codePoints, const int probability) const {
|
||||
const PatriciaTrieReadingUtils::NodeFlags flags = PatriciaTrieReadingUtils::createAndGetFlags(
|
||||
false /* isBlacklisted */, false /* isNotAWord */,
|
||||
probability != NOT_A_PROBABILITY /* isTerminal */,
|
||||
false /* isBlacklisted */, false /* isNotAWord */, isTerminal,
|
||||
false /* hasShortcutTargets */, false /* hasBigrams */,
|
||||
codePointCount > 1 /* hasMultipleChars */, CHILDREN_POSITION_FIELD_SIZE);
|
||||
return PtNodeParams(flags, parentPos, codePointCount, codePoints, probability);
|
||||
|
|
|
@ -34,10 +34,8 @@ class PtNodeWriter;
|
|||
class DynamicPatriciaTrieUpdatingHelper {
|
||||
public:
|
||||
DynamicPatriciaTrieUpdatingHelper(BufferWithExtendableBuffer *const buffer,
|
||||
const PtNodeReader *const ptNodeReader, PtNodeWriter *const ptNodeWriter,
|
||||
const bool needsToDecay)
|
||||
: mBuffer(buffer), mPtNodeReader(ptNodeReader), mPtNodeWriter(ptNodeWriter),
|
||||
mNeedsToDecay(needsToDecay) {}
|
||||
const PtNodeReader *const ptNodeReader, PtNodeWriter *const ptNodeWriter)
|
||||
: mBuffer(buffer), mPtNodeReader(ptNodeReader), mPtNodeWriter(ptNodeWriter) {}
|
||||
|
||||
~DynamicPatriciaTrieUpdatingHelper() {}
|
||||
|
||||
|
@ -61,7 +59,6 @@ class DynamicPatriciaTrieUpdatingHelper {
|
|||
BufferWithExtendableBuffer *const mBuffer;
|
||||
const PtNodeReader *const mPtNodeReader;
|
||||
PtNodeWriter *const mPtNodeWriter;
|
||||
const bool mNeedsToDecay;
|
||||
|
||||
bool createAndInsertNodeIntoPtNodeArray(const int parentPos, const int *const nodeCodePoints,
|
||||
const int nodeCodePointCount, const int probability, int *const forwardLinkFieldPos);
|
||||
|
@ -80,14 +77,12 @@ class DynamicPatriciaTrieUpdatingHelper {
|
|||
const int probabilityOfNewPtNode, const int *const newNodeCodePoints,
|
||||
const int newNodeCodePointCount);
|
||||
|
||||
int getUpdatedProbability(const int originalProbability, const int newProbability) const;
|
||||
|
||||
const PtNodeParams getUpdatedPtNodeParams(const PtNodeParams *const originalPtNodeParams,
|
||||
const int parentPos, const int codePointCount, const int *const codePoints,
|
||||
const int probability) const;
|
||||
|
||||
const PtNodeParams getPtNodeParamsForNewPtNode(const int parentPos, const int codePointCount,
|
||||
const bool isTerminal, const int parentPos, const int codePointCount,
|
||||
const int *const codePoints, const int probability) const;
|
||||
|
||||
const PtNodeParams getPtNodeParamsForNewPtNode(const bool isTerminal, const int parentPos,
|
||||
const int codePointCount, const int *const codePoints, const int probability) const;
|
||||
};
|
||||
} // namespace latinime
|
||||
#endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_UPDATING_HELPER_H */
|
||||
|
|
|
@ -76,11 +76,13 @@ bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
|
|||
int *const outUnigramCount, int *const outBigramCount) {
|
||||
DynamicPatriciaTrieNodeReader ptNodeReader(mBuffer, mBigramPolicy, mShortcutPolicy);
|
||||
DynamicPatriciaTrieReadingHelper readingHelper(mBuffer, &ptNodeReader);
|
||||
DynamicPatriciaTrieNodeWriter ptNodeWriter(mBuffer, &ptNodeReader, mBigramPolicy,
|
||||
mShortcutPolicy, false /* needsToDecayWhenUpdating */);
|
||||
readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
|
||||
DynamicPatriciaTrieGcEventListeners
|
||||
::TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted
|
||||
traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted(
|
||||
headerPolicy, mPtNodeWriter, mBuffer, mNeedsToDecay);
|
||||
headerPolicy, &ptNodeWriter, mBuffer, mNeedsToDecay);
|
||||
if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(
|
||||
&traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted)) {
|
||||
return false;
|
||||
|
@ -92,7 +94,7 @@ bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
|
|||
|
||||
readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
|
||||
DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateBigramProbability
|
||||
traversePolicyToUpdateBigramProbability(mPtNodeWriter);
|
||||
traversePolicyToUpdateBigramProbability(&ptNodeWriter);
|
||||
if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(
|
||||
&traversePolicyToUpdateBigramProbability)) {
|
||||
return false;
|
||||
|
@ -106,7 +108,7 @@ bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
|
|||
PtNodeWriter::DictPositionRelocationMap dictPositionRelocationMap;
|
||||
readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
|
||||
DynamicPatriciaTrieNodeWriter newPtNodeWriter(bufferToWrite, &ptNodeReader, mBigramPolicy,
|
||||
mShortcutPolicy);
|
||||
mShortcutPolicy, false /* needsToDecayWhenUpdating */);
|
||||
DynamicPatriciaTrieGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer
|
||||
traversePolicyToPlaceAndWriteValidPtNodesToBuffer(&newPtNodeWriter, bufferToWrite,
|
||||
&dictPositionRelocationMap);
|
||||
|
@ -124,7 +126,7 @@ bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
|
|||
&newDictShortcutPolicy);
|
||||
DynamicPatriciaTrieReadingHelper newDictReadingHelper(bufferToWrite, &newDictNodeReader);
|
||||
DynamicPatriciaTrieNodeWriter newDictNodeWriter(bufferToWrite, &newDictNodeReader,
|
||||
&newDictBigramPolicy, &newDictShortcutPolicy);
|
||||
&newDictBigramPolicy, &newDictShortcutPolicy, false /* needsToDecayWhenUpdating */);
|
||||
newDictReadingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
|
||||
DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateAllPositionFields
|
||||
traversePolicyToUpdateAllPositionFields(&newDictNodeWriter, &dictPositionRelocationMap);
|
||||
|
|
|
@ -29,8 +29,6 @@ class DynamicBigramListPolicy;
|
|||
class DynamicPatriciaTrieReadingHelper;
|
||||
class DynamicShortcutListPolicy;
|
||||
class HeaderPolicy;
|
||||
class PtNodeReader;
|
||||
class PtNodeWriter;
|
||||
|
||||
// TODO: Make it independent from a particular format and move to pt_common.
|
||||
class DynamicPatriciaTrieWritingHelper {
|
||||
|
@ -38,11 +36,9 @@ class DynamicPatriciaTrieWritingHelper {
|
|||
static const size_t MAX_DICTIONARY_SIZE;
|
||||
|
||||
DynamicPatriciaTrieWritingHelper(BufferWithExtendableBuffer *const buffer,
|
||||
const PtNodeReader *const ptNodeReader, PtNodeWriter *const ptNodeWriter,
|
||||
DynamicBigramListPolicy *const bigramPolicy,
|
||||
DynamicShortcutListPolicy *const shortcutPolicy, const bool needsToDecay)
|
||||
: mBuffer(buffer), mPtNodeReader(ptNodeReader), mPtNodeWriter(ptNodeWriter),
|
||||
mBigramPolicy(bigramPolicy), mShortcutPolicy(shortcutPolicy),
|
||||
: mBuffer(buffer), mBigramPolicy(bigramPolicy), mShortcutPolicy(shortcutPolicy),
|
||||
mNeedsToDecay(needsToDecay) {}
|
||||
|
||||
~DynamicPatriciaTrieWritingHelper() {}
|
||||
|
@ -57,8 +53,6 @@ class DynamicPatriciaTrieWritingHelper {
|
|||
DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTrieWritingHelper);
|
||||
|
||||
BufferWithExtendableBuffer *const mBuffer;
|
||||
const PtNodeReader *const mPtNodeReader;
|
||||
PtNodeWriter *const mPtNodeWriter;
|
||||
DynamicBigramListPolicy *const mBigramPolicy;
|
||||
DynamicShortcutListPolicy *const mShortcutPolicy;
|
||||
const bool mNeedsToDecay;
|
||||
|
|
|
@ -24,6 +24,7 @@
|
|||
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_utils.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h"
|
||||
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
|
||||
#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
|
||||
|
||||
namespace latinime {
|
||||
|
||||
|
@ -115,8 +116,10 @@ bool Ver4PatriciaTrieNodeWriter::updatePtNodeProbability(
|
|||
if (!toBeUpdatedPtNodeParams->isTerminal()) {
|
||||
return false;
|
||||
}
|
||||
const int probabilityToWrite = getUpdatedProbability(toBeUpdatedPtNodeParams->getProbability(),
|
||||
newProbability);
|
||||
return mBuffers->getUpdatableProbabilityDictContent()->setProbability(
|
||||
toBeUpdatedPtNodeParams->getTerminalId(), newProbability);
|
||||
toBeUpdatedPtNodeParams->getTerminalId(), probabilityToWrite);
|
||||
}
|
||||
|
||||
bool Ver4PatriciaTrieNodeWriter::updateChildrenPosition(
|
||||
|
@ -134,67 +137,23 @@ bool Ver4PatriciaTrieNodeWriter::updateTerminalId(const PtNodeParams *const toBe
|
|||
|
||||
bool Ver4PatriciaTrieNodeWriter::writePtNodeAndAdvancePosition(
|
||||
const PtNodeParams *const ptNodeParams, int *const ptNodeWritingPos) {
|
||||
const int nodePos = *ptNodeWritingPos;
|
||||
// Write dummy flags. The Node flags are updated with appropriate flags at the last step of the
|
||||
// PtNode writing.
|
||||
if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mTrieBuffer,
|
||||
0 /* nodeFlags */, ptNodeWritingPos)) {
|
||||
return false;
|
||||
}
|
||||
// Calculate a parent offset and write the offset.
|
||||
if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition(mTrieBuffer,
|
||||
ptNodeParams->getParentPos(), nodePos, ptNodeWritingPos)) {
|
||||
return false;
|
||||
}
|
||||
// Write code points
|
||||
if (!DynamicPatriciaTrieWritingUtils::writeCodePointsAndAdvancePosition(mTrieBuffer,
|
||||
ptNodeParams->getCodePoints(), ptNodeParams->getCodePointCount(), ptNodeWritingPos)) {
|
||||
return false;
|
||||
}
|
||||
return writePtNodeAndGetTerminalIdAndAdvancePosition(ptNodeParams, 0 /* outTerminalId */,
|
||||
ptNodeWritingPos);
|
||||
}
|
||||
|
||||
|
||||
bool Ver4PatriciaTrieNodeWriter::writeNewTerminalPtNodeAndAdvancePosition(
|
||||
const PtNodeParams *const ptNodeParams, int *const ptNodeWritingPos) {
|
||||
int terminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
|
||||
if (ptNodeParams->getTerminalId() != Ver4DictConstants::NOT_A_TERMINAL_ID) {
|
||||
terminalId = ptNodeParams->getTerminalId();
|
||||
} else if (ptNodeParams->getProbability() != NOT_A_PROBABILITY) {
|
||||
// Write terminal information using a new terminal id.
|
||||
// Get a new unused terminal id.
|
||||
terminalId = mBuffers->getTerminalPositionLookupTable()->getNextTerminalId();
|
||||
}
|
||||
const int isTerminal = terminalId != Ver4DictConstants::NOT_A_TERMINAL_ID;
|
||||
if (isTerminal) {
|
||||
// Update the lookup table.
|
||||
if (!mBuffers->getUpdatableTerminalPositionLookupTable()->setTerminalPtNodePosition(
|
||||
terminalId, nodePos)) {
|
||||
return false;
|
||||
}
|
||||
// Write terminal Id.
|
||||
if (!mTrieBuffer->writeUintAndAdvancePosition(terminalId,
|
||||
Ver4DictConstants::TERMINAL_ID_FIELD_SIZE, ptNodeWritingPos)) {
|
||||
if (!writePtNodeAndGetTerminalIdAndAdvancePosition(ptNodeParams, &terminalId,
|
||||
ptNodeWritingPos)) {
|
||||
return false;
|
||||
}
|
||||
// Write probability.
|
||||
if (!mBuffers->getUpdatableProbabilityDictContent()->setProbability(
|
||||
terminalId, ptNodeParams->getProbability())) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
// Write children position
|
||||
if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(mTrieBuffer,
|
||||
ptNodeParams->getChildrenPos(), ptNodeWritingPos)) {
|
||||
return false;
|
||||
}
|
||||
// Create node flags and write them.
|
||||
PatriciaTrieReadingUtils::NodeFlags nodeFlags =
|
||||
PatriciaTrieReadingUtils::createAndGetFlags(ptNodeParams->isBlacklisted(),
|
||||
ptNodeParams->isNotAWord(), isTerminal,
|
||||
ptNodeParams->hasShortcutTargets(), ptNodeParams->hasBigrams(),
|
||||
ptNodeParams->getCodePointCount() > 1 /* hasMultipleChars */,
|
||||
CHILDREN_POSITION_FIELD_SIZE);
|
||||
int flagsFieldPos = nodePos;
|
||||
if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mTrieBuffer, nodeFlags,
|
||||
&flagsFieldPos)) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
const int probabilityToWrite = getUpdatedProbability(NOT_A_PROBABILITY,
|
||||
ptNodeParams->getProbability());
|
||||
return mBuffers->getUpdatableProbabilityDictContent()->setProbability(terminalId,
|
||||
probabilityToWrite);
|
||||
}
|
||||
|
||||
bool Ver4PatriciaTrieNodeWriter::addNewBigramEntry(
|
||||
|
@ -258,4 +217,85 @@ bool Ver4PatriciaTrieNodeWriter::updateAllPositionFields(
|
|||
return true;
|
||||
}
|
||||
|
||||
bool Ver4PatriciaTrieNodeWriter::writePtNodeAndGetTerminalIdAndAdvancePosition(
|
||||
const PtNodeParams *const ptNodeParams, int *const outTerminalId,
|
||||
int *const ptNodeWritingPos) {
|
||||
const int nodePos = *ptNodeWritingPos;
|
||||
// Write dummy flags. The Node flags are updated with appropriate flags at the last step of the
|
||||
// PtNode writing.
|
||||
if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mTrieBuffer,
|
||||
0 /* nodeFlags */, ptNodeWritingPos)) {
|
||||
return false;
|
||||
}
|
||||
// Calculate a parent offset and write the offset.
|
||||
if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition(mTrieBuffer,
|
||||
ptNodeParams->getParentPos(), nodePos, ptNodeWritingPos)) {
|
||||
return false;
|
||||
}
|
||||
// Write code points
|
||||
if (!DynamicPatriciaTrieWritingUtils::writeCodePointsAndAdvancePosition(mTrieBuffer,
|
||||
ptNodeParams->getCodePoints(), ptNodeParams->getCodePointCount(), ptNodeWritingPos)) {
|
||||
return false;
|
||||
}
|
||||
int terminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
|
||||
if (ptNodeParams->getTerminalId() != Ver4DictConstants::NOT_A_TERMINAL_ID) {
|
||||
terminalId = ptNodeParams->getTerminalId();
|
||||
} else if (ptNodeParams->isTerminal()) {
|
||||
// Write terminal information using a new terminal id.
|
||||
// Get a new unused terminal id.
|
||||
terminalId = mBuffers->getTerminalPositionLookupTable()->getNextTerminalId();
|
||||
}
|
||||
const int isTerminal = terminalId != Ver4DictConstants::NOT_A_TERMINAL_ID;
|
||||
if (isTerminal) {
|
||||
// Update the lookup table.
|
||||
if (!mBuffers->getUpdatableTerminalPositionLookupTable()->setTerminalPtNodePosition(
|
||||
terminalId, nodePos)) {
|
||||
return false;
|
||||
}
|
||||
// Write terminal Id.
|
||||
if (!mTrieBuffer->writeUintAndAdvancePosition(terminalId,
|
||||
Ver4DictConstants::TERMINAL_ID_FIELD_SIZE, ptNodeWritingPos)) {
|
||||
return false;
|
||||
}
|
||||
// Write probability.
|
||||
if (ptNodeParams->getProbability() != NOT_A_PROBABILITY) {
|
||||
if (!mBuffers->getUpdatableProbabilityDictContent()->setProbability(
|
||||
terminalId, ptNodeParams->getProbability())) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if (outTerminalId) {
|
||||
*outTerminalId = terminalId;
|
||||
}
|
||||
}
|
||||
// Write children position
|
||||
if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(mTrieBuffer,
|
||||
ptNodeParams->getChildrenPos(), ptNodeWritingPos)) {
|
||||
return false;
|
||||
}
|
||||
// Create node flags and write them.
|
||||
PatriciaTrieReadingUtils::NodeFlags nodeFlags =
|
||||
PatriciaTrieReadingUtils::createAndGetFlags(ptNodeParams->isBlacklisted(),
|
||||
ptNodeParams->isNotAWord(), isTerminal,
|
||||
ptNodeParams->hasShortcutTargets(), ptNodeParams->hasBigrams(),
|
||||
ptNodeParams->getCodePointCount() > 1 /* hasMultipleChars */,
|
||||
CHILDREN_POSITION_FIELD_SIZE);
|
||||
int flagsFieldPos = nodePos;
|
||||
if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mTrieBuffer, nodeFlags,
|
||||
&flagsFieldPos)) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
int Ver4PatriciaTrieNodeWriter::getUpdatedProbability(const int originalProbability,
|
||||
const int newProbability) const {
|
||||
if (mNeedsToDecayWhenUpdating) {
|
||||
return ForgettingCurveUtils::getUpdatedEncodedProbability(originalProbability,
|
||||
newProbability);
|
||||
} else {
|
||||
return newProbability;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -39,10 +39,12 @@ class Ver4PatriciaTrieNodeWriter : public PtNodeWriter {
|
|||
public:
|
||||
Ver4PatriciaTrieNodeWriter(BufferWithExtendableBuffer *const trieBuffer,
|
||||
Ver4DictBuffers *const buffers, const Ver4PatriciaTrieNodeReader *const ptNodeReader,
|
||||
Ver4BigramListPolicy *const bigramPolicy, Ver4ShortcutListPolicy *const shortcutPolicy)
|
||||
Ver4BigramListPolicy *const bigramPolicy, Ver4ShortcutListPolicy *const shortcutPolicy,
|
||||
const bool needsToDecayWhenUpdating)
|
||||
: mTrieBuffer(trieBuffer), mBuffers(buffers), mPtNodeReader(ptNodeReader),
|
||||
mReadingHelper(mTrieBuffer, mPtNodeReader),
|
||||
mBigramPolicy(bigramPolicy), mShortcutPolicy(shortcutPolicy) {}
|
||||
mBigramPolicy(bigramPolicy), mShortcutPolicy(shortcutPolicy),
|
||||
mNeedsToDecayWhenUpdating(needsToDecayWhenUpdating) {}
|
||||
|
||||
virtual ~Ver4PatriciaTrieNodeWriter() {}
|
||||
|
||||
|
@ -63,6 +65,9 @@ class Ver4PatriciaTrieNodeWriter : public PtNodeWriter {
|
|||
virtual bool writePtNodeAndAdvancePosition(const PtNodeParams *const ptNodeParams,
|
||||
int *const ptNodeWritingPos);
|
||||
|
||||
virtual bool writeNewTerminalPtNodeAndAdvancePosition(
|
||||
const PtNodeParams *const ptNodeParams, int *const ptNodeWritingPos);
|
||||
|
||||
virtual bool addNewBigramEntry(const PtNodeParams *const sourcePtNodeParams,
|
||||
const PtNodeParams *const targetPtNodeParam, const int probability,
|
||||
bool *const outAddedNewBigram);
|
||||
|
@ -80,6 +85,12 @@ class Ver4PatriciaTrieNodeWriter : public PtNodeWriter {
|
|||
private:
|
||||
DISALLOW_COPY_AND_ASSIGN(Ver4PatriciaTrieNodeWriter);
|
||||
|
||||
bool writePtNodeAndGetTerminalIdAndAdvancePosition(
|
||||
const PtNodeParams *const ptNodeParams, int *const outTerminalId,
|
||||
int *const ptNodeWritingPos);
|
||||
|
||||
int getUpdatedProbability(const int originalProbability, const int newProbability) const;
|
||||
|
||||
static const int CHILDREN_POSITION_FIELD_SIZE;
|
||||
|
||||
BufferWithExtendableBuffer *const mTrieBuffer;
|
||||
|
@ -88,6 +99,7 @@ class Ver4PatriciaTrieNodeWriter : public PtNodeWriter {
|
|||
DynamicPatriciaTrieReadingHelper mReadingHelper;
|
||||
Ver4BigramListPolicy *const mBigramPolicy;
|
||||
Ver4ShortcutListPolicy *const mShortcutPolicy;
|
||||
const bool mNeedsToDecayWhenUpdating;
|
||||
};
|
||||
} // namespace latinime
|
||||
#endif /* LATINIME_VER4_PATRICIA_TRIE_NODE_WRITER_H */
|
||||
|
|
|
@ -49,9 +49,8 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
|||
mBuffers.get()->getTerminalPositionLookupTable()),
|
||||
mNodeReader(mDictBuffer, mBuffers.get()->getProbabilityDictContent()),
|
||||
mNodeWriter(mDictBuffer, mBuffers.get(), &mNodeReader, &mBigramPolicy,
|
||||
&mShortcutPolicy),
|
||||
mUpdatingHelper(mDictBuffer, &mNodeReader, &mNodeWriter,
|
||||
mHeaderPolicy.isDecayingDict()),
|
||||
&mShortcutPolicy, mHeaderPolicy.isDecayingDict()),
|
||||
mUpdatingHelper(mDictBuffer, &mNodeReader, &mNodeWriter),
|
||||
mWritingHelper(mBuffers.get()),
|
||||
mUnigramCount(mHeaderPolicy.getUnigramCount()),
|
||||
mBigramCount(mHeaderPolicy.getBigramCount()), mNeedsToDecayForTesting(false) {};
|
||||
|
|
|
@ -87,7 +87,8 @@ bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
|
|||
Ver4ShortcutListPolicy shortcutPolicy(mBuffers->getShortcutDictContent(),
|
||||
mBuffers->getTerminalPositionLookupTable());
|
||||
Ver4PatriciaTrieNodeWriter ptNodeWriter(mBuffers->getWritableTrieBuffer(),
|
||||
mBuffers, &ptNodeReader, &bigramPolicy, &shortcutPolicy);
|
||||
mBuffers, &ptNodeReader, &bigramPolicy, &shortcutPolicy,
|
||||
false /* needsToDecayWhenUpdating */);
|
||||
|
||||
DynamicPatriciaTrieReadingHelper readingHelper(mBuffers->getTrieBuffer(), &ptNodeReader);
|
||||
readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
|
||||
|
@ -121,7 +122,8 @@ bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
|
|||
PtNodeWriter::DictPositionRelocationMap dictPositionRelocationMap;
|
||||
readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
|
||||
Ver4PatriciaTrieNodeWriter ptNodeWriterForNewBuffers(buffersToWrite->getWritableTrieBuffer(),
|
||||
buffersToWrite, &ptNodeReader, &bigramPolicy, &shortcutPolicy);
|
||||
buffersToWrite, &ptNodeReader, &bigramPolicy, &shortcutPolicy,
|
||||
false /* needsToDecayWhenUpdating */);
|
||||
DynamicPatriciaTrieGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer
|
||||
traversePolicyToPlaceAndWriteValidPtNodesToBuffer(&ptNodeWriterForNewBuffers,
|
||||
buffersToWrite->getWritableTrieBuffer(), &dictPositionRelocationMap);
|
||||
|
@ -139,7 +141,8 @@ bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
|
|||
Ver4ShortcutListPolicy newShortcutPolicy(buffersToWrite->getShortcutDictContent(),
|
||||
buffersToWrite->getTerminalPositionLookupTable());
|
||||
Ver4PatriciaTrieNodeWriter newPtNodeWriter(buffersToWrite->getWritableTrieBuffer(),
|
||||
buffersToWrite, &newPtNodeReader, &newBigramPolicy, &newShortcutPolicy);
|
||||
buffersToWrite, &newPtNodeReader, &newBigramPolicy, &newShortcutPolicy,
|
||||
false /* needsToDecayWhenUpdating */);
|
||||
|
||||
DynamicPatriciaTrieReadingHelper newDictReadingHelper(buffersToWrite->getTrieBuffer(),
|
||||
&newPtNodeReader);
|
||||
|
|
Loading…
Reference in a new issue