Merge "Refactoring to extend probability field."

This commit is contained in:
Keisuke Kuroyanagi 2013-11-28 09:17:33 +00:00 committed by Android (Google) Code Review
commit c7dd2eb7ad
13 changed files with 296 additions and 200 deletions

View file

@ -57,6 +57,9 @@ class PtNodeWriter {
virtual bool writePtNodeAndAdvancePosition(const PtNodeParams *const ptNodeParams, virtual bool writePtNodeAndAdvancePosition(const PtNodeParams *const ptNodeParams,
int *const ptNodeWritingPos) = 0; int *const ptNodeWritingPos) = 0;
virtual bool writeNewTerminalPtNodeAndAdvancePosition(const PtNodeParams *const ptNodeParams,
int *const ptNodeWritingPos) = 0;
virtual bool addNewBigramEntry(const PtNodeParams *const sourcePtNodeParams, virtual bool addNewBigramEntry(const PtNodeParams *const sourcePtNodeParams,
const PtNodeParams *const targetPtNodeParam, const int probability, const PtNodeParams *const targetPtNodeParam, const int probability,
bool *const outAddedNewBigram) = 0; bool *const outAddedNewBigram) = 0;

View file

@ -23,6 +23,7 @@
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.h" #include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.h"
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_utils.h" #include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_utils.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" #include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
namespace latinime { namespace latinime {
@ -105,9 +106,11 @@ bool DynamicPatriciaTrieNodeWriter::updatePtNodeProbability(
if (!toBeUpdatedPtNodeParams->isTerminal()) { if (!toBeUpdatedPtNodeParams->isTerminal()) {
return false; return false;
} }
const int probabilityToWrite = getUpdatedProbability(toBeUpdatedPtNodeParams->getProbability(),
newProbability);
int probabilityFieldPos = toBeUpdatedPtNodeParams->getProbabilityFieldPos(); int probabilityFieldPos = toBeUpdatedPtNodeParams->getProbabilityFieldPos();
return DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition(mBuffer, return DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition(mBuffer,
newProbability, &probabilityFieldPos); probabilityToWrite, &probabilityFieldPos);
} }
bool DynamicPatriciaTrieNodeWriter::updateChildrenPosition( bool DynamicPatriciaTrieNodeWriter::updateChildrenPosition(
@ -119,67 +122,24 @@ bool DynamicPatriciaTrieNodeWriter::updateChildrenPosition(
bool DynamicPatriciaTrieNodeWriter::writePtNodeAndAdvancePosition( bool DynamicPatriciaTrieNodeWriter::writePtNodeAndAdvancePosition(
const PtNodeParams *const ptNodeParams, int *const ptNodeWritingPos) { const PtNodeParams *const ptNodeParams, int *const ptNodeWritingPos) {
const int nodePos = *ptNodeWritingPos; return writePtNodeAndGetProbabilityFieldPosAndAdvancePosition(ptNodeParams,
// Write dummy flags. The Node flags are updated with appropriate flags at the last step of the 0 /* outProbabilityFieldPos */, ptNodeWritingPos);
// PtNode writing. }
if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer,
0 /* nodeFlags */, ptNodeWritingPos)) { bool DynamicPatriciaTrieNodeWriter::writeNewTerminalPtNodeAndAdvancePosition(
return false; const PtNodeParams *const ptNodeParams, int *const ptNodeWritingPos) {
} int probabilityFieldPos = NOT_A_DICT_POS;
// Calculate a parent offset and write the offset. if (!writePtNodeAndGetProbabilityFieldPosAndAdvancePosition(ptNodeParams, &probabilityFieldPos,
if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition(mBuffer,
ptNodeParams->getParentPos(), nodePos, ptNodeWritingPos)) {
return false;
}
// Write code points
if (!DynamicPatriciaTrieWritingUtils::writeCodePointsAndAdvancePosition(mBuffer,
ptNodeParams->getCodePoints(), ptNodeParams->getCodePointCount(), ptNodeWritingPos)) {
return false;
}
// Write probability when the probability is a valid probability, which means this node is
// terminal.
if (ptNodeParams->getProbability() != NOT_A_PROBABILITY) {
if (!DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition(mBuffer,
ptNodeParams->getProbability(), ptNodeWritingPos)) {
return false;
}
}
// Write children position
if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(mBuffer,
ptNodeParams->getChildrenPos(), ptNodeWritingPos)) {
return false;
}
// Copy shortcut list when the originalShortcutListPos is valid dictionary position.
if (ptNodeParams->getShortcutPos() != NOT_A_DICT_POS) {
int fromPos = ptNodeParams->getShortcutPos();
if (!mShortcutPolicy->copyAllShortcutsAndReturnIfSucceededOrNot(mBuffer, &fromPos,
ptNodeWritingPos)) { ptNodeWritingPos)) {
return false; return false;
} }
} if (probabilityFieldPos == NOT_A_DICT_POS) {
// Copy bigram list when the originalBigramListPos is valid dictionary position.
int bigramCount = 0;
if (ptNodeParams->getBigramsPos() != NOT_A_DICT_POS) {
int fromPos = ptNodeParams->getBigramsPos();
if (!mBigramPolicy->copyAllBigrams(mBuffer, &fromPos, ptNodeWritingPos, &bigramCount)) {
return false; return false;
} }
} const int probabilityToWrite = getUpdatedProbability(
// Create node flags and write them. NOT_A_PROBABILITY /* originalProbability */, ptNodeParams->getProbability());
PatriciaTrieReadingUtils::NodeFlags nodeFlags = return DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition(mBuffer,
PatriciaTrieReadingUtils::createAndGetFlags(ptNodeParams->isBlacklisted(), probabilityToWrite, &probabilityFieldPos);
ptNodeParams->isNotAWord(),
ptNodeParams->getProbability() != NOT_A_PROBABILITY /* isTerminal */,
ptNodeParams->getShortcutPos() != NOT_A_DICT_POS /* hasShortcutTargets */,
bigramCount > 0 /* hasBigrams */,
ptNodeParams->getCodePointCount() > 1 /* hasMultipleChars */,
CHILDREN_POSITION_FIELD_SIZE);
int flagsFieldPos = nodePos;
if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, nodeFlags,
&flagsFieldPos)) {
return false;
}
return true;
} }
bool DynamicPatriciaTrieNodeWriter::addNewBigramEntry( bool DynamicPatriciaTrieNodeWriter::addNewBigramEntry(
@ -289,4 +249,90 @@ bool DynamicPatriciaTrieNodeWriter::updateAllPositionFields(
return true; return true;
} }
bool DynamicPatriciaTrieNodeWriter::writePtNodeAndGetProbabilityFieldPosAndAdvancePosition(
const PtNodeParams *const ptNodeParams, int *const outProbabilityFieldPos,
int *const ptNodeWritingPos) {
const int nodePos = *ptNodeWritingPos;
// Write dummy flags. The Node flags are updated with appropriate flags at the last step of the
// PtNode writing.
if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer,
0 /* nodeFlags */, ptNodeWritingPos)) {
return false;
}
// Calculate a parent offset and write the offset.
if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition(mBuffer,
ptNodeParams->getParentPos(), nodePos, ptNodeWritingPos)) {
return false;
}
// Write code points
if (!DynamicPatriciaTrieWritingUtils::writeCodePointsAndAdvancePosition(mBuffer,
ptNodeParams->getCodePoints(), ptNodeParams->getCodePointCount(), ptNodeWritingPos)) {
return false;
}
// Write probability when the probability is a valid probability, which means this node is
// terminal.
if (ptNodeParams->isTerminal()) {
if (outProbabilityFieldPos) {
*outProbabilityFieldPos = *ptNodeWritingPos;
}
if (ptNodeParams->getProbability() == NOT_A_PROBABILITY) {
// Write a dummy probability.
if (!DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition(mBuffer,
0 /* probability */, ptNodeWritingPos)) {
return false;
}
} else {
if (!DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition(mBuffer,
ptNodeParams->getProbability(), ptNodeWritingPos)) {
return false;
}
}
}
// Write children position
if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(mBuffer,
ptNodeParams->getChildrenPos(), ptNodeWritingPos)) {
return false;
}
// Copy shortcut list when the originalShortcutListPos is valid dictionary position.
if (ptNodeParams->getShortcutPos() != NOT_A_DICT_POS) {
int fromPos = ptNodeParams->getShortcutPos();
if (!mShortcutPolicy->copyAllShortcutsAndReturnIfSucceededOrNot(mBuffer, &fromPos,
ptNodeWritingPos)) {
return false;
}
}
// Copy bigram list when the originalBigramListPos is valid dictionary position.
int bigramCount = 0;
if (ptNodeParams->getBigramsPos() != NOT_A_DICT_POS) {
int fromPos = ptNodeParams->getBigramsPos();
if (!mBigramPolicy->copyAllBigrams(mBuffer, &fromPos, ptNodeWritingPos, &bigramCount)) {
return false;
}
}
// Create node flags and write them.
PatriciaTrieReadingUtils::NodeFlags nodeFlags =
PatriciaTrieReadingUtils::createAndGetFlags(ptNodeParams->isBlacklisted(),
ptNodeParams->isNotAWord(), ptNodeParams->isTerminal(),
ptNodeParams->getShortcutPos() != NOT_A_DICT_POS /* hasShortcutTargets */,
bigramCount > 0 /* hasBigrams */,
ptNodeParams->getCodePointCount() > 1 /* hasMultipleChars */,
CHILDREN_POSITION_FIELD_SIZE);
int flagsFieldPos = nodePos;
if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, nodeFlags,
&flagsFieldPos)) {
return false;
}
return true;
}
int DynamicPatriciaTrieNodeWriter::getUpdatedProbability(const int originalProbability,
const int newProbability) const {
if (mNeedsToDecayWhenUpdating) {
return ForgettingCurveUtils::getUpdatedEncodedProbability(originalProbability,
newProbability);
} else {
return newProbability;
}
}
} }

View file

@ -39,9 +39,10 @@ class DynamicPatriciaTrieNodeWriter : public PtNodeWriter {
DynamicPatriciaTrieNodeWriter(BufferWithExtendableBuffer *const buffer, DynamicPatriciaTrieNodeWriter(BufferWithExtendableBuffer *const buffer,
const DynamicPatriciaTrieNodeReader *const ptNodeReader, const DynamicPatriciaTrieNodeReader *const ptNodeReader,
DynamicBigramListPolicy *const bigramPolicy, DynamicBigramListPolicy *const bigramPolicy,
DynamicShortcutListPolicy *const shortcutPolicy) DynamicShortcutListPolicy *const shortcutPolicy, const bool needsToDecayWhenUpdating)
: mBuffer(buffer), mPtNodeReader(ptNodeReader), mReadingHelper(mBuffer, ptNodeReader), : mBuffer(buffer), mPtNodeReader(ptNodeReader), mReadingHelper(mBuffer, ptNodeReader),
mBigramPolicy(bigramPolicy), mShortcutPolicy(shortcutPolicy) {} mBigramPolicy(bigramPolicy), mShortcutPolicy(shortcutPolicy),
mNeedsToDecayWhenUpdating(needsToDecayWhenUpdating) {}
virtual ~DynamicPatriciaTrieNodeWriter() {} virtual ~DynamicPatriciaTrieNodeWriter() {}
@ -59,6 +60,9 @@ class DynamicPatriciaTrieNodeWriter : public PtNodeWriter {
virtual bool writePtNodeAndAdvancePosition(const PtNodeParams *const ptNodeParams, virtual bool writePtNodeAndAdvancePosition(const PtNodeParams *const ptNodeParams,
int *const ptNodeWritingPos); int *const ptNodeWritingPos);
virtual bool writeNewTerminalPtNodeAndAdvancePosition(
const PtNodeParams *const ptNodeParams, int *const ptNodeWritingPos);
virtual bool addNewBigramEntry(const PtNodeParams *const sourcePtNodeParams, virtual bool addNewBigramEntry(const PtNodeParams *const sourcePtNodeParams,
const PtNodeParams *const targetPtNodeParam, const int probability, const PtNodeParams *const targetPtNodeParam, const int probability,
bool *const outAddedNewBigram); bool *const outAddedNewBigram);
@ -76,6 +80,12 @@ class DynamicPatriciaTrieNodeWriter : public PtNodeWriter {
private: private:
DISALLOW_COPY_AND_ASSIGN(DynamicPatriciaTrieNodeWriter); DISALLOW_COPY_AND_ASSIGN(DynamicPatriciaTrieNodeWriter);
bool writePtNodeAndGetProbabilityFieldPosAndAdvancePosition(
const PtNodeParams *const ptNodeParams, int *const outProbabilityFieldPos,
int *const ptNodeWritingPos);
int getUpdatedProbability(const int originalProbability, const int newProbability) const;
static const int CHILDREN_POSITION_FIELD_SIZE; static const int CHILDREN_POSITION_FIELD_SIZE;
BufferWithExtendableBuffer *const mBuffer; BufferWithExtendableBuffer *const mBuffer;
@ -83,7 +93,7 @@ class DynamicPatriciaTrieNodeWriter : public PtNodeWriter {
DynamicPatriciaTrieReadingHelper mReadingHelper; DynamicPatriciaTrieReadingHelper mReadingHelper;
DynamicBigramListPolicy *const mBigramPolicy; DynamicBigramListPolicy *const mBigramPolicy;
DynamicShortcutListPolicy *const mShortcutPolicy; DynamicShortcutListPolicy *const mShortcutPolicy;
const bool mNeedsToDecayWhenUpdating;
}; };
} // namespace latinime } // namespace latinime
#endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_NODE_WRITER_H */ #endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_NODE_WRITER_H */

View file

@ -231,8 +231,8 @@ void DynamicPatriciaTriePolicy::flush(const char *const filePath) {
AKLOGI("Warning: flush() is called for non-updatable dictionary."); AKLOGI("Warning: flush() is called for non-updatable dictionary.");
return; return;
} }
DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer, &mNodeReader, DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer,
&mNodeWriter, &mBigramListPolicy, &mShortcutListPolicy, false /* needsToDecay */); &mBigramListPolicy, &mShortcutListPolicy, false /* needsToDecay */);
writingHelper.writeToDictFile(filePath, &mHeaderPolicy, mUnigramCount, mBigramCount); writingHelper.writeToDictFile(filePath, &mHeaderPolicy, mUnigramCount, mBigramCount);
} }
@ -246,8 +246,8 @@ void DynamicPatriciaTriePolicy::flushWithGC(const char *const filePath) {
false /* mindsBlockByDecay */, mUnigramCount, mBigramCount, &mHeaderPolicy)); false /* mindsBlockByDecay */, mUnigramCount, mBigramCount, &mHeaderPolicy));
DynamicBigramListPolicy bigramListPolicyForGC(&mHeaderPolicy, &mBufferWithExtendableBuffer, DynamicBigramListPolicy bigramListPolicyForGC(&mHeaderPolicy, &mBufferWithExtendableBuffer,
&mShortcutListPolicy, needsToDecay); &mShortcutListPolicy, needsToDecay);
DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer, &mNodeReader, DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer,
&mNodeWriter, &bigramListPolicyForGC, &mShortcutListPolicy, needsToDecay); &bigramListPolicyForGC, &mShortcutListPolicy, needsToDecay);
writingHelper.writeToDictFileWithGC(getRootPosition(), filePath, &mHeaderPolicy); writingHelper.writeToDictFileWithGC(getRootPosition(), filePath, &mHeaderPolicy);
mNeedsToDecayForTesting = false; mNeedsToDecayForTesting = false;
} }

View file

@ -49,9 +49,8 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
mHeaderPolicy.isDecayingDict()), mHeaderPolicy.isDecayingDict()),
mNodeReader(&mBufferWithExtendableBuffer, &mBigramListPolicy, &mShortcutListPolicy), mNodeReader(&mBufferWithExtendableBuffer, &mBigramListPolicy, &mShortcutListPolicy),
mNodeWriter(&mBufferWithExtendableBuffer, &mNodeReader, &mBigramListPolicy, mNodeWriter(&mBufferWithExtendableBuffer, &mNodeReader, &mBigramListPolicy,
&mShortcutListPolicy), &mShortcutListPolicy, mHeaderPolicy.isDecayingDict()),
mUpdatingHelper(&mBufferWithExtendableBuffer, &mNodeReader, &mNodeWriter, mUpdatingHelper(&mBufferWithExtendableBuffer, &mNodeReader, &mNodeWriter),
mHeaderPolicy.isDecayingDict()),
mUnigramCount(mHeaderPolicy.getUnigramCount()), mUnigramCount(mHeaderPolicy.getUnigramCount()),
mBigramCount(mHeaderPolicy.getBigramCount()), mNeedsToDecayForTesting(false) {} mBigramCount(mHeaderPolicy.getBigramCount()), mNeedsToDecayForTesting(false) {}

View file

@ -22,7 +22,6 @@
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.h" #include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.h"
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_utils.h" #include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_utils.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" #include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
namespace latinime { namespace latinime {
@ -53,9 +52,7 @@ bool DynamicPatriciaTrieUpdatingHelper::addUnigramWord(
if (nextIndex >= codePointCount || !readingHelper->isMatchedCodePoint(ptNodeParams, j, if (nextIndex >= codePointCount || !readingHelper->isMatchedCodePoint(ptNodeParams, j,
wordCodePoints[matchedCodePointCount + j])) { wordCodePoints[matchedCodePointCount + j])) {
*outAddedNewUnigram = true; *outAddedNewUnigram = true;
return reallocatePtNodeAndAddNewPtNodes(&ptNodeParams, j, return reallocatePtNodeAndAddNewPtNodes(&ptNodeParams, j, probability,
getUpdatedProbability(NOT_A_PROBABILITY /* originalProbability */,
probability),
wordCodePoints + matchedCodePointCount, wordCodePoints + matchedCodePointCount,
codePointCount - matchedCodePointCount); codePointCount - matchedCodePointCount);
} }
@ -66,8 +63,7 @@ bool DynamicPatriciaTrieUpdatingHelper::addUnigramWord(
} }
if (!ptNodeParams.hasChildren()) { if (!ptNodeParams.hasChildren()) {
*outAddedNewUnigram = true; *outAddedNewUnigram = true;
return createChildrenPtNodeArrayAndAChildPtNode(&ptNodeParams, return createChildrenPtNodeArrayAndAChildPtNode(&ptNodeParams, probability,
getUpdatedProbability(NOT_A_PROBABILITY /* originalProbability */, probability),
wordCodePoints + readingHelper->getTotalCodePointCount(ptNodeParams), wordCodePoints + readingHelper->getTotalCodePointCount(ptNodeParams),
codePointCount - readingHelper->getTotalCodePointCount(ptNodeParams)); codePointCount - readingHelper->getTotalCodePointCount(ptNodeParams));
} }
@ -83,8 +79,7 @@ bool DynamicPatriciaTrieUpdatingHelper::addUnigramWord(
*outAddedNewUnigram = true; *outAddedNewUnigram = true;
return createAndInsertNodeIntoPtNodeArray(parentPos, return createAndInsertNodeIntoPtNodeArray(parentPos,
wordCodePoints + readingHelper->getPrevTotalCodePointCount(), wordCodePoints + readingHelper->getPrevTotalCodePointCount(),
codePointCount - readingHelper->getPrevTotalCodePointCount(), codePointCount - readingHelper->getPrevTotalCodePointCount(), probability, &pos);
getUpdatedProbability(NOT_A_PROBABILITY /* originalProbability */, probability), &pos);
} }
bool DynamicPatriciaTrieUpdatingHelper::addBigramWords(const int word0Pos, const int word1Pos, bool DynamicPatriciaTrieUpdatingHelper::addBigramWords(const int word0Pos, const int word1Pos,
@ -124,19 +119,18 @@ bool DynamicPatriciaTrieUpdatingHelper::setPtNodeProbability(
if (originalPtNodeParams->isTerminal()) { if (originalPtNodeParams->isTerminal()) {
// Overwrites the probability. // Overwrites the probability.
*outAddedNewUnigram = false; *outAddedNewUnigram = false;
const int probabilityToWrite = getUpdatedProbability( return mPtNodeWriter->updatePtNodeProbability(originalPtNodeParams, probability);
originalPtNodeParams->getProbability(), probability);
return mPtNodeWriter->updatePtNodeProbability(originalPtNodeParams, probabilityToWrite);
} else { } else {
// Make the node terminal and write the probability. // Make the node terminal and write the probability.
*outAddedNewUnigram = true; *outAddedNewUnigram = true;
const int movedPos = mBuffer->getTailPosition(); const int movedPos = mBuffer->getTailPosition();
int writingPos = movedPos; int writingPos = movedPos;
const PtNodeParams ptNodeParamsToWrite(getUpdatedPtNodeParams(originalPtNodeParams, const PtNodeParams ptNodeParamsToWrite(getUpdatedPtNodeParams(originalPtNodeParams,
originalPtNodeParams->getParentPos(), originalPtNodeParams->getCodePointCount(), true /* isTerminal */, originalPtNodeParams->getParentPos(),
originalPtNodeParams->getCodePoints(), originalPtNodeParams->getCodePointCount(), originalPtNodeParams->getCodePoints(),
getUpdatedProbability(NOT_A_PROBABILITY /* originalProbability */, probability))); probability));
if (!mPtNodeWriter->writePtNodeAndAdvancePosition(&ptNodeParamsToWrite, &writingPos)) { if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&ptNodeParamsToWrite,
&writingPos)) {
return false; return false;
} }
if (!mPtNodeWriter->markPtNodeAsMoved(originalPtNodeParams, movedPos, movedPos)) { if (!mPtNodeWriter->markPtNodeAsMoved(originalPtNodeParams, movedPos, movedPos)) {
@ -165,9 +159,10 @@ bool DynamicPatriciaTrieUpdatingHelper::createNewPtNodeArrayWithAChildPtNode(
1 /* arraySize */, &writingPos)) { 1 /* arraySize */, &writingPos)) {
return false; return false;
} }
const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode( const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode(true /* isTerminal */,
parentPtNodePos, nodeCodePointCount, nodeCodePoints, probability)); parentPtNodePos, nodeCodePointCount, nodeCodePoints, probability));
if (!mPtNodeWriter->writePtNodeAndAdvancePosition(&ptNodeParamsToWrite, &writingPos)) { if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&ptNodeParamsToWrite,
&writingPos)) {
return false; return false;
} }
if (!DynamicPatriciaTrieWritingUtils::writeForwardLinkPositionAndAdvancePosition(mBuffer, if (!DynamicPatriciaTrieWritingUtils::writeForwardLinkPositionAndAdvancePosition(mBuffer,
@ -194,13 +189,22 @@ bool DynamicPatriciaTrieUpdatingHelper::reallocatePtNodeAndAddNewPtNodes(
int writingPos = firstPartOfReallocatedPtNodePos; int writingPos = firstPartOfReallocatedPtNodePos;
// Write the 1st part of the reallocating node. The children position will be updated later // Write the 1st part of the reallocating node. The children position will be updated later
// with actual children position. // with actual children position.
const int newProbability = addsExtraChild ? NOT_A_PROBABILITY : probabilityOfNewPtNode; if (addsExtraChild) {
const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode( const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode(false /* isTerminal */,
reallocatingPtNodeParams->getParentPos(), overlappingCodePointCount, reallocatingPtNodeParams->getParentPos(), overlappingCodePointCount,
reallocatingPtNodeParams->getCodePoints(), newProbability)); reallocatingPtNodeParams->getCodePoints(), NOT_A_PROBABILITY));
if (!mPtNodeWriter->writePtNodeAndAdvancePosition(&ptNodeParamsToWrite, &writingPos)) { if (!mPtNodeWriter->writePtNodeAndAdvancePosition(&ptNodeParamsToWrite, &writingPos)) {
return false; return false;
} }
} else {
const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode(true /* isTerminal */,
reallocatingPtNodeParams->getParentPos(), overlappingCodePointCount,
reallocatingPtNodeParams->getCodePoints(), probabilityOfNewPtNode));
if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&ptNodeParamsToWrite,
&writingPos)) {
return false;
}
}
const int actualChildrenPos = writingPos; const int actualChildrenPos = writingPos;
// Create new children PtNode array. // Create new children PtNode array.
const size_t newPtNodeCount = addsExtraChild ? 2 : 1; const size_t newPtNodeCount = addsExtraChild ? 2 : 1;
@ -211,7 +215,7 @@ bool DynamicPatriciaTrieUpdatingHelper::reallocatePtNodeAndAddNewPtNodes(
// Write the 2nd part of the reallocating node. // Write the 2nd part of the reallocating node.
const int secondPartOfReallocatedPtNodePos = writingPos; const int secondPartOfReallocatedPtNodePos = writingPos;
const PtNodeParams childPartPtNodeParams(getUpdatedPtNodeParams(reallocatingPtNodeParams, const PtNodeParams childPartPtNodeParams(getUpdatedPtNodeParams(reallocatingPtNodeParams,
firstPartOfReallocatedPtNodePos, reallocatingPtNodeParams->isTerminal(), firstPartOfReallocatedPtNodePos,
reallocatingPtNodeParams->getCodePointCount() - overlappingCodePointCount, reallocatingPtNodeParams->getCodePointCount() - overlappingCodePointCount,
reallocatingPtNodeParams->getCodePoints() + overlappingCodePointCount, reallocatingPtNodeParams->getCodePoints() + overlappingCodePointCount,
reallocatingPtNodeParams->getProbability())); reallocatingPtNodeParams->getProbability()));
@ -219,10 +223,11 @@ bool DynamicPatriciaTrieUpdatingHelper::reallocatePtNodeAndAddNewPtNodes(
return false; return false;
} }
if (addsExtraChild) { if (addsExtraChild) {
const PtNodeParams extraChildPtNodeParams(getPtNodeParamsForNewPtNode( const PtNodeParams extraChildPtNodeParams(getPtNodeParamsForNewPtNode(true /* isTerminal */,
firstPartOfReallocatedPtNodePos, newNodeCodePointCount - overlappingCodePointCount, firstPartOfReallocatedPtNodePos, newNodeCodePointCount - overlappingCodePointCount,
newNodeCodePoints + overlappingCodePointCount, probabilityOfNewPtNode)); newNodeCodePoints + overlappingCodePointCount, probabilityOfNewPtNode));
if (!mPtNodeWriter->writePtNodeAndAdvancePosition(&extraChildPtNodeParams, &writingPos)) { if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&extraChildPtNodeParams,
&writingPos)) {
return false; return false;
} }
} }
@ -242,22 +247,11 @@ bool DynamicPatriciaTrieUpdatingHelper::reallocatePtNodeAndAddNewPtNodes(
return mPtNodeWriter->updateChildrenPosition(&ptNodeParams, actualChildrenPos); return mPtNodeWriter->updateChildrenPosition(&ptNodeParams, actualChildrenPos);
} }
int DynamicPatriciaTrieUpdatingHelper::getUpdatedProbability(const int originalProbability,
const int newProbability) const {
if (mNeedsToDecay) {
return ForgettingCurveUtils::getUpdatedEncodedProbability(originalProbability,
newProbability);
} else {
return newProbability;
}
}
const PtNodeParams DynamicPatriciaTrieUpdatingHelper::getUpdatedPtNodeParams( const PtNodeParams DynamicPatriciaTrieUpdatingHelper::getUpdatedPtNodeParams(
const PtNodeParams *const originalPtNodeParams, const int parentPos, const PtNodeParams *const originalPtNodeParams, const bool isTerminal, const int parentPos,
const int codePointCount, const int *const codePoints, const int probability) const { const int codePointCount, const int *const codePoints, const int probability) const {
const PatriciaTrieReadingUtils::NodeFlags flags = PatriciaTrieReadingUtils::createAndGetFlags( const PatriciaTrieReadingUtils::NodeFlags flags = PatriciaTrieReadingUtils::createAndGetFlags(
originalPtNodeParams->isBlacklisted(), originalPtNodeParams->isNotAWord(), originalPtNodeParams->isBlacklisted(), originalPtNodeParams->isNotAWord(), isTerminal,
probability != NOT_A_PROBABILITY /* isTerminal */,
originalPtNodeParams->hasShortcutTargets(), originalPtNodeParams->hasBigrams(), originalPtNodeParams->hasShortcutTargets(), originalPtNodeParams->hasBigrams(),
codePointCount > 1 /* hasMultipleChars */, CHILDREN_POSITION_FIELD_SIZE); codePointCount > 1 /* hasMultipleChars */, CHILDREN_POSITION_FIELD_SIZE);
return PtNodeParams(originalPtNodeParams, flags, parentPos, codePointCount, codePoints, return PtNodeParams(originalPtNodeParams, flags, parentPos, codePointCount, codePoints,
@ -265,11 +259,10 @@ const PtNodeParams DynamicPatriciaTrieUpdatingHelper::getUpdatedPtNodeParams(
} }
const PtNodeParams DynamicPatriciaTrieUpdatingHelper::getPtNodeParamsForNewPtNode( const PtNodeParams DynamicPatriciaTrieUpdatingHelper::getPtNodeParamsForNewPtNode(
const int parentPos, const int codePointCount, const int *const codePoints, const bool isTerminal, const int parentPos, const int codePointCount,
const int probability) const { const int *const codePoints, const int probability) const {
const PatriciaTrieReadingUtils::NodeFlags flags = PatriciaTrieReadingUtils::createAndGetFlags( const PatriciaTrieReadingUtils::NodeFlags flags = PatriciaTrieReadingUtils::createAndGetFlags(
false /* isBlacklisted */, false /* isNotAWord */, false /* isBlacklisted */, false /* isNotAWord */, isTerminal,
probability != NOT_A_PROBABILITY /* isTerminal */,
false /* hasShortcutTargets */, false /* hasBigrams */, false /* hasShortcutTargets */, false /* hasBigrams */,
codePointCount > 1 /* hasMultipleChars */, CHILDREN_POSITION_FIELD_SIZE); codePointCount > 1 /* hasMultipleChars */, CHILDREN_POSITION_FIELD_SIZE);
return PtNodeParams(flags, parentPos, codePointCount, codePoints, probability); return PtNodeParams(flags, parentPos, codePointCount, codePoints, probability);

View file

@ -34,10 +34,8 @@ class PtNodeWriter;
class DynamicPatriciaTrieUpdatingHelper { class DynamicPatriciaTrieUpdatingHelper {
public: public:
DynamicPatriciaTrieUpdatingHelper(BufferWithExtendableBuffer *const buffer, DynamicPatriciaTrieUpdatingHelper(BufferWithExtendableBuffer *const buffer,
const PtNodeReader *const ptNodeReader, PtNodeWriter *const ptNodeWriter, const PtNodeReader *const ptNodeReader, PtNodeWriter *const ptNodeWriter)
const bool needsToDecay) : mBuffer(buffer), mPtNodeReader(ptNodeReader), mPtNodeWriter(ptNodeWriter) {}
: mBuffer(buffer), mPtNodeReader(ptNodeReader), mPtNodeWriter(ptNodeWriter),
mNeedsToDecay(needsToDecay) {}
~DynamicPatriciaTrieUpdatingHelper() {} ~DynamicPatriciaTrieUpdatingHelper() {}
@ -61,7 +59,6 @@ class DynamicPatriciaTrieUpdatingHelper {
BufferWithExtendableBuffer *const mBuffer; BufferWithExtendableBuffer *const mBuffer;
const PtNodeReader *const mPtNodeReader; const PtNodeReader *const mPtNodeReader;
PtNodeWriter *const mPtNodeWriter; PtNodeWriter *const mPtNodeWriter;
const bool mNeedsToDecay;
bool createAndInsertNodeIntoPtNodeArray(const int parentPos, const int *const nodeCodePoints, bool createAndInsertNodeIntoPtNodeArray(const int parentPos, const int *const nodeCodePoints,
const int nodeCodePointCount, const int probability, int *const forwardLinkFieldPos); const int nodeCodePointCount, const int probability, int *const forwardLinkFieldPos);
@ -80,14 +77,12 @@ class DynamicPatriciaTrieUpdatingHelper {
const int probabilityOfNewPtNode, const int *const newNodeCodePoints, const int probabilityOfNewPtNode, const int *const newNodeCodePoints,
const int newNodeCodePointCount); const int newNodeCodePointCount);
int getUpdatedProbability(const int originalProbability, const int newProbability) const;
const PtNodeParams getUpdatedPtNodeParams(const PtNodeParams *const originalPtNodeParams, const PtNodeParams getUpdatedPtNodeParams(const PtNodeParams *const originalPtNodeParams,
const int parentPos, const int codePointCount, const int *const codePoints, const bool isTerminal, const int parentPos, const int codePointCount,
const int probability) const;
const PtNodeParams getPtNodeParamsForNewPtNode(const int parentPos, const int codePointCount,
const int *const codePoints, const int probability) const; const int *const codePoints, const int probability) const;
const PtNodeParams getPtNodeParamsForNewPtNode(const bool isTerminal, const int parentPos,
const int codePointCount, const int *const codePoints, const int probability) const;
}; };
} // namespace latinime } // namespace latinime
#endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_UPDATING_HELPER_H */ #endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_UPDATING_HELPER_H */

View file

@ -76,11 +76,13 @@ bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
int *const outUnigramCount, int *const outBigramCount) { int *const outUnigramCount, int *const outBigramCount) {
DynamicPatriciaTrieNodeReader ptNodeReader(mBuffer, mBigramPolicy, mShortcutPolicy); DynamicPatriciaTrieNodeReader ptNodeReader(mBuffer, mBigramPolicy, mShortcutPolicy);
DynamicPatriciaTrieReadingHelper readingHelper(mBuffer, &ptNodeReader); DynamicPatriciaTrieReadingHelper readingHelper(mBuffer, &ptNodeReader);
DynamicPatriciaTrieNodeWriter ptNodeWriter(mBuffer, &ptNodeReader, mBigramPolicy,
mShortcutPolicy, false /* needsToDecayWhenUpdating */);
readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos); readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
DynamicPatriciaTrieGcEventListeners DynamicPatriciaTrieGcEventListeners
::TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted ::TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted
traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted( traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted(
headerPolicy, mPtNodeWriter, mBuffer, mNeedsToDecay); headerPolicy, &ptNodeWriter, mBuffer, mNeedsToDecay);
if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner( if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(
&traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted)) { &traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted)) {
return false; return false;
@ -92,7 +94,7 @@ bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos); readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateBigramProbability DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateBigramProbability
traversePolicyToUpdateBigramProbability(mPtNodeWriter); traversePolicyToUpdateBigramProbability(&ptNodeWriter);
if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner( if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(
&traversePolicyToUpdateBigramProbability)) { &traversePolicyToUpdateBigramProbability)) {
return false; return false;
@ -106,7 +108,7 @@ bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
PtNodeWriter::DictPositionRelocationMap dictPositionRelocationMap; PtNodeWriter::DictPositionRelocationMap dictPositionRelocationMap;
readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos); readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
DynamicPatriciaTrieNodeWriter newPtNodeWriter(bufferToWrite, &ptNodeReader, mBigramPolicy, DynamicPatriciaTrieNodeWriter newPtNodeWriter(bufferToWrite, &ptNodeReader, mBigramPolicy,
mShortcutPolicy); mShortcutPolicy, false /* needsToDecayWhenUpdating */);
DynamicPatriciaTrieGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer DynamicPatriciaTrieGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer
traversePolicyToPlaceAndWriteValidPtNodesToBuffer(&newPtNodeWriter, bufferToWrite, traversePolicyToPlaceAndWriteValidPtNodesToBuffer(&newPtNodeWriter, bufferToWrite,
&dictPositionRelocationMap); &dictPositionRelocationMap);
@ -124,7 +126,7 @@ bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
&newDictShortcutPolicy); &newDictShortcutPolicy);
DynamicPatriciaTrieReadingHelper newDictReadingHelper(bufferToWrite, &newDictNodeReader); DynamicPatriciaTrieReadingHelper newDictReadingHelper(bufferToWrite, &newDictNodeReader);
DynamicPatriciaTrieNodeWriter newDictNodeWriter(bufferToWrite, &newDictNodeReader, DynamicPatriciaTrieNodeWriter newDictNodeWriter(bufferToWrite, &newDictNodeReader,
&newDictBigramPolicy, &newDictShortcutPolicy); &newDictBigramPolicy, &newDictShortcutPolicy, false /* needsToDecayWhenUpdating */);
newDictReadingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos); newDictReadingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateAllPositionFields DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateAllPositionFields
traversePolicyToUpdateAllPositionFields(&newDictNodeWriter, &dictPositionRelocationMap); traversePolicyToUpdateAllPositionFields(&newDictNodeWriter, &dictPositionRelocationMap);

View file

@ -29,8 +29,6 @@ class DynamicBigramListPolicy;
class DynamicPatriciaTrieReadingHelper; class DynamicPatriciaTrieReadingHelper;
class DynamicShortcutListPolicy; class DynamicShortcutListPolicy;
class HeaderPolicy; class HeaderPolicy;
class PtNodeReader;
class PtNodeWriter;
// TODO: Make it independent from a particular format and move to pt_common. // TODO: Make it independent from a particular format and move to pt_common.
class DynamicPatriciaTrieWritingHelper { class DynamicPatriciaTrieWritingHelper {
@ -38,11 +36,9 @@ class DynamicPatriciaTrieWritingHelper {
static const size_t MAX_DICTIONARY_SIZE; static const size_t MAX_DICTIONARY_SIZE;
DynamicPatriciaTrieWritingHelper(BufferWithExtendableBuffer *const buffer, DynamicPatriciaTrieWritingHelper(BufferWithExtendableBuffer *const buffer,
const PtNodeReader *const ptNodeReader, PtNodeWriter *const ptNodeWriter,
DynamicBigramListPolicy *const bigramPolicy, DynamicBigramListPolicy *const bigramPolicy,
DynamicShortcutListPolicy *const shortcutPolicy, const bool needsToDecay) DynamicShortcutListPolicy *const shortcutPolicy, const bool needsToDecay)
: mBuffer(buffer), mPtNodeReader(ptNodeReader), mPtNodeWriter(ptNodeWriter), : mBuffer(buffer), mBigramPolicy(bigramPolicy), mShortcutPolicy(shortcutPolicy),
mBigramPolicy(bigramPolicy), mShortcutPolicy(shortcutPolicy),
mNeedsToDecay(needsToDecay) {} mNeedsToDecay(needsToDecay) {}
~DynamicPatriciaTrieWritingHelper() {} ~DynamicPatriciaTrieWritingHelper() {}
@ -57,8 +53,6 @@ class DynamicPatriciaTrieWritingHelper {
DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTrieWritingHelper); DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTrieWritingHelper);
BufferWithExtendableBuffer *const mBuffer; BufferWithExtendableBuffer *const mBuffer;
const PtNodeReader *const mPtNodeReader;
PtNodeWriter *const mPtNodeWriter;
DynamicBigramListPolicy *const mBigramPolicy; DynamicBigramListPolicy *const mBigramPolicy;
DynamicShortcutListPolicy *const mShortcutPolicy; DynamicShortcutListPolicy *const mShortcutPolicy;
const bool mNeedsToDecay; const bool mNeedsToDecay;

View file

@ -24,6 +24,7 @@
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_utils.h" #include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_utils.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h" #include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" #include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
namespace latinime { namespace latinime {
@ -115,8 +116,10 @@ bool Ver4PatriciaTrieNodeWriter::updatePtNodeProbability(
if (!toBeUpdatedPtNodeParams->isTerminal()) { if (!toBeUpdatedPtNodeParams->isTerminal()) {
return false; return false;
} }
const int probabilityToWrite = getUpdatedProbability(toBeUpdatedPtNodeParams->getProbability(),
newProbability);
return mBuffers->getUpdatableProbabilityDictContent()->setProbability( return mBuffers->getUpdatableProbabilityDictContent()->setProbability(
toBeUpdatedPtNodeParams->getTerminalId(), newProbability); toBeUpdatedPtNodeParams->getTerminalId(), probabilityToWrite);
} }
bool Ver4PatriciaTrieNodeWriter::updateChildrenPosition( bool Ver4PatriciaTrieNodeWriter::updateChildrenPosition(
@ -134,67 +137,23 @@ bool Ver4PatriciaTrieNodeWriter::updateTerminalId(const PtNodeParams *const toBe
bool Ver4PatriciaTrieNodeWriter::writePtNodeAndAdvancePosition( bool Ver4PatriciaTrieNodeWriter::writePtNodeAndAdvancePosition(
const PtNodeParams *const ptNodeParams, int *const ptNodeWritingPos) { const PtNodeParams *const ptNodeParams, int *const ptNodeWritingPos) {
const int nodePos = *ptNodeWritingPos; return writePtNodeAndGetTerminalIdAndAdvancePosition(ptNodeParams, 0 /* outTerminalId */,
// Write dummy flags. The Node flags are updated with appropriate flags at the last step of the ptNodeWritingPos);
// PtNode writing. }
if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mTrieBuffer,
0 /* nodeFlags */, ptNodeWritingPos)) {
return false; bool Ver4PatriciaTrieNodeWriter::writeNewTerminalPtNodeAndAdvancePosition(
} const PtNodeParams *const ptNodeParams, int *const ptNodeWritingPos) {
// Calculate a parent offset and write the offset.
if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition(mTrieBuffer,
ptNodeParams->getParentPos(), nodePos, ptNodeWritingPos)) {
return false;
}
// Write code points
if (!DynamicPatriciaTrieWritingUtils::writeCodePointsAndAdvancePosition(mTrieBuffer,
ptNodeParams->getCodePoints(), ptNodeParams->getCodePointCount(), ptNodeWritingPos)) {
return false;
}
int terminalId = Ver4DictConstants::NOT_A_TERMINAL_ID; int terminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
if (ptNodeParams->getTerminalId() != Ver4DictConstants::NOT_A_TERMINAL_ID) { if (!writePtNodeAndGetTerminalIdAndAdvancePosition(ptNodeParams, &terminalId,
terminalId = ptNodeParams->getTerminalId(); ptNodeWritingPos)) {
} else if (ptNodeParams->getProbability() != NOT_A_PROBABILITY) {
// Write terminal information using a new terminal id.
// Get a new unused terminal id.
terminalId = mBuffers->getTerminalPositionLookupTable()->getNextTerminalId();
}
const int isTerminal = terminalId != Ver4DictConstants::NOT_A_TERMINAL_ID;
if (isTerminal) {
// Update the lookup table.
if (!mBuffers->getUpdatableTerminalPositionLookupTable()->setTerminalPtNodePosition(
terminalId, nodePos)) {
return false;
}
// Write terminal Id.
if (!mTrieBuffer->writeUintAndAdvancePosition(terminalId,
Ver4DictConstants::TERMINAL_ID_FIELD_SIZE, ptNodeWritingPos)) {
return false; return false;
} }
// Write probability. // Write probability.
if (!mBuffers->getUpdatableProbabilityDictContent()->setProbability( const int probabilityToWrite = getUpdatedProbability(NOT_A_PROBABILITY,
terminalId, ptNodeParams->getProbability())) { ptNodeParams->getProbability());
return false; return mBuffers->getUpdatableProbabilityDictContent()->setProbability(terminalId,
} probabilityToWrite);
}
// Write children position
if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(mTrieBuffer,
ptNodeParams->getChildrenPos(), ptNodeWritingPos)) {
return false;
}
// Create node flags and write them.
PatriciaTrieReadingUtils::NodeFlags nodeFlags =
PatriciaTrieReadingUtils::createAndGetFlags(ptNodeParams->isBlacklisted(),
ptNodeParams->isNotAWord(), isTerminal,
ptNodeParams->hasShortcutTargets(), ptNodeParams->hasBigrams(),
ptNodeParams->getCodePointCount() > 1 /* hasMultipleChars */,
CHILDREN_POSITION_FIELD_SIZE);
int flagsFieldPos = nodePos;
if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mTrieBuffer, nodeFlags,
&flagsFieldPos)) {
return false;
}
return true;
} }
bool Ver4PatriciaTrieNodeWriter::addNewBigramEntry( bool Ver4PatriciaTrieNodeWriter::addNewBigramEntry(
@ -258,4 +217,85 @@ bool Ver4PatriciaTrieNodeWriter::updateAllPositionFields(
return true; return true;
} }
bool Ver4PatriciaTrieNodeWriter::writePtNodeAndGetTerminalIdAndAdvancePosition(
const PtNodeParams *const ptNodeParams, int *const outTerminalId,
int *const ptNodeWritingPos) {
const int nodePos = *ptNodeWritingPos;
// Write dummy flags. The Node flags are updated with appropriate flags at the last step of the
// PtNode writing.
if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mTrieBuffer,
0 /* nodeFlags */, ptNodeWritingPos)) {
return false;
}
// Calculate a parent offset and write the offset.
if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition(mTrieBuffer,
ptNodeParams->getParentPos(), nodePos, ptNodeWritingPos)) {
return false;
}
// Write code points
if (!DynamicPatriciaTrieWritingUtils::writeCodePointsAndAdvancePosition(mTrieBuffer,
ptNodeParams->getCodePoints(), ptNodeParams->getCodePointCount(), ptNodeWritingPos)) {
return false;
}
int terminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
if (ptNodeParams->getTerminalId() != Ver4DictConstants::NOT_A_TERMINAL_ID) {
terminalId = ptNodeParams->getTerminalId();
} else if (ptNodeParams->isTerminal()) {
// Write terminal information using a new terminal id.
// Get a new unused terminal id.
terminalId = mBuffers->getTerminalPositionLookupTable()->getNextTerminalId();
}
const int isTerminal = terminalId != Ver4DictConstants::NOT_A_TERMINAL_ID;
if (isTerminal) {
// Update the lookup table.
if (!mBuffers->getUpdatableTerminalPositionLookupTable()->setTerminalPtNodePosition(
terminalId, nodePos)) {
return false;
}
// Write terminal Id.
if (!mTrieBuffer->writeUintAndAdvancePosition(terminalId,
Ver4DictConstants::TERMINAL_ID_FIELD_SIZE, ptNodeWritingPos)) {
return false;
}
// Write probability.
if (ptNodeParams->getProbability() != NOT_A_PROBABILITY) {
if (!mBuffers->getUpdatableProbabilityDictContent()->setProbability(
terminalId, ptNodeParams->getProbability())) {
return false;
}
}
if (outTerminalId) {
*outTerminalId = terminalId;
}
}
// Write children position
if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(mTrieBuffer,
ptNodeParams->getChildrenPos(), ptNodeWritingPos)) {
return false;
}
// Create node flags and write them.
PatriciaTrieReadingUtils::NodeFlags nodeFlags =
PatriciaTrieReadingUtils::createAndGetFlags(ptNodeParams->isBlacklisted(),
ptNodeParams->isNotAWord(), isTerminal,
ptNodeParams->hasShortcutTargets(), ptNodeParams->hasBigrams(),
ptNodeParams->getCodePointCount() > 1 /* hasMultipleChars */,
CHILDREN_POSITION_FIELD_SIZE);
int flagsFieldPos = nodePos;
if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mTrieBuffer, nodeFlags,
&flagsFieldPos)) {
return false;
}
return true;
}
int Ver4PatriciaTrieNodeWriter::getUpdatedProbability(const int originalProbability,
const int newProbability) const {
if (mNeedsToDecayWhenUpdating) {
return ForgettingCurveUtils::getUpdatedEncodedProbability(originalProbability,
newProbability);
} else {
return newProbability;
}
}
} }

View file

@ -39,10 +39,12 @@ class Ver4PatriciaTrieNodeWriter : public PtNodeWriter {
public: public:
Ver4PatriciaTrieNodeWriter(BufferWithExtendableBuffer *const trieBuffer, Ver4PatriciaTrieNodeWriter(BufferWithExtendableBuffer *const trieBuffer,
Ver4DictBuffers *const buffers, const Ver4PatriciaTrieNodeReader *const ptNodeReader, Ver4DictBuffers *const buffers, const Ver4PatriciaTrieNodeReader *const ptNodeReader,
Ver4BigramListPolicy *const bigramPolicy, Ver4ShortcutListPolicy *const shortcutPolicy) Ver4BigramListPolicy *const bigramPolicy, Ver4ShortcutListPolicy *const shortcutPolicy,
const bool needsToDecayWhenUpdating)
: mTrieBuffer(trieBuffer), mBuffers(buffers), mPtNodeReader(ptNodeReader), : mTrieBuffer(trieBuffer), mBuffers(buffers), mPtNodeReader(ptNodeReader),
mReadingHelper(mTrieBuffer, mPtNodeReader), mReadingHelper(mTrieBuffer, mPtNodeReader),
mBigramPolicy(bigramPolicy), mShortcutPolicy(shortcutPolicy) {} mBigramPolicy(bigramPolicy), mShortcutPolicy(shortcutPolicy),
mNeedsToDecayWhenUpdating(needsToDecayWhenUpdating) {}
virtual ~Ver4PatriciaTrieNodeWriter() {} virtual ~Ver4PatriciaTrieNodeWriter() {}
@ -63,6 +65,9 @@ class Ver4PatriciaTrieNodeWriter : public PtNodeWriter {
virtual bool writePtNodeAndAdvancePosition(const PtNodeParams *const ptNodeParams, virtual bool writePtNodeAndAdvancePosition(const PtNodeParams *const ptNodeParams,
int *const ptNodeWritingPos); int *const ptNodeWritingPos);
virtual bool writeNewTerminalPtNodeAndAdvancePosition(
const PtNodeParams *const ptNodeParams, int *const ptNodeWritingPos);
virtual bool addNewBigramEntry(const PtNodeParams *const sourcePtNodeParams, virtual bool addNewBigramEntry(const PtNodeParams *const sourcePtNodeParams,
const PtNodeParams *const targetPtNodeParam, const int probability, const PtNodeParams *const targetPtNodeParam, const int probability,
bool *const outAddedNewBigram); bool *const outAddedNewBigram);
@ -80,6 +85,12 @@ class Ver4PatriciaTrieNodeWriter : public PtNodeWriter {
private: private:
DISALLOW_COPY_AND_ASSIGN(Ver4PatriciaTrieNodeWriter); DISALLOW_COPY_AND_ASSIGN(Ver4PatriciaTrieNodeWriter);
bool writePtNodeAndGetTerminalIdAndAdvancePosition(
const PtNodeParams *const ptNodeParams, int *const outTerminalId,
int *const ptNodeWritingPos);
int getUpdatedProbability(const int originalProbability, const int newProbability) const;
static const int CHILDREN_POSITION_FIELD_SIZE; static const int CHILDREN_POSITION_FIELD_SIZE;
BufferWithExtendableBuffer *const mTrieBuffer; BufferWithExtendableBuffer *const mTrieBuffer;
@ -88,6 +99,7 @@ class Ver4PatriciaTrieNodeWriter : public PtNodeWriter {
DynamicPatriciaTrieReadingHelper mReadingHelper; DynamicPatriciaTrieReadingHelper mReadingHelper;
Ver4BigramListPolicy *const mBigramPolicy; Ver4BigramListPolicy *const mBigramPolicy;
Ver4ShortcutListPolicy *const mShortcutPolicy; Ver4ShortcutListPolicy *const mShortcutPolicy;
const bool mNeedsToDecayWhenUpdating;
}; };
} // namespace latinime } // namespace latinime
#endif /* LATINIME_VER4_PATRICIA_TRIE_NODE_WRITER_H */ #endif /* LATINIME_VER4_PATRICIA_TRIE_NODE_WRITER_H */

View file

@ -49,9 +49,8 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
mBuffers.get()->getTerminalPositionLookupTable()), mBuffers.get()->getTerminalPositionLookupTable()),
mNodeReader(mDictBuffer, mBuffers.get()->getProbabilityDictContent()), mNodeReader(mDictBuffer, mBuffers.get()->getProbabilityDictContent()),
mNodeWriter(mDictBuffer, mBuffers.get(), &mNodeReader, &mBigramPolicy, mNodeWriter(mDictBuffer, mBuffers.get(), &mNodeReader, &mBigramPolicy,
&mShortcutPolicy), &mShortcutPolicy, mHeaderPolicy.isDecayingDict()),
mUpdatingHelper(mDictBuffer, &mNodeReader, &mNodeWriter, mUpdatingHelper(mDictBuffer, &mNodeReader, &mNodeWriter),
mHeaderPolicy.isDecayingDict()),
mWritingHelper(mBuffers.get()), mWritingHelper(mBuffers.get()),
mUnigramCount(mHeaderPolicy.getUnigramCount()), mUnigramCount(mHeaderPolicy.getUnigramCount()),
mBigramCount(mHeaderPolicy.getBigramCount()), mNeedsToDecayForTesting(false) {}; mBigramCount(mHeaderPolicy.getBigramCount()), mNeedsToDecayForTesting(false) {};

View file

@ -87,7 +87,8 @@ bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
Ver4ShortcutListPolicy shortcutPolicy(mBuffers->getShortcutDictContent(), Ver4ShortcutListPolicy shortcutPolicy(mBuffers->getShortcutDictContent(),
mBuffers->getTerminalPositionLookupTable()); mBuffers->getTerminalPositionLookupTable());
Ver4PatriciaTrieNodeWriter ptNodeWriter(mBuffers->getWritableTrieBuffer(), Ver4PatriciaTrieNodeWriter ptNodeWriter(mBuffers->getWritableTrieBuffer(),
mBuffers, &ptNodeReader, &bigramPolicy, &shortcutPolicy); mBuffers, &ptNodeReader, &bigramPolicy, &shortcutPolicy,
false /* needsToDecayWhenUpdating */);
DynamicPatriciaTrieReadingHelper readingHelper(mBuffers->getTrieBuffer(), &ptNodeReader); DynamicPatriciaTrieReadingHelper readingHelper(mBuffers->getTrieBuffer(), &ptNodeReader);
readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos); readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
@ -121,7 +122,8 @@ bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
PtNodeWriter::DictPositionRelocationMap dictPositionRelocationMap; PtNodeWriter::DictPositionRelocationMap dictPositionRelocationMap;
readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos); readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
Ver4PatriciaTrieNodeWriter ptNodeWriterForNewBuffers(buffersToWrite->getWritableTrieBuffer(), Ver4PatriciaTrieNodeWriter ptNodeWriterForNewBuffers(buffersToWrite->getWritableTrieBuffer(),
buffersToWrite, &ptNodeReader, &bigramPolicy, &shortcutPolicy); buffersToWrite, &ptNodeReader, &bigramPolicy, &shortcutPolicy,
false /* needsToDecayWhenUpdating */);
DynamicPatriciaTrieGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer DynamicPatriciaTrieGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer
traversePolicyToPlaceAndWriteValidPtNodesToBuffer(&ptNodeWriterForNewBuffers, traversePolicyToPlaceAndWriteValidPtNodesToBuffer(&ptNodeWriterForNewBuffers,
buffersToWrite->getWritableTrieBuffer(), &dictPositionRelocationMap); buffersToWrite->getWritableTrieBuffer(), &dictPositionRelocationMap);
@ -139,7 +141,8 @@ bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
Ver4ShortcutListPolicy newShortcutPolicy(buffersToWrite->getShortcutDictContent(), Ver4ShortcutListPolicy newShortcutPolicy(buffersToWrite->getShortcutDictContent(),
buffersToWrite->getTerminalPositionLookupTable()); buffersToWrite->getTerminalPositionLookupTable());
Ver4PatriciaTrieNodeWriter newPtNodeWriter(buffersToWrite->getWritableTrieBuffer(), Ver4PatriciaTrieNodeWriter newPtNodeWriter(buffersToWrite->getWritableTrieBuffer(),
buffersToWrite, &newPtNodeReader, &newBigramPolicy, &newShortcutPolicy); buffersToWrite, &newPtNodeReader, &newBigramPolicy, &newShortcutPolicy,
false /* needsToDecayWhenUpdating */);
DynamicPatriciaTrieReadingHelper newDictReadingHelper(buffersToWrite->getTrieBuffer(), DynamicPatriciaTrieReadingHelper newDictReadingHelper(buffersToWrite->getTrieBuffer(),
&newPtNodeReader); &newPtNodeReader);