Refactoring: Use UnigramProperty to add/update unigram.

Bug: 13406708
Change-Id: I26fd541fb465d3543faa5f155becc455ddbb6c9c
This commit is contained in:
Keisuke Kuroyanagi 2014-05-09 17:22:17 +09:00
parent eaa347bc1a
commit b636e25e95
6 changed files with 81 additions and 80 deletions

View file

@ -16,6 +16,7 @@
#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.h" #include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.h"
#include "suggest/core/dictionary/property/unigram_property.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h" #include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_writing_utils.h" #include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_writing_utils.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.h" #include "suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.h"
@ -29,9 +30,8 @@ const int DynamicPtUpdatingHelper::CHILDREN_POSITION_FIELD_SIZE = 3;
bool DynamicPtUpdatingHelper::addUnigramWord( bool DynamicPtUpdatingHelper::addUnigramWord(
DynamicPtReadingHelper *const readingHelper, DynamicPtReadingHelper *const readingHelper,
const int *const wordCodePoints, const int codePointCount, const int probability, const int *const wordCodePoints, const int codePointCount,
const bool isNotAWord, const bool isBlacklisted, const int timestamp, const UnigramProperty *const unigramProperty, bool *const outAddedNewUnigram) {
bool *const outAddedNewUnigram) {
int parentPos = NOT_A_DICT_POS; int parentPos = NOT_A_DICT_POS;
while (!readingHelper->isEnd()) { while (!readingHelper->isEnd()) {
const PtNodeParams ptNodeParams(readingHelper->getPtNodeParams()); const PtNodeParams ptNodeParams(readingHelper->getPtNodeParams());
@ -53,20 +53,18 @@ bool DynamicPtUpdatingHelper::addUnigramWord(
if (nextIndex >= codePointCount || !readingHelper->isMatchedCodePoint(ptNodeParams, j, if (nextIndex >= codePointCount || !readingHelper->isMatchedCodePoint(ptNodeParams, j,
wordCodePoints[matchedCodePointCount + j])) { wordCodePoints[matchedCodePointCount + j])) {
*outAddedNewUnigram = true; *outAddedNewUnigram = true;
return reallocatePtNodeAndAddNewPtNodes(&ptNodeParams, j, isNotAWord, isBlacklisted, return reallocatePtNodeAndAddNewPtNodes(&ptNodeParams, j, unigramProperty,
probability, timestamp, wordCodePoints + matchedCodePointCount, wordCodePoints + matchedCodePointCount,
codePointCount - matchedCodePointCount); codePointCount - matchedCodePointCount);
} }
} }
// All characters are matched. // All characters are matched.
if (codePointCount == readingHelper->getTotalCodePointCount(ptNodeParams)) { if (codePointCount == readingHelper->getTotalCodePointCount(ptNodeParams)) {
return setPtNodeProbability(&ptNodeParams, isNotAWord, isBlacklisted, probability, return setPtNodeProbability(&ptNodeParams, unigramProperty, outAddedNewUnigram);
timestamp, outAddedNewUnigram);
} }
if (!ptNodeParams.hasChildren()) { if (!ptNodeParams.hasChildren()) {
*outAddedNewUnigram = true; *outAddedNewUnigram = true;
return createChildrenPtNodeArrayAndAChildPtNode(&ptNodeParams, return createChildrenPtNodeArrayAndAChildPtNode(&ptNodeParams, unigramProperty,
isNotAWord, isBlacklisted, probability, timestamp,
wordCodePoints + readingHelper->getTotalCodePointCount(ptNodeParams), wordCodePoints + readingHelper->getTotalCodePointCount(ptNodeParams),
codePointCount - readingHelper->getTotalCodePointCount(ptNodeParams)); codePointCount - readingHelper->getTotalCodePointCount(ptNodeParams));
} }
@ -83,7 +81,7 @@ bool DynamicPtUpdatingHelper::addUnigramWord(
return createAndInsertNodeIntoPtNodeArray(parentPos, return createAndInsertNodeIntoPtNodeArray(parentPos,
wordCodePoints + readingHelper->getPrevTotalCodePointCount(), wordCodePoints + readingHelper->getPrevTotalCodePointCount(),
codePointCount - readingHelper->getPrevTotalCodePointCount(), codePointCount - readingHelper->getPrevTotalCodePointCount(),
isNotAWord, isBlacklisted, probability, timestamp, &pos); unigramProperty, &pos);
} }
bool DynamicPtUpdatingHelper::addBigramWords(const int word0Pos, const int word1Pos, bool DynamicPtUpdatingHelper::addBigramWords(const int word0Pos, const int word1Pos,
@ -115,36 +113,34 @@ bool DynamicPtUpdatingHelper::addShortcutTarget(const int wordPos,
bool DynamicPtUpdatingHelper::createAndInsertNodeIntoPtNodeArray(const int parentPos, bool DynamicPtUpdatingHelper::createAndInsertNodeIntoPtNodeArray(const int parentPos,
const int *const nodeCodePoints, const int nodeCodePointCount, const int *const nodeCodePoints, const int nodeCodePointCount,
const bool isNotAWord, const bool isBlacklisted, const int probability, const UnigramProperty *const unigramProperty, int *const forwardLinkFieldPos) {
const int timestamp, int *const forwardLinkFieldPos) {
const int newPtNodeArrayPos = mBuffer->getTailPosition(); const int newPtNodeArrayPos = mBuffer->getTailPosition();
if (!DynamicPtWritingUtils::writeForwardLinkPositionAndAdvancePosition(mBuffer, if (!DynamicPtWritingUtils::writeForwardLinkPositionAndAdvancePosition(mBuffer,
newPtNodeArrayPos, forwardLinkFieldPos)) { newPtNodeArrayPos, forwardLinkFieldPos)) {
return false; return false;
} }
return createNewPtNodeArrayWithAChildPtNode(parentPos, nodeCodePoints, nodeCodePointCount, return createNewPtNodeArrayWithAChildPtNode(parentPos, nodeCodePoints, nodeCodePointCount,
isNotAWord, isBlacklisted, probability, timestamp); unigramProperty);
} }
bool DynamicPtUpdatingHelper::setPtNodeProbability( bool DynamicPtUpdatingHelper::setPtNodeProbability(const PtNodeParams *const originalPtNodeParams,
const PtNodeParams *const originalPtNodeParams, const bool isNotAWord, const UnigramProperty *const unigramProperty, bool *const outAddedNewUnigram) {
const bool isBlacklisted, const int probability, const int timestamp,
bool *const outAddedNewUnigram) {
if (originalPtNodeParams->isTerminal()) { if (originalPtNodeParams->isTerminal()) {
// Overwrites the probability. // Overwrites the probability.
*outAddedNewUnigram = false; *outAddedNewUnigram = false;
return mPtNodeWriter->updatePtNodeProbability(originalPtNodeParams, probability, timestamp); return mPtNodeWriter->updatePtNodeUnigramProperty(originalPtNodeParams, unigramProperty);
} else { } else {
// Make the node terminal and write the probability. // Make the node terminal and write the probability.
*outAddedNewUnigram = true; *outAddedNewUnigram = true;
const int movedPos = mBuffer->getTailPosition(); const int movedPos = mBuffer->getTailPosition();
int writingPos = movedPos; int writingPos = movedPos;
const PtNodeParams ptNodeParamsToWrite(getUpdatedPtNodeParams(originalPtNodeParams, const PtNodeParams ptNodeParamsToWrite(getUpdatedPtNodeParams(originalPtNodeParams,
isNotAWord, isBlacklisted, true /* isTerminal */, unigramProperty->isNotAWord(), unigramProperty->isBlacklisted(),
originalPtNodeParams->getParentPos(), originalPtNodeParams->getCodePointCount(), true /* isTerminal */, originalPtNodeParams->getParentPos(),
originalPtNodeParams->getCodePoints(), probability)); originalPtNodeParams->getCodePointCount(), originalPtNodeParams->getCodePoints(),
unigramProperty->getProbability()));
if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&ptNodeParamsToWrite, if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&ptNodeParamsToWrite,
timestamp, &writingPos)) { unigramProperty, &writingPos)) {
return false; return false;
} }
if (!mPtNodeWriter->markPtNodeAsMoved(originalPtNodeParams, movedPos, movedPos)) { if (!mPtNodeWriter->markPtNodeAsMoved(originalPtNodeParams, movedPos, movedPos)) {
@ -155,31 +151,30 @@ bool DynamicPtUpdatingHelper::setPtNodeProbability(
} }
bool DynamicPtUpdatingHelper::createChildrenPtNodeArrayAndAChildPtNode( bool DynamicPtUpdatingHelper::createChildrenPtNodeArrayAndAChildPtNode(
const PtNodeParams *const parentPtNodeParams, const bool isNotAWord, const PtNodeParams *const parentPtNodeParams, const UnigramProperty *const unigramProperty,
const bool isBlacklisted, const int probability, const int timestamp,
const int *const codePoints, const int codePointCount) { const int *const codePoints, const int codePointCount) {
const int newPtNodeArrayPos = mBuffer->getTailPosition(); const int newPtNodeArrayPos = mBuffer->getTailPosition();
if (!mPtNodeWriter->updateChildrenPosition(parentPtNodeParams, newPtNodeArrayPos)) { if (!mPtNodeWriter->updateChildrenPosition(parentPtNodeParams, newPtNodeArrayPos)) {
return false; return false;
} }
return createNewPtNodeArrayWithAChildPtNode(parentPtNodeParams->getHeadPos(), codePoints, return createNewPtNodeArrayWithAChildPtNode(parentPtNodeParams->getHeadPos(), codePoints,
codePointCount, isNotAWord, isBlacklisted, probability, timestamp); codePointCount, unigramProperty);
} }
bool DynamicPtUpdatingHelper::createNewPtNodeArrayWithAChildPtNode( bool DynamicPtUpdatingHelper::createNewPtNodeArrayWithAChildPtNode(
const int parentPtNodePos, const int *const nodeCodePoints, const int nodeCodePointCount, const int parentPtNodePos, const int *const nodeCodePoints, const int nodeCodePointCount,
const bool isNotAWord, const bool isBlacklisted, const int probability, const UnigramProperty *const unigramProperty) {
const int timestamp) {
int writingPos = mBuffer->getTailPosition(); int writingPos = mBuffer->getTailPosition();
if (!DynamicPtWritingUtils::writePtNodeArraySizeAndAdvancePosition(mBuffer, if (!DynamicPtWritingUtils::writePtNodeArraySizeAndAdvancePosition(mBuffer,
1 /* arraySize */, &writingPos)) { 1 /* arraySize */, &writingPos)) {
return false; return false;
} }
const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode( const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode(
isNotAWord, isBlacklisted, true /* isTerminal */, unigramProperty->isNotAWord(), unigramProperty->isBlacklisted(), true /* isTerminal */,
parentPtNodePos, nodeCodePointCount, nodeCodePoints, probability)); parentPtNodePos, nodeCodePointCount, nodeCodePoints,
if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&ptNodeParamsToWrite, timestamp, unigramProperty->getProbability()));
&writingPos)) { if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&ptNodeParamsToWrite,
unigramProperty, &writingPos)) {
return false; return false;
} }
if (!DynamicPtWritingUtils::writeForwardLinkPositionAndAdvancePosition(mBuffer, if (!DynamicPtWritingUtils::writeForwardLinkPositionAndAdvancePosition(mBuffer,
@ -192,13 +187,13 @@ bool DynamicPtUpdatingHelper::createNewPtNodeArrayWithAChildPtNode(
// Returns whether the dictionary updating was succeeded or not. // Returns whether the dictionary updating was succeeded or not.
bool DynamicPtUpdatingHelper::reallocatePtNodeAndAddNewPtNodes( bool DynamicPtUpdatingHelper::reallocatePtNodeAndAddNewPtNodes(
const PtNodeParams *const reallocatingPtNodeParams, const int overlappingCodePointCount, const PtNodeParams *const reallocatingPtNodeParams, const int overlappingCodePointCount,
const bool isNotAWord, const bool isBlacklisted, const int probabilityOfNewPtNode, const UnigramProperty *const unigramProperty, const int *const newNodeCodePoints,
const int timestamp, const int *const newNodeCodePoints, const int newNodeCodePointCount) { const int newNodeCodePointCount) {
// When addsExtraChild is true, split the reallocating PtNode and add new child. // When addsExtraChild is true, split the reallocating PtNode and add new child.
// Reallocating PtNode: abcde, newNode: abcxy. // Reallocating PtNode: abcde, newNode: abcxy.
// abc (1st, not terminal) __ de (2nd) // abc (1st, not terminal) __ de (2nd)
// \_ xy (extra child, terminal) // \_ xy (extra child, terminal)
// Otherwise, this method makes 1st part terminal and write probabilityOfNewPtNode. // Otherwise, this method makes 1st part terminal and write information in unigramProperty.
// Reallocating PtNode: abcde, newNode: abc. // Reallocating PtNode: abcde, newNode: abc.
// abc (1st, terminal) __ de (2nd) // abc (1st, terminal) __ de (2nd)
const bool addsExtraChild = newNodeCodePointCount > overlappingCodePointCount; const bool addsExtraChild = newNodeCodePointCount > overlappingCodePointCount;
@ -216,11 +211,12 @@ bool DynamicPtUpdatingHelper::reallocatePtNodeAndAddNewPtNodes(
} }
} else { } else {
const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode( const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode(
isNotAWord, isBlacklisted, true /* isTerminal */, unigramProperty->isNotAWord(), unigramProperty->isBlacklisted(),
reallocatingPtNodeParams->getParentPos(), overlappingCodePointCount, true /* isTerminal */, reallocatingPtNodeParams->getParentPos(),
reallocatingPtNodeParams->getCodePoints(), probabilityOfNewPtNode)); overlappingCodePointCount, reallocatingPtNodeParams->getCodePoints(),
unigramProperty->getProbability()));
if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&ptNodeParamsToWrite, if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&ptNodeParamsToWrite,
timestamp, &writingPos)) { unigramProperty, &writingPos)) {
return false; return false;
} }
} }
@ -244,11 +240,12 @@ bool DynamicPtUpdatingHelper::reallocatePtNodeAndAddNewPtNodes(
} }
if (addsExtraChild) { if (addsExtraChild) {
const PtNodeParams extraChildPtNodeParams(getPtNodeParamsForNewPtNode( const PtNodeParams extraChildPtNodeParams(getPtNodeParamsForNewPtNode(
isNotAWord, isBlacklisted, true /* isTerminal */, unigramProperty->isNotAWord(), unigramProperty->isBlacklisted(),
firstPartOfReallocatedPtNodePos, newNodeCodePointCount - overlappingCodePointCount, true /* isTerminal */, firstPartOfReallocatedPtNodePos,
newNodeCodePoints + overlappingCodePointCount, probabilityOfNewPtNode)); newNodeCodePointCount - overlappingCodePointCount,
newNodeCodePoints + overlappingCodePointCount, unigramProperty->getProbability()));
if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&extraChildPtNodeParams, if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&extraChildPtNodeParams,
timestamp, &writingPos)) { unigramProperty, &writingPos)) {
return false; return false;
} }
} }
@ -269,8 +266,8 @@ bool DynamicPtUpdatingHelper::reallocatePtNodeAndAddNewPtNodes(
} }
const PtNodeParams DynamicPtUpdatingHelper::getUpdatedPtNodeParams( const PtNodeParams DynamicPtUpdatingHelper::getUpdatedPtNodeParams(
const PtNodeParams *const originalPtNodeParams, const bool isNotAWord, const PtNodeParams *const originalPtNodeParams,
const bool isBlacklisted, const bool isTerminal, const int parentPos, const bool isNotAWord, const bool isBlacklisted, const bool isTerminal, const int parentPos,
const int codePointCount, const int *const codePoints, const int probability) const { const int codePointCount, const int *const codePoints, const int probability) const {
const PatriciaTrieReadingUtils::NodeFlags flags = PatriciaTrieReadingUtils::createAndGetFlags( const PatriciaTrieReadingUtils::NodeFlags flags = PatriciaTrieReadingUtils::createAndGetFlags(
isBlacklisted, isNotAWord, isTerminal, originalPtNodeParams->hasShortcutTargets(), isBlacklisted, isNotAWord, isTerminal, originalPtNodeParams->hasShortcutTargets(),

View file

@ -26,6 +26,7 @@ class BufferWithExtendableBuffer;
class DynamicPtReadingHelper; class DynamicPtReadingHelper;
class PtNodeReader; class PtNodeReader;
class PtNodeWriter; class PtNodeWriter;
class UnigramProperty;
class DynamicPtUpdatingHelper { class DynamicPtUpdatingHelper {
public: public:
@ -37,9 +38,8 @@ class DynamicPtUpdatingHelper {
// Add a word to the dictionary. If the word already exists, update the probability. // Add a word to the dictionary. If the word already exists, update the probability.
bool addUnigramWord(DynamicPtReadingHelper *const readingHelper, bool addUnigramWord(DynamicPtReadingHelper *const readingHelper,
const int *const wordCodePoints, const int codePointCount, const int probability, const int *const wordCodePoints, const int codePointCount,
const bool isNotAWord, const bool isBlacklisted, const int timestamp, const UnigramProperty *const unigramProperty, bool *const outAddedNewUnigram);
bool *const outAddedNewUnigram);
// Add a bigram relation from word0Pos to word1Pos. // Add a bigram relation from word0Pos to word1Pos.
bool addBigramWords(const int word0Pos, const int word1Pos, const int probability, bool addBigramWords(const int word0Pos, const int word1Pos, const int probability,
@ -62,25 +62,22 @@ class DynamicPtUpdatingHelper {
PtNodeWriter *const mPtNodeWriter; PtNodeWriter *const mPtNodeWriter;
bool createAndInsertNodeIntoPtNodeArray(const int parentPos, const int *const nodeCodePoints, bool createAndInsertNodeIntoPtNodeArray(const int parentPos, const int *const nodeCodePoints,
const int nodeCodePointCount, const bool isNotAWord, const bool isBlacklisted, const int nodeCodePointCount, const UnigramProperty *const unigramProperty,
const int probability, const int timestamp, int *const forwardLinkFieldPos); int *const forwardLinkFieldPos);
bool setPtNodeProbability(const PtNodeParams *const originalPtNodeParams, const bool isNotAWord, bool setPtNodeProbability(const PtNodeParams *const originalPtNodeParams,
const bool isBlacklisted, const int probability, const int timestamp, const UnigramProperty *const unigramProperty, bool *const outAddedNewUnigram);
bool *const outAddedNewUnigram);
bool createChildrenPtNodeArrayAndAChildPtNode(const PtNodeParams *const parentPtNodeParams, bool createChildrenPtNodeArrayAndAChildPtNode(const PtNodeParams *const parentPtNodeParams,
const bool isNotAWord, const bool isBlacklisted, const int probability, const UnigramProperty *const unigramProperty, const int *const codePoints,
const int timestamp, const int *const codePoints, const int codePointCount); const int codePointCount);
bool createNewPtNodeArrayWithAChildPtNode(const int parentPos, const int *const nodeCodePoints, bool createNewPtNodeArrayWithAChildPtNode(const int parentPos, const int *const nodeCodePoints,
const int nodeCodePointCount, const bool isNotAWord, const bool isBlacklisted, const int nodeCodePointCount, const UnigramProperty *const unigramProperty);
const int probability, const int timestamp);
bool reallocatePtNodeAndAddNewPtNodes( bool reallocatePtNodeAndAddNewPtNodes(
const PtNodeParams *const reallocatingPtNodeParams, const int overlappingCodePointCount, const PtNodeParams *const reallocatingPtNodeParams, const int overlappingCodePointCount,
const bool isNotAWord, const bool isBlacklisted, const int probabilityOfNewPtNode, const UnigramProperty *const unigramProperty, const int *const newNodeCodePoints,
const int timestamp, const int *const newNodeCodePoints,
const int newNodeCodePointCount); const int newNodeCodePointCount);
const PtNodeParams getUpdatedPtNodeParams(const PtNodeParams *const originalPtNodeParams, const PtNodeParams getUpdatedPtNodeParams(const PtNodeParams *const originalPtNodeParams,

View file

@ -24,6 +24,8 @@
namespace latinime { namespace latinime {
class UnigramProperty;
// Interface class used to write PtNode information. // Interface class used to write PtNode information.
class PtNodeWriter { class PtNodeWriter {
public: public:
@ -51,8 +53,8 @@ class PtNodeWriter {
virtual bool markPtNodeAsWillBecomeNonTerminal( virtual bool markPtNodeAsWillBecomeNonTerminal(
const PtNodeParams *const toBeUpdatedPtNodeParams) = 0; const PtNodeParams *const toBeUpdatedPtNodeParams) = 0;
virtual bool updatePtNodeProbability(const PtNodeParams *const toBeUpdatedPtNodeParams, virtual bool updatePtNodeUnigramProperty(const PtNodeParams *const toBeUpdatedPtNodeParams,
const int probability, const int timestamp) = 0; const UnigramProperty *const unigramProperty) = 0;
virtual bool updatePtNodeProbabilityAndGetNeedsToKeepPtNodeAfterGC( virtual bool updatePtNodeProbabilityAndGetNeedsToKeepPtNodeAfterGC(
const PtNodeParams *const toBeUpdatedPtNodeParams, const PtNodeParams *const toBeUpdatedPtNodeParams,
@ -65,7 +67,7 @@ class PtNodeWriter {
int *const ptNodeWritingPos) = 0; int *const ptNodeWritingPos) = 0;
virtual bool writeNewTerminalPtNodeAndAdvancePosition(const PtNodeParams *const ptNodeParams, virtual bool writeNewTerminalPtNodeAndAdvancePosition(const PtNodeParams *const ptNodeParams,
const int timestamp, int *const ptNodeWritingPos) = 0; const UnigramProperty *const unigramProperty, int *const ptNodeWritingPos) = 0;
virtual bool addNewBigramEntry(const PtNodeParams *const sourcePtNodeParams, virtual bool addNewBigramEntry(const PtNodeParams *const sourcePtNodeParams,
const PtNodeParams *const targetPtNodeParam, const int probability, const int timestamp, const PtNodeParams *const targetPtNodeParam, const int probability, const int timestamp,

View file

@ -16,6 +16,7 @@
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h" #include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h"
#include "suggest/core/dictionary/property/unigram_property.h"
#include "suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h" #include "suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h"
#include "suggest/policyimpl/dictionary/header/header_policy.h" #include "suggest/policyimpl/dictionary/header/header_policy.h"
#include "suggest/policyimpl/dictionary/shortcut/ver4_shortcut_list_policy.h" #include "suggest/policyimpl/dictionary/shortcut/ver4_shortcut_list_policy.h"
@ -133,9 +134,11 @@ bool Ver4PatriciaTrieNodeWriter::markPtNodeAsWillBecomeNonTerminal(
&writingPos); &writingPos);
} }
bool Ver4PatriciaTrieNodeWriter::updatePtNodeProbability( bool Ver4PatriciaTrieNodeWriter::updatePtNodeUnigramProperty(
const PtNodeParams *const toBeUpdatedPtNodeParams, const int newProbability, const PtNodeParams *const toBeUpdatedPtNodeParams,
const int timestamp) { const UnigramProperty *const unigramProperty) {
// Update probability and historical information.
// TODO: Update other information in the unigram property.
if (!toBeUpdatedPtNodeParams->isTerminal()) { if (!toBeUpdatedPtNodeParams->isTerminal()) {
return false; return false;
} }
@ -143,7 +146,7 @@ bool Ver4PatriciaTrieNodeWriter::updatePtNodeProbability(
mBuffers->getProbabilityDictContent()->getProbabilityEntry( mBuffers->getProbabilityDictContent()->getProbabilityEntry(
toBeUpdatedPtNodeParams->getTerminalId()); toBeUpdatedPtNodeParams->getTerminalId());
const ProbabilityEntry probabilityEntry = createUpdatedEntryFrom(&originalProbabilityEntry, const ProbabilityEntry probabilityEntry = createUpdatedEntryFrom(&originalProbabilityEntry,
newProbability, timestamp); unigramProperty);
return mBuffers->getMutableProbabilityDictContent()->setProbabilityEntry( return mBuffers->getMutableProbabilityDictContent()->setProbabilityEntry(
toBeUpdatedPtNodeParams->getTerminalId(), &probabilityEntry); toBeUpdatedPtNodeParams->getTerminalId(), &probabilityEntry);
} }
@ -204,7 +207,8 @@ bool Ver4PatriciaTrieNodeWriter::writePtNodeAndAdvancePosition(
bool Ver4PatriciaTrieNodeWriter::writeNewTerminalPtNodeAndAdvancePosition( bool Ver4PatriciaTrieNodeWriter::writeNewTerminalPtNodeAndAdvancePosition(
const PtNodeParams *const ptNodeParams, const int timestamp, int *const ptNodeWritingPos) { const PtNodeParams *const ptNodeParams, const UnigramProperty *const unigramProperty,
int *const ptNodeWritingPos) {
int terminalId = Ver4DictConstants::NOT_A_TERMINAL_ID; int terminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
if (!writePtNodeAndGetTerminalIdAndAdvancePosition(ptNodeParams, &terminalId, if (!writePtNodeAndGetTerminalIdAndAdvancePosition(ptNodeParams, &terminalId,
ptNodeWritingPos)) { ptNodeWritingPos)) {
@ -213,7 +217,7 @@ bool Ver4PatriciaTrieNodeWriter::writeNewTerminalPtNodeAndAdvancePosition(
// Write probability. // Write probability.
ProbabilityEntry newProbabilityEntry; ProbabilityEntry newProbabilityEntry;
const ProbabilityEntry probabilityEntryToWrite = createUpdatedEntryFrom( const ProbabilityEntry probabilityEntryToWrite = createUpdatedEntryFrom(
&newProbabilityEntry, ptNodeParams->getProbability(), timestamp); &newProbabilityEntry, unigramProperty);
return mBuffers->getMutableProbabilityDictContent()->setProbabilityEntry(terminalId, return mBuffers->getMutableProbabilityDictContent()->setProbabilityEntry(terminalId,
&probabilityEntryToWrite); &probabilityEntryToWrite);
} }
@ -379,18 +383,20 @@ bool Ver4PatriciaTrieNodeWriter::writePtNodeAndGetTerminalIdAndAdvancePosition(
} }
const ProbabilityEntry Ver4PatriciaTrieNodeWriter::createUpdatedEntryFrom( const ProbabilityEntry Ver4PatriciaTrieNodeWriter::createUpdatedEntryFrom(
const ProbabilityEntry *const originalProbabilityEntry, const int newProbability, const ProbabilityEntry *const originalProbabilityEntry,
const int timestamp) const { const UnigramProperty *const unigramProperty) const {
// TODO: Consolidate historical info and probability. // TODO: Consolidate historical info and probability.
if (mHeaderPolicy->hasHistoricalInfoOfWords()) { if (mHeaderPolicy->hasHistoricalInfoOfWords()) {
const HistoricalInfo updatedHistoricalInfo = const HistoricalInfo updatedHistoricalInfo =
ForgettingCurveUtils::createUpdatedHistoricalInfo( ForgettingCurveUtils::createUpdatedHistoricalInfo(
originalProbabilityEntry->getHistoricalInfo(), newProbability, timestamp, originalProbabilityEntry->getHistoricalInfo(),
unigramProperty->getProbability(), unigramProperty->getTimestamp(),
mHeaderPolicy); mHeaderPolicy);
return originalProbabilityEntry->createEntryWithUpdatedHistoricalInfo( return originalProbabilityEntry->createEntryWithUpdatedHistoricalInfo(
&updatedHistoricalInfo); &updatedHistoricalInfo);
} else { } else {
return originalProbabilityEntry->createEntryWithUpdatedProbability(newProbability); return originalProbabilityEntry->createEntryWithUpdatedProbability(
unigramProperty->getProbability());
} }
} }

View file

@ -57,8 +57,8 @@ class Ver4PatriciaTrieNodeWriter : public PtNodeWriter {
virtual bool markPtNodeAsWillBecomeNonTerminal( virtual bool markPtNodeAsWillBecomeNonTerminal(
const PtNodeParams *const toBeUpdatedPtNodeParams); const PtNodeParams *const toBeUpdatedPtNodeParams);
virtual bool updatePtNodeProbability(const PtNodeParams *const toBeUpdatedPtNodeParams, virtual bool updatePtNodeUnigramProperty(const PtNodeParams *const toBeUpdatedPtNodeParams,
const int newProbability, const int timestamp); const UnigramProperty *const unigramProperty);
virtual bool updatePtNodeProbabilityAndGetNeedsToKeepPtNodeAfterGC( virtual bool updatePtNodeProbabilityAndGetNeedsToKeepPtNodeAfterGC(
const PtNodeParams *const toBeUpdatedPtNodeParams, bool *const outNeedsToKeepPtNode); const PtNodeParams *const toBeUpdatedPtNodeParams, bool *const outNeedsToKeepPtNode);
@ -73,7 +73,7 @@ class Ver4PatriciaTrieNodeWriter : public PtNodeWriter {
int *const ptNodeWritingPos); int *const ptNodeWritingPos);
virtual bool writeNewTerminalPtNodeAndAdvancePosition(const PtNodeParams *const ptNodeParams, virtual bool writeNewTerminalPtNodeAndAdvancePosition(const PtNodeParams *const ptNodeParams,
const int timestamp, int *const ptNodeWritingPos); const UnigramProperty *const unigramProperty, int *const ptNodeWritingPos);
virtual bool addNewBigramEntry(const PtNodeParams *const sourcePtNodeParams, virtual bool addNewBigramEntry(const PtNodeParams *const sourcePtNodeParams,
const PtNodeParams *const targetPtNodeParam, const int probability, const int timestamp, const PtNodeParams *const targetPtNodeParam, const int probability, const int timestamp,
@ -102,11 +102,12 @@ class Ver4PatriciaTrieNodeWriter : public PtNodeWriter {
const PtNodeParams *const ptNodeParams, int *const outTerminalId, const PtNodeParams *const ptNodeParams, int *const outTerminalId,
int *const ptNodeWritingPos); int *const ptNodeWritingPos);
// Create updated probability entry using given probability and timestamp. In addition to the // Create updated probability entry using given unigram property. In addition to the
// probability, this method updates historical information if needed. // probability, this method updates historical information if needed.
// TODO: Update flags belonging to the unigram property.
const ProbabilityEntry createUpdatedEntryFrom( const ProbabilityEntry createUpdatedEntryFrom(
const ProbabilityEntry *const originalProbabilityEntry, const int newProbability, const ProbabilityEntry *const originalProbabilityEntry,
const int timestamp) const; const UnigramProperty *const unigramProperty) const;
bool updatePtNodeFlags(const int ptNodePos, const bool isBlacklisted, const bool isNotAWord, bool updatePtNodeFlags(const int ptNodePos, const bool isBlacklisted, const bool isNotAWord,
const bool isTerminal, const bool hasShortcutTargets, const bool hasBigrams, const bool isTerminal, const bool hasShortcutTargets, const bool hasBigrams,

View file

@ -179,9 +179,7 @@ bool Ver4PatriciaTriePolicy::addUnigramWord(const int *const word, const int len
readingHelper.initWithPtNodeArrayPos(getRootPosition()); readingHelper.initWithPtNodeArrayPos(getRootPosition());
bool addedNewUnigram = false; bool addedNewUnigram = false;
if (mUpdatingHelper.addUnigramWord(&readingHelper, word, length, if (mUpdatingHelper.addUnigramWord(&readingHelper, word, length,
unigramProperty->getProbability(), unigramProperty->isNotAWord(), unigramProperty, &addedNewUnigram)) {
unigramProperty->isBlacklisted(), unigramProperty->getTimestamp(),
&addedNewUnigram)) {
if (addedNewUnigram) { if (addedNewUnigram) {
mUnigramCount++; mUnigramCount++;
} }