Refactoring: Use UnigramProperty to add/update unigram.
Bug: 13406708 Change-Id: I26fd541fb465d3543faa5f155becc455ddbb6c9c
This commit is contained in:
parent
eaa347bc1a
commit
b636e25e95
6 changed files with 81 additions and 80 deletions
native/jni/src/suggest/policyimpl/dictionary/structure
|
@ -16,6 +16,7 @@
|
||||||
|
|
||||||
#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.h"
|
#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.h"
|
||||||
|
|
||||||
|
#include "suggest/core/dictionary/property/unigram_property.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h"
|
#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_writing_utils.h"
|
#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_writing_utils.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.h"
|
#include "suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.h"
|
||||||
|
@ -29,9 +30,8 @@ const int DynamicPtUpdatingHelper::CHILDREN_POSITION_FIELD_SIZE = 3;
|
||||||
|
|
||||||
bool DynamicPtUpdatingHelper::addUnigramWord(
|
bool DynamicPtUpdatingHelper::addUnigramWord(
|
||||||
DynamicPtReadingHelper *const readingHelper,
|
DynamicPtReadingHelper *const readingHelper,
|
||||||
const int *const wordCodePoints, const int codePointCount, const int probability,
|
const int *const wordCodePoints, const int codePointCount,
|
||||||
const bool isNotAWord, const bool isBlacklisted, const int timestamp,
|
const UnigramProperty *const unigramProperty, bool *const outAddedNewUnigram) {
|
||||||
bool *const outAddedNewUnigram) {
|
|
||||||
int parentPos = NOT_A_DICT_POS;
|
int parentPos = NOT_A_DICT_POS;
|
||||||
while (!readingHelper->isEnd()) {
|
while (!readingHelper->isEnd()) {
|
||||||
const PtNodeParams ptNodeParams(readingHelper->getPtNodeParams());
|
const PtNodeParams ptNodeParams(readingHelper->getPtNodeParams());
|
||||||
|
@ -53,20 +53,18 @@ bool DynamicPtUpdatingHelper::addUnigramWord(
|
||||||
if (nextIndex >= codePointCount || !readingHelper->isMatchedCodePoint(ptNodeParams, j,
|
if (nextIndex >= codePointCount || !readingHelper->isMatchedCodePoint(ptNodeParams, j,
|
||||||
wordCodePoints[matchedCodePointCount + j])) {
|
wordCodePoints[matchedCodePointCount + j])) {
|
||||||
*outAddedNewUnigram = true;
|
*outAddedNewUnigram = true;
|
||||||
return reallocatePtNodeAndAddNewPtNodes(&ptNodeParams, j, isNotAWord, isBlacklisted,
|
return reallocatePtNodeAndAddNewPtNodes(&ptNodeParams, j, unigramProperty,
|
||||||
probability, timestamp, wordCodePoints + matchedCodePointCount,
|
wordCodePoints + matchedCodePointCount,
|
||||||
codePointCount - matchedCodePointCount);
|
codePointCount - matchedCodePointCount);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// All characters are matched.
|
// All characters are matched.
|
||||||
if (codePointCount == readingHelper->getTotalCodePointCount(ptNodeParams)) {
|
if (codePointCount == readingHelper->getTotalCodePointCount(ptNodeParams)) {
|
||||||
return setPtNodeProbability(&ptNodeParams, isNotAWord, isBlacklisted, probability,
|
return setPtNodeProbability(&ptNodeParams, unigramProperty, outAddedNewUnigram);
|
||||||
timestamp, outAddedNewUnigram);
|
|
||||||
}
|
}
|
||||||
if (!ptNodeParams.hasChildren()) {
|
if (!ptNodeParams.hasChildren()) {
|
||||||
*outAddedNewUnigram = true;
|
*outAddedNewUnigram = true;
|
||||||
return createChildrenPtNodeArrayAndAChildPtNode(&ptNodeParams,
|
return createChildrenPtNodeArrayAndAChildPtNode(&ptNodeParams, unigramProperty,
|
||||||
isNotAWord, isBlacklisted, probability, timestamp,
|
|
||||||
wordCodePoints + readingHelper->getTotalCodePointCount(ptNodeParams),
|
wordCodePoints + readingHelper->getTotalCodePointCount(ptNodeParams),
|
||||||
codePointCount - readingHelper->getTotalCodePointCount(ptNodeParams));
|
codePointCount - readingHelper->getTotalCodePointCount(ptNodeParams));
|
||||||
}
|
}
|
||||||
|
@ -83,7 +81,7 @@ bool DynamicPtUpdatingHelper::addUnigramWord(
|
||||||
return createAndInsertNodeIntoPtNodeArray(parentPos,
|
return createAndInsertNodeIntoPtNodeArray(parentPos,
|
||||||
wordCodePoints + readingHelper->getPrevTotalCodePointCount(),
|
wordCodePoints + readingHelper->getPrevTotalCodePointCount(),
|
||||||
codePointCount - readingHelper->getPrevTotalCodePointCount(),
|
codePointCount - readingHelper->getPrevTotalCodePointCount(),
|
||||||
isNotAWord, isBlacklisted, probability, timestamp, &pos);
|
unigramProperty, &pos);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool DynamicPtUpdatingHelper::addBigramWords(const int word0Pos, const int word1Pos,
|
bool DynamicPtUpdatingHelper::addBigramWords(const int word0Pos, const int word1Pos,
|
||||||
|
@ -115,36 +113,34 @@ bool DynamicPtUpdatingHelper::addShortcutTarget(const int wordPos,
|
||||||
|
|
||||||
bool DynamicPtUpdatingHelper::createAndInsertNodeIntoPtNodeArray(const int parentPos,
|
bool DynamicPtUpdatingHelper::createAndInsertNodeIntoPtNodeArray(const int parentPos,
|
||||||
const int *const nodeCodePoints, const int nodeCodePointCount,
|
const int *const nodeCodePoints, const int nodeCodePointCount,
|
||||||
const bool isNotAWord, const bool isBlacklisted, const int probability,
|
const UnigramProperty *const unigramProperty, int *const forwardLinkFieldPos) {
|
||||||
const int timestamp, int *const forwardLinkFieldPos) {
|
|
||||||
const int newPtNodeArrayPos = mBuffer->getTailPosition();
|
const int newPtNodeArrayPos = mBuffer->getTailPosition();
|
||||||
if (!DynamicPtWritingUtils::writeForwardLinkPositionAndAdvancePosition(mBuffer,
|
if (!DynamicPtWritingUtils::writeForwardLinkPositionAndAdvancePosition(mBuffer,
|
||||||
newPtNodeArrayPos, forwardLinkFieldPos)) {
|
newPtNodeArrayPos, forwardLinkFieldPos)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
return createNewPtNodeArrayWithAChildPtNode(parentPos, nodeCodePoints, nodeCodePointCount,
|
return createNewPtNodeArrayWithAChildPtNode(parentPos, nodeCodePoints, nodeCodePointCount,
|
||||||
isNotAWord, isBlacklisted, probability, timestamp);
|
unigramProperty);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool DynamicPtUpdatingHelper::setPtNodeProbability(
|
bool DynamicPtUpdatingHelper::setPtNodeProbability(const PtNodeParams *const originalPtNodeParams,
|
||||||
const PtNodeParams *const originalPtNodeParams, const bool isNotAWord,
|
const UnigramProperty *const unigramProperty, bool *const outAddedNewUnigram) {
|
||||||
const bool isBlacklisted, const int probability, const int timestamp,
|
|
||||||
bool *const outAddedNewUnigram) {
|
|
||||||
if (originalPtNodeParams->isTerminal()) {
|
if (originalPtNodeParams->isTerminal()) {
|
||||||
// Overwrites the probability.
|
// Overwrites the probability.
|
||||||
*outAddedNewUnigram = false;
|
*outAddedNewUnigram = false;
|
||||||
return mPtNodeWriter->updatePtNodeProbability(originalPtNodeParams, probability, timestamp);
|
return mPtNodeWriter->updatePtNodeUnigramProperty(originalPtNodeParams, unigramProperty);
|
||||||
} else {
|
} else {
|
||||||
// Make the node terminal and write the probability.
|
// Make the node terminal and write the probability.
|
||||||
*outAddedNewUnigram = true;
|
*outAddedNewUnigram = true;
|
||||||
const int movedPos = mBuffer->getTailPosition();
|
const int movedPos = mBuffer->getTailPosition();
|
||||||
int writingPos = movedPos;
|
int writingPos = movedPos;
|
||||||
const PtNodeParams ptNodeParamsToWrite(getUpdatedPtNodeParams(originalPtNodeParams,
|
const PtNodeParams ptNodeParamsToWrite(getUpdatedPtNodeParams(originalPtNodeParams,
|
||||||
isNotAWord, isBlacklisted, true /* isTerminal */,
|
unigramProperty->isNotAWord(), unigramProperty->isBlacklisted(),
|
||||||
originalPtNodeParams->getParentPos(), originalPtNodeParams->getCodePointCount(),
|
true /* isTerminal */, originalPtNodeParams->getParentPos(),
|
||||||
originalPtNodeParams->getCodePoints(), probability));
|
originalPtNodeParams->getCodePointCount(), originalPtNodeParams->getCodePoints(),
|
||||||
|
unigramProperty->getProbability()));
|
||||||
if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&ptNodeParamsToWrite,
|
if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&ptNodeParamsToWrite,
|
||||||
timestamp, &writingPos)) {
|
unigramProperty, &writingPos)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (!mPtNodeWriter->markPtNodeAsMoved(originalPtNodeParams, movedPos, movedPos)) {
|
if (!mPtNodeWriter->markPtNodeAsMoved(originalPtNodeParams, movedPos, movedPos)) {
|
||||||
|
@ -155,31 +151,30 @@ bool DynamicPtUpdatingHelper::setPtNodeProbability(
|
||||||
}
|
}
|
||||||
|
|
||||||
bool DynamicPtUpdatingHelper::createChildrenPtNodeArrayAndAChildPtNode(
|
bool DynamicPtUpdatingHelper::createChildrenPtNodeArrayAndAChildPtNode(
|
||||||
const PtNodeParams *const parentPtNodeParams, const bool isNotAWord,
|
const PtNodeParams *const parentPtNodeParams, const UnigramProperty *const unigramProperty,
|
||||||
const bool isBlacklisted, const int probability, const int timestamp,
|
|
||||||
const int *const codePoints, const int codePointCount) {
|
const int *const codePoints, const int codePointCount) {
|
||||||
const int newPtNodeArrayPos = mBuffer->getTailPosition();
|
const int newPtNodeArrayPos = mBuffer->getTailPosition();
|
||||||
if (!mPtNodeWriter->updateChildrenPosition(parentPtNodeParams, newPtNodeArrayPos)) {
|
if (!mPtNodeWriter->updateChildrenPosition(parentPtNodeParams, newPtNodeArrayPos)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
return createNewPtNodeArrayWithAChildPtNode(parentPtNodeParams->getHeadPos(), codePoints,
|
return createNewPtNodeArrayWithAChildPtNode(parentPtNodeParams->getHeadPos(), codePoints,
|
||||||
codePointCount, isNotAWord, isBlacklisted, probability, timestamp);
|
codePointCount, unigramProperty);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool DynamicPtUpdatingHelper::createNewPtNodeArrayWithAChildPtNode(
|
bool DynamicPtUpdatingHelper::createNewPtNodeArrayWithAChildPtNode(
|
||||||
const int parentPtNodePos, const int *const nodeCodePoints, const int nodeCodePointCount,
|
const int parentPtNodePos, const int *const nodeCodePoints, const int nodeCodePointCount,
|
||||||
const bool isNotAWord, const bool isBlacklisted, const int probability,
|
const UnigramProperty *const unigramProperty) {
|
||||||
const int timestamp) {
|
|
||||||
int writingPos = mBuffer->getTailPosition();
|
int writingPos = mBuffer->getTailPosition();
|
||||||
if (!DynamicPtWritingUtils::writePtNodeArraySizeAndAdvancePosition(mBuffer,
|
if (!DynamicPtWritingUtils::writePtNodeArraySizeAndAdvancePosition(mBuffer,
|
||||||
1 /* arraySize */, &writingPos)) {
|
1 /* arraySize */, &writingPos)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode(
|
const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode(
|
||||||
isNotAWord, isBlacklisted, true /* isTerminal */,
|
unigramProperty->isNotAWord(), unigramProperty->isBlacklisted(), true /* isTerminal */,
|
||||||
parentPtNodePos, nodeCodePointCount, nodeCodePoints, probability));
|
parentPtNodePos, nodeCodePointCount, nodeCodePoints,
|
||||||
if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&ptNodeParamsToWrite, timestamp,
|
unigramProperty->getProbability()));
|
||||||
&writingPos)) {
|
if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&ptNodeParamsToWrite,
|
||||||
|
unigramProperty, &writingPos)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (!DynamicPtWritingUtils::writeForwardLinkPositionAndAdvancePosition(mBuffer,
|
if (!DynamicPtWritingUtils::writeForwardLinkPositionAndAdvancePosition(mBuffer,
|
||||||
|
@ -192,13 +187,13 @@ bool DynamicPtUpdatingHelper::createNewPtNodeArrayWithAChildPtNode(
|
||||||
// Returns whether the dictionary updating was succeeded or not.
|
// Returns whether the dictionary updating was succeeded or not.
|
||||||
bool DynamicPtUpdatingHelper::reallocatePtNodeAndAddNewPtNodes(
|
bool DynamicPtUpdatingHelper::reallocatePtNodeAndAddNewPtNodes(
|
||||||
const PtNodeParams *const reallocatingPtNodeParams, const int overlappingCodePointCount,
|
const PtNodeParams *const reallocatingPtNodeParams, const int overlappingCodePointCount,
|
||||||
const bool isNotAWord, const bool isBlacklisted, const int probabilityOfNewPtNode,
|
const UnigramProperty *const unigramProperty, const int *const newNodeCodePoints,
|
||||||
const int timestamp, const int *const newNodeCodePoints, const int newNodeCodePointCount) {
|
const int newNodeCodePointCount) {
|
||||||
// When addsExtraChild is true, split the reallocating PtNode and add new child.
|
// When addsExtraChild is true, split the reallocating PtNode and add new child.
|
||||||
// Reallocating PtNode: abcde, newNode: abcxy.
|
// Reallocating PtNode: abcde, newNode: abcxy.
|
||||||
// abc (1st, not terminal) __ de (2nd)
|
// abc (1st, not terminal) __ de (2nd)
|
||||||
// \_ xy (extra child, terminal)
|
// \_ xy (extra child, terminal)
|
||||||
// Otherwise, this method makes 1st part terminal and write probabilityOfNewPtNode.
|
// Otherwise, this method makes 1st part terminal and write information in unigramProperty.
|
||||||
// Reallocating PtNode: abcde, newNode: abc.
|
// Reallocating PtNode: abcde, newNode: abc.
|
||||||
// abc (1st, terminal) __ de (2nd)
|
// abc (1st, terminal) __ de (2nd)
|
||||||
const bool addsExtraChild = newNodeCodePointCount > overlappingCodePointCount;
|
const bool addsExtraChild = newNodeCodePointCount > overlappingCodePointCount;
|
||||||
|
@ -216,11 +211,12 @@ bool DynamicPtUpdatingHelper::reallocatePtNodeAndAddNewPtNodes(
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode(
|
const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode(
|
||||||
isNotAWord, isBlacklisted, true /* isTerminal */,
|
unigramProperty->isNotAWord(), unigramProperty->isBlacklisted(),
|
||||||
reallocatingPtNodeParams->getParentPos(), overlappingCodePointCount,
|
true /* isTerminal */, reallocatingPtNodeParams->getParentPos(),
|
||||||
reallocatingPtNodeParams->getCodePoints(), probabilityOfNewPtNode));
|
overlappingCodePointCount, reallocatingPtNodeParams->getCodePoints(),
|
||||||
|
unigramProperty->getProbability()));
|
||||||
if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&ptNodeParamsToWrite,
|
if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&ptNodeParamsToWrite,
|
||||||
timestamp, &writingPos)) {
|
unigramProperty, &writingPos)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -244,11 +240,12 @@ bool DynamicPtUpdatingHelper::reallocatePtNodeAndAddNewPtNodes(
|
||||||
}
|
}
|
||||||
if (addsExtraChild) {
|
if (addsExtraChild) {
|
||||||
const PtNodeParams extraChildPtNodeParams(getPtNodeParamsForNewPtNode(
|
const PtNodeParams extraChildPtNodeParams(getPtNodeParamsForNewPtNode(
|
||||||
isNotAWord, isBlacklisted, true /* isTerminal */,
|
unigramProperty->isNotAWord(), unigramProperty->isBlacklisted(),
|
||||||
firstPartOfReallocatedPtNodePos, newNodeCodePointCount - overlappingCodePointCount,
|
true /* isTerminal */, firstPartOfReallocatedPtNodePos,
|
||||||
newNodeCodePoints + overlappingCodePointCount, probabilityOfNewPtNode));
|
newNodeCodePointCount - overlappingCodePointCount,
|
||||||
|
newNodeCodePoints + overlappingCodePointCount, unigramProperty->getProbability()));
|
||||||
if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&extraChildPtNodeParams,
|
if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&extraChildPtNodeParams,
|
||||||
timestamp, &writingPos)) {
|
unigramProperty, &writingPos)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -269,8 +266,8 @@ bool DynamicPtUpdatingHelper::reallocatePtNodeAndAddNewPtNodes(
|
||||||
}
|
}
|
||||||
|
|
||||||
const PtNodeParams DynamicPtUpdatingHelper::getUpdatedPtNodeParams(
|
const PtNodeParams DynamicPtUpdatingHelper::getUpdatedPtNodeParams(
|
||||||
const PtNodeParams *const originalPtNodeParams, const bool isNotAWord,
|
const PtNodeParams *const originalPtNodeParams,
|
||||||
const bool isBlacklisted, const bool isTerminal, const int parentPos,
|
const bool isNotAWord, const bool isBlacklisted, const bool isTerminal, const int parentPos,
|
||||||
const int codePointCount, const int *const codePoints, const int probability) const {
|
const int codePointCount, const int *const codePoints, const int probability) const {
|
||||||
const PatriciaTrieReadingUtils::NodeFlags flags = PatriciaTrieReadingUtils::createAndGetFlags(
|
const PatriciaTrieReadingUtils::NodeFlags flags = PatriciaTrieReadingUtils::createAndGetFlags(
|
||||||
isBlacklisted, isNotAWord, isTerminal, originalPtNodeParams->hasShortcutTargets(),
|
isBlacklisted, isNotAWord, isTerminal, originalPtNodeParams->hasShortcutTargets(),
|
||||||
|
|
|
@ -26,6 +26,7 @@ class BufferWithExtendableBuffer;
|
||||||
class DynamicPtReadingHelper;
|
class DynamicPtReadingHelper;
|
||||||
class PtNodeReader;
|
class PtNodeReader;
|
||||||
class PtNodeWriter;
|
class PtNodeWriter;
|
||||||
|
class UnigramProperty;
|
||||||
|
|
||||||
class DynamicPtUpdatingHelper {
|
class DynamicPtUpdatingHelper {
|
||||||
public:
|
public:
|
||||||
|
@ -37,9 +38,8 @@ class DynamicPtUpdatingHelper {
|
||||||
|
|
||||||
// Add a word to the dictionary. If the word already exists, update the probability.
|
// Add a word to the dictionary. If the word already exists, update the probability.
|
||||||
bool addUnigramWord(DynamicPtReadingHelper *const readingHelper,
|
bool addUnigramWord(DynamicPtReadingHelper *const readingHelper,
|
||||||
const int *const wordCodePoints, const int codePointCount, const int probability,
|
const int *const wordCodePoints, const int codePointCount,
|
||||||
const bool isNotAWord, const bool isBlacklisted, const int timestamp,
|
const UnigramProperty *const unigramProperty, bool *const outAddedNewUnigram);
|
||||||
bool *const outAddedNewUnigram);
|
|
||||||
|
|
||||||
// Add a bigram relation from word0Pos to word1Pos.
|
// Add a bigram relation from word0Pos to word1Pos.
|
||||||
bool addBigramWords(const int word0Pos, const int word1Pos, const int probability,
|
bool addBigramWords(const int word0Pos, const int word1Pos, const int probability,
|
||||||
|
@ -62,25 +62,22 @@ class DynamicPtUpdatingHelper {
|
||||||
PtNodeWriter *const mPtNodeWriter;
|
PtNodeWriter *const mPtNodeWriter;
|
||||||
|
|
||||||
bool createAndInsertNodeIntoPtNodeArray(const int parentPos, const int *const nodeCodePoints,
|
bool createAndInsertNodeIntoPtNodeArray(const int parentPos, const int *const nodeCodePoints,
|
||||||
const int nodeCodePointCount, const bool isNotAWord, const bool isBlacklisted,
|
const int nodeCodePointCount, const UnigramProperty *const unigramProperty,
|
||||||
const int probability, const int timestamp, int *const forwardLinkFieldPos);
|
int *const forwardLinkFieldPos);
|
||||||
|
|
||||||
bool setPtNodeProbability(const PtNodeParams *const originalPtNodeParams, const bool isNotAWord,
|
bool setPtNodeProbability(const PtNodeParams *const originalPtNodeParams,
|
||||||
const bool isBlacklisted, const int probability, const int timestamp,
|
const UnigramProperty *const unigramProperty, bool *const outAddedNewUnigram);
|
||||||
bool *const outAddedNewUnigram);
|
|
||||||
|
|
||||||
bool createChildrenPtNodeArrayAndAChildPtNode(const PtNodeParams *const parentPtNodeParams,
|
bool createChildrenPtNodeArrayAndAChildPtNode(const PtNodeParams *const parentPtNodeParams,
|
||||||
const bool isNotAWord, const bool isBlacklisted, const int probability,
|
const UnigramProperty *const unigramProperty, const int *const codePoints,
|
||||||
const int timestamp, const int *const codePoints, const int codePointCount);
|
const int codePointCount);
|
||||||
|
|
||||||
bool createNewPtNodeArrayWithAChildPtNode(const int parentPos, const int *const nodeCodePoints,
|
bool createNewPtNodeArrayWithAChildPtNode(const int parentPos, const int *const nodeCodePoints,
|
||||||
const int nodeCodePointCount, const bool isNotAWord, const bool isBlacklisted,
|
const int nodeCodePointCount, const UnigramProperty *const unigramProperty);
|
||||||
const int probability, const int timestamp);
|
|
||||||
|
|
||||||
bool reallocatePtNodeAndAddNewPtNodes(
|
bool reallocatePtNodeAndAddNewPtNodes(
|
||||||
const PtNodeParams *const reallocatingPtNodeParams, const int overlappingCodePointCount,
|
const PtNodeParams *const reallocatingPtNodeParams, const int overlappingCodePointCount,
|
||||||
const bool isNotAWord, const bool isBlacklisted, const int probabilityOfNewPtNode,
|
const UnigramProperty *const unigramProperty, const int *const newNodeCodePoints,
|
||||||
const int timestamp, const int *const newNodeCodePoints,
|
|
||||||
const int newNodeCodePointCount);
|
const int newNodeCodePointCount);
|
||||||
|
|
||||||
const PtNodeParams getUpdatedPtNodeParams(const PtNodeParams *const originalPtNodeParams,
|
const PtNodeParams getUpdatedPtNodeParams(const PtNodeParams *const originalPtNodeParams,
|
||||||
|
|
|
@ -24,6 +24,8 @@
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
|
||||||
|
class UnigramProperty;
|
||||||
|
|
||||||
// Interface class used to write PtNode information.
|
// Interface class used to write PtNode information.
|
||||||
class PtNodeWriter {
|
class PtNodeWriter {
|
||||||
public:
|
public:
|
||||||
|
@ -51,8 +53,8 @@ class PtNodeWriter {
|
||||||
virtual bool markPtNodeAsWillBecomeNonTerminal(
|
virtual bool markPtNodeAsWillBecomeNonTerminal(
|
||||||
const PtNodeParams *const toBeUpdatedPtNodeParams) = 0;
|
const PtNodeParams *const toBeUpdatedPtNodeParams) = 0;
|
||||||
|
|
||||||
virtual bool updatePtNodeProbability(const PtNodeParams *const toBeUpdatedPtNodeParams,
|
virtual bool updatePtNodeUnigramProperty(const PtNodeParams *const toBeUpdatedPtNodeParams,
|
||||||
const int probability, const int timestamp) = 0;
|
const UnigramProperty *const unigramProperty) = 0;
|
||||||
|
|
||||||
virtual bool updatePtNodeProbabilityAndGetNeedsToKeepPtNodeAfterGC(
|
virtual bool updatePtNodeProbabilityAndGetNeedsToKeepPtNodeAfterGC(
|
||||||
const PtNodeParams *const toBeUpdatedPtNodeParams,
|
const PtNodeParams *const toBeUpdatedPtNodeParams,
|
||||||
|
@ -65,7 +67,7 @@ class PtNodeWriter {
|
||||||
int *const ptNodeWritingPos) = 0;
|
int *const ptNodeWritingPos) = 0;
|
||||||
|
|
||||||
virtual bool writeNewTerminalPtNodeAndAdvancePosition(const PtNodeParams *const ptNodeParams,
|
virtual bool writeNewTerminalPtNodeAndAdvancePosition(const PtNodeParams *const ptNodeParams,
|
||||||
const int timestamp, int *const ptNodeWritingPos) = 0;
|
const UnigramProperty *const unigramProperty, int *const ptNodeWritingPos) = 0;
|
||||||
|
|
||||||
virtual bool addNewBigramEntry(const PtNodeParams *const sourcePtNodeParams,
|
virtual bool addNewBigramEntry(const PtNodeParams *const sourcePtNodeParams,
|
||||||
const PtNodeParams *const targetPtNodeParam, const int probability, const int timestamp,
|
const PtNodeParams *const targetPtNodeParam, const int probability, const int timestamp,
|
||||||
|
|
|
@ -16,6 +16,7 @@
|
||||||
|
|
||||||
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h"
|
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h"
|
||||||
|
|
||||||
|
#include "suggest/core/dictionary/property/unigram_property.h"
|
||||||
#include "suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h"
|
#include "suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h"
|
||||||
#include "suggest/policyimpl/dictionary/header/header_policy.h"
|
#include "suggest/policyimpl/dictionary/header/header_policy.h"
|
||||||
#include "suggest/policyimpl/dictionary/shortcut/ver4_shortcut_list_policy.h"
|
#include "suggest/policyimpl/dictionary/shortcut/ver4_shortcut_list_policy.h"
|
||||||
|
@ -133,9 +134,11 @@ bool Ver4PatriciaTrieNodeWriter::markPtNodeAsWillBecomeNonTerminal(
|
||||||
&writingPos);
|
&writingPos);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Ver4PatriciaTrieNodeWriter::updatePtNodeProbability(
|
bool Ver4PatriciaTrieNodeWriter::updatePtNodeUnigramProperty(
|
||||||
const PtNodeParams *const toBeUpdatedPtNodeParams, const int newProbability,
|
const PtNodeParams *const toBeUpdatedPtNodeParams,
|
||||||
const int timestamp) {
|
const UnigramProperty *const unigramProperty) {
|
||||||
|
// Update probability and historical information.
|
||||||
|
// TODO: Update other information in the unigram property.
|
||||||
if (!toBeUpdatedPtNodeParams->isTerminal()) {
|
if (!toBeUpdatedPtNodeParams->isTerminal()) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -143,7 +146,7 @@ bool Ver4PatriciaTrieNodeWriter::updatePtNodeProbability(
|
||||||
mBuffers->getProbabilityDictContent()->getProbabilityEntry(
|
mBuffers->getProbabilityDictContent()->getProbabilityEntry(
|
||||||
toBeUpdatedPtNodeParams->getTerminalId());
|
toBeUpdatedPtNodeParams->getTerminalId());
|
||||||
const ProbabilityEntry probabilityEntry = createUpdatedEntryFrom(&originalProbabilityEntry,
|
const ProbabilityEntry probabilityEntry = createUpdatedEntryFrom(&originalProbabilityEntry,
|
||||||
newProbability, timestamp);
|
unigramProperty);
|
||||||
return mBuffers->getMutableProbabilityDictContent()->setProbabilityEntry(
|
return mBuffers->getMutableProbabilityDictContent()->setProbabilityEntry(
|
||||||
toBeUpdatedPtNodeParams->getTerminalId(), &probabilityEntry);
|
toBeUpdatedPtNodeParams->getTerminalId(), &probabilityEntry);
|
||||||
}
|
}
|
||||||
|
@ -204,7 +207,8 @@ bool Ver4PatriciaTrieNodeWriter::writePtNodeAndAdvancePosition(
|
||||||
|
|
||||||
|
|
||||||
bool Ver4PatriciaTrieNodeWriter::writeNewTerminalPtNodeAndAdvancePosition(
|
bool Ver4PatriciaTrieNodeWriter::writeNewTerminalPtNodeAndAdvancePosition(
|
||||||
const PtNodeParams *const ptNodeParams, const int timestamp, int *const ptNodeWritingPos) {
|
const PtNodeParams *const ptNodeParams, const UnigramProperty *const unigramProperty,
|
||||||
|
int *const ptNodeWritingPos) {
|
||||||
int terminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
|
int terminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
|
||||||
if (!writePtNodeAndGetTerminalIdAndAdvancePosition(ptNodeParams, &terminalId,
|
if (!writePtNodeAndGetTerminalIdAndAdvancePosition(ptNodeParams, &terminalId,
|
||||||
ptNodeWritingPos)) {
|
ptNodeWritingPos)) {
|
||||||
|
@ -213,7 +217,7 @@ bool Ver4PatriciaTrieNodeWriter::writeNewTerminalPtNodeAndAdvancePosition(
|
||||||
// Write probability.
|
// Write probability.
|
||||||
ProbabilityEntry newProbabilityEntry;
|
ProbabilityEntry newProbabilityEntry;
|
||||||
const ProbabilityEntry probabilityEntryToWrite = createUpdatedEntryFrom(
|
const ProbabilityEntry probabilityEntryToWrite = createUpdatedEntryFrom(
|
||||||
&newProbabilityEntry, ptNodeParams->getProbability(), timestamp);
|
&newProbabilityEntry, unigramProperty);
|
||||||
return mBuffers->getMutableProbabilityDictContent()->setProbabilityEntry(terminalId,
|
return mBuffers->getMutableProbabilityDictContent()->setProbabilityEntry(terminalId,
|
||||||
&probabilityEntryToWrite);
|
&probabilityEntryToWrite);
|
||||||
}
|
}
|
||||||
|
@ -379,18 +383,20 @@ bool Ver4PatriciaTrieNodeWriter::writePtNodeAndGetTerminalIdAndAdvancePosition(
|
||||||
}
|
}
|
||||||
|
|
||||||
const ProbabilityEntry Ver4PatriciaTrieNodeWriter::createUpdatedEntryFrom(
|
const ProbabilityEntry Ver4PatriciaTrieNodeWriter::createUpdatedEntryFrom(
|
||||||
const ProbabilityEntry *const originalProbabilityEntry, const int newProbability,
|
const ProbabilityEntry *const originalProbabilityEntry,
|
||||||
const int timestamp) const {
|
const UnigramProperty *const unigramProperty) const {
|
||||||
// TODO: Consolidate historical info and probability.
|
// TODO: Consolidate historical info and probability.
|
||||||
if (mHeaderPolicy->hasHistoricalInfoOfWords()) {
|
if (mHeaderPolicy->hasHistoricalInfoOfWords()) {
|
||||||
const HistoricalInfo updatedHistoricalInfo =
|
const HistoricalInfo updatedHistoricalInfo =
|
||||||
ForgettingCurveUtils::createUpdatedHistoricalInfo(
|
ForgettingCurveUtils::createUpdatedHistoricalInfo(
|
||||||
originalProbabilityEntry->getHistoricalInfo(), newProbability, timestamp,
|
originalProbabilityEntry->getHistoricalInfo(),
|
||||||
|
unigramProperty->getProbability(), unigramProperty->getTimestamp(),
|
||||||
mHeaderPolicy);
|
mHeaderPolicy);
|
||||||
return originalProbabilityEntry->createEntryWithUpdatedHistoricalInfo(
|
return originalProbabilityEntry->createEntryWithUpdatedHistoricalInfo(
|
||||||
&updatedHistoricalInfo);
|
&updatedHistoricalInfo);
|
||||||
} else {
|
} else {
|
||||||
return originalProbabilityEntry->createEntryWithUpdatedProbability(newProbability);
|
return originalProbabilityEntry->createEntryWithUpdatedProbability(
|
||||||
|
unigramProperty->getProbability());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -57,8 +57,8 @@ class Ver4PatriciaTrieNodeWriter : public PtNodeWriter {
|
||||||
virtual bool markPtNodeAsWillBecomeNonTerminal(
|
virtual bool markPtNodeAsWillBecomeNonTerminal(
|
||||||
const PtNodeParams *const toBeUpdatedPtNodeParams);
|
const PtNodeParams *const toBeUpdatedPtNodeParams);
|
||||||
|
|
||||||
virtual bool updatePtNodeProbability(const PtNodeParams *const toBeUpdatedPtNodeParams,
|
virtual bool updatePtNodeUnigramProperty(const PtNodeParams *const toBeUpdatedPtNodeParams,
|
||||||
const int newProbability, const int timestamp);
|
const UnigramProperty *const unigramProperty);
|
||||||
|
|
||||||
virtual bool updatePtNodeProbabilityAndGetNeedsToKeepPtNodeAfterGC(
|
virtual bool updatePtNodeProbabilityAndGetNeedsToKeepPtNodeAfterGC(
|
||||||
const PtNodeParams *const toBeUpdatedPtNodeParams, bool *const outNeedsToKeepPtNode);
|
const PtNodeParams *const toBeUpdatedPtNodeParams, bool *const outNeedsToKeepPtNode);
|
||||||
|
@ -73,7 +73,7 @@ class Ver4PatriciaTrieNodeWriter : public PtNodeWriter {
|
||||||
int *const ptNodeWritingPos);
|
int *const ptNodeWritingPos);
|
||||||
|
|
||||||
virtual bool writeNewTerminalPtNodeAndAdvancePosition(const PtNodeParams *const ptNodeParams,
|
virtual bool writeNewTerminalPtNodeAndAdvancePosition(const PtNodeParams *const ptNodeParams,
|
||||||
const int timestamp, int *const ptNodeWritingPos);
|
const UnigramProperty *const unigramProperty, int *const ptNodeWritingPos);
|
||||||
|
|
||||||
virtual bool addNewBigramEntry(const PtNodeParams *const sourcePtNodeParams,
|
virtual bool addNewBigramEntry(const PtNodeParams *const sourcePtNodeParams,
|
||||||
const PtNodeParams *const targetPtNodeParam, const int probability, const int timestamp,
|
const PtNodeParams *const targetPtNodeParam, const int probability, const int timestamp,
|
||||||
|
@ -102,11 +102,12 @@ class Ver4PatriciaTrieNodeWriter : public PtNodeWriter {
|
||||||
const PtNodeParams *const ptNodeParams, int *const outTerminalId,
|
const PtNodeParams *const ptNodeParams, int *const outTerminalId,
|
||||||
int *const ptNodeWritingPos);
|
int *const ptNodeWritingPos);
|
||||||
|
|
||||||
// Create updated probability entry using given probability and timestamp. In addition to the
|
// Create updated probability entry using given unigram property. In addition to the
|
||||||
// probability, this method updates historical information if needed.
|
// probability, this method updates historical information if needed.
|
||||||
|
// TODO: Update flags belonging to the unigram property.
|
||||||
const ProbabilityEntry createUpdatedEntryFrom(
|
const ProbabilityEntry createUpdatedEntryFrom(
|
||||||
const ProbabilityEntry *const originalProbabilityEntry, const int newProbability,
|
const ProbabilityEntry *const originalProbabilityEntry,
|
||||||
const int timestamp) const;
|
const UnigramProperty *const unigramProperty) const;
|
||||||
|
|
||||||
bool updatePtNodeFlags(const int ptNodePos, const bool isBlacklisted, const bool isNotAWord,
|
bool updatePtNodeFlags(const int ptNodePos, const bool isBlacklisted, const bool isNotAWord,
|
||||||
const bool isTerminal, const bool hasShortcutTargets, const bool hasBigrams,
|
const bool isTerminal, const bool hasShortcutTargets, const bool hasBigrams,
|
||||||
|
|
|
@ -179,9 +179,7 @@ bool Ver4PatriciaTriePolicy::addUnigramWord(const int *const word, const int len
|
||||||
readingHelper.initWithPtNodeArrayPos(getRootPosition());
|
readingHelper.initWithPtNodeArrayPos(getRootPosition());
|
||||||
bool addedNewUnigram = false;
|
bool addedNewUnigram = false;
|
||||||
if (mUpdatingHelper.addUnigramWord(&readingHelper, word, length,
|
if (mUpdatingHelper.addUnigramWord(&readingHelper, word, length,
|
||||||
unigramProperty->getProbability(), unigramProperty->isNotAWord(),
|
unigramProperty, &addedNewUnigram)) {
|
||||||
unigramProperty->isBlacklisted(), unigramProperty->getTimestamp(),
|
|
||||||
&addedNewUnigram)) {
|
|
||||||
if (addedNewUnigram) {
|
if (addedNewUnigram) {
|
||||||
mUnigramCount++;
|
mUnigramCount++;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue