am 680fb1d3: Merge "Refactoring: Separate updating methods from writing methods."
* commit '680fb1d3a0b3e0cf28b2367457f12586196970d6': Refactoring: Separate updating methods from writing methods.main
commit
31946a8cf9
|
@ -84,6 +84,7 @@ LATIN_IME_CORE_SRC_FILES := \
|
||||||
dynamic_patricia_trie_policy.cpp \
|
dynamic_patricia_trie_policy.cpp \
|
||||||
dynamic_patricia_trie_reading_helper.cpp \
|
dynamic_patricia_trie_reading_helper.cpp \
|
||||||
dynamic_patricia_trie_reading_utils.cpp \
|
dynamic_patricia_trie_reading_utils.cpp \
|
||||||
|
dynamic_patricia_trie_updating_helper.cpp \
|
||||||
dynamic_patricia_trie_writing_helper.cpp \
|
dynamic_patricia_trie_writing_helper.cpp \
|
||||||
dynamic_patricia_trie_writing_utils.cpp) \
|
dynamic_patricia_trie_writing_utils.cpp) \
|
||||||
$(addprefix suggest/policyimpl/dictionary/structure/v4/, \
|
$(addprefix suggest/policyimpl/dictionary/structure/v4/, \
|
||||||
|
|
|
@ -46,6 +46,9 @@ class PtNodeWriter {
|
||||||
const PtNodeParams *const targetPtNodeParam, const int probability,
|
const PtNodeParams *const targetPtNodeParam, const int probability,
|
||||||
bool *const outAddedNewBigram) = 0;
|
bool *const outAddedNewBigram) = 0;
|
||||||
|
|
||||||
|
virtual bool removeBigramEntry(const PtNodeParams *const sourcePtNodeParams,
|
||||||
|
const PtNodeParams *const targetPtNodeParam) = 0;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
PtNodeWriter() {};
|
PtNodeWriter() {};
|
||||||
|
|
||||||
|
|
|
@ -224,4 +224,13 @@ bool DynamicPatriciaTrieNodeWriter::addNewBigramEntry(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool DynamicPatriciaTrieNodeWriter::removeBigramEntry(
|
||||||
|
const PtNodeParams *const sourcePtNodeParams, const PtNodeParams *const targetPtNodeParam) {
|
||||||
|
if (sourcePtNodeParams->getBigramsPos() == NOT_A_DICT_POS) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return mBigramPolicy->removeBigram(sourcePtNodeParams->getBigramsPos(),
|
||||||
|
targetPtNodeParam->getHeadPos());
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -63,6 +63,9 @@ class DynamicPatriciaTrieNodeWriter : public PtNodeWriter {
|
||||||
const PtNodeParams *const targetPtNodeParam, const int probability,
|
const PtNodeParams *const targetPtNodeParam, const int probability,
|
||||||
bool *const outAddedNewBigram);
|
bool *const outAddedNewBigram);
|
||||||
|
|
||||||
|
virtual bool removeBigramEntry(const PtNodeParams *const sourcePtNodeParams,
|
||||||
|
const PtNodeParams *const targetPtNodeParam);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
DISALLOW_COPY_AND_ASSIGN(DynamicPatriciaTrieNodeWriter);
|
DISALLOW_COPY_AND_ASSIGN(DynamicPatriciaTrieNodeWriter);
|
||||||
|
|
||||||
|
|
|
@ -27,6 +27,7 @@
|
||||||
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.h"
|
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.h"
|
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.h"
|
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_updating_helper.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_helper.h"
|
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_helper.h"
|
||||||
#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
|
#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
|
||||||
#include "suggest/policyimpl/dictionary/utils/probability_utils.h"
|
#include "suggest/policyimpl/dictionary/utils/probability_utils.h"
|
||||||
|
@ -152,10 +153,8 @@ bool DynamicPatriciaTriePolicy::addUnigramWord(const int *const word, const int
|
||||||
}
|
}
|
||||||
DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer, &mNodeReader);
|
DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer, &mNodeReader);
|
||||||
readingHelper.initWithPtNodeArrayPos(getRootPosition());
|
readingHelper.initWithPtNodeArrayPos(getRootPosition());
|
||||||
DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer, &mNodeReader,
|
|
||||||
&mNodeWriter, &mBigramListPolicy, &mShortcutListPolicy, mHeaderPolicy.isDecayingDict());
|
|
||||||
bool addedNewUnigram = false;
|
bool addedNewUnigram = false;
|
||||||
if (writingHelper.addUnigramWord(&readingHelper, word, length, probability,
|
if (mUpdatingHelper.addUnigramWord(&readingHelper, word, length, probability,
|
||||||
&addedNewUnigram)) {
|
&addedNewUnigram)) {
|
||||||
if (addedNewUnigram) {
|
if (addedNewUnigram) {
|
||||||
mUnigramCount++;
|
mUnigramCount++;
|
||||||
|
@ -187,10 +186,8 @@ bool DynamicPatriciaTriePolicy::addBigramWords(const int *const word0, const int
|
||||||
if (word1Pos == NOT_A_DICT_POS) {
|
if (word1Pos == NOT_A_DICT_POS) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer, &mNodeReader,
|
|
||||||
&mNodeWriter, &mBigramListPolicy, &mShortcutListPolicy, mHeaderPolicy.isDecayingDict());
|
|
||||||
bool addedNewBigram = false;
|
bool addedNewBigram = false;
|
||||||
if (writingHelper.addBigramWords(word0Pos, word1Pos, probability, &addedNewBigram)) {
|
if (mUpdatingHelper.addBigramWords(word0Pos, word1Pos, probability, &addedNewBigram)) {
|
||||||
if (addedNewBigram) {
|
if (addedNewBigram) {
|
||||||
mBigramCount++;
|
mBigramCount++;
|
||||||
}
|
}
|
||||||
|
@ -221,9 +218,7 @@ bool DynamicPatriciaTriePolicy::removeBigramWords(const int *const word0, const
|
||||||
if (word1Pos == NOT_A_DICT_POS) {
|
if (word1Pos == NOT_A_DICT_POS) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer, &mNodeReader,
|
if (mUpdatingHelper.removeBigramWords(word0Pos, word1Pos)) {
|
||||||
&mNodeWriter, &mBigramListPolicy, &mShortcutListPolicy, mHeaderPolicy.isDecayingDict());
|
|
||||||
if (writingHelper.removeBigramWords(word0Pos, word1Pos)) {
|
|
||||||
mBigramCount--;
|
mBigramCount--;
|
||||||
return true;
|
return true;
|
||||||
} else {
|
} else {
|
||||||
|
|
|
@ -24,6 +24,7 @@
|
||||||
#include "suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h"
|
#include "suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.h"
|
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_writer.h"
|
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_writer.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_updating_helper.h"
|
||||||
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
|
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
|
||||||
#include "suggest/policyimpl/dictionary/utils/format_utils.h"
|
#include "suggest/policyimpl/dictionary/utils/format_utils.h"
|
||||||
#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
|
#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
|
||||||
|
@ -49,6 +50,8 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
||||||
mNodeReader(&mBufferWithExtendableBuffer, &mBigramListPolicy, &mShortcutListPolicy),
|
mNodeReader(&mBufferWithExtendableBuffer, &mBigramListPolicy, &mShortcutListPolicy),
|
||||||
mNodeWriter(&mBufferWithExtendableBuffer, &mNodeReader, &mBigramListPolicy,
|
mNodeWriter(&mBufferWithExtendableBuffer, &mNodeReader, &mBigramListPolicy,
|
||||||
&mShortcutListPolicy),
|
&mShortcutListPolicy),
|
||||||
|
mUpdatingHelper(&mBufferWithExtendableBuffer, &mNodeReader, &mNodeWriter,
|
||||||
|
mHeaderPolicy.isDecayingDict()),
|
||||||
mUnigramCount(mHeaderPolicy.getUnigramCount()),
|
mUnigramCount(mHeaderPolicy.getUnigramCount()),
|
||||||
mBigramCount(mHeaderPolicy.getBigramCount()), mNeedsToDecayForTesting(false) {}
|
mBigramCount(mHeaderPolicy.getBigramCount()), mNeedsToDecayForTesting(false) {}
|
||||||
|
|
||||||
|
@ -121,6 +124,7 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
||||||
DynamicBigramListPolicy mBigramListPolicy;
|
DynamicBigramListPolicy mBigramListPolicy;
|
||||||
DynamicPatriciaTrieNodeReader mNodeReader;
|
DynamicPatriciaTrieNodeReader mNodeReader;
|
||||||
DynamicPatriciaTrieNodeWriter mNodeWriter;
|
DynamicPatriciaTrieNodeWriter mNodeWriter;
|
||||||
|
DynamicPatriciaTrieUpdatingHelper mUpdatingHelper;
|
||||||
int mUnigramCount;
|
int mUnigramCount;
|
||||||
int mBigramCount;
|
int mBigramCount;
|
||||||
int mNeedsToDecayForTesting;
|
int mNeedsToDecayForTesting;
|
||||||
|
|
|
@ -0,0 +1,279 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2013, The Android Open Source Project
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_updating_helper.h"
|
||||||
|
|
||||||
|
#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_reader.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_utils.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
|
||||||
|
|
||||||
|
namespace latinime {
|
||||||
|
|
||||||
|
const int DynamicPatriciaTrieUpdatingHelper::CHILDREN_POSITION_FIELD_SIZE = 3;
|
||||||
|
|
||||||
|
bool DynamicPatriciaTrieUpdatingHelper::addUnigramWord(
|
||||||
|
DynamicPatriciaTrieReadingHelper *const readingHelper,
|
||||||
|
const int *const wordCodePoints, const int codePointCount, const int probability,
|
||||||
|
bool *const outAddedNewUnigram) {
|
||||||
|
int parentPos = NOT_A_DICT_POS;
|
||||||
|
while (!readingHelper->isEnd()) {
|
||||||
|
const PtNodeParams ptNodeParams(readingHelper->getPtNodeParams());
|
||||||
|
if (!ptNodeParams.isValid()) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
const int matchedCodePointCount = readingHelper->getPrevTotalCodePointCount();
|
||||||
|
if (!readingHelper->isMatchedCodePoint(ptNodeParams, 0 /* index */,
|
||||||
|
wordCodePoints[matchedCodePointCount])) {
|
||||||
|
// The first code point is different from target code point. Skip this node and read
|
||||||
|
// the next sibling node.
|
||||||
|
readingHelper->readNextSiblingNode(ptNodeParams);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// Check following merged node code points.
|
||||||
|
const int nodeCodePointCount = ptNodeParams.getCodePointCount();
|
||||||
|
for (int j = 1; j < nodeCodePointCount; ++j) {
|
||||||
|
const int nextIndex = matchedCodePointCount + j;
|
||||||
|
if (nextIndex >= codePointCount || !readingHelper->isMatchedCodePoint(ptNodeParams, j,
|
||||||
|
wordCodePoints[matchedCodePointCount + j])) {
|
||||||
|
*outAddedNewUnigram = true;
|
||||||
|
return reallocatePtNodeAndAddNewPtNodes(&ptNodeParams, j,
|
||||||
|
getUpdatedProbability(NOT_A_PROBABILITY /* originalProbability */,
|
||||||
|
probability),
|
||||||
|
wordCodePoints + matchedCodePointCount,
|
||||||
|
codePointCount - matchedCodePointCount);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// All characters are matched.
|
||||||
|
if (codePointCount == readingHelper->getTotalCodePointCount(ptNodeParams)) {
|
||||||
|
return setPtNodeProbability(&ptNodeParams, probability, outAddedNewUnigram);
|
||||||
|
}
|
||||||
|
if (!ptNodeParams.hasChildren()) {
|
||||||
|
*outAddedNewUnigram = true;
|
||||||
|
return createChildrenPtNodeArrayAndAChildPtNode(&ptNodeParams,
|
||||||
|
getUpdatedProbability(NOT_A_PROBABILITY /* originalProbability */, probability),
|
||||||
|
wordCodePoints + readingHelper->getTotalCodePointCount(ptNodeParams),
|
||||||
|
codePointCount - readingHelper->getTotalCodePointCount(ptNodeParams));
|
||||||
|
}
|
||||||
|
// Advance to the children nodes.
|
||||||
|
parentPos = ptNodeParams.getHeadPos();
|
||||||
|
readingHelper->readChildNode(ptNodeParams);
|
||||||
|
}
|
||||||
|
if (readingHelper->isError()) {
|
||||||
|
// The dictionary is invalid.
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
int pos = readingHelper->getPosOfLastForwardLinkField();
|
||||||
|
*outAddedNewUnigram = true;
|
||||||
|
return createAndInsertNodeIntoPtNodeArray(parentPos,
|
||||||
|
wordCodePoints + readingHelper->getPrevTotalCodePointCount(),
|
||||||
|
codePointCount - readingHelper->getPrevTotalCodePointCount(),
|
||||||
|
getUpdatedProbability(NOT_A_PROBABILITY /* originalProbability */, probability), &pos);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool DynamicPatriciaTrieUpdatingHelper::addBigramWords(const int word0Pos, const int word1Pos,
|
||||||
|
const int probability, bool *const outAddedNewBigram) {
|
||||||
|
const PtNodeParams sourcePtNodeParams(
|
||||||
|
mPtNodeReader->fetchNodeInfoInBufferFromPtNodePos(word0Pos));
|
||||||
|
const PtNodeParams targetPtNodeParams(
|
||||||
|
mPtNodeReader->fetchNodeInfoInBufferFromPtNodePos(word1Pos));
|
||||||
|
return mPtNodeWriter->addNewBigramEntry(&sourcePtNodeParams, &targetPtNodeParams, probability,
|
||||||
|
outAddedNewBigram);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Remove a bigram relation from word0Pos to word1Pos.
|
||||||
|
bool DynamicPatriciaTrieUpdatingHelper::removeBigramWords(const int word0Pos, const int word1Pos) {
|
||||||
|
const PtNodeParams sourcePtNodeParams(
|
||||||
|
mPtNodeReader->fetchNodeInfoInBufferFromPtNodePos(word0Pos));
|
||||||
|
const PtNodeParams targetPtNodeParams(
|
||||||
|
mPtNodeReader->fetchNodeInfoInBufferFromPtNodePos(word1Pos));
|
||||||
|
return mPtNodeWriter->removeBigramEntry(&sourcePtNodeParams, &targetPtNodeParams);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool DynamicPatriciaTrieUpdatingHelper::createAndInsertNodeIntoPtNodeArray(const int parentPos,
|
||||||
|
const int *const nodeCodePoints, const int nodeCodePointCount, const int probability,
|
||||||
|
int *const forwardLinkFieldPos) {
|
||||||
|
const int newPtNodeArrayPos = mBuffer->getTailPosition();
|
||||||
|
if (!DynamicPatriciaTrieWritingUtils::writeForwardLinkPositionAndAdvancePosition(mBuffer,
|
||||||
|
newPtNodeArrayPos, forwardLinkFieldPos)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return createNewPtNodeArrayWithAChildPtNode(parentPos, nodeCodePoints, nodeCodePointCount,
|
||||||
|
probability);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool DynamicPatriciaTrieUpdatingHelper::setPtNodeProbability(
|
||||||
|
const PtNodeParams *const originalPtNodeParams, const int probability,
|
||||||
|
bool *const outAddedNewUnigram) {
|
||||||
|
if (originalPtNodeParams->isTerminal()) {
|
||||||
|
// Overwrites the probability.
|
||||||
|
*outAddedNewUnigram = false;
|
||||||
|
const int probabilityToWrite = getUpdatedProbability(
|
||||||
|
originalPtNodeParams->getProbability(), probability);
|
||||||
|
return mPtNodeWriter->updatePtNodeProbability(originalPtNodeParams, probabilityToWrite);
|
||||||
|
} else {
|
||||||
|
// Make the node terminal and write the probability.
|
||||||
|
*outAddedNewUnigram = true;
|
||||||
|
const int movedPos = mBuffer->getTailPosition();
|
||||||
|
int writingPos = movedPos;
|
||||||
|
const PtNodeParams ptNodeParamsToWrite(getUpdatedPtNodeParams(originalPtNodeParams,
|
||||||
|
originalPtNodeParams->getParentPos(), originalPtNodeParams->getCodePointCount(),
|
||||||
|
originalPtNodeParams->getCodePoints(),
|
||||||
|
getUpdatedProbability(NOT_A_PROBABILITY /* originalProbability */, probability)));
|
||||||
|
if (!mPtNodeWriter->writePtNodeAndAdvancePosition(&ptNodeParamsToWrite, &writingPos)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (!mPtNodeWriter->markPtNodeAsMoved(originalPtNodeParams, movedPos, movedPos)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool DynamicPatriciaTrieUpdatingHelper::createChildrenPtNodeArrayAndAChildPtNode(
|
||||||
|
const PtNodeParams *const parentPtNodeParams, const int probability,
|
||||||
|
const int *const codePoints, const int codePointCount) {
|
||||||
|
const int newPtNodeArrayPos = mBuffer->getTailPosition();
|
||||||
|
if (!mPtNodeWriter->updateChildrenPosition(parentPtNodeParams, newPtNodeArrayPos)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return createNewPtNodeArrayWithAChildPtNode(parentPtNodeParams->getHeadPos(), codePoints,
|
||||||
|
codePointCount, probability);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool DynamicPatriciaTrieUpdatingHelper::createNewPtNodeArrayWithAChildPtNode(
|
||||||
|
const int parentPtNodePos, const int *const nodeCodePoints, const int nodeCodePointCount,
|
||||||
|
const int probability) {
|
||||||
|
int writingPos = mBuffer->getTailPosition();
|
||||||
|
if (!DynamicPatriciaTrieWritingUtils::writePtNodeArraySizeAndAdvancePosition(mBuffer,
|
||||||
|
1 /* arraySize */, &writingPos)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode(
|
||||||
|
parentPtNodePos, nodeCodePointCount, nodeCodePoints, probability));
|
||||||
|
if (!mPtNodeWriter->writePtNodeAndAdvancePosition(&ptNodeParamsToWrite, &writingPos)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (!DynamicPatriciaTrieWritingUtils::writeForwardLinkPositionAndAdvancePosition(mBuffer,
|
||||||
|
NOT_A_DICT_POS /* forwardLinkPos */, &writingPos)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Returns whether the dictionary updating was succeeded or not.
|
||||||
|
bool DynamicPatriciaTrieUpdatingHelper::reallocatePtNodeAndAddNewPtNodes(
|
||||||
|
const PtNodeParams *const reallocatingPtNodeParams, const int overlappingCodePointCount,
|
||||||
|
const int probabilityOfNewPtNode, const int *const newNodeCodePoints,
|
||||||
|
const int newNodeCodePointCount) {
|
||||||
|
// When addsExtraChild is true, split the reallocating PtNode and add new child.
|
||||||
|
// Reallocating PtNode: abcde, newNode: abcxy.
|
||||||
|
// abc (1st, not terminal) __ de (2nd)
|
||||||
|
// \_ xy (extra child, terminal)
|
||||||
|
// Otherwise, this method makes 1st part terminal and write probabilityOfNewPtNode.
|
||||||
|
// Reallocating PtNode: abcde, newNode: abc.
|
||||||
|
// abc (1st, terminal) __ de (2nd)
|
||||||
|
const bool addsExtraChild = newNodeCodePointCount > overlappingCodePointCount;
|
||||||
|
const int firstPartOfReallocatedPtNodePos = mBuffer->getTailPosition();
|
||||||
|
int writingPos = firstPartOfReallocatedPtNodePos;
|
||||||
|
// Write the 1st part of the reallocating node. The children position will be updated later
|
||||||
|
// with actual children position.
|
||||||
|
const int newProbability = addsExtraChild ? NOT_A_PROBABILITY : probabilityOfNewPtNode;
|
||||||
|
const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode(
|
||||||
|
reallocatingPtNodeParams->getParentPos(), overlappingCodePointCount,
|
||||||
|
reallocatingPtNodeParams->getCodePoints(), newProbability));
|
||||||
|
if (!mPtNodeWriter->writePtNodeAndAdvancePosition(&ptNodeParamsToWrite, &writingPos)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
const int actualChildrenPos = writingPos;
|
||||||
|
// Create new children PtNode array.
|
||||||
|
const size_t newPtNodeCount = addsExtraChild ? 2 : 1;
|
||||||
|
if (!DynamicPatriciaTrieWritingUtils::writePtNodeArraySizeAndAdvancePosition(mBuffer,
|
||||||
|
newPtNodeCount, &writingPos)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
// Write the 2nd part of the reallocating node.
|
||||||
|
const int secondPartOfReallocatedPtNodePos = writingPos;
|
||||||
|
const PtNodeParams childPartPtNodeParams(getUpdatedPtNodeParams(reallocatingPtNodeParams,
|
||||||
|
firstPartOfReallocatedPtNodePos,
|
||||||
|
reallocatingPtNodeParams->getCodePointCount() - overlappingCodePointCount,
|
||||||
|
reallocatingPtNodeParams->getCodePoints() + overlappingCodePointCount,
|
||||||
|
reallocatingPtNodeParams->getProbability()));
|
||||||
|
if (!mPtNodeWriter->writePtNodeAndAdvancePosition(&childPartPtNodeParams, &writingPos)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (addsExtraChild) {
|
||||||
|
const PtNodeParams extraChildPtNodeParams(getPtNodeParamsForNewPtNode(
|
||||||
|
firstPartOfReallocatedPtNodePos, newNodeCodePointCount - overlappingCodePointCount,
|
||||||
|
newNodeCodePoints + overlappingCodePointCount, probabilityOfNewPtNode));
|
||||||
|
if (!mPtNodeWriter->writePtNodeAndAdvancePosition(&extraChildPtNodeParams, &writingPos)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!DynamicPatriciaTrieWritingUtils::writeForwardLinkPositionAndAdvancePosition(mBuffer,
|
||||||
|
NOT_A_DICT_POS /* forwardLinkPos */, &writingPos)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
// Update original reallocating PtNode as moved.
|
||||||
|
if (!mPtNodeWriter->markPtNodeAsMoved(reallocatingPtNodeParams, firstPartOfReallocatedPtNodePos,
|
||||||
|
secondPartOfReallocatedPtNodePos)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
// Load node info. Information of the 1st part will be fetched.
|
||||||
|
const PtNodeParams ptNodeParams(
|
||||||
|
mPtNodeReader->fetchNodeInfoInBufferFromPtNodePos(firstPartOfReallocatedPtNodePos));
|
||||||
|
// Update children position.
|
||||||
|
return mPtNodeWriter->updateChildrenPosition(&ptNodeParams, actualChildrenPos);
|
||||||
|
}
|
||||||
|
|
||||||
|
int DynamicPatriciaTrieUpdatingHelper::getUpdatedProbability(const int originalProbability,
|
||||||
|
const int newProbability) const {
|
||||||
|
if (mNeedsToDecay) {
|
||||||
|
return ForgettingCurveUtils::getUpdatedEncodedProbability(originalProbability,
|
||||||
|
newProbability);
|
||||||
|
} else {
|
||||||
|
return newProbability;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const PtNodeParams DynamicPatriciaTrieUpdatingHelper::getUpdatedPtNodeParams(
|
||||||
|
const PtNodeParams *const originalPtNodeParams, const int parentPos,
|
||||||
|
const int codePointCount, const int *const codePoints, const int probability) const {
|
||||||
|
const PatriciaTrieReadingUtils::NodeFlags flags = PatriciaTrieReadingUtils::createAndGetFlags(
|
||||||
|
originalPtNodeParams->isBlacklisted(), originalPtNodeParams->isNotAWord(),
|
||||||
|
probability != NOT_A_PROBABILITY /* isTerminal */,
|
||||||
|
originalPtNodeParams->getShortcutPos() != NOT_A_DICT_POS /* hasShortcutTargets */,
|
||||||
|
originalPtNodeParams->getBigramsPos() != NOT_A_DICT_POS /* hasBigrams */,
|
||||||
|
codePointCount > 1 /* hasMultipleChars */, CHILDREN_POSITION_FIELD_SIZE);
|
||||||
|
return PtNodeParams(originalPtNodeParams, flags, parentPos, codePointCount, codePoints,
|
||||||
|
probability);
|
||||||
|
}
|
||||||
|
|
||||||
|
const PtNodeParams DynamicPatriciaTrieUpdatingHelper::getPtNodeParamsForNewPtNode(
|
||||||
|
const int parentPos, const int codePointCount, const int *const codePoints,
|
||||||
|
const int probability) const {
|
||||||
|
const PatriciaTrieReadingUtils::NodeFlags flags = PatriciaTrieReadingUtils::createAndGetFlags(
|
||||||
|
false /* isBlacklisted */, false /* isNotAWord */,
|
||||||
|
probability != NOT_A_PROBABILITY /* isTerminal */,
|
||||||
|
false /* hasShortcutTargets */, false /* hasBigrams */,
|
||||||
|
codePointCount > 1 /* hasMultipleChars */, CHILDREN_POSITION_FIELD_SIZE);
|
||||||
|
return PtNodeParams(flags, parentPos, codePointCount, codePoints, probability);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace latinime
|
|
@ -0,0 +1,93 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2013, The Android Open Source Project
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef LATINIME_DYNAMIC_PATRICIA_TRIE_UPDATING_HELPER_H
|
||||||
|
#define LATINIME_DYNAMIC_PATRICIA_TRIE_UPDATING_HELPER_H
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
#include "defines.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h"
|
||||||
|
#include "utils/hash_map_compat.h"
|
||||||
|
|
||||||
|
namespace latinime {
|
||||||
|
|
||||||
|
class BufferWithExtendableBuffer;
|
||||||
|
class DynamicPatriciaTrieReadingHelper;
|
||||||
|
class PtNodeReader;
|
||||||
|
class PtNodeWriter;
|
||||||
|
|
||||||
|
// TODO: Move to pt_common.
|
||||||
|
class DynamicPatriciaTrieUpdatingHelper {
|
||||||
|
public:
|
||||||
|
DynamicPatriciaTrieUpdatingHelper(BufferWithExtendableBuffer *const buffer,
|
||||||
|
const PtNodeReader *const ptNodeReader, PtNodeWriter *const ptNodeWriter,
|
||||||
|
const bool needsToDecay)
|
||||||
|
: mBuffer(buffer), mPtNodeReader(ptNodeReader), mPtNodeWriter(ptNodeWriter),
|
||||||
|
mNeedsToDecay(needsToDecay) {}
|
||||||
|
|
||||||
|
~DynamicPatriciaTrieUpdatingHelper() {}
|
||||||
|
|
||||||
|
// Add a word to the dictionary. If the word already exists, update the probability.
|
||||||
|
bool addUnigramWord(DynamicPatriciaTrieReadingHelper *const readingHelper,
|
||||||
|
const int *const wordCodePoints, const int codePointCount, const int probability,
|
||||||
|
bool *const outAddedNewUnigram);
|
||||||
|
|
||||||
|
// Add a bigram relation from word0Pos to word1Pos.
|
||||||
|
bool addBigramWords(const int word0Pos, const int word1Pos, const int probability,
|
||||||
|
bool *const outAddedNewBigram);
|
||||||
|
|
||||||
|
// Remove a bigram relation from word0Pos to word1Pos.
|
||||||
|
bool removeBigramWords(const int word0Pos, const int word1Pos);
|
||||||
|
|
||||||
|
private:
|
||||||
|
DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTrieUpdatingHelper);
|
||||||
|
|
||||||
|
static const int CHILDREN_POSITION_FIELD_SIZE;
|
||||||
|
|
||||||
|
BufferWithExtendableBuffer *const mBuffer;
|
||||||
|
const PtNodeReader *const mPtNodeReader;
|
||||||
|
PtNodeWriter *const mPtNodeWriter;
|
||||||
|
const bool mNeedsToDecay;
|
||||||
|
|
||||||
|
bool createAndInsertNodeIntoPtNodeArray(const int parentPos, const int *const nodeCodePoints,
|
||||||
|
const int nodeCodePointCount, const int probability, int *const forwardLinkFieldPos);
|
||||||
|
|
||||||
|
bool setPtNodeProbability(const PtNodeParams *const originalPtNodeParams, const int probability,
|
||||||
|
bool *const outAddedNewUnigram);
|
||||||
|
|
||||||
|
bool createChildrenPtNodeArrayAndAChildPtNode(const PtNodeParams *const parentPtNodeParams,
|
||||||
|
const int probability, const int *const codePoints, const int codePointCount);
|
||||||
|
|
||||||
|
bool createNewPtNodeArrayWithAChildPtNode(const int parentPos, const int *const nodeCodePoints,
|
||||||
|
const int nodeCodePointCount, const int probability);
|
||||||
|
|
||||||
|
bool reallocatePtNodeAndAddNewPtNodes(
|
||||||
|
const PtNodeParams *const reallocatingPtNodeParams, const int overlappingCodePointCount,
|
||||||
|
const int probabilityOfNewPtNode, const int *const newNodeCodePoints,
|
||||||
|
const int newNodeCodePointCount);
|
||||||
|
|
||||||
|
int getUpdatedProbability(const int originalProbability, const int newProbability) const;
|
||||||
|
|
||||||
|
const PtNodeParams getUpdatedPtNodeParams(const PtNodeParams *const originalPtNodeParams,
|
||||||
|
const int parentPos, const int codePointCount, const int *const codePoints,
|
||||||
|
const int probability) const;
|
||||||
|
|
||||||
|
const PtNodeParams getPtNodeParamsForNewPtNode(const int parentPos, const int codePointCount,
|
||||||
|
const int *const codePoints, const int probability) const;
|
||||||
|
};
|
||||||
|
} // namespace latinime
|
||||||
|
#endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_UPDATING_HELPER_H */
|
|
@ -34,88 +34,9 @@
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
|
||||||
const int DynamicPatriciaTrieWritingHelper::CHILDREN_POSITION_FIELD_SIZE = 3;
|
|
||||||
// TODO: Make MAX_DICTIONARY_SIZE 8MB.
|
// TODO: Make MAX_DICTIONARY_SIZE 8MB.
|
||||||
const size_t DynamicPatriciaTrieWritingHelper::MAX_DICTIONARY_SIZE = 2 * 1024 * 1024;
|
const size_t DynamicPatriciaTrieWritingHelper::MAX_DICTIONARY_SIZE = 2 * 1024 * 1024;
|
||||||
|
|
||||||
bool DynamicPatriciaTrieWritingHelper::addUnigramWord(
|
|
||||||
DynamicPatriciaTrieReadingHelper *const readingHelper,
|
|
||||||
const int *const wordCodePoints, const int codePointCount, const int probability,
|
|
||||||
bool *const outAddedNewUnigram) {
|
|
||||||
int parentPos = NOT_A_DICT_POS;
|
|
||||||
while (!readingHelper->isEnd()) {
|
|
||||||
const PtNodeParams ptNodeParams(readingHelper->getPtNodeParams());
|
|
||||||
if (!ptNodeParams.isValid()) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
const int matchedCodePointCount = readingHelper->getPrevTotalCodePointCount();
|
|
||||||
if (!readingHelper->isMatchedCodePoint(ptNodeParams, 0 /* index */,
|
|
||||||
wordCodePoints[matchedCodePointCount])) {
|
|
||||||
// The first code point is different from target code point. Skip this node and read
|
|
||||||
// the next sibling node.
|
|
||||||
readingHelper->readNextSiblingNode(ptNodeParams);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
// Check following merged node code points.
|
|
||||||
const int nodeCodePointCount = ptNodeParams.getCodePointCount();
|
|
||||||
for (int j = 1; j < nodeCodePointCount; ++j) {
|
|
||||||
const int nextIndex = matchedCodePointCount + j;
|
|
||||||
if (nextIndex >= codePointCount || !readingHelper->isMatchedCodePoint(ptNodeParams, j,
|
|
||||||
wordCodePoints[matchedCodePointCount + j])) {
|
|
||||||
*outAddedNewUnigram = true;
|
|
||||||
return reallocatePtNodeAndAddNewPtNodes(&ptNodeParams, j,
|
|
||||||
getUpdatedProbability(NOT_A_PROBABILITY /* originalProbability */,
|
|
||||||
probability),
|
|
||||||
wordCodePoints + matchedCodePointCount,
|
|
||||||
codePointCount - matchedCodePointCount);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// All characters are matched.
|
|
||||||
if (codePointCount == readingHelper->getTotalCodePointCount(ptNodeParams)) {
|
|
||||||
return setPtNodeProbability(&ptNodeParams, probability, outAddedNewUnigram);
|
|
||||||
}
|
|
||||||
if (!ptNodeParams.hasChildren()) {
|
|
||||||
*outAddedNewUnigram = true;
|
|
||||||
return createChildrenPtNodeArrayAndAChildPtNode(&ptNodeParams,
|
|
||||||
getUpdatedProbability(NOT_A_PROBABILITY /* originalProbability */, probability),
|
|
||||||
wordCodePoints + readingHelper->getTotalCodePointCount(ptNodeParams),
|
|
||||||
codePointCount - readingHelper->getTotalCodePointCount(ptNodeParams));
|
|
||||||
}
|
|
||||||
// Advance to the children nodes.
|
|
||||||
parentPos = ptNodeParams.getHeadPos();
|
|
||||||
readingHelper->readChildNode(ptNodeParams);
|
|
||||||
}
|
|
||||||
if (readingHelper->isError()) {
|
|
||||||
// The dictionary is invalid.
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
int pos = readingHelper->getPosOfLastForwardLinkField();
|
|
||||||
*outAddedNewUnigram = true;
|
|
||||||
return createAndInsertNodeIntoPtNodeArray(parentPos,
|
|
||||||
wordCodePoints + readingHelper->getPrevTotalCodePointCount(),
|
|
||||||
codePointCount - readingHelper->getPrevTotalCodePointCount(),
|
|
||||||
getUpdatedProbability(NOT_A_PROBABILITY /* originalProbability */, probability), &pos);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool DynamicPatriciaTrieWritingHelper::addBigramWords(const int word0Pos, const int word1Pos,
|
|
||||||
const int probability, bool *const outAddedNewBigram) {
|
|
||||||
const PtNodeParams sourcePtNodeParams(
|
|
||||||
mPtNodeReader->fetchNodeInfoInBufferFromPtNodePos(word0Pos));
|
|
||||||
const PtNodeParams targetPtNodeParams(
|
|
||||||
mPtNodeReader->fetchNodeInfoInBufferFromPtNodePos(word1Pos));
|
|
||||||
return mPtNodeWriter->addNewBigramEntry(&sourcePtNodeParams, &targetPtNodeParams, probability,
|
|
||||||
outAddedNewBigram);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Remove a bigram relation from word0Pos to word1Pos.
|
|
||||||
bool DynamicPatriciaTrieWritingHelper::removeBigramWords(const int word0Pos, const int word1Pos) {
|
|
||||||
const PtNodeParams ptNodeParams(mPtNodeReader->fetchNodeInfoInBufferFromPtNodePos(word0Pos));
|
|
||||||
if (ptNodeParams.getBigramsPos() == NOT_A_DICT_POS) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
return mBigramPolicy->removeBigram(ptNodeParams.getBigramsPos(), word1Pos);
|
|
||||||
}
|
|
||||||
|
|
||||||
void DynamicPatriciaTrieWritingHelper::writeToDictFile(const char *const fileName,
|
void DynamicPatriciaTrieWritingHelper::writeToDictFile(const char *const fileName,
|
||||||
const HeaderPolicy *const headerPolicy, const int unigramCount, const int bigramCount) {
|
const HeaderPolicy *const headerPolicy, const int unigramCount, const int bigramCount) {
|
||||||
BufferWithExtendableBuffer headerBuffer(
|
BufferWithExtendableBuffer headerBuffer(
|
||||||
|
@ -149,142 +70,6 @@ void DynamicPatriciaTrieWritingHelper::writeToDictFileWithGC(const int rootPtNod
|
||||||
DictFileWritingUtils::flushAllHeaderAndBodyToFile(fileName, &headerBuffer, &newDictBuffer);
|
DictFileWritingUtils::flushAllHeaderAndBodyToFile(fileName, &headerBuffer, &newDictBuffer);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool DynamicPatriciaTrieWritingHelper::createAndInsertNodeIntoPtNodeArray(const int parentPos,
|
|
||||||
const int *const nodeCodePoints, const int nodeCodePointCount, const int probability,
|
|
||||||
int *const forwardLinkFieldPos) {
|
|
||||||
const int newPtNodeArrayPos = mBuffer->getTailPosition();
|
|
||||||
if (!DynamicPatriciaTrieWritingUtils::writeForwardLinkPositionAndAdvancePosition(mBuffer,
|
|
||||||
newPtNodeArrayPos, forwardLinkFieldPos)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
return createNewPtNodeArrayWithAChildPtNode(parentPos, nodeCodePoints, nodeCodePointCount,
|
|
||||||
probability);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool DynamicPatriciaTrieWritingHelper::setPtNodeProbability(
|
|
||||||
const PtNodeParams *const originalPtNodeParams, const int probability,
|
|
||||||
bool *const outAddedNewUnigram) {
|
|
||||||
if (originalPtNodeParams->isTerminal()) {
|
|
||||||
// Overwrites the probability.
|
|
||||||
*outAddedNewUnigram = false;
|
|
||||||
const int probabilityToWrite = getUpdatedProbability(
|
|
||||||
originalPtNodeParams->getProbability(), probability);
|
|
||||||
return mPtNodeWriter->updatePtNodeProbability(originalPtNodeParams, probabilityToWrite);
|
|
||||||
} else {
|
|
||||||
// Make the node terminal and write the probability.
|
|
||||||
*outAddedNewUnigram = true;
|
|
||||||
const int movedPos = mBuffer->getTailPosition();
|
|
||||||
int writingPos = movedPos;
|
|
||||||
const PtNodeParams ptNodeParamsToWrite(getUpdatedPtNodeParams(originalPtNodeParams,
|
|
||||||
originalPtNodeParams->getParentPos(), originalPtNodeParams->getCodePointCount(),
|
|
||||||
originalPtNodeParams->getCodePoints(),
|
|
||||||
getUpdatedProbability(NOT_A_PROBABILITY /* originalProbability */, probability)));
|
|
||||||
if (!mPtNodeWriter->writePtNodeAndAdvancePosition(&ptNodeParamsToWrite, &writingPos)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
if (!mPtNodeWriter->markPtNodeAsMoved(originalPtNodeParams, movedPos, movedPos)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool DynamicPatriciaTrieWritingHelper::createChildrenPtNodeArrayAndAChildPtNode(
|
|
||||||
const PtNodeParams *const parentPtNodeParams, const int probability,
|
|
||||||
const int *const codePoints, const int codePointCount) {
|
|
||||||
const int newPtNodeArrayPos = mBuffer->getTailPosition();
|
|
||||||
if (!mPtNodeWriter->updateChildrenPosition(parentPtNodeParams, newPtNodeArrayPos)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
return createNewPtNodeArrayWithAChildPtNode(parentPtNodeParams->getHeadPos(), codePoints,
|
|
||||||
codePointCount, probability);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool DynamicPatriciaTrieWritingHelper::createNewPtNodeArrayWithAChildPtNode(
|
|
||||||
const int parentPtNodePos, const int *const nodeCodePoints, const int nodeCodePointCount,
|
|
||||||
const int probability) {
|
|
||||||
int writingPos = mBuffer->getTailPosition();
|
|
||||||
if (!DynamicPatriciaTrieWritingUtils::writePtNodeArraySizeAndAdvancePosition(mBuffer,
|
|
||||||
1 /* arraySize */, &writingPos)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode(
|
|
||||||
parentPtNodePos, nodeCodePointCount, nodeCodePoints, probability));
|
|
||||||
if (!mPtNodeWriter->writePtNodeAndAdvancePosition(&ptNodeParamsToWrite, &writingPos)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
if (!DynamicPatriciaTrieWritingUtils::writeForwardLinkPositionAndAdvancePosition(mBuffer,
|
|
||||||
NOT_A_DICT_POS /* forwardLinkPos */, &writingPos)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Returns whether the dictionary updating was succeeded or not.
|
|
||||||
bool DynamicPatriciaTrieWritingHelper::reallocatePtNodeAndAddNewPtNodes(
|
|
||||||
const PtNodeParams *const reallocatingPtNodeParams, const int overlappingCodePointCount,
|
|
||||||
const int probabilityOfNewPtNode, const int *const newNodeCodePoints,
|
|
||||||
const int newNodeCodePointCount) {
|
|
||||||
// When addsExtraChild is true, split the reallocating PtNode and add new child.
|
|
||||||
// Reallocating PtNode: abcde, newNode: abcxy.
|
|
||||||
// abc (1st, not terminal) __ de (2nd)
|
|
||||||
// \_ xy (extra child, terminal)
|
|
||||||
// Otherwise, this method makes 1st part terminal and write probabilityOfNewPtNode.
|
|
||||||
// Reallocating PtNode: abcde, newNode: abc.
|
|
||||||
// abc (1st, terminal) __ de (2nd)
|
|
||||||
const bool addsExtraChild = newNodeCodePointCount > overlappingCodePointCount;
|
|
||||||
const int firstPartOfReallocatedPtNodePos = mBuffer->getTailPosition();
|
|
||||||
int writingPos = firstPartOfReallocatedPtNodePos;
|
|
||||||
// Write the 1st part of the reallocating node. The children position will be updated later
|
|
||||||
// with actual children position.
|
|
||||||
const int newProbability = addsExtraChild ? NOT_A_PROBABILITY : probabilityOfNewPtNode;
|
|
||||||
const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode(
|
|
||||||
reallocatingPtNodeParams->getParentPos(), overlappingCodePointCount,
|
|
||||||
reallocatingPtNodeParams->getCodePoints(), newProbability));
|
|
||||||
if (!mPtNodeWriter->writePtNodeAndAdvancePosition(&ptNodeParamsToWrite, &writingPos)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
const int actualChildrenPos = writingPos;
|
|
||||||
// Create new children PtNode array.
|
|
||||||
const size_t newPtNodeCount = addsExtraChild ? 2 : 1;
|
|
||||||
if (!DynamicPatriciaTrieWritingUtils::writePtNodeArraySizeAndAdvancePosition(mBuffer,
|
|
||||||
newPtNodeCount, &writingPos)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
// Write the 2nd part of the reallocating node.
|
|
||||||
const int secondPartOfReallocatedPtNodePos = writingPos;
|
|
||||||
const PtNodeParams childPartPtNodeParams(getUpdatedPtNodeParams(reallocatingPtNodeParams,
|
|
||||||
firstPartOfReallocatedPtNodePos,
|
|
||||||
reallocatingPtNodeParams->getCodePointCount() - overlappingCodePointCount,
|
|
||||||
reallocatingPtNodeParams->getCodePoints() + overlappingCodePointCount,
|
|
||||||
reallocatingPtNodeParams->getProbability()));
|
|
||||||
if (!mPtNodeWriter->writePtNodeAndAdvancePosition(&childPartPtNodeParams, &writingPos)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
if (addsExtraChild) {
|
|
||||||
const PtNodeParams extraChildPtNodeParams(getPtNodeParamsForNewPtNode(
|
|
||||||
firstPartOfReallocatedPtNodePos, newNodeCodePointCount - overlappingCodePointCount,
|
|
||||||
newNodeCodePoints + overlappingCodePointCount, probabilityOfNewPtNode));
|
|
||||||
if (!mPtNodeWriter->writePtNodeAndAdvancePosition(&extraChildPtNodeParams, &writingPos)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (!DynamicPatriciaTrieWritingUtils::writeForwardLinkPositionAndAdvancePosition(mBuffer,
|
|
||||||
NOT_A_DICT_POS /* forwardLinkPos */, &writingPos)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
// Update original reallocating PtNode as moved.
|
|
||||||
if (!mPtNodeWriter->markPtNodeAsMoved(reallocatingPtNodeParams, firstPartOfReallocatedPtNodePos,
|
|
||||||
secondPartOfReallocatedPtNodePos)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
// Load node info. Information of the 1st part will be fetched.
|
|
||||||
const PtNodeParams ptNodeParams(
|
|
||||||
mPtNodeReader->fetchNodeInfoInBufferFromPtNodePos(firstPartOfReallocatedPtNodePos));
|
|
||||||
// Update children position.
|
|
||||||
return mPtNodeWriter->updateChildrenPosition(&ptNodeParams, actualChildrenPos);
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO: Make this method version independent.
|
// TODO: Make this method version independent.
|
||||||
bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
|
bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
|
||||||
const HeaderPolicy *const headerPolicy, BufferWithExtendableBuffer *const bufferToWrite,
|
const HeaderPolicy *const headerPolicy, BufferWithExtendableBuffer *const bufferToWrite,
|
||||||
|
@ -351,38 +136,4 @@ bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
int DynamicPatriciaTrieWritingHelper::getUpdatedProbability(const int originalProbability,
|
|
||||||
const int newProbability) const {
|
|
||||||
if (mNeedsToDecay) {
|
|
||||||
return ForgettingCurveUtils::getUpdatedEncodedProbability(originalProbability,
|
|
||||||
newProbability);
|
|
||||||
} else {
|
|
||||||
return newProbability;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
const PtNodeParams DynamicPatriciaTrieWritingHelper::getUpdatedPtNodeParams(
|
|
||||||
const PtNodeParams *const originalPtNodeParams, const int parentPos,
|
|
||||||
const int codePointCount, const int *const codePoints, const int probability) const {
|
|
||||||
const PatriciaTrieReadingUtils::NodeFlags flags = PatriciaTrieReadingUtils::createAndGetFlags(
|
|
||||||
originalPtNodeParams->isBlacklisted(), originalPtNodeParams->isNotAWord(),
|
|
||||||
probability != NOT_A_PROBABILITY /* isTerminal */,
|
|
||||||
originalPtNodeParams->getShortcutPos() != NOT_A_DICT_POS /* hasShortcutTargets */,
|
|
||||||
originalPtNodeParams->getBigramsPos() != NOT_A_DICT_POS /* hasBigrams */,
|
|
||||||
codePointCount > 1 /* hasMultipleChars */, CHILDREN_POSITION_FIELD_SIZE);
|
|
||||||
return PtNodeParams(originalPtNodeParams, flags, parentPos, codePointCount, codePoints,
|
|
||||||
probability);
|
|
||||||
}
|
|
||||||
|
|
||||||
const PtNodeParams DynamicPatriciaTrieWritingHelper::getPtNodeParamsForNewPtNode(
|
|
||||||
const int parentPos, const int codePointCount, const int *const codePoints,
|
|
||||||
const int probability) const {
|
|
||||||
const PatriciaTrieReadingUtils::NodeFlags flags = PatriciaTrieReadingUtils::createAndGetFlags(
|
|
||||||
false /* isBlacklisted */, false /* isNotAWord */,
|
|
||||||
probability != NOT_A_PROBABILITY /* isTerminal */,
|
|
||||||
false /* hasShortcutTargets */, false /* hasBigrams */,
|
|
||||||
codePointCount > 1 /* hasMultipleChars */, CHILDREN_POSITION_FIELD_SIZE);
|
|
||||||
return PtNodeParams(flags, parentPos, codePointCount, codePoints, probability);
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
|
|
|
@ -20,7 +20,6 @@
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
|
|
||||||
#include "defines.h"
|
#include "defines.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h"
|
|
||||||
#include "utils/hash_map_compat.h"
|
#include "utils/hash_map_compat.h"
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
@ -62,18 +61,6 @@ class DynamicPatriciaTrieWritingHelper {
|
||||||
|
|
||||||
~DynamicPatriciaTrieWritingHelper() {}
|
~DynamicPatriciaTrieWritingHelper() {}
|
||||||
|
|
||||||
// Add a word to the dictionary. If the word already exists, update the probability.
|
|
||||||
bool addUnigramWord(DynamicPatriciaTrieReadingHelper *const readingHelper,
|
|
||||||
const int *const wordCodePoints, const int codePointCount, const int probability,
|
|
||||||
bool *const outAddedNewUnigram);
|
|
||||||
|
|
||||||
// Add a bigram relation from word0Pos to word1Pos.
|
|
||||||
bool addBigramWords(const int word0Pos, const int word1Pos, const int probability,
|
|
||||||
bool *const outAddedNewBigram);
|
|
||||||
|
|
||||||
// Remove a bigram relation from word0Pos to word1Pos.
|
|
||||||
bool removeBigramWords(const int word0Pos, const int word1Pos);
|
|
||||||
|
|
||||||
void writeToDictFile(const char *const fileName, const HeaderPolicy *const headerPolicy,
|
void writeToDictFile(const char *const fileName, const HeaderPolicy *const headerPolicy,
|
||||||
const int unigramCount, const int bigramCount);
|
const int unigramCount, const int bigramCount);
|
||||||
|
|
||||||
|
@ -83,8 +70,6 @@ class DynamicPatriciaTrieWritingHelper {
|
||||||
private:
|
private:
|
||||||
DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTrieWritingHelper);
|
DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTrieWritingHelper);
|
||||||
|
|
||||||
static const int CHILDREN_POSITION_FIELD_SIZE;
|
|
||||||
|
|
||||||
BufferWithExtendableBuffer *const mBuffer;
|
BufferWithExtendableBuffer *const mBuffer;
|
||||||
const PtNodeReader *const mPtNodeReader;
|
const PtNodeReader *const mPtNodeReader;
|
||||||
PtNodeWriter *const mPtNodeWriter;
|
PtNodeWriter *const mPtNodeWriter;
|
||||||
|
@ -92,35 +77,9 @@ class DynamicPatriciaTrieWritingHelper {
|
||||||
DynamicShortcutListPolicy *const mShortcutPolicy;
|
DynamicShortcutListPolicy *const mShortcutPolicy;
|
||||||
const bool mNeedsToDecay;
|
const bool mNeedsToDecay;
|
||||||
|
|
||||||
bool createAndInsertNodeIntoPtNodeArray(const int parentPos, const int *const nodeCodePoints,
|
|
||||||
const int nodeCodePointCount, const int probability, int *const forwardLinkFieldPos);
|
|
||||||
|
|
||||||
bool setPtNodeProbability(const PtNodeParams *const originalPtNodeParams, const int probability,
|
|
||||||
bool *const outAddedNewUnigram);
|
|
||||||
|
|
||||||
bool createChildrenPtNodeArrayAndAChildPtNode(const PtNodeParams *const parentPtNodeParams,
|
|
||||||
const int probability, const int *const codePoints, const int codePointCount);
|
|
||||||
|
|
||||||
bool createNewPtNodeArrayWithAChildPtNode(const int parentPos, const int *const nodeCodePoints,
|
|
||||||
const int nodeCodePointCount, const int probability);
|
|
||||||
|
|
||||||
bool reallocatePtNodeAndAddNewPtNodes(
|
|
||||||
const PtNodeParams *const reallocatingPtNodeParams, const int overlappingCodePointCount,
|
|
||||||
const int probabilityOfNewPtNode, const int *const newNodeCodePoints,
|
|
||||||
const int newNodeCodePointCount);
|
|
||||||
|
|
||||||
bool runGC(const int rootPtNodeArrayPos, const HeaderPolicy *const headerPolicy,
|
bool runGC(const int rootPtNodeArrayPos, const HeaderPolicy *const headerPolicy,
|
||||||
BufferWithExtendableBuffer *const bufferToWrite, int *const outUnigramCount,
|
BufferWithExtendableBuffer *const bufferToWrite, int *const outUnigramCount,
|
||||||
int *const outBigramCount);
|
int *const outBigramCount);
|
||||||
|
|
||||||
int getUpdatedProbability(const int originalProbability, const int newProbability) const;
|
|
||||||
|
|
||||||
const PtNodeParams getUpdatedPtNodeParams(const PtNodeParams *const originalPtNodeParams,
|
|
||||||
const int parentPos, const int codePointCount, const int *const codePoints,
|
|
||||||
const int probability) const;
|
|
||||||
|
|
||||||
const PtNodeParams getPtNodeParamsForNewPtNode(const int parentPos, const int codePointCount,
|
|
||||||
const int *const codePoints, const int probability) const;
|
|
||||||
};
|
};
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
#endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_WRITING_HELPER_H */
|
#endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_WRITING_HELPER_H */
|
||||||
|
|
Loading…
Reference in New Issue