am 3fbc5ef1
: Implement inserting new node into PtNode array.
* commit '3fbc5ef196bbe20b02be2ff11768e00a4f16ff4c': Implement inserting new node into PtNode array.
This commit is contained in:
commit
b41d66d718
4 changed files with 142 additions and 3 deletions
|
@ -70,6 +70,7 @@ void DynamicPatriciaTrieReadingHelper::followForwardLink() {
|
||||||
if (usesAdditionalBuffer) {
|
if (usesAdditionalBuffer) {
|
||||||
mPos += mBuffer->getOriginalBufferSize();
|
mPos += mBuffer->getOriginalBufferSize();
|
||||||
}
|
}
|
||||||
|
mPosOfLastForwardLinkField = mPos;
|
||||||
if (DynamicPatriciaTrieReadingUtils::isValidForwardLinkPosition(forwardLinkPosition)) {
|
if (DynamicPatriciaTrieReadingUtils::isValidForwardLinkPosition(forwardLinkPosition)) {
|
||||||
// Follow the forward link.
|
// Follow the forward link.
|
||||||
mPos += forwardLinkPosition;
|
mPos += forwardLinkPosition;
|
||||||
|
|
|
@ -38,8 +38,8 @@ class DynamicPatriciaTrieReadingHelper {
|
||||||
const DictionaryBigramsStructurePolicy *const bigramsPolicy,
|
const DictionaryBigramsStructurePolicy *const bigramsPolicy,
|
||||||
const DictionaryShortcutsStructurePolicy *const shortcutsPolicy)
|
const DictionaryShortcutsStructurePolicy *const shortcutsPolicy)
|
||||||
: mIsError(false), mPos(NOT_A_DICT_POS), mNodeCount(0), mPrevTotalCodePointCount(0),
|
: mIsError(false), mPos(NOT_A_DICT_POS), mNodeCount(0), mPrevTotalCodePointCount(0),
|
||||||
mTotalNodeCount(0), mNodeArrayCount(0), mBuffer(buffer),
|
mTotalNodeCount(0), mNodeArrayCount(0), mPosOfLastForwardLinkField(NOT_A_DICT_POS),
|
||||||
mNodeReader(mBuffer, bigramsPolicy, shortcutsPolicy) {}
|
mBuffer(buffer), mNodeReader(mBuffer, bigramsPolicy, shortcutsPolicy) {}
|
||||||
|
|
||||||
~DynamicPatriciaTrieReadingHelper() {}
|
~DynamicPatriciaTrieReadingHelper() {}
|
||||||
|
|
||||||
|
@ -62,6 +62,7 @@ class DynamicPatriciaTrieReadingHelper {
|
||||||
mPrevTotalCodePointCount = 0;
|
mPrevTotalCodePointCount = 0;
|
||||||
mTotalNodeCount = 0;
|
mTotalNodeCount = 0;
|
||||||
mNodeArrayCount = 0;
|
mNodeArrayCount = 0;
|
||||||
|
mPosOfLastForwardLinkField = NOT_A_DICT_POS;
|
||||||
nextNodeArray();
|
nextNodeArray();
|
||||||
if (!isEnd()) {
|
if (!isEnd()) {
|
||||||
fetchNodeInfo();
|
fetchNodeInfo();
|
||||||
|
@ -81,6 +82,7 @@ class DynamicPatriciaTrieReadingHelper {
|
||||||
mPrevTotalCodePointCount = 0;
|
mPrevTotalCodePointCount = 0;
|
||||||
mTotalNodeCount = 1;
|
mTotalNodeCount = 1;
|
||||||
mNodeArrayCount = 1;
|
mNodeArrayCount = 1;
|
||||||
|
mPosOfLastForwardLinkField = NOT_A_DICT_POS;
|
||||||
fetchNodeInfo();
|
fetchNodeInfo();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -140,6 +142,7 @@ class DynamicPatriciaTrieReadingHelper {
|
||||||
mTotalNodeCount = 0;
|
mTotalNodeCount = 0;
|
||||||
mNodeArrayCount = 0;
|
mNodeArrayCount = 0;
|
||||||
mPos = mNodeReader.getChildrenPos();
|
mPos = mNodeReader.getChildrenPos();
|
||||||
|
mPosOfLastForwardLinkField = NOT_A_DICT_POS;
|
||||||
// Read children node array.
|
// Read children node array.
|
||||||
nextNodeArray();
|
nextNodeArray();
|
||||||
if (!isEnd()) {
|
if (!isEnd()) {
|
||||||
|
@ -158,12 +161,17 @@ class DynamicPatriciaTrieReadingHelper {
|
||||||
mNodeArrayCount = 1;
|
mNodeArrayCount = 1;
|
||||||
mNodeCount = 1;
|
mNodeCount = 1;
|
||||||
mPos = mNodeReader.getParentPos();
|
mPos = mNodeReader.getParentPos();
|
||||||
|
mPosOfLastForwardLinkField = NOT_A_DICT_POS;
|
||||||
fetchNodeInfo();
|
fetchNodeInfo();
|
||||||
} else {
|
} else {
|
||||||
mPos = NOT_A_DICT_POS;
|
mPos = NOT_A_DICT_POS;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
AK_FORCE_INLINE int getPosOfLastForwardLinkField() const {
|
||||||
|
return mPosOfLastForwardLinkField;
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
DISALLOW_COPY_AND_ASSIGN(DynamicPatriciaTrieReadingHelper);
|
DISALLOW_COPY_AND_ASSIGN(DynamicPatriciaTrieReadingHelper);
|
||||||
|
|
||||||
|
@ -177,6 +185,7 @@ class DynamicPatriciaTrieReadingHelper {
|
||||||
int mPrevTotalCodePointCount;
|
int mPrevTotalCodePointCount;
|
||||||
int mTotalNodeCount;
|
int mTotalNodeCount;
|
||||||
int mNodeArrayCount;
|
int mNodeArrayCount;
|
||||||
|
int mPosOfLastForwardLinkField;
|
||||||
const BufferWithExtendableBuffer *const mBuffer;
|
const BufferWithExtendableBuffer *const mBuffer;
|
||||||
DynamicPatriciaTrieNodeReader mNodeReader;
|
DynamicPatriciaTrieNodeReader mNodeReader;
|
||||||
int mMergedNodeCodePoints[MAX_WORD_LENGTH];
|
int mMergedNodeCodePoints[MAX_WORD_LENGTH];
|
||||||
|
|
|
@ -19,7 +19,9 @@
|
||||||
#include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h"
|
#include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h"
|
||||||
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h"
|
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h"
|
||||||
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h"
|
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h"
|
||||||
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h"
|
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h"
|
||||||
#include "suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h"
|
#include "suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h"
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
@ -27,6 +29,7 @@ namespace latinime {
|
||||||
bool DynamicPatriciaTrieWritingHelper::addUnigramWord(
|
bool DynamicPatriciaTrieWritingHelper::addUnigramWord(
|
||||||
DynamicPatriciaTrieReadingHelper *const readingHelper,
|
DynamicPatriciaTrieReadingHelper *const readingHelper,
|
||||||
const int *const wordCodePoints, const int codePointCount, const int probability) {
|
const int *const wordCodePoints, const int codePointCount, const int probability) {
|
||||||
|
int parentPos = NOT_A_VALID_WORD_POS;
|
||||||
while (!readingHelper->isEnd()) {
|
while (!readingHelper->isEnd()) {
|
||||||
const int matchedCodePointCount = readingHelper->getPrevTotalCodePointCount();
|
const int matchedCodePointCount = readingHelper->getPrevTotalCodePointCount();
|
||||||
if (!readingHelper->isMatchedCodePoint(0 /* index */,
|
if (!readingHelper->isMatchedCodePoint(0 /* index */,
|
||||||
|
@ -65,14 +68,20 @@ bool DynamicPatriciaTrieWritingHelper::addUnigramWord(
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
// Advance to the children nodes.
|
// Advance to the children nodes.
|
||||||
|
parentPos = nodeReader->getNodePos();
|
||||||
readingHelper->readChildNode();
|
readingHelper->readChildNode();
|
||||||
}
|
}
|
||||||
if (readingHelper->isError()) {
|
if (readingHelper->isError()) {
|
||||||
// The dictionary is invalid.
|
// The dictionary is invalid.
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
// TODO: add at the last position of the node array.
|
int pos = readingHelper->getPosOfLastForwardLinkField();
|
||||||
|
// TODO: Remove.
|
||||||
return false;
|
return false;
|
||||||
|
return createAndInsertNodeIntoPtNodeArray(parentPos,
|
||||||
|
wordCodePoints + readingHelper->getPrevTotalCodePointCount(),
|
||||||
|
codePointCount - readingHelper->getPrevTotalCodePointCount(),
|
||||||
|
probability, &pos);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool DynamicPatriciaTrieWritingHelper::addBigramWords(const int word0Pos, const int word1Pos,
|
bool DynamicPatriciaTrieWritingHelper::addBigramWords(const int word0Pos, const int word1Pos,
|
||||||
|
@ -97,4 +106,112 @@ bool DynamicPatriciaTrieWritingHelper::removeBigramWords(const int word0Pos, con
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool DynamicPatriciaTrieWritingHelper::markNodeAsMovedAndSetPosition(
|
||||||
|
const DynamicPatriciaTrieNodeReader *const originalNode, const int movedPos) {
|
||||||
|
int pos = originalNode->getNodePos();
|
||||||
|
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(pos);
|
||||||
|
const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
|
||||||
|
if (usesAdditionalBuffer) {
|
||||||
|
pos -= mBuffer->getOriginalBufferSize();
|
||||||
|
}
|
||||||
|
// Read original flags
|
||||||
|
const PatriciaTrieReadingUtils::NodeFlags originalFlags =
|
||||||
|
PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos);
|
||||||
|
const PatriciaTrieReadingUtils::NodeFlags updatedFlags =
|
||||||
|
DynamicPatriciaTrieReadingUtils::updateAndGetFlags(originalFlags, true /* isMoved */,
|
||||||
|
false /* isDeleted */);
|
||||||
|
int writingPos = originalNode->getNodePos();
|
||||||
|
// Update flags.
|
||||||
|
if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, updatedFlags,
|
||||||
|
&writingPos)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
// Update moved position, which is stored in the parent position field.
|
||||||
|
if (!DynamicPatriciaTrieWritingUtils::writeParentPositionAndAdvancePosition(
|
||||||
|
mBuffer, movedPos, &writingPos)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Write new node at writingPos.
|
||||||
|
bool DynamicPatriciaTrieWritingHelper::writeNodeToBuffer(const bool isBlacklisted,
|
||||||
|
const bool isNotAWord, const int parentPos, const int *const codePoints,
|
||||||
|
const int codePointCount, const int probability, const int childrenPos,
|
||||||
|
const int originalBigramListPos, const int originalShortcutListPos,
|
||||||
|
int *const writingPos) {
|
||||||
|
// Create node flags and write them.
|
||||||
|
const PatriciaTrieReadingUtils::NodeFlags nodeFlags =
|
||||||
|
PatriciaTrieReadingUtils::createAndGetFlags(isBlacklisted, isNotAWord,
|
||||||
|
probability != NOT_A_PROBABILITY, originalShortcutListPos != NOT_A_DICT_POS,
|
||||||
|
originalBigramListPos != NOT_A_DICT_POS, codePointCount > 1,
|
||||||
|
3 /* childrenPositionFieldSize */);
|
||||||
|
if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, nodeFlags,
|
||||||
|
writingPos)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
// Write parent position
|
||||||
|
if (!DynamicPatriciaTrieWritingUtils::writeParentPositionAndAdvancePosition(mBuffer, parentPos,
|
||||||
|
writingPos)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
// Write code points
|
||||||
|
if (!DynamicPatriciaTrieWritingUtils::writeCodePointsAndAdvancePosition(mBuffer, codePoints,
|
||||||
|
codePointCount, writingPos)) {
|
||||||
|
return false;;
|
||||||
|
}
|
||||||
|
// Write probability when the probability is a valid probability, which means this node is
|
||||||
|
// terminal.
|
||||||
|
if (probability != NOT_A_PROBABILITY) {
|
||||||
|
if (!DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition(mBuffer,
|
||||||
|
probability, writingPos)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Write children position
|
||||||
|
if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(mBuffer,
|
||||||
|
childrenPos, writingPos)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
// Copy shortcut list when the originalShortcutListPos is valid dictionary position.
|
||||||
|
if (originalShortcutListPos != NOT_A_DICT_POS) {
|
||||||
|
int fromPos = originalShortcutListPos;
|
||||||
|
mShortcutPolicy->copyAllShortcuts(&fromPos, writingPos);
|
||||||
|
}
|
||||||
|
// Copy bigram list when the originalBigramListPos is valid dictionary position.
|
||||||
|
if (originalBigramListPos != NOT_A_DICT_POS) {
|
||||||
|
int fromPos = originalBigramListPos;
|
||||||
|
if (!mBigramPolicy->copyAllBigrams(&fromPos, writingPos)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool DynamicPatriciaTrieWritingHelper::createAndInsertNodeIntoPtNodeArray(const int parentPos,
|
||||||
|
const int *const nodeCodePoints, const int nodeCodePointCount, const int probability,
|
||||||
|
int *const forwardLinkFieldPos) {
|
||||||
|
const int newPtNodeArrayPos = mBuffer->getTailPosition();
|
||||||
|
if (!DynamicPatriciaTrieWritingUtils::writeForwardLinkPositionAndAdvancePosition(mBuffer,
|
||||||
|
newPtNodeArrayPos, forwardLinkFieldPos)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
int writingPos = newPtNodeArrayPos;
|
||||||
|
if (!DynamicPatriciaTrieWritingUtils::writePtNodeArraySizeAndAdvancePosition(mBuffer,
|
||||||
|
1 /* arraySize */, &writingPos)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (!writeNodeToBuffer(false /* isBlacklisted */, false /* isNotAWord */, parentPos,
|
||||||
|
nodeCodePoints, nodeCodePointCount, probability, NOT_A_DICT_POS /* childrenPos */,
|
||||||
|
NOT_A_DICT_POS /* originalBigramsPos */, NOT_A_DICT_POS /* originalShortcutPos */,
|
||||||
|
&writingPos)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (!DynamicPatriciaTrieWritingUtils::writeForwardLinkPositionAndAdvancePosition(mBuffer,
|
||||||
|
NOT_A_DICT_POS /* forwardLinkPos */, &writingPos)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
|
|
|
@ -23,6 +23,7 @@ namespace latinime {
|
||||||
|
|
||||||
class BufferWithExtendableBuffer;
|
class BufferWithExtendableBuffer;
|
||||||
class DynamicBigramListPolicy;
|
class DynamicBigramListPolicy;
|
||||||
|
class DynamicPatriciaTrieNodeReader;
|
||||||
class DynamicPatriciaTrieReadingHelper;
|
class DynamicPatriciaTrieReadingHelper;
|
||||||
class DynamicShortcutListPolicy;
|
class DynamicShortcutListPolicy;
|
||||||
|
|
||||||
|
@ -51,6 +52,17 @@ class DynamicPatriciaTrieWritingHelper {
|
||||||
BufferWithExtendableBuffer *const mBuffer;
|
BufferWithExtendableBuffer *const mBuffer;
|
||||||
DynamicBigramListPolicy *const mBigramPolicy;
|
DynamicBigramListPolicy *const mBigramPolicy;
|
||||||
DynamicShortcutListPolicy *const mShortcutPolicy;
|
DynamicShortcutListPolicy *const mShortcutPolicy;
|
||||||
|
|
||||||
|
bool markNodeAsMovedAndSetPosition(const DynamicPatriciaTrieNodeReader *const nodeToUpdate,
|
||||||
|
const int movedPos);
|
||||||
|
|
||||||
|
bool writeNodeToBuffer(const bool isBlacklisted, const bool isNotAWord, const int parentPos,
|
||||||
|
const int *const codePoints, const int codePointCount, const int probability,
|
||||||
|
const int childrenPos, const int originalBigramListPos,
|
||||||
|
const int originalShortcutListPos, int *const writingPos);
|
||||||
|
|
||||||
|
bool createAndInsertNodeIntoPtNodeArray(const int parentPos, const int *const nodeCodePoints,
|
||||||
|
const int nodeCodePointCount, const int probability, int *const forwardLinkFieldPos);
|
||||||
};
|
};
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
#endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_WRITING_HELPER_H */
|
#endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_WRITING_HELPER_H */
|
||||||
|
|
Loading…
Reference in a new issue