From 3fbc5ef196bbe20b02be2ff11768e00a4f16ff4c Mon Sep 17 00:00:00 2001 From: Keisuke Kuroyanagi Date: Fri, 6 Sep 2013 20:40:08 +0900 Subject: [PATCH] Implement inserting new node into PtNode array. Bug: 6669677 Change-Id: I0171476231181e41234dde76ac9061febb2e8c35 --- .../dynamic_patricia_trie_reading_helper.cpp | 1 + .../dynamic_patricia_trie_reading_helper.h | 13 +- .../dynamic_patricia_trie_writing_helper.cpp | 119 +++++++++++++++++- .../dynamic_patricia_trie_writing_helper.h | 12 ++ 4 files changed, 142 insertions(+), 3 deletions(-) diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.cpp index a2712c34c..a0b5be6a4 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.cpp @@ -70,6 +70,7 @@ void DynamicPatriciaTrieReadingHelper::followForwardLink() { if (usesAdditionalBuffer) { mPos += mBuffer->getOriginalBufferSize(); } + mPosOfLastForwardLinkField = mPos; if (DynamicPatriciaTrieReadingUtils::isValidForwardLinkPosition(forwardLinkPosition)) { // Follow the forward link. mPos += forwardLinkPosition; diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h index b108ed5fb..db1c392bb 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h @@ -38,8 +38,8 @@ class DynamicPatriciaTrieReadingHelper { const DictionaryBigramsStructurePolicy *const bigramsPolicy, const DictionaryShortcutsStructurePolicy *const shortcutsPolicy) : mIsError(false), mPos(NOT_A_DICT_POS), mNodeCount(0), mPrevTotalCodePointCount(0), - mTotalNodeCount(0), mNodeArrayCount(0), mBuffer(buffer), - mNodeReader(mBuffer, bigramsPolicy, shortcutsPolicy) {} + mTotalNodeCount(0), mNodeArrayCount(0), mPosOfLastForwardLinkField(NOT_A_DICT_POS), + mBuffer(buffer), mNodeReader(mBuffer, bigramsPolicy, shortcutsPolicy) {} ~DynamicPatriciaTrieReadingHelper() {} @@ -62,6 +62,7 @@ class DynamicPatriciaTrieReadingHelper { mPrevTotalCodePointCount = 0; mTotalNodeCount = 0; mNodeArrayCount = 0; + mPosOfLastForwardLinkField = NOT_A_DICT_POS; nextNodeArray(); if (!isEnd()) { fetchNodeInfo(); @@ -81,6 +82,7 @@ class DynamicPatriciaTrieReadingHelper { mPrevTotalCodePointCount = 0; mTotalNodeCount = 1; mNodeArrayCount = 1; + mPosOfLastForwardLinkField = NOT_A_DICT_POS; fetchNodeInfo(); } } @@ -140,6 +142,7 @@ class DynamicPatriciaTrieReadingHelper { mTotalNodeCount = 0; mNodeArrayCount = 0; mPos = mNodeReader.getChildrenPos(); + mPosOfLastForwardLinkField = NOT_A_DICT_POS; // Read children node array. nextNodeArray(); if (!isEnd()) { @@ -158,12 +161,17 @@ class DynamicPatriciaTrieReadingHelper { mNodeArrayCount = 1; mNodeCount = 1; mPos = mNodeReader.getParentPos(); + mPosOfLastForwardLinkField = NOT_A_DICT_POS; fetchNodeInfo(); } else { mPos = NOT_A_DICT_POS; } } + AK_FORCE_INLINE int getPosOfLastForwardLinkField() const { + return mPosOfLastForwardLinkField; + } + private: DISALLOW_COPY_AND_ASSIGN(DynamicPatriciaTrieReadingHelper); @@ -177,6 +185,7 @@ class DynamicPatriciaTrieReadingHelper { int mPrevTotalCodePointCount; int mTotalNodeCount; int mNodeArrayCount; + int mPosOfLastForwardLinkField; const BufferWithExtendableBuffer *const mBuffer; DynamicPatriciaTrieNodeReader mNodeReader; int mMergedNodeCodePoints[MAX_WORD_LENGTH]; diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp index a4793e167..03dc57628 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp @@ -19,7 +19,9 @@ #include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h" #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h" #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h" +#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h" #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h" +#include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h" #include "suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h" namespace latinime { @@ -27,6 +29,7 @@ namespace latinime { bool DynamicPatriciaTrieWritingHelper::addUnigramWord( DynamicPatriciaTrieReadingHelper *const readingHelper, const int *const wordCodePoints, const int codePointCount, const int probability) { + int parentPos = NOT_A_VALID_WORD_POS; while (!readingHelper->isEnd()) { const int matchedCodePointCount = readingHelper->getPrevTotalCodePointCount(); if (!readingHelper->isMatchedCodePoint(0 /* index */, @@ -65,14 +68,20 @@ bool DynamicPatriciaTrieWritingHelper::addUnigramWord( return false; } // Advance to the children nodes. + parentPos = nodeReader->getNodePos(); readingHelper->readChildNode(); } if (readingHelper->isError()) { // The dictionary is invalid. return false; } - // TODO: add at the last position of the node array. + int pos = readingHelper->getPosOfLastForwardLinkField(); + // TODO: Remove. return false; + return createAndInsertNodeIntoPtNodeArray(parentPos, + wordCodePoints + readingHelper->getPrevTotalCodePointCount(), + codePointCount - readingHelper->getPrevTotalCodePointCount(), + probability, &pos); } bool DynamicPatriciaTrieWritingHelper::addBigramWords(const int word0Pos, const int word1Pos, @@ -97,4 +106,112 @@ bool DynamicPatriciaTrieWritingHelper::removeBigramWords(const int word0Pos, con return false; } +bool DynamicPatriciaTrieWritingHelper::markNodeAsMovedAndSetPosition( + const DynamicPatriciaTrieNodeReader *const originalNode, const int movedPos) { + int pos = originalNode->getNodePos(); + const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(pos); + const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer); + if (usesAdditionalBuffer) { + pos -= mBuffer->getOriginalBufferSize(); + } + // Read original flags + const PatriciaTrieReadingUtils::NodeFlags originalFlags = + PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos); + const PatriciaTrieReadingUtils::NodeFlags updatedFlags = + DynamicPatriciaTrieReadingUtils::updateAndGetFlags(originalFlags, true /* isMoved */, + false /* isDeleted */); + int writingPos = originalNode->getNodePos(); + // Update flags. + if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, updatedFlags, + &writingPos)) { + return false; + } + // Update moved position, which is stored in the parent position field. + if (!DynamicPatriciaTrieWritingUtils::writeParentPositionAndAdvancePosition( + mBuffer, movedPos, &writingPos)) { + return false; + } + return true; +} + +// Write new node at writingPos. +bool DynamicPatriciaTrieWritingHelper::writeNodeToBuffer(const bool isBlacklisted, + const bool isNotAWord, const int parentPos, const int *const codePoints, + const int codePointCount, const int probability, const int childrenPos, + const int originalBigramListPos, const int originalShortcutListPos, + int *const writingPos) { + // Create node flags and write them. + const PatriciaTrieReadingUtils::NodeFlags nodeFlags = + PatriciaTrieReadingUtils::createAndGetFlags(isBlacklisted, isNotAWord, + probability != NOT_A_PROBABILITY, originalShortcutListPos != NOT_A_DICT_POS, + originalBigramListPos != NOT_A_DICT_POS, codePointCount > 1, + 3 /* childrenPositionFieldSize */); + if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, nodeFlags, + writingPos)) { + return false; + } + // Write parent position + if (!DynamicPatriciaTrieWritingUtils::writeParentPositionAndAdvancePosition(mBuffer, parentPos, + writingPos)) { + return false; + } + // Write code points + if (!DynamicPatriciaTrieWritingUtils::writeCodePointsAndAdvancePosition(mBuffer, codePoints, + codePointCount, writingPos)) { + return false;; + } + // Write probability when the probability is a valid probability, which means this node is + // terminal. + if (probability != NOT_A_PROBABILITY) { + if (!DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition(mBuffer, + probability, writingPos)) { + return false; + } + } + // Write children position + if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(mBuffer, + childrenPos, writingPos)) { + return false; + } + // Copy shortcut list when the originalShortcutListPos is valid dictionary position. + if (originalShortcutListPos != NOT_A_DICT_POS) { + int fromPos = originalShortcutListPos; + mShortcutPolicy->copyAllShortcuts(&fromPos, writingPos); + } + // Copy bigram list when the originalBigramListPos is valid dictionary position. + if (originalBigramListPos != NOT_A_DICT_POS) { + int fromPos = originalBigramListPos; + if (!mBigramPolicy->copyAllBigrams(&fromPos, writingPos)) { + return false; + } + } + return true; +} + +bool DynamicPatriciaTrieWritingHelper::createAndInsertNodeIntoPtNodeArray(const int parentPos, + const int *const nodeCodePoints, const int nodeCodePointCount, const int probability, + int *const forwardLinkFieldPos) { + const int newPtNodeArrayPos = mBuffer->getTailPosition(); + if (!DynamicPatriciaTrieWritingUtils::writeForwardLinkPositionAndAdvancePosition(mBuffer, + newPtNodeArrayPos, forwardLinkFieldPos)) { + return false; + } + int writingPos = newPtNodeArrayPos; + if (!DynamicPatriciaTrieWritingUtils::writePtNodeArraySizeAndAdvancePosition(mBuffer, + 1 /* arraySize */, &writingPos)) { + return false; + } + if (!writeNodeToBuffer(false /* isBlacklisted */, false /* isNotAWord */, parentPos, + nodeCodePoints, nodeCodePointCount, probability, NOT_A_DICT_POS /* childrenPos */, + NOT_A_DICT_POS /* originalBigramsPos */, NOT_A_DICT_POS /* originalShortcutPos */, + &writingPos)) { + return false; + } + if (!DynamicPatriciaTrieWritingUtils::writeForwardLinkPositionAndAdvancePosition(mBuffer, + NOT_A_DICT_POS /* forwardLinkPos */, &writingPos)) { + return false; + } + return true; +} + } // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h index f8165fc3d..524f361d3 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h @@ -23,6 +23,7 @@ namespace latinime { class BufferWithExtendableBuffer; class DynamicBigramListPolicy; +class DynamicPatriciaTrieNodeReader; class DynamicPatriciaTrieReadingHelper; class DynamicShortcutListPolicy; @@ -51,6 +52,17 @@ class DynamicPatriciaTrieWritingHelper { BufferWithExtendableBuffer *const mBuffer; DynamicBigramListPolicy *const mBigramPolicy; DynamicShortcutListPolicy *const mShortcutPolicy; + + bool markNodeAsMovedAndSetPosition(const DynamicPatriciaTrieNodeReader *const nodeToUpdate, + const int movedPos); + + bool writeNodeToBuffer(const bool isBlacklisted, const bool isNotAWord, const int parentPos, + const int *const codePoints, const int codePointCount, const int probability, + const int childrenPos, const int originalBigramListPos, + const int originalShortcutListPos, int *const writingPos); + + bool createAndInsertNodeIntoPtNodeArray(const int parentPos, const int *const nodeCodePoints, + const int nodeCodePointCount, const int probability, int *const forwardLinkFieldPos); }; } // namespace latinime #endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_WRITING_HELPER_H */