From c481d0556fe62cdaaf6d6d8cf037a33ce665f44c Mon Sep 17 00:00:00 2001 From: Keisuke Kuroyanagi Date: Fri, 25 Oct 2013 11:18:55 -0700 Subject: [PATCH] Refactoring: Introduce PtNode and PtNodeReader. To handle multiple dictionary formats in helpers. Bug: 11073222 Change-Id: Iaef7be08534f9010e837ffcf8c8292b174b64d2b --- .../bigram/dynamic_bigram_list_policy.cpp | 19 +- .../structure/pt_common/pt_node_params.h | 185 ++++++++++++++++++ .../structure/pt_common/pt_node_reader.h | 39 ++++ ...namic_patricia_trie_gc_event_listeners.cpp | 60 +++--- ...dynamic_patricia_trie_gc_event_listeners.h | 13 +- .../v3/dynamic_patricia_trie_node_reader.cpp | 103 ++++------ .../v3/dynamic_patricia_trie_node_reader.h | 119 +---------- .../v3/dynamic_patricia_trie_policy.cpp | 102 +++++----- .../v3/dynamic_patricia_trie_policy.h | 3 + .../dynamic_patricia_trie_reading_helper.cpp | 32 +-- .../v3/dynamic_patricia_trie_reading_helper.h | 100 ++++------ .../dynamic_patricia_trie_writing_helper.cpp | 164 ++++++++-------- .../v3/dynamic_patricia_trie_writing_helper.h | 21 +- .../structure/v4/ver4_dict_constants.cpp | 2 + .../structure/v4/ver4_dict_constants.h | 2 + 15 files changed, 528 insertions(+), 436 deletions(-) create mode 100644 native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h create mode 100644 native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_reader.h diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp index d97501265..83a32fb0b 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp @@ -157,8 +157,9 @@ bool DynamicBigramListPolicy::updateAllBigramEntriesAndDeleteUselessEntries( } const int bigramTargetNodePos = followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos); - nodeReader.fetchNodeInfoInBufferFromPtNodePos(bigramTargetNodePos); - if (nodeReader.isDeleted() || !nodeReader.isTerminal() + const PtNodeParams ptNodeParams(nodeReader.fetchNodeInfoInBufferFromPtNodePos( + bigramTargetNodePos)); + if (ptNodeParams.isDeleted() || !ptNodeParams.isTerminal() || bigramTargetNodePos == NOT_A_DICT_POS) { // The target is no longer valid terminal. Invalidate the current bigram entry. if (!BigramListReadWriteUtils::writeBigramEntry(mBuffer, bigramFlags, @@ -342,20 +343,22 @@ int DynamicBigramListPolicy::followBigramLinkAndGetCurrentBigramPtNodePos( if (originalBigramPos == NOT_A_DICT_POS) { return NOT_A_DICT_POS; } - int currentPos = originalBigramPos; DynamicPatriciaTrieNodeReader nodeReader(mBuffer, this /* bigramsPolicy */, mShortcutPolicy); - nodeReader.fetchNodeInfoInBufferFromPtNodePos(currentPos); + int currentPos = NOT_A_DICT_POS; int bigramLinkCount = 0; - while (nodeReader.getBigramLinkedNodePos() != NOT_A_DICT_POS) { - currentPos = nodeReader.getBigramLinkedNodePos(); - nodeReader.fetchNodeInfoInBufferFromPtNodePos(currentPos); + int bigramLinkedNodePos = originalBigramPos; + do { + currentPos = bigramLinkedNodePos; + const PtNodeParams ptNodeParams(nodeReader.fetchNodeInfoInBufferFromPtNodePos(currentPos)); + bigramLinkedNodePos = ptNodeParams.getBigramLinkedNodePos(); bigramLinkCount++; if (bigramLinkCount > CONTINUING_BIGRAM_LINK_COUNT_LIMIT) { AKLOGE("Bigram link is invalid. start position: %d", originalBigramPos); ASSERT(false); return NOT_A_DICT_POS; } - } + bigramLinkedNodePos = ptNodeParams.getBigramLinkedNodePos(); + } while (bigramLinkedNodePos != NOT_A_DICT_POS); return currentPos; } diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h new file mode 100644 index 000000000..7bdd829cd --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h @@ -0,0 +1,185 @@ +/* + * Copyright (C) 2013, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_PT_NODE_PARAMS_H +#define LATINIME_PT_NODE_PARAMS_H + +#include + +#include "defines.h" +#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.h" +#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h" + +namespace latinime { + +// This class has information of a PtNode. This class is immutable. +class PtNodeParams { + public: + // Invalid PtNode. + PtNodeParams() : mHeadPos(NOT_A_DICT_POS), mFlags(0), mParentPos(NOT_A_DICT_POS), + mCodePointCount(0), mCodePoints(), mTerminalIdFieldPos(NOT_A_DICT_POS), + mTerminalId(Ver4DictConstants::NOT_A_TERMINAL), mProbabilityFieldPos(NOT_A_DICT_POS), + mProbability(NOT_A_PROBABILITY), mChildrenPosFieldPos(NOT_A_DICT_POS), + mChildrenPos(NOT_A_DICT_POS), mBigramLinkedNodePos(NOT_A_DICT_POS), + mShortcutPos(NOT_A_DICT_POS), mBigramPos(NOT_A_DICT_POS), + mSiblingPos(NOT_A_DICT_POS) {} + + PtNodeParams(const PtNodeParams& ptNodeParams) + : mHeadPos(ptNodeParams.mHeadPos), mFlags(ptNodeParams.mFlags), + mParentPos(ptNodeParams.mParentPos), mCodePointCount(ptNodeParams.mCodePointCount), + mCodePoints(), mTerminalIdFieldPos(ptNodeParams.mTerminalIdFieldPos), + mTerminalId(ptNodeParams.mTerminalId), + mProbabilityFieldPos(ptNodeParams.mProbabilityFieldPos), + mProbability(ptNodeParams.mProbability), + mChildrenPosFieldPos(ptNodeParams.mChildrenPosFieldPos), + mChildrenPos(ptNodeParams.mChildrenPos), + mBigramLinkedNodePos(ptNodeParams.mBigramLinkedNodePos), + mShortcutPos(ptNodeParams.mShortcutPos), mBigramPos(ptNodeParams.mBigramPos), + mSiblingPos(ptNodeParams.mSiblingPos) { + memcpy(mCodePoints, ptNodeParams.getCodePoints(), sizeof(int) * mCodePointCount); + } + + PtNodeParams(const int headPos, const PatriciaTrieReadingUtils::NodeFlags flags, + const int parentPos, const int codePointCount, const int *const codePoints, + const int probabilityFieldPos, const int probability, const int childrenPosFieldPos, + const int childrenPos, const int bigramLinkedNodePos, const int shortcutPos, + const int bigramPos, const int siblingPos) + : mHeadPos(headPos), mFlags(flags), mParentPos(parentPos), + mCodePointCount(codePointCount), mCodePoints(), + mTerminalIdFieldPos(NOT_A_DICT_POS), mTerminalId(Ver4DictConstants::NOT_A_TERMINAL), + mProbabilityFieldPos(probabilityFieldPos), mProbability(probability), + mChildrenPosFieldPos(childrenPosFieldPos), mChildrenPos(childrenPos), + mBigramLinkedNodePos(bigramLinkedNodePos), mShortcutPos(shortcutPos), + mBigramPos(bigramPos), mSiblingPos(siblingPos) { + memcpy(mCodePoints, codePoints, sizeof(int) * mCodePointCount); + } + + AK_FORCE_INLINE bool isValid() const { + return mCodePointCount > 0; + } + + // Head position of the PtNode + AK_FORCE_INLINE int getHeadPos() const { + return mHeadPos; + } + + // Flags + AK_FORCE_INLINE bool isDeleted() const { + return DynamicPatriciaTrieReadingUtils::isDeleted(mFlags); + } + + AK_FORCE_INLINE bool hasChildren() const { + return mChildrenPos != NOT_A_DICT_POS; + } + + AK_FORCE_INLINE bool isTerminal() const { + return PatriciaTrieReadingUtils::isTerminal(mFlags); + } + + AK_FORCE_INLINE bool isBlacklisted() const { + return PatriciaTrieReadingUtils::isBlacklisted(mFlags); + } + + AK_FORCE_INLINE bool isNotAWord() const { + return PatriciaTrieReadingUtils::isNotAWord(mFlags); + } + + // Parent node position + AK_FORCE_INLINE int getParentPos() const { + return mParentPos; + } + + // Number of code points + AK_FORCE_INLINE uint8_t getCodePointCount() const { + return mCodePointCount; + } + + AK_FORCE_INLINE const int *getCodePoints() const { + return mCodePoints; + } + + // Probability + AK_FORCE_INLINE int getTerminalIdFieldPos() const { + return mTerminalIdFieldPos; + } + + AK_FORCE_INLINE int getTerminalId() const { + return mTerminalId; + } + + // Probability + AK_FORCE_INLINE int getProbabilityFieldPos() const { + return mProbabilityFieldPos; + } + + AK_FORCE_INLINE int getProbability() const { + return mProbability; + } + + // Children PtNode array position + AK_FORCE_INLINE int getChildrenPosFieldPos() const { + return mChildrenPosFieldPos; + } + + AK_FORCE_INLINE int getChildrenPos() const { + return mChildrenPos; + } + + // Bigram linked node position. + AK_FORCE_INLINE int getBigramLinkedNodePos() const { + return mBigramLinkedNodePos; + } + + // Shortcutlist position + AK_FORCE_INLINE int getShortcutPos() const { + return mShortcutPos; + } + + // Bigrams position + AK_FORCE_INLINE int getBigramsPos() const { + return mBigramPos; + } + + // Sibling node position + AK_FORCE_INLINE int getSiblingNodePos() const { + return mSiblingPos; + } + + private: + // This class have a public copy constructor to be used as a return value. + + // Disallowing the assignment operator. + PtNodeParams &operator=(PtNodeParams &ptNodeParams); + + const int mHeadPos; + const PatriciaTrieReadingUtils::NodeFlags mFlags; + const int mParentPos; + const uint8_t mCodePointCount; + int mCodePoints[MAX_WORD_LENGTH]; + const int mTerminalIdFieldPos; + const int mTerminalId; + const int mProbabilityFieldPos; + const int mProbability; + const int mChildrenPosFieldPos; + const int mChildrenPos; + const int mBigramLinkedNodePos; + const int mShortcutPos; + const int mBigramPos; + const int mSiblingPos; +}; +} // namespace latinime +#endif /* LATINIME_PT_NODE_PARAMS_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_reader.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_reader.h new file mode 100644 index 000000000..c6b2a8bed --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_reader.h @@ -0,0 +1,39 @@ +/* + * Copyright (C) 2013, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_PT_NODE_READER_H +#define LATINIME_PT_NODE_READER_H + +#include "defines.h" + +#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h" + +namespace latinime { + +// Interface class used to read PtNode information. +class PtNodeReader { + public: + virtual ~PtNodeReader() {} + virtual const PtNodeParams fetchNodeInfoInBufferFromPtNodePos(const int ptNodePos) const = 0; + + protected: + PtNodeReader() {}; + + private: + DISALLOW_COPY_AND_ASSIGN(PtNodeReader); +}; +} // namespace latinime +#endif /* LATINIME_PT_NODE_READER_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_gc_event_listeners.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_gc_event_listeners.cpp index 126b7681e..db4e86da1 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_gc_event_listeners.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_gc_event_listeners.cpp @@ -17,22 +17,22 @@ #include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_gc_event_listeners.h" #include "suggest/core/policy/dictionary_header_structure_policy.h" +#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h" #include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h" namespace latinime { bool DynamicPatriciaTrieGcEventListeners ::TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted - ::onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node, - const int *const nodeCodePoints) { + ::onVisitingPtNode(const PtNodeParams *const ptNodeParams) { // PtNode is useless when the PtNode is not a terminal and doesn't have any not useless // children. - bool isUselessPtNode = !node->isTerminal(); - if (node->isTerminal() && mIsDecayingDict) { + bool isUselessPtNode = !ptNodeParams->isTerminal(); + if (ptNodeParams->isTerminal() && mIsDecayingDict) { const int newProbability = - ForgettingCurveUtils::getEncodedProbabilityToSave(node->getProbability(), + ForgettingCurveUtils::getEncodedProbabilityToSave(ptNodeParams->getProbability(), mHeaderPolicy); - int writingPos = node->getProbabilityFieldPos(); + int writingPos = ptNodeParams->getProbabilityFieldPos(); // Update probability. if (!DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition( mBuffer, newProbability, &writingPos)) { @@ -44,9 +44,9 @@ bool DynamicPatriciaTrieGcEventListeners } if (mChildrenValue > 0) { isUselessPtNode = false; - } else if (node->isTerminal()) { + } else if (ptNodeParams->isTerminal()) { // Remove children as all children are useless. - int writingPos = node->getChildrenPosFieldPos(); + int writingPos = ptNodeParams->getChildrenPosFieldPos(); if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition( mBuffer, NOT_A_DICT_POS /* childrenPosition */, &writingPos)) { return false; @@ -54,12 +54,12 @@ bool DynamicPatriciaTrieGcEventListeners } if (isUselessPtNode) { // Current PtNode is no longer needed. Mark it as deleted. - if (!mWritingHelper->markNodeAsDeleted(node)) { + if (!mWritingHelper->markNodeAsDeleted(ptNodeParams)) { return false; } } else { mValueStack.back() += 1; - if (node->isTerminal()) { + if (ptNodeParams->isTerminal()) { mValidUnigramCount += 1; } } @@ -67,10 +67,9 @@ bool DynamicPatriciaTrieGcEventListeners } bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateBigramProbability - ::onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node, - const int *const nodeCodePoints) { - if (!node->isDeleted()) { - int pos = node->getBigramsPos(); + ::onVisitingPtNode(const PtNodeParams *const ptNodeParams) { + if (!ptNodeParams->isDeleted()) { + int pos = ptNodeParams->getBigramsPos(); if (pos != NOT_A_DICT_POS) { int bigramEntryCount = 0; if (!mBigramPolicy->updateAllBigramEntriesAndDeleteUselessEntries(&pos, @@ -117,31 +116,29 @@ bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNo // Write valid PtNode to buffer and memorize mapping from the old position to the new position. bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer - ::onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node, - const int *const nodeCodePoints) { - if (node->isDeleted()) { + ::onVisitingPtNode(const PtNodeParams *const ptNodeParams) { + if (ptNodeParams->isDeleted()) { // Current PtNode is not written in new buffer because it has been deleted. mDictPositionRelocationMap->mPtNodePositionRelocationMap.insert( DynamicPatriciaTrieWritingHelper::PtNodePositionRelocationMap::value_type( - node->getHeadPos(), NOT_A_DICT_POS)); + ptNodeParams->getHeadPos(), NOT_A_DICT_POS)); return true; } int writingPos = mBufferToWrite->getTailPosition(); mDictPositionRelocationMap->mPtNodePositionRelocationMap.insert( DynamicPatriciaTrieWritingHelper::PtNodePositionRelocationMap::value_type( - node->getHeadPos(), writingPos)); + ptNodeParams->getHeadPos(), writingPos)); mValidPtNodeCount++; // Writes current PtNode. - return mWritingHelper->writePtNodeToBufferByCopyingPtNodeInfo(mBufferToWrite, node, - node->getParentPos(), nodeCodePoints, node->getCodePointCount(), - node->getProbability(), &writingPos); + return mWritingHelper->writePtNodeToBufferByCopyingPtNodeInfo(mBufferToWrite, ptNodeParams, + ptNodeParams->getParentPos(), ptNodeParams->getCodePoints(), + ptNodeParams->getCodePointCount(), ptNodeParams->getProbability(), &writingPos); } bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateAllPositionFields - ::onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node, - const int *const nodeCodePoints) { + ::onVisitingPtNode(const PtNodeParams *const ptNodeParams) { // Updates parent position. - int parentPos = node->getParentPos(); + int parentPos = ptNodeParams->getParentPos(); if (parentPos != NOT_A_DICT_POS) { DynamicPatriciaTrieWritingHelper::PtNodePositionRelocationMap::const_iterator it = mDictPositionRelocationMap->mPtNodePositionRelocationMap.find(parentPos); @@ -149,15 +146,16 @@ bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateAllPositionField parentPos = it->second; } } - int writingPos = node->getHeadPos() + DynamicPatriciaTrieWritingUtils::NODE_FLAG_FIELD_SIZE; + int writingPos = ptNodeParams->getHeadPos() + + DynamicPatriciaTrieWritingUtils::NODE_FLAG_FIELD_SIZE; // Write updated parent offset. if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition(mBufferToWrite, - parentPos, node->getHeadPos(), &writingPos)) { + parentPos, ptNodeParams->getHeadPos(), &writingPos)) { return false; } // Updates children position. - int childrenPos = node->getChildrenPos(); + int childrenPos = ptNodeParams->getChildrenPos(); if (childrenPos != NOT_A_DICT_POS) { DynamicPatriciaTrieWritingHelper::PtNodeArrayPositionRelocationMap::const_iterator it = mDictPositionRelocationMap->mPtNodeArrayPositionRelocationMap.find(childrenPos); @@ -165,14 +163,14 @@ bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateAllPositionField childrenPos = it->second; } } - writingPos = node->getChildrenPosFieldPos(); + writingPos = ptNodeParams->getChildrenPosFieldPos(); if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(mBufferToWrite, childrenPos, &writingPos)) { return false; } // Updates bigram target PtNode positions in the bigram list. - int bigramsPos = node->getBigramsPos(); + int bigramsPos = ptNodeParams->getBigramsPos(); if (bigramsPos != NOT_A_DICT_POS) { int bigramEntryCount; if (!mBigramPolicy->updateAllBigramTargetPtNodePositions(&bigramsPos, @@ -181,7 +179,7 @@ bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateAllPositionField } mBigramCount += bigramEntryCount; } - if (node->isTerminal()) { + if (ptNodeParams->isTerminal()) { mUnigramCount++; } diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_gc_event_listeners.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_gc_event_listeners.h index ab59cc645..cfe3c145c 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_gc_event_listeners.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_gc_event_listeners.h @@ -30,6 +30,7 @@ namespace latinime { class DictionaryHeaderStructurePolicy; +class PtNodeParams; class DynamicPatriciaTrieGcEventListeners { public: @@ -66,8 +67,7 @@ class DynamicPatriciaTrieGcEventListeners { bool onReadingPtNodeArrayTail() { return true; } - bool onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node, - const int *const nodeCodePoints); + bool onVisitingPtNode(const PtNodeParams *const ptNodeParams); int getValidUnigramCount() const { return mValidUnigramCount; @@ -101,8 +101,7 @@ class DynamicPatriciaTrieGcEventListeners { bool onReadingPtNodeArrayTail() { return true; } - bool onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node, - const int *const nodeCodePoints); + bool onVisitingPtNode(const PtNodeParams *const ptNodeParams); int getValidBigramEntryCount() const { return mValidBigramEntryCount; @@ -133,8 +132,7 @@ class DynamicPatriciaTrieGcEventListeners { bool onReadingPtNodeArrayTail(); - bool onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node, - const int *const nodeCodePoints); + bool onVisitingPtNode(const PtNodeParams *const ptNodeParams); private: DISALLOW_IMPLICIT_CONSTRUCTORS(TraversePolicyToPlaceAndWriteValidPtNodesToBuffer); @@ -167,8 +165,7 @@ class DynamicPatriciaTrieGcEventListeners { bool onReadingPtNodeArrayTail() { return true; } - bool onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node, - const int *const nodeCodePoints); + bool onVisitingPtNode(const PtNodeParams *const ptNodeParams); int getUnigramCount() const { return mUnigramCount; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.cpp index 4fd2484e1..3393ce662 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.cpp @@ -18,107 +18,90 @@ #include "suggest/core/policy/dictionary_bigrams_structure_policy.h" #include "suggest/core/policy/dictionary_shortcuts_structure_policy.h" +#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h" #include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.h" #include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" namespace latinime { -void DynamicPatriciaTrieNodeReader::fetchPtNodeInfoFromBufferAndProcessMovedPtNode( - const int ptNodePos, const int maxCodePointCount, int *const outCodePoints) { +const PtNodeParams DynamicPatriciaTrieNodeReader::fetchPtNodeInfoFromBufferAndProcessMovedPtNode( + const int ptNodePos, const int siblingNodePos, const int bigramLinkedNodePos) const { if (ptNodePos < 0 || ptNodePos >= mBuffer->getTailPosition()) { // Reading invalid position because of bug or broken dictionary. AKLOGE("Fetching PtNode info from invalid dictionary position: %d, dictionary size: %d", ptNodePos, mBuffer->getTailPosition()); ASSERT(false); - invalidatePtNodeInfo(); - return; + return PtNodeParams(); } const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(ptNodePos); const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer); int pos = ptNodePos; - mHeadPos = ptNodePos; + const int headPos = ptNodePos; if (usesAdditionalBuffer) { pos -= mBuffer->getOriginalBufferSize(); } - mFlags = PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos); + const PatriciaTrieReadingUtils::NodeFlags flags = + PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos); const int parentPosOffset = DynamicPatriciaTrieReadingUtils::getParentPtNodePosOffsetAndAdvancePosition(dictBuf, &pos); - mParentPos = DynamicPatriciaTrieReadingUtils::getParentPtNodePos(parentPosOffset, mHeadPos); - if (outCodePoints != 0) { - mCodePointCount = PatriciaTrieReadingUtils::getCharsAndAdvancePosition( - dictBuf, mFlags, maxCodePointCount, outCodePoints, &pos); - } else { - mCodePointCount = PatriciaTrieReadingUtils::skipCharacters( - dictBuf, mFlags, MAX_WORD_LENGTH, &pos); - } - if (isTerminal()) { - mProbabilityFieldPos = pos; + const int parentPos = + DynamicPatriciaTrieReadingUtils::getParentPtNodePos(parentPosOffset, headPos); + int codePoints[MAX_WORD_LENGTH]; + const int codePonitCount = PatriciaTrieReadingUtils::getCharsAndAdvancePosition( + dictBuf, flags, MAX_WORD_LENGTH, codePoints, &pos); + int probability = NOT_A_PROBABILITY; + int probabilityFieldPos = NOT_A_DICT_POS; + if (PatriciaTrieReadingUtils::isTerminal(flags)) { + probabilityFieldPos = pos; if (usesAdditionalBuffer) { - mProbabilityFieldPos += mBuffer->getOriginalBufferSize(); + probabilityFieldPos += mBuffer->getOriginalBufferSize(); } - mProbability = PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(dictBuf, &pos); - } else { - mProbabilityFieldPos = NOT_A_DICT_POS; - mProbability = NOT_A_PROBABILITY; + probability = PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(dictBuf, &pos); } - mChildrenPosFieldPos = pos; + int childrenPosFieldPos = pos; if (usesAdditionalBuffer) { - mChildrenPosFieldPos += mBuffer->getOriginalBufferSize(); + childrenPosFieldPos += mBuffer->getOriginalBufferSize(); } - mChildrenPos = DynamicPatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition( + int childrenPos = DynamicPatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition( dictBuf, &pos); - if (usesAdditionalBuffer && mChildrenPos != NOT_A_DICT_POS) { - mChildrenPos += mBuffer->getOriginalBufferSize(); + if (usesAdditionalBuffer && childrenPos != NOT_A_DICT_POS) { + childrenPos += mBuffer->getOriginalBufferSize(); } - if (mSiblingPos == NOT_A_DICT_POS) { - if (DynamicPatriciaTrieReadingUtils::isMoved(mFlags)) { - mBigramLinkedNodePos = mChildrenPos; - } else { - mBigramLinkedNodePos = NOT_A_DICT_POS; + int newBigramLinkedNodePos = bigramLinkedNodePos; + if (siblingNodePos == NOT_A_DICT_POS) { + if (DynamicPatriciaTrieReadingUtils::isMoved(flags)) { + newBigramLinkedNodePos = childrenPos; } } if (usesAdditionalBuffer) { pos += mBuffer->getOriginalBufferSize(); } - if (PatriciaTrieReadingUtils::hasShortcutTargets(mFlags)) { - mShortcutPos = pos; + int shortcutsPos = NOT_A_DICT_POS; + if (PatriciaTrieReadingUtils::hasShortcutTargets(flags)) { + shortcutsPos = pos; mShortcutsPolicy->skipAllShortcuts(&pos); - } else { - mShortcutPos = NOT_A_DICT_POS; } - if (PatriciaTrieReadingUtils::hasBigrams(mFlags)) { - mBigramPos = pos; + int bigramsPos = NOT_A_DICT_POS; + if (PatriciaTrieReadingUtils::hasBigrams(flags)) { + bigramsPos = pos; mBigramsPolicy->skipAllBigrams(&pos); - } else { - mBigramPos = NOT_A_DICT_POS; } - // Update siblingPos if needed. - if (mSiblingPos == NOT_A_DICT_POS) { + int newSiblingNodePos = siblingNodePos; + if (siblingNodePos == NOT_A_DICT_POS) { // Sibling position is the tail position of current node. - mSiblingPos = pos; + newSiblingNodePos = pos; } // Read destination node if the read node is a moved node. - if (DynamicPatriciaTrieReadingUtils::isMoved(mFlags)) { + if (DynamicPatriciaTrieReadingUtils::isMoved(flags)) { // The destination position is stored at the same place as the parent position. - fetchPtNodeInfoFromBufferAndProcessMovedPtNode(mParentPos, maxCodePointCount, - outCodePoints); + return fetchPtNodeInfoFromBufferAndProcessMovedPtNode(parentPos, newSiblingNodePos, + newBigramLinkedNodePos); + } else { + return PtNodeParams(headPos, flags, parentPos, codePonitCount, codePoints, + probabilityFieldPos, probability, childrenPosFieldPos, childrenPos, + newBigramLinkedNodePos, shortcutsPos, bigramsPos, newSiblingNodePos); } } -void DynamicPatriciaTrieNodeReader::invalidatePtNodeInfo() { - mHeadPos = NOT_A_DICT_POS; - mFlags = 0; - mParentPos = NOT_A_DICT_POS; - mCodePointCount = 0; - mProbabilityFieldPos = NOT_A_DICT_POS; - mProbability = NOT_A_PROBABILITY; - mChildrenPosFieldPos = NOT_A_DICT_POS; - mChildrenPos = NOT_A_DICT_POS; - mBigramLinkedNodePos = NOT_A_DICT_POS; - mShortcutPos = NOT_A_DICT_POS; - mBigramPos = NOT_A_DICT_POS; - mSiblingPos = NOT_A_DICT_POS; -} - } diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.h index fac078d0a..b5abffeda 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.h @@ -20,8 +20,8 @@ #include #include "defines.h" -#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h" -#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.h" +#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h" +#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_reader.h" namespace latinime { @@ -33,106 +33,19 @@ class DictionaryShortcutsStructurePolicy; * This class is used for helping to read nodes of dynamic patricia trie. This class handles moved * node and reads node attributes. */ -class DynamicPatriciaTrieNodeReader { +class DynamicPatriciaTrieNodeReader : public PtNodeReader { public: DynamicPatriciaTrieNodeReader(const BufferWithExtendableBuffer *const buffer, const DictionaryBigramsStructurePolicy *const bigramsPolicy, const DictionaryShortcutsStructurePolicy *const shortcutsPolicy) : mBuffer(buffer), mBigramsPolicy(bigramsPolicy), - mShortcutsPolicy(shortcutsPolicy), mHeadPos(NOT_A_DICT_POS), mFlags(0), - mParentPos(NOT_A_DICT_POS), mCodePointCount(0), mProbabilityFieldPos(NOT_A_DICT_POS), - mProbability(NOT_A_PROBABILITY), mChildrenPosFieldPos(NOT_A_DICT_POS), - mChildrenPos(NOT_A_DICT_POS), mBigramLinkedNodePos(NOT_A_DICT_POS), - mShortcutPos(NOT_A_DICT_POS), mBigramPos(NOT_A_DICT_POS), - mSiblingPos(NOT_A_DICT_POS) {} + mShortcutsPolicy(shortcutsPolicy) {} ~DynamicPatriciaTrieNodeReader() {} - // Reads PtNode information from dictionary buffer and updates members with the information. - AK_FORCE_INLINE void fetchNodeInfoInBufferFromPtNodePos(const int ptNodePos) { - fetchNodeInfoInBufferFromPtNodePosAndGetNodeCodePoints(ptNodePos , - 0 /* maxCodePointCount */, 0 /* outCodePoints */); - } - - AK_FORCE_INLINE void fetchNodeInfoInBufferFromPtNodePosAndGetNodeCodePoints( - const int ptNodePos, const int maxCodePointCount, int *const outCodePoints) { - mSiblingPos = NOT_A_DICT_POS; - mBigramLinkedNodePos = NOT_A_DICT_POS; - fetchPtNodeInfoFromBufferAndProcessMovedPtNode(ptNodePos, maxCodePointCount, outCodePoints); - } - - // HeadPos is different from NodePos when the current PtNode is a moved PtNode. - AK_FORCE_INLINE int getHeadPos() const { - return mHeadPos; - } - - // Flags - AK_FORCE_INLINE bool isDeleted() const { - return DynamicPatriciaTrieReadingUtils::isDeleted(mFlags); - } - - AK_FORCE_INLINE bool hasChildren() const { - return mChildrenPos != NOT_A_DICT_POS; - } - - AK_FORCE_INLINE bool isTerminal() const { - return PatriciaTrieReadingUtils::isTerminal(mFlags); - } - - AK_FORCE_INLINE bool isBlacklisted() const { - return PatriciaTrieReadingUtils::isBlacklisted(mFlags); - } - - AK_FORCE_INLINE bool isNotAWord() const { - return PatriciaTrieReadingUtils::isNotAWord(mFlags); - } - - // Parent node position - AK_FORCE_INLINE int getParentPos() const { - return mParentPos; - } - - // Number of code points - AK_FORCE_INLINE uint8_t getCodePointCount() const { - return mCodePointCount; - } - - // Probability - AK_FORCE_INLINE int getProbabilityFieldPos() const { - return mProbabilityFieldPos; - } - - AK_FORCE_INLINE int getProbability() const { - return mProbability; - } - - // Children PtNode array position - AK_FORCE_INLINE int getChildrenPosFieldPos() const { - return mChildrenPosFieldPos; - } - - AK_FORCE_INLINE int getChildrenPos() const { - return mChildrenPos; - } - - // Bigram linked node position. - AK_FORCE_INLINE int getBigramLinkedNodePos() const { - return mBigramLinkedNodePos; - } - - // Shortcutlist position - AK_FORCE_INLINE int getShortcutPos() const { - return mShortcutPos; - } - - // Bigrams position - AK_FORCE_INLINE int getBigramsPos() const { - return mBigramPos; - } - - // Sibling node position - AK_FORCE_INLINE int getSiblingNodePos() const { - return mSiblingPos; + virtual const PtNodeParams fetchNodeInfoInBufferFromPtNodePos(const int ptNodePos) const { + return fetchPtNodeInfoFromBufferAndProcessMovedPtNode(ptNodePos, + NOT_A_DICT_POS /* siblingNodePos */, NOT_A_DICT_POS /* bigramLinkedNodePos */); } private: @@ -141,23 +54,9 @@ class DynamicPatriciaTrieNodeReader { const BufferWithExtendableBuffer *const mBuffer; const DictionaryBigramsStructurePolicy *const mBigramsPolicy; const DictionaryShortcutsStructurePolicy *const mShortcutsPolicy; - int mHeadPos; - DynamicPatriciaTrieReadingUtils::NodeFlags mFlags; - int mParentPos; - uint8_t mCodePointCount; - int mProbabilityFieldPos; - int mProbability; - int mChildrenPosFieldPos; - int mChildrenPos; - int mBigramLinkedNodePos; - int mShortcutPos; - int mBigramPos; - int mSiblingPos; - void fetchPtNodeInfoFromBufferAndProcessMovedPtNode(const int ptNodePos, - const int maxCodePointCount, int *const outCodePoints); - - void invalidatePtNodeInfo(); + const PtNodeParams fetchPtNodeInfoFromBufferAndProcessMovedPtNode(const int ptNodePos, + const int siblingNodePos, const int bigramLinkedNodePos) const; }; } // namespace latinime #endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_NODE_READER_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_policy.cpp index b9d4739da..50882b3e9 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_policy.cpp @@ -50,24 +50,27 @@ void DynamicPatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *cons if (!dicNode->hasChildren()) { return; } - DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer, - getBigramsStructurePolicy(), getShortcutsStructurePolicy()); + DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer, &mNodeReader); readingHelper.initWithPtNodeArrayPos(dicNode->getChildrenPtNodeArrayPos()); - const DynamicPatriciaTrieNodeReader *const nodeReader = readingHelper.getNodeReader(); while (!readingHelper.isEnd()) { - bool isTerminal = nodeReader->isTerminal() && !nodeReader->isDeleted(); + const PtNodeParams ptNodeParams(readingHelper.getPtNodeParams()); + if (!ptNodeParams.isValid()) { + break; + } + bool isTerminal = ptNodeParams.isTerminal() && !ptNodeParams.isDeleted(); if (isTerminal && mHeaderPolicy.isDecayingDict()) { // A DecayingDict may have a terminal PtNode that has a terminal DicNode whose // probability is NOT_A_PROBABILITY. In such case, we don't want to treat it as a // valid terminal DicNode. - isTerminal = getProbability(nodeReader->getProbability(), NOT_A_PROBABILITY) + isTerminal = getProbability(ptNodeParams.getProbability(), NOT_A_PROBABILITY) != NOT_A_PROBABILITY; } - childDicNodes->pushLeavingChild(dicNode, nodeReader->getHeadPos(), - nodeReader->getChildrenPos(), nodeReader->getProbability(), isTerminal, - nodeReader->hasChildren(), nodeReader->isBlacklisted() || nodeReader->isNotAWord(), - nodeReader->getCodePointCount(), readingHelper.getMergedNodeCodePoints()); - readingHelper.readNextSiblingNode(); + childDicNodes->pushLeavingChild(dicNode, ptNodeParams.getHeadPos(), + ptNodeParams.getChildrenPos(), ptNodeParams.getProbability(), isTerminal, + ptNodeParams.hasChildren(), + ptNodeParams.isBlacklisted() || ptNodeParams.isNotAWord(), + ptNodeParams.getCodePointCount(), ptNodeParams.getCodePoints()); + readingHelper.readNextSiblingNode(ptNodeParams); } } @@ -77,29 +80,33 @@ int DynamicPatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCoun // This method traverses parent nodes from the terminal by following parent pointers; thus, // node code points are stored in the buffer in the reverse order. int reverseCodePoints[maxCodePointCount]; - DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer, - getBigramsStructurePolicy(), getShortcutsStructurePolicy()); + DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer, &mNodeReader); // First, read the terminal node and get its probability. readingHelper.initWithPtNodePos(ptNodePos); - if (!readingHelper.isValidTerminalNode()) { + + const PtNodeParams terminalPtNodeParams(readingHelper.getPtNodeParams()); + if (!readingHelper.isValidTerminalNode(terminalPtNodeParams)) { // Node at the ptNodePos is not a valid terminal node. *outUnigramProbability = NOT_A_PROBABILITY; return 0; } // Store terminal node probability. - *outUnigramProbability = readingHelper.getNodeReader()->getProbability(); + *outUnigramProbability = terminalPtNodeParams.getProbability(); // Then, following parent node link to the dictionary root and fetch node code points. + int totalCodePointCount = 0; while (!readingHelper.isEnd()) { - if (readingHelper.getTotalCodePointCount() > maxCodePointCount) { + const PtNodeParams ptNodeParams(readingHelper.getPtNodeParams()); + totalCodePointCount = readingHelper.getTotalCodePointCount(ptNodeParams); + if (!ptNodeParams.isValid() || totalCodePointCount > maxCodePointCount) { // The ptNodePos is not a valid terminal node position in the dictionary. *outUnigramProbability = NOT_A_PROBABILITY; return 0; } // Store node code points to buffer in the reverse order. - readingHelper.fetchMergedNodeCodePointsInReverseOrder( + readingHelper.fetchMergedNodeCodePointsInReverseOrder(ptNodeParams, readingHelper.getPrevTotalCodePointCount(), reverseCodePoints); // Follow parent node toward the root node. - readingHelper.readParentNode(); + readingHelper.readParentNode(ptNodeParams); } if (readingHelper.isError()) { // The node position or the dictionary is invalid. @@ -107,11 +114,10 @@ int DynamicPatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCoun return 0; } // Reverse the stored code points to output them. - const int codePointCount = readingHelper.getTotalCodePointCount(); - for (int i = 0; i < codePointCount; ++i) { - outCodePoints[i] = reverseCodePoints[codePointCount - i - 1]; + for (int i = 0; i < totalCodePointCount; ++i) { + outCodePoints[i] = reverseCodePoints[totalCodePointCount - i - 1]; } - return codePointCount; + return totalCodePointCount; } int DynamicPatriciaTriePolicy::getTerminalPtNodePositionOfWord(const int *const inWord, @@ -120,39 +126,42 @@ int DynamicPatriciaTriePolicy::getTerminalPtNodePositionOfWord(const int *const for (int i = 0; i < length; ++i) { searchCodePoints[i] = forceLowerCaseSearch ? CharUtils::toLowerCase(inWord[i]) : inWord[i]; } - DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer, - getBigramsStructurePolicy(), getShortcutsStructurePolicy()); + + DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer, &mNodeReader); readingHelper.initWithPtNodeArrayPos(getRootPosition()); - const DynamicPatriciaTrieNodeReader *const nodeReader = readingHelper.getNodeReader(); while (!readingHelper.isEnd()) { + const PtNodeParams ptNodeParams(readingHelper.getPtNodeParams()); + if (!ptNodeParams.isValid()) { + break; + } const int matchedCodePointCount = readingHelper.getPrevTotalCodePointCount(); - if (readingHelper.getTotalCodePointCount() > length - || !readingHelper.isMatchedCodePoint(0 /* index */, + if (readingHelper.getTotalCodePointCount(ptNodeParams) > length + || !readingHelper.isMatchedCodePoint(ptNodeParams, 0 /* index */, searchCodePoints[matchedCodePointCount])) { // Current node has too many code points or its first code point is different from // target code point. Skip this node and read the next sibling node. - readingHelper.readNextSiblingNode(); + readingHelper.readNextSiblingNode(ptNodeParams); continue; } // Check following merged node code points. - const int nodeCodePointCount = nodeReader->getCodePointCount(); + const int nodeCodePointCount = ptNodeParams.getCodePointCount(); for (int j = 1; j < nodeCodePointCount; ++j) { - if (!readingHelper.isMatchedCodePoint( + if (!readingHelper.isMatchedCodePoint(ptNodeParams, j, searchCodePoints[matchedCodePointCount + j])) { // Different code point is found. The given word is not included in the dictionary. return NOT_A_DICT_POS; } } // All characters are matched. - if (length == readingHelper.getTotalCodePointCount()) { + if (length == readingHelper.getTotalCodePointCount(ptNodeParams)) { // Terminal position is found. - return nodeReader->getHeadPos(); + return ptNodeParams.getHeadPos(); } - if (!nodeReader->hasChildren()) { + if (!ptNodeParams.hasChildren()) { return NOT_A_DICT_POS; } // Advance to the children nodes. - readingHelper.readChildNode(); + readingHelper.readChildNode(ptNodeParams); } // If we already traversed the tree further than the word is long, there means // there was no match (or we would have found it). @@ -179,39 +188,33 @@ int DynamicPatriciaTriePolicy::getUnigramProbabilityOfPtNode(const int ptNodePos if (ptNodePos == NOT_A_DICT_POS) { return NOT_A_PROBABILITY; } - DynamicPatriciaTrieNodeReader nodeReader(&mBufferWithExtendableBuffer, - getBigramsStructurePolicy(), getShortcutsStructurePolicy()); - nodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos); - if (nodeReader.isDeleted() || nodeReader.isBlacklisted() || nodeReader.isNotAWord()) { + const PtNodeParams ptNodeParams(mNodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos)); + if (ptNodeParams.isDeleted() || ptNodeParams.isBlacklisted() || ptNodeParams.isNotAWord()) { return NOT_A_PROBABILITY; } - return getProbability(nodeReader.getProbability(), NOT_A_PROBABILITY); + return getProbability(ptNodeParams.getProbability(), NOT_A_PROBABILITY); } int DynamicPatriciaTriePolicy::getShortcutPositionOfPtNode(const int ptNodePos) const { if (ptNodePos == NOT_A_DICT_POS) { return NOT_A_DICT_POS; } - DynamicPatriciaTrieNodeReader nodeReader(&mBufferWithExtendableBuffer, - getBigramsStructurePolicy(), getShortcutsStructurePolicy()); - nodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos); - if (nodeReader.isDeleted()) { + const PtNodeParams ptNodeParams(mNodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos)); + if (ptNodeParams.isDeleted()) { return NOT_A_DICT_POS; } - return nodeReader.getShortcutPos(); + return ptNodeParams.getShortcutPos(); } int DynamicPatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) const { if (ptNodePos == NOT_A_DICT_POS) { return NOT_A_DICT_POS; } - DynamicPatriciaTrieNodeReader nodeReader(&mBufferWithExtendableBuffer, - getBigramsStructurePolicy(), getShortcutsStructurePolicy()); - nodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos); - if (nodeReader.isDeleted()) { + const PtNodeParams ptNodeParams(mNodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos)); + if (ptNodeParams.isDeleted()) { return NOT_A_DICT_POS; } - return nodeReader.getBigramsPos(); + return ptNodeParams.getBigramsPos(); } bool DynamicPatriciaTriePolicy::addUnigramWord(const int *const word, const int length, @@ -225,8 +228,7 @@ bool DynamicPatriciaTriePolicy::addUnigramWord(const int *const word, const int AKLOGE("The dictionary is too large to dynamically update."); return false; } - DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer, - getBigramsStructurePolicy(), getShortcutsStructurePolicy()); + DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer, &mNodeReader); readingHelper.initWithPtNodeArrayPos(getRootPosition()); DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer, &mBigramListPolicy, &mShortcutListPolicy, mHeaderPolicy.isDecayingDict()); diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_policy.h index 6a7f91ef6..7a81a9c0a 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_policy.h @@ -22,6 +22,7 @@ #include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h" #include "suggest/policyimpl/dictionary/header/header_policy.h" #include "suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.h" #include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" #include "suggest/policyimpl/dictionary/utils/format_utils.h" #include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h" @@ -44,6 +45,7 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { mShortcutListPolicy(&mBufferWithExtendableBuffer), mBigramListPolicy(&mHeaderPolicy, &mBufferWithExtendableBuffer, &mShortcutListPolicy, mHeaderPolicy.isDecayingDict()), + mNodeReader(&mBufferWithExtendableBuffer, &mBigramListPolicy, &mShortcutListPolicy), mUnigramCount(mHeaderPolicy.getUnigramCount()), mBigramCount(mHeaderPolicy.getBigramCount()), mNeedsToDecayForTesting(false) {} @@ -114,6 +116,7 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { BufferWithExtendableBuffer mBufferWithExtendableBuffer; DynamicShortcutListPolicy mShortcutListPolicy; DynamicBigramListPolicy mBigramListPolicy; + DynamicPatriciaTrieNodeReader mNodeReader; int mUnigramCount; int mBigramCount; int mNeedsToDecayForTesting; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.cpp index f3410affc..398ff21cf 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.cpp @@ -17,6 +17,8 @@ #include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.h" #include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" +#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.h" namespace latinime { @@ -37,22 +39,26 @@ bool DynamicPatriciaTrieReadingHelper::traverseAllPtNodesInPostorderDepthFirstMa return false; } while (!isEnd()) { + const PtNodeParams ptNodeParams(getPtNodeParams()); + if (!ptNodeParams.isValid()) { + break; + } if (!alreadyVisitedChildren) { - if (mNodeReader.hasChildren()) { + if (ptNodeParams.hasChildren()) { // Move to the first child. - if (!listener->onDescend(mNodeReader.getChildrenPos())) { + if (!listener->onDescend(ptNodeParams.getChildrenPos())) { return false; } pushReadingStateToStack(); - readChildNode(); + readChildNode(ptNodeParams); } else { alreadyVisitedChildren = true; } } else { - if (!listener->onVisitingPtNode(&mNodeReader, mMergedNodeCodePoints)) { + if (!listener->onVisitingPtNode(&ptNodeParams)) { return false; } - readNextSiblingNode(); + readNextSiblingNode(ptNodeParams); if (isEnd()) { // All PtNodes in current linked PtNode arrays have been visited. // Return to the parent. @@ -101,10 +107,14 @@ bool DynamicPatriciaTrieReadingHelper::traverseAllPtNodesInPtNodeArrayLevelPreor } pushReadingStateToStack(); while (!isEnd()) { + const PtNodeParams ptNodeParams(getPtNodeParams()); + if (!ptNodeParams.isValid()) { + break; + } if (alreadyVisitedAllPtNodesInArray) { if (alreadyVisitedChildren) { // Move to next sibling PtNode's children. - readNextSiblingNode(); + readNextSiblingNode(ptNodeParams); if (isEnd()) { // Return to the parent PTNode. if (!listener->onAscend()) { @@ -120,13 +130,13 @@ bool DynamicPatriciaTrieReadingHelper::traverseAllPtNodesInPtNodeArrayLevelPreor alreadyVisitedChildren = false; } } else { - if (mNodeReader.hasChildren()) { + if (ptNodeParams.hasChildren()) { // Move to the first child. - if (!listener->onDescend(mNodeReader.getChildrenPos())) { + if (!listener->onDescend(ptNodeParams.getChildrenPos())) { return false; } pushReadingStateToStack(); - readChildNode(); + readChildNode(ptNodeParams); // Push state to return the head of PtNode array. pushReadingStateToStack(); alreadyVisitedAllPtNodesInArray = false; @@ -136,10 +146,10 @@ bool DynamicPatriciaTrieReadingHelper::traverseAllPtNodesInPtNodeArrayLevelPreor } } } else { - if (!listener->onVisitingPtNode(&mNodeReader, mMergedNodeCodePoints)) { + if (!listener->onVisitingPtNode(&ptNodeParams)) { return false; } - readNextSiblingNode(); + readNextSiblingNode(ptNodeParams); if (isEnd()) { if (!listener->onReadingPtNodeArrayTail()) { return false; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.h index f8d32c9cd..1e9218e58 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.h @@ -21,9 +21,8 @@ #include #include "defines.h" -#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h" -#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.h" -#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.h" +#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h" +#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_reader.h" namespace latinime { @@ -35,6 +34,7 @@ class DictionaryShortcutsStructurePolicy; * This class is used for traversing dynamic patricia trie. This class supports iterating nodes and * dealing with additional buffer. This class counts nodes and node arrays to avoid infinite loop. */ +// TODO: Move to pt_common. class DynamicPatriciaTrieReadingHelper { public: class TraversingEventListener { @@ -51,8 +51,7 @@ class DynamicPatriciaTrieReadingHelper { virtual bool onReadingPtNodeArrayTail() = 0; // Returns whether the event handling was succeeded or not. - virtual bool onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node, - const int *const nodeCodePoints) = 0; + virtual bool onVisitingPtNode(const PtNodeParams *const node) = 0; protected: TraversingEventListener() {}; @@ -62,10 +61,9 @@ class DynamicPatriciaTrieReadingHelper { }; DynamicPatriciaTrieReadingHelper(const BufferWithExtendableBuffer *const buffer, - const DictionaryBigramsStructurePolicy *const bigramsPolicy, - const DictionaryShortcutsStructurePolicy *const shortcutsPolicy) + const PtNodeReader *const ptNodeReader) : mIsError(false), mReadingState(), mBuffer(buffer), - mNodeReader(mBuffer, bigramsPolicy, shortcutsPolicy), mReadingStateStack() {} + mPtNodeReader(ptNodeReader), mReadingStateStack() {} ~DynamicPatriciaTrieReadingHelper() {} @@ -90,9 +88,6 @@ class DynamicPatriciaTrieReadingHelper { mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS; mReadingStateStack.clear(); nextPtNodeArray(); - if (!isEnd()) { - fetchPtNodeInfo(); - } } } @@ -110,20 +105,23 @@ class DynamicPatriciaTrieReadingHelper { mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS; mReadingState.mPosOfThisPtNodeArrayHead = NOT_A_DICT_POS; mReadingStateStack.clear(); - fetchPtNodeInfo(); } } - AK_FORCE_INLINE const DynamicPatriciaTrieNodeReader* getNodeReader() const { - return &mNodeReader; + AK_FORCE_INLINE const PtNodeParams getPtNodeParams() const { + if (isEnd()) { + return PtNodeParams(); + } + return mPtNodeReader->fetchNodeInfoInBufferFromPtNodePos(mReadingState.mPos); } - AK_FORCE_INLINE bool isValidTerminalNode() const { - return !isEnd() && !mNodeReader.isDeleted() && mNodeReader.isTerminal(); + AK_FORCE_INLINE bool isValidTerminalNode(const PtNodeParams &ptNodeParams) const { + return !isEnd() && !ptNodeParams.isDeleted() && ptNodeParams.isTerminal(); } - AK_FORCE_INLINE bool isMatchedCodePoint(const int index, const int codePoint) const { - return mMergedNodeCodePoints[index] == codePoint; + AK_FORCE_INLINE bool isMatchedCodePoint(const PtNodeParams &ptNodeParams, const int index, + const int codePoint) const { + return ptNodeParams.getCodePoints()[index] == codePoint; } // Return code point count exclude the last read node's code points. @@ -132,68 +130,56 @@ class DynamicPatriciaTrieReadingHelper { } // Return code point count include the last read node's code points. - AK_FORCE_INLINE int getTotalCodePointCount() const { + AK_FORCE_INLINE int getTotalCodePointCount(const PtNodeParams &ptNodeParams) const { return mReadingState.mTotalCodePointCountSinceInitialization - + mNodeReader.getCodePointCount(); + + ptNodeParams.getCodePointCount(); } - AK_FORCE_INLINE void fetchMergedNodeCodePointsInReverseOrder( + AK_FORCE_INLINE void fetchMergedNodeCodePointsInReverseOrder(const PtNodeParams &ptNodeParams, const int index, int *const outCodePoints) const { - const int nodeCodePointCount = mNodeReader.getCodePointCount(); + const int nodeCodePointCount = ptNodeParams.getCodePointCount(); + const int *const nodeCodePoints = ptNodeParams.getCodePoints(); for (int i = 0; i < nodeCodePointCount; ++i) { - outCodePoints[index + i] = mMergedNodeCodePoints[nodeCodePointCount - 1 - i]; + outCodePoints[index + i] = nodeCodePoints[nodeCodePointCount - 1 - i]; } } - AK_FORCE_INLINE const int *getMergedNodeCodePoints() const { - return mMergedNodeCodePoints; - } - - AK_FORCE_INLINE void readNextSiblingNode() { + AK_FORCE_INLINE void readNextSiblingNode(const PtNodeParams &ptNodeParams) { mReadingState.mRemainingPtNodeCountInThisArray -= 1; - mReadingState.mPos = mNodeReader.getSiblingNodePos(); + mReadingState.mPos = ptNodeParams.getSiblingNodePos(); if (mReadingState.mRemainingPtNodeCountInThisArray <= 0) { // All nodes in the current node array have been read. followForwardLink(); - if (!isEnd()) { - fetchPtNodeInfo(); - } - } else { - fetchPtNodeInfo(); } } // Read the first child node of the current node. - AK_FORCE_INLINE void readChildNode() { - if (mNodeReader.hasChildren()) { + AK_FORCE_INLINE void readChildNode(const PtNodeParams &ptNodeParams) { + if (ptNodeParams.hasChildren()) { mReadingState.mTotalCodePointCountSinceInitialization += - mNodeReader.getCodePointCount(); + ptNodeParams.getCodePointCount(); mReadingState.mTotalPtNodeIndexInThisArrayChain = 0; mReadingState.mPtNodeArrayIndexInThisArrayChain = 0; - mReadingState.mPos = mNodeReader.getChildrenPos(); + mReadingState.mPos = ptNodeParams.getChildrenPos(); mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS; // Read children node array. nextPtNodeArray(); - if (!isEnd()) { - fetchPtNodeInfo(); - } } else { mReadingState.mPos = NOT_A_DICT_POS; } } // Read the parent node of the current node. - AK_FORCE_INLINE void readParentNode() { - if (mNodeReader.getParentPos() != NOT_A_DICT_POS) { + AK_FORCE_INLINE void readParentNode(const PtNodeParams &ptNodeParams) { + if (ptNodeParams.getParentPos() != NOT_A_DICT_POS) { mReadingState.mTotalCodePointCountSinceInitialization += - mNodeReader.getCodePointCount(); + ptNodeParams.getCodePointCount(); mReadingState.mTotalPtNodeIndexInThisArrayChain = 1; mReadingState.mPtNodeArrayIndexInThisArrayChain = 1; mReadingState.mRemainingPtNodeCountInThisArray = 1; - mReadingState.mPos = mNodeReader.getParentPos(); + mReadingState.mPos = ptNodeParams.getParentPos(); mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS; mReadingState.mPosOfThisPtNodeArrayHead = NOT_A_DICT_POS; - fetchPtNodeInfo(); } else { mReadingState.mPos = NOT_A_DICT_POS; } @@ -207,12 +193,6 @@ class DynamicPatriciaTrieReadingHelper { return mReadingState.mPosOfThisPtNodeArrayHead; } - AK_FORCE_INLINE void reloadCurrentPtNodeInfo() { - if (!isEnd()) { - fetchPtNodeInfo(); - } - } - bool traverseAllPtNodesInPostorderDepthFirstManner(TraversingEventListener *const listener); bool traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner( @@ -253,24 +233,13 @@ class DynamicPatriciaTrieReadingHelper { bool mIsError; PtNodeReadingState mReadingState; const BufferWithExtendableBuffer *const mBuffer; - DynamicPatriciaTrieNodeReader mNodeReader; - int mMergedNodeCodePoints[MAX_WORD_LENGTH]; + const PtNodeReader *const mPtNodeReader; std::vector mReadingStateStack; void nextPtNodeArray(); void followForwardLink(); - AK_FORCE_INLINE void fetchPtNodeInfo() { - mNodeReader.fetchNodeInfoInBufferFromPtNodePosAndGetNodeCodePoints(mReadingState.mPos, - MAX_WORD_LENGTH, mMergedNodeCodePoints); - if (mNodeReader.getCodePointCount() <= 0) { - // Empty node is not allowed. - mIsError = true; - mReadingState.mPos = NOT_A_DICT_POS; - } - } - AK_FORCE_INLINE void pushReadingStateToStack() { if (mReadingStateStack.size() > MAX_READING_STATE_STACK_SIZE) { AKLOGI("Reading state stack overflow. Max size: %zd", MAX_READING_STATE_STACK_SIZE); @@ -288,9 +257,6 @@ class DynamicPatriciaTrieReadingHelper { } else { mReadingState = mReadingStateStack.back(); mReadingStateStack.pop_back(); - if (!isEnd()) { - fetchPtNodeInfo(); - } } } }; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_helper.cpp index d856c50f4..05caaebac 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_helper.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_helper.cpp @@ -41,24 +41,26 @@ bool DynamicPatriciaTrieWritingHelper::addUnigramWord( bool *const outAddedNewUnigram) { int parentPos = NOT_A_DICT_POS; while (!readingHelper->isEnd()) { + const PtNodeParams ptNodeParams(readingHelper->getPtNodeParams()); + if (!ptNodeParams.isValid()) { + break; + } const int matchedCodePointCount = readingHelper->getPrevTotalCodePointCount(); - if (!readingHelper->isMatchedCodePoint(0 /* index */, + if (!readingHelper->isMatchedCodePoint(ptNodeParams, 0 /* index */, wordCodePoints[matchedCodePointCount])) { // The first code point is different from target code point. Skip this node and read // the next sibling node. - readingHelper->readNextSiblingNode(); + readingHelper->readNextSiblingNode(ptNodeParams); continue; } // Check following merged node code points. - const DynamicPatriciaTrieNodeReader *const nodeReader = readingHelper->getNodeReader(); - const int nodeCodePointCount = nodeReader->getCodePointCount(); + const int nodeCodePointCount = ptNodeParams.getCodePointCount(); for (int j = 1; j < nodeCodePointCount; ++j) { const int nextIndex = matchedCodePointCount + j; - if (nextIndex >= codePointCount || !readingHelper->isMatchedCodePoint(j, + if (nextIndex >= codePointCount || !readingHelper->isMatchedCodePoint(ptNodeParams, j, wordCodePoints[matchedCodePointCount + j])) { *outAddedNewUnigram = true; - return reallocatePtNodeAndAddNewPtNodes(nodeReader, - readingHelper->getMergedNodeCodePoints(), j, + return reallocatePtNodeAndAddNewPtNodes(&ptNodeParams, j, getUpdatedProbability(NOT_A_PROBABILITY /* originalProbability */, probability), wordCodePoints + matchedCodePointCount, @@ -66,20 +68,19 @@ bool DynamicPatriciaTrieWritingHelper::addUnigramWord( } } // All characters are matched. - if (codePointCount == readingHelper->getTotalCodePointCount()) { - return setPtNodeProbability(nodeReader, probability, - readingHelper->getMergedNodeCodePoints(), outAddedNewUnigram); + if (codePointCount == readingHelper->getTotalCodePointCount(ptNodeParams)) { + return setPtNodeProbability(&ptNodeParams, probability, outAddedNewUnigram); } - if (!nodeReader->hasChildren()) { + if (!ptNodeParams.hasChildren()) { *outAddedNewUnigram = true; - return createChildrenPtNodeArrayAndAChildPtNode(nodeReader, + return createChildrenPtNodeArrayAndAChildPtNode(&ptNodeParams, getUpdatedProbability(NOT_A_PROBABILITY /* originalProbability */, probability), - wordCodePoints + readingHelper->getTotalCodePointCount(), - codePointCount - readingHelper->getTotalCodePointCount()); + wordCodePoints + readingHelper->getTotalCodePointCount(ptNodeParams), + codePointCount - readingHelper->getTotalCodePointCount(ptNodeParams)); } // Advance to the children nodes. - parentPos = nodeReader->getHeadPos(); - readingHelper->readChildNode(); + parentPos = ptNodeParams.getHeadPos(); + readingHelper->readChildNode(ptNodeParams); } if (readingHelper->isError()) { // The dictionary is invalid. @@ -95,26 +96,24 @@ bool DynamicPatriciaTrieWritingHelper::addUnigramWord( bool DynamicPatriciaTrieWritingHelper::addBigramWords(const int word0Pos, const int word1Pos, const int probability, bool *const outAddedNewBigram) { - int mMergedNodeCodePoints[MAX_WORD_LENGTH]; DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy); - nodeReader.fetchNodeInfoInBufferFromPtNodePosAndGetNodeCodePoints(word0Pos, MAX_WORD_LENGTH, - mMergedNodeCodePoints); + const PtNodeParams ptNodeParams(nodeReader.fetchNodeInfoInBufferFromPtNodePos(word0Pos)); // Move node to add bigram entry. const int newNodePos = mBuffer->getTailPosition(); - if (!markNodeAsMovedAndSetPosition(&nodeReader, newNodePos, newNodePos)) { + if (!markNodeAsMovedAndSetPosition(&ptNodeParams, newNodePos, newNodePos)) { return false; } int writingPos = newNodePos; // Write a new PtNode using original PtNode's info to the tail of the dictionary in mBuffer. - if (!writePtNodeToBufferByCopyingPtNodeInfo(mBuffer, &nodeReader, nodeReader.getParentPos(), - mMergedNodeCodePoints, nodeReader.getCodePointCount(), nodeReader.getProbability(), - &writingPos)) { + if (!writePtNodeToBufferByCopyingPtNodeInfo(mBuffer, &ptNodeParams, ptNodeParams.getParentPos(), + ptNodeParams.getCodePoints(), ptNodeParams.getCodePointCount(), + ptNodeParams.getProbability(), &writingPos)) { return false; } - nodeReader.fetchNodeInfoInBufferFromPtNodePos(newNodePos); - if (nodeReader.getBigramsPos() != NOT_A_DICT_POS) { + const PtNodeParams newPtNodeParams(nodeReader.fetchNodeInfoInBufferFromPtNodePos(newNodePos)); + if (newPtNodeParams.getBigramsPos() != NOT_A_DICT_POS) { // Insert a new bigram entry into the existing bigram list. - int bigramListPos = nodeReader.getBigramsPos(); + int bigramListPos = newPtNodeParams.getBigramsPos(); return mBigramPolicy->addNewBigramEntryToBigramList(word1Pos, probability, &bigramListPos, outAddedNewBigram); } else { @@ -126,10 +125,11 @@ bool DynamicPatriciaTrieWritingHelper::addBigramWords(const int word0Pos, const } // Then, Mark as the PtNode having bigram list in the flags. const PatriciaTrieReadingUtils::NodeFlags updatedFlags = - PatriciaTrieReadingUtils::createAndGetFlags(nodeReader.isBlacklisted(), - nodeReader.isNotAWord(), nodeReader.getProbability() != NOT_A_PROBABILITY, - nodeReader.getShortcutPos() != NOT_A_DICT_POS, true /* hasBigrams */, - nodeReader.getCodePointCount() > 1, CHILDREN_POSITION_FIELD_SIZE); + PatriciaTrieReadingUtils::createAndGetFlags(newPtNodeParams.isBlacklisted(), + newPtNodeParams.isNotAWord(), + newPtNodeParams.getProbability() != NOT_A_PROBABILITY, + newPtNodeParams.getShortcutPos() != NOT_A_DICT_POS, true /* hasBigrams */, + newPtNodeParams.getCodePointCount() > 1, CHILDREN_POSITION_FIELD_SIZE); writingPos = newNodePos; // Write updated flags into the moved PtNode's flags field. return DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, updatedFlags, @@ -140,11 +140,11 @@ bool DynamicPatriciaTrieWritingHelper::addBigramWords(const int word0Pos, const // Remove a bigram relation from word0Pos to word1Pos. bool DynamicPatriciaTrieWritingHelper::removeBigramWords(const int word0Pos, const int word1Pos) { DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy); - nodeReader.fetchNodeInfoInBufferFromPtNodePos(word0Pos); - if (nodeReader.getBigramsPos() == NOT_A_DICT_POS) { + const PtNodeParams ptNodeParams(nodeReader.fetchNodeInfoInBufferFromPtNodePos(word0Pos)); + if (ptNodeParams.getBigramsPos() == NOT_A_DICT_POS) { return false; } - return mBigramPolicy->removeBigram(nodeReader.getBigramsPos(), word1Pos); + return mBigramPolicy->removeBigram(ptNodeParams.getBigramsPos(), word1Pos); } void DynamicPatriciaTrieWritingHelper::writeToDictFile(const char *const fileName, @@ -181,8 +181,8 @@ void DynamicPatriciaTrieWritingHelper::writeToDictFileWithGC(const int rootPtNod } bool DynamicPatriciaTrieWritingHelper::markNodeAsDeleted( - const DynamicPatriciaTrieNodeReader *const nodeToUpdate) { - int pos = nodeToUpdate->getHeadPos(); + const PtNodeParams *const toBeUpdatedPtNodeParams) { + int pos = toBeUpdatedPtNodeParams->getHeadPos(); const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(pos); const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer); if (usesAdditionalBuffer) { @@ -194,16 +194,16 @@ bool DynamicPatriciaTrieWritingHelper::markNodeAsDeleted( const PatriciaTrieReadingUtils::NodeFlags updatedFlags = DynamicPatriciaTrieReadingUtils::updateAndGetFlags(originalFlags, false /* isMoved */, true /* isDeleted */); - int writingPos = nodeToUpdate->getHeadPos(); + int writingPos = toBeUpdatedPtNodeParams->getHeadPos(); // Update flags. return DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, updatedFlags, &writingPos); } bool DynamicPatriciaTrieWritingHelper::markNodeAsMovedAndSetPosition( - const DynamicPatriciaTrieNodeReader *const originalNode, const int movedPos, + const PtNodeParams *const toBeUpdatedPtNodeParams, const int movedPos, const int bigramLinkedNodePos) { - int pos = originalNode->getHeadPos(); + int pos = toBeUpdatedPtNodeParams->getHeadPos(); const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(pos); const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer); if (usesAdditionalBuffer) { @@ -215,7 +215,7 @@ bool DynamicPatriciaTrieWritingHelper::markNodeAsMovedAndSetPosition( const PatriciaTrieReadingUtils::NodeFlags updatedFlags = DynamicPatriciaTrieReadingUtils::updateAndGetFlags(originalFlags, true /* isMoved */, false /* isDeleted */); - int writingPos = originalNode->getHeadPos(); + int writingPos = toBeUpdatedPtNodeParams->getHeadPos(); // Update flags. if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, updatedFlags, &writingPos)) { @@ -223,31 +223,32 @@ bool DynamicPatriciaTrieWritingHelper::markNodeAsMovedAndSetPosition( } // Update moved position, which is stored in the parent offset field. if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition( - mBuffer, movedPos, originalNode->getHeadPos(), &writingPos)) { + mBuffer, movedPos, toBeUpdatedPtNodeParams->getHeadPos(), &writingPos)) { return false; } // Update bigram linked node position, which is stored in the children position field. - int childrenPosFieldPos = originalNode->getChildrenPosFieldPos(); + int childrenPosFieldPos = toBeUpdatedPtNodeParams->getChildrenPosFieldPos(); if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition( mBuffer, bigramLinkedNodePos, &childrenPosFieldPos)) { return false; } - if (originalNode->hasChildren()) { + if (toBeUpdatedPtNodeParams->hasChildren()) { // Update children's parent position. - DynamicPatriciaTrieReadingHelper readingHelper(mBuffer, mBigramPolicy, mShortcutPolicy); - const DynamicPatriciaTrieNodeReader *const nodeReader = readingHelper.getNodeReader(); - readingHelper.initWithPtNodeArrayPos(originalNode->getChildrenPos()); + DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy); + DynamicPatriciaTrieReadingHelper readingHelper(mBuffer, &nodeReader); + readingHelper.initWithPtNodeArrayPos(toBeUpdatedPtNodeParams->getChildrenPos()); while (!readingHelper.isEnd()) { - int parentOffsetFieldPos = nodeReader->getHeadPos() + const PtNodeParams childPtNodeParams(readingHelper.getPtNodeParams()); + int parentOffsetFieldPos = childPtNodeParams.getHeadPos() + DynamicPatriciaTrieWritingUtils::NODE_FLAG_FIELD_SIZE; if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition( - mBuffer, bigramLinkedNodePos, nodeReader->getHeadPos(), + mBuffer, bigramLinkedNodePos, childPtNodeParams.getHeadPos(), &parentOffsetFieldPos)) { // Parent offset cannot be written because of a bug or a broken dictionary; thus, // we give up to update dictionary. return false; } - readingHelper.readNextSiblingNode(); + readingHelper.readNextSiblingNode(childPtNodeParams); } } return true; @@ -333,13 +334,13 @@ bool DynamicPatriciaTrieWritingHelper::writePtNodeToBuffer( bool DynamicPatriciaTrieWritingHelper::writePtNodeToBufferByCopyingPtNodeInfo( BufferWithExtendableBuffer *const bufferToWrite, - const DynamicPatriciaTrieNodeReader *const originalNode, const int parentPos, + const PtNodeParams *const originalPtNodeParams, const int parentPos, const int *const codePoints, const int codePointCount, const int probability, int *const writingPos) { - return writePtNodeWithFullInfoToBuffer(bufferToWrite, originalNode->isBlacklisted(), - originalNode->isNotAWord(), parentPos, codePoints, codePointCount, probability, - originalNode->getChildrenPos(), originalNode->getBigramsPos(), - originalNode->getShortcutPos(), writingPos); + return writePtNodeWithFullInfoToBuffer(bufferToWrite, originalPtNodeParams->isBlacklisted(), + originalPtNodeParams->isNotAWord(), parentPos, codePoints, codePointCount, probability, + originalPtNodeParams->getChildrenPos(), originalPtNodeParams->getBigramsPos(), + originalPtNodeParams->getShortcutPos(), writingPos); } bool DynamicPatriciaTrieWritingHelper::createAndInsertNodeIntoPtNodeArray(const int parentPos, @@ -355,14 +356,14 @@ bool DynamicPatriciaTrieWritingHelper::createAndInsertNodeIntoPtNodeArray(const } bool DynamicPatriciaTrieWritingHelper::setPtNodeProbability( - const DynamicPatriciaTrieNodeReader *const originalPtNode, const int probability, - const int *const codePoints, bool *const outAddedNewUnigram) { - if (originalPtNode->isTerminal()) { + const PtNodeParams *const originalPtNodeParams, const int probability, + bool *const outAddedNewUnigram) { + if (originalPtNodeParams->isTerminal()) { // Overwrites the probability. *outAddedNewUnigram = false; - const int probabilityToWrite = getUpdatedProbability(originalPtNode->getProbability(), - probability); - int probabilityFieldPos = originalPtNode->getProbabilityFieldPos(); + const int probabilityToWrite = getUpdatedProbability( + originalPtNodeParams->getProbability(), probability); + int probabilityFieldPos = originalPtNodeParams->getProbabilityFieldPos(); if (!DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition(mBuffer, probabilityToWrite, &probabilityFieldPos)) { return false; @@ -371,11 +372,12 @@ bool DynamicPatriciaTrieWritingHelper::setPtNodeProbability( // Make the node terminal and write the probability. *outAddedNewUnigram = true; int movedPos = mBuffer->getTailPosition(); - if (!markNodeAsMovedAndSetPosition(originalPtNode, movedPos, movedPos)) { + if (!markNodeAsMovedAndSetPosition(originalPtNodeParams, movedPos, movedPos)) { return false; } - if (!writePtNodeToBufferByCopyingPtNodeInfo(mBuffer, originalPtNode, - originalPtNode->getParentPos(), codePoints, originalPtNode->getCodePointCount(), + if (!writePtNodeToBufferByCopyingPtNodeInfo(mBuffer, originalPtNodeParams, + originalPtNodeParams->getParentPos(), originalPtNodeParams->getCodePoints(), + originalPtNodeParams->getCodePointCount(), getUpdatedProbability(NOT_A_PROBABILITY /* originalProbability */, probability), &movedPos)) { return false; @@ -385,15 +387,15 @@ bool DynamicPatriciaTrieWritingHelper::setPtNodeProbability( } bool DynamicPatriciaTrieWritingHelper::createChildrenPtNodeArrayAndAChildPtNode( - const DynamicPatriciaTrieNodeReader *const parentNode, const int probability, + const PtNodeParams *const parentPtNodeParams, const int probability, const int *const codePoints, const int codePointCount) { const int newPtNodeArrayPos = mBuffer->getTailPosition(); - int childrenPosFieldPos = parentNode->getChildrenPosFieldPos(); + int childrenPosFieldPos = parentPtNodeParams->getChildrenPosFieldPos(); if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(mBuffer, newPtNodeArrayPos, &childrenPosFieldPos)) { return false; } - return createNewPtNodeArrayWithAChildPtNode(parentNode->getHeadPos(), codePoints, + return createNewPtNodeArrayWithAChildPtNode(parentPtNodeParams->getHeadPos(), codePoints, codePointCount, probability); } @@ -418,8 +420,7 @@ bool DynamicPatriciaTrieWritingHelper::createNewPtNodeArrayWithAChildPtNode( // Returns whether the dictionary updating was succeeded or not. bool DynamicPatriciaTrieWritingHelper::reallocatePtNodeAndAddNewPtNodes( - const DynamicPatriciaTrieNodeReader *const reallocatingPtNode, - const int *const reallocatingPtNodeCodePoints, const int overlappingCodePointCount, + const PtNodeParams *const reallocatingPtNodeParams, const int overlappingCodePointCount, const int probabilityOfNewPtNode, const int *const newNodeCodePoints, const int newNodeCodePointCount) { // When addsExtraChild is true, split the reallocating PtNode and add new child. @@ -435,8 +436,8 @@ bool DynamicPatriciaTrieWritingHelper::reallocatePtNodeAndAddNewPtNodes( // Write the 1st part of the reallocating node. The children position will be updated later // with actual children position. const int newProbability = addsExtraChild ? NOT_A_PROBABILITY : probabilityOfNewPtNode; - if (!writePtNodeToBuffer(mBuffer, reallocatingPtNode->getParentPos(), - reallocatingPtNodeCodePoints, overlappingCodePointCount, newProbability, + if (!writePtNodeToBuffer(mBuffer, reallocatingPtNodeParams->getParentPos(), + reallocatingPtNodeParams->getCodePoints(), overlappingCodePointCount, newProbability, &writingPos)) { return false; } @@ -449,11 +450,11 @@ bool DynamicPatriciaTrieWritingHelper::reallocatePtNodeAndAddNewPtNodes( } // Write the 2nd part of the reallocating node. const int secondPartOfReallocatedPtNodePos = writingPos; - if (!writePtNodeToBufferByCopyingPtNodeInfo(mBuffer, reallocatingPtNode, + if (!writePtNodeToBufferByCopyingPtNodeInfo(mBuffer, reallocatingPtNodeParams, firstPartOfReallocatedPtNodePos, - reallocatingPtNodeCodePoints + overlappingCodePointCount, - reallocatingPtNode->getCodePointCount() - overlappingCodePointCount, - reallocatingPtNode->getProbability(), &writingPos)) { + reallocatingPtNodeParams->getCodePoints() + overlappingCodePointCount, + reallocatingPtNodeParams->getCodePointCount() - overlappingCodePointCount, + reallocatingPtNodeParams->getProbability(), &writingPos)) { return false; } if (addsExtraChild) { @@ -468,16 +469,17 @@ bool DynamicPatriciaTrieWritingHelper::reallocatePtNodeAndAddNewPtNodes( NOT_A_DICT_POS /* forwardLinkPos */, &writingPos)) { return false; } - // Update original reallocatingPtNode as moved. - if (!markNodeAsMovedAndSetPosition(reallocatingPtNode, firstPartOfReallocatedPtNodePos, + // Update original reallocating PtNode as moved. + if (!markNodeAsMovedAndSetPosition(reallocatingPtNodeParams, firstPartOfReallocatedPtNodePos, secondPartOfReallocatedPtNodePos)) { return false; } // Load node info. Information of the 1st part will be fetched. DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy); - nodeReader.fetchNodeInfoInBufferFromPtNodePos(firstPartOfReallocatedPtNodePos); + const PtNodeParams ptNodeParams( + nodeReader.fetchNodeInfoInBufferFromPtNodePos(firstPartOfReallocatedPtNodePos)); // Update children position. - int childrenPosFieldPos = nodeReader.getChildrenPosFieldPos(); + int childrenPosFieldPos = ptNodeParams.getChildrenPosFieldPos(); if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(mBuffer, actualChildrenPos, &childrenPosFieldPos)) { return false; @@ -488,7 +490,8 @@ bool DynamicPatriciaTrieWritingHelper::reallocatePtNodeAndAddNewPtNodes( bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos, const HeaderPolicy *const headerPolicy, BufferWithExtendableBuffer *const bufferToWrite, int *const outUnigramCount, int *const outBigramCount) { - DynamicPatriciaTrieReadingHelper readingHelper(mBuffer, mBigramPolicy, mShortcutPolicy); + DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy); + DynamicPatriciaTrieReadingHelper readingHelper(mBuffer, &nodeReader); readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos); DynamicPatriciaTrieGcEventListeners ::TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted @@ -530,9 +533,10 @@ bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos, DynamicShortcutListPolicy newDictShortcutPolicy(bufferToWrite); DynamicBigramListPolicy newDictBigramPolicy(headerPolicy, bufferToWrite, &newDictShortcutPolicy, mNeedsToDecay); - // Create reading helper for the GCed dictionary. - DynamicPatriciaTrieReadingHelper newDictReadingHelper(bufferToWrite, &newDictBigramPolicy, + // Create reading node reader and reading helper for the GCed dictionary. + DynamicPatriciaTrieNodeReader newDictNodeReader(bufferToWrite, &newDictBigramPolicy, &newDictShortcutPolicy); + DynamicPatriciaTrieReadingHelper newDictReadingHelper(bufferToWrite, &newDictNodeReader); newDictReadingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos); DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateAllPositionFields traversePolicyToUpdateAllPositionFields(this, &newDictBigramPolicy, bufferToWrite, diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_helper.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_helper.h index ca8664729..5614cb3ac 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_helper.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_helper.h @@ -26,11 +26,12 @@ namespace latinime { class BufferWithExtendableBuffer; class DynamicBigramListPolicy; -class DynamicPatriciaTrieNodeReader; class DynamicPatriciaTrieReadingHelper; class DynamicShortcutListPolicy; class HeaderPolicy; +class PtNodeParams; +// TODO: Make it independent from a particular format and move to pt_common. class DynamicPatriciaTrieWritingHelper { public: typedef hash_map_compat PtNodeArrayPositionRelocationMap; @@ -77,12 +78,12 @@ class DynamicPatriciaTrieWritingHelper { // CAVEAT: This method must be called only from inner classes of // DynamicPatriciaTrieGcEventListeners. - bool markNodeAsDeleted(const DynamicPatriciaTrieNodeReader *const nodeToUpdate); + bool markNodeAsDeleted(const PtNodeParams *const toBeUpdatedPtNodeParams); // CAVEAT: This method must be called only from this class or inner classes of // DynamicPatriciaTrieGcEventListeners. bool writePtNodeToBufferByCopyingPtNodeInfo(BufferWithExtendableBuffer *const bufferToWrite, - const DynamicPatriciaTrieNodeReader *const originalNode, const int parentPos, + const PtNodeParams *const originalPtNodeParams, const int parentPos, const int *const codePoints, const int codePointCount, const int probability, int *const writingPos); @@ -96,7 +97,7 @@ class DynamicPatriciaTrieWritingHelper { DynamicShortcutListPolicy *const mShortcutPolicy; const bool mNeedsToDecay; - bool markNodeAsMovedAndSetPosition(const DynamicPatriciaTrieNodeReader *const nodeToUpdate, + bool markNodeAsMovedAndSetPosition(const PtNodeParams *const toBeUpdatedPtNodeParams, const int movedPos, const int bigramLinkedNodePos); bool writePtNodeWithFullInfoToBuffer(BufferWithExtendableBuffer *const bufferToWrite, @@ -112,19 +113,17 @@ class DynamicPatriciaTrieWritingHelper { bool createAndInsertNodeIntoPtNodeArray(const int parentPos, const int *const nodeCodePoints, const int nodeCodePointCount, const int probability, int *const forwardLinkFieldPos); - bool setPtNodeProbability(const DynamicPatriciaTrieNodeReader *const originalNode, - const int probability, const int *const codePoints, bool *const outAddedNewUnigram); + bool setPtNodeProbability(const PtNodeParams *const originalPtNodeParams, const int probability, + bool *const outAddedNewUnigram); - bool createChildrenPtNodeArrayAndAChildPtNode( - const DynamicPatriciaTrieNodeReader *const parentNode, const int probability, - const int *const codePoints, const int codePointCount); + bool createChildrenPtNodeArrayAndAChildPtNode(const PtNodeParams *const parentPtNodeParams, + const int probability, const int *const codePoints, const int codePointCount); bool createNewPtNodeArrayWithAChildPtNode(const int parentPos, const int *const nodeCodePoints, const int nodeCodePointCount, const int probability); bool reallocatePtNodeAndAddNewPtNodes( - const DynamicPatriciaTrieNodeReader *const reallocatingPtNode, - const int *const reallocatingPtNodeCodePoints, const int overlappingCodePointCount, + const PtNodeParams *const reallocatingPtNodeParams, const int overlappingCodePointCount, const int probabilityOfNewPtNode, const int *const newNodeCodePoints, const int newNodeCodePointCount); diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp index aff11f39f..0bfd07b04 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp @@ -30,4 +30,6 @@ const char *const Ver4DictConstants::SHORTCUT_LOOKUP_TABLE_FILE_EXTENSION = ".sh const char *const Ver4DictConstants::SHORTCUT_CONTENT_TABLE_FILE_EXTENSION = ".shortcut_index_shortcut"; +const int Ver4DictConstants::NOT_A_TERMINAL = -1; + } // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h index a65f11abb..6498ce428 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h @@ -34,6 +34,8 @@ class Ver4DictConstants { static const char *const SHORTCUT_LOOKUP_TABLE_FILE_EXTENSION; static const char *const SHORTCUT_CONTENT_TABLE_FILE_EXTENSION; + static const int NOT_A_TERMINAL; + private: DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4DictConstants); };