From 6c4d09e9e12d02aa87b27def6529220c93ff4588 Mon Sep 17 00:00:00 2001 From: Keisuke Kuroyanagi Date: Tue, 27 Aug 2013 18:06:42 +0900 Subject: [PATCH] Use extendable buffer for reading node info. Bug: 6669677 Change-Id: I78ba80100e3a38f2b49e43db1e6aef4e56ed062c --- .../dynamic_patricia_trie_node_reader.cpp | 24 +++++++++++++------ .../dynamic_patricia_trie_node_reader.h | 11 ++++++--- .../dynamic_patricia_trie_policy.cpp | 24 +++++++++---------- .../dictionary/dynamic_patricia_trie_policy.h | 13 ++++++---- 4 files changed, 45 insertions(+), 27 deletions(-) diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp index 77a85c86d..c427ebe2d 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp @@ -19,34 +19,44 @@ #include "suggest/core/policy/dictionary_bigrams_structure_policy.h" #include "suggest/core/policy/dictionary_shortcuts_structure_policy.h" #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h" +#include "suggest/policyimpl/dictionary/utils/extendable_buffer.h" namespace latinime { void DynamicPatriciaTrieNodeReader::fetchNodeInfoFromBufferAndProcessMovedNode(const int nodePos, const int maxCodePointCount, int *const outCodePoints) { - int pos = nodePos; - mFlags = PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos); + const bool usesAdditionalBuffer = nodePos >= mOriginalDictSize; + const uint8_t *const dictBuf = + usesAdditionalBuffer ? mExtendableBuffer->getBuffer() : mDictRoot; + int pos = (usesAdditionalBuffer) ? nodePos - mOriginalDictSize : nodePos; + mFlags = PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos); const int parentPos = - DynamicPatriciaTrieReadingUtils::getParentPosAndAdvancePosition(mDictRoot, &pos); + DynamicPatriciaTrieReadingUtils::getParentPosAndAdvancePosition(dictBuf, &pos); mParentPos = (parentPos != 0) ? mNodePos + parentPos : NOT_A_DICT_POS; if (outCodePoints != 0) { mCodePointCount = PatriciaTrieReadingUtils::getCharsAndAdvancePosition( - mDictRoot, mFlags, maxCodePointCount, outCodePoints, &pos); + dictBuf, mFlags, maxCodePointCount, outCodePoints, &pos); } else { mCodePointCount = PatriciaTrieReadingUtils::skipCharacters( - mDictRoot, mFlags, MAX_WORD_LENGTH, &pos); + dictBuf, mFlags, MAX_WORD_LENGTH, &pos); } if (isTerminal()) { - mProbability = PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos); + mProbability = PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(dictBuf, &pos); } else { mProbability = NOT_A_PROBABILITY; } if (hasChildren()) { mChildrenPos = DynamicPatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition( - mDictRoot, mFlags, &pos); + dictBuf, mFlags, &pos); + if (usesAdditionalBuffer && mChildrenPos != NOT_A_DICT_POS) { + mChildrenPos += mOriginalDictSize; + } } else { mChildrenPos = NOT_A_DICT_POS; } + if (usesAdditionalBuffer) { + pos += mOriginalDictSize; + } if (PatriciaTrieReadingUtils::hasShortcutTargets(mFlags)) { mShortcutPos = pos; mShortcutsPolicy->skipAllShortcuts(&pos); diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h index e990809e8..2a636289e 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h @@ -27,6 +27,7 @@ namespace latinime { class DictionaryBigramsStructurePolicy; class DictionaryShortcutsStructurePolicy; +class ExtendableBuffer; /* * This class is used for helping to read nodes of dynamic patricia trie. This class handles moved @@ -34,12 +35,14 @@ class DictionaryShortcutsStructurePolicy; */ class DynamicPatriciaTrieNodeReader { public: - DynamicPatriciaTrieNodeReader(const uint8_t *const dictRoot, + DynamicPatriciaTrieNodeReader(const uint8_t *const dictRoot, const int originalDictSize, + const ExtendableBuffer *const extendableBuffer, const DictionaryBigramsStructurePolicy *const bigramsPolicy, const DictionaryShortcutsStructurePolicy *const shortcutsPolicy) - : mDictRoot(dictRoot), mBigramsPolicy(bigramsPolicy), + : mDictRoot(dictRoot), mOriginalDictSize(originalDictSize), + mExtendableBuffer(extendableBuffer), mBigramsPolicy(bigramsPolicy), mShortcutsPolicy(shortcutsPolicy), mNodePos(NOT_A_VALID_WORD_POS), mFlags(0), - mParentPos(NOT_A_DICT_POS), mCodePointCount(0), mProbability(NOT_A_PROBABILITY), + mParentPos(NOT_A_DICT_POS), mCodePointCount(0), mProbability(NOT_A_PROBABILITY), mChildrenPos(NOT_A_DICT_POS), mShortcutPos(NOT_A_DICT_POS), mBigramPos(NOT_A_DICT_POS), mSiblingPos(NOT_A_VALID_WORD_POS) {} @@ -123,6 +126,8 @@ class DynamicPatriciaTrieNodeReader { // TODO: Consolidate mDictRoot. const uint8_t *const mDictRoot; + const int mOriginalDictSize; + const ExtendableBuffer *const mExtendableBuffer; const DictionaryBigramsStructurePolicy *const mBigramsPolicy; const DictionaryShortcutsStructurePolicy *const mShortcutsPolicy; int mNodePos; diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp index 89e0cd4f5..b244dd0b5 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp @@ -33,8 +33,8 @@ void DynamicPatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const d if (!dicNode->hasChildren()) { return; } - DynamicPatriciaTrieNodeReader nodeReader(mDictRoot, getBigramsStructurePolicy(), - getShortcutsStructurePolicy()); + DynamicPatriciaTrieNodeReader nodeReader(mDictRoot, mOriginalDictSize, &mExtendableBuffer, + getBigramsStructurePolicy(), getShortcutsStructurePolicy()); int mergedNodeCodePoints[MAX_WORD_LENGTH]; int nextPos = dicNode->getChildrenPos(); int totalChildCount = 0; @@ -79,8 +79,8 @@ int DynamicPatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCoun int mergedNodeCodePoints[maxCodePointCount]; int codePointCount = 0; - DynamicPatriciaTrieNodeReader nodeReader(mDictRoot, getBigramsStructurePolicy(), - getShortcutsStructurePolicy()); + DynamicPatriciaTrieNodeReader nodeReader(mDictRoot, mOriginalDictSize, &mExtendableBuffer, + getBigramsStructurePolicy(), getShortcutsStructurePolicy()); // First, read terminal node and get its probability. nodeReader.fetchNodeInfoFromBufferAndGetNodeCodePoints(nodePos, maxCodePointCount, mergedNodeCodePoints); @@ -124,8 +124,8 @@ int DynamicPatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const in int mergedNodeCodePoints[MAX_WORD_LENGTH]; int currentLength = 0; int pos = getRootPosition(); - DynamicPatriciaTrieNodeReader nodeReader(mDictRoot, getBigramsStructurePolicy(), - getShortcutsStructurePolicy()); + DynamicPatriciaTrieNodeReader nodeReader(mDictRoot, mOriginalDictSize, &mExtendableBuffer, + getBigramsStructurePolicy(), getShortcutsStructurePolicy()); while (currentLength < length) { // When foundMatchedNode becomes true, currentLength is increased at least once. bool foundMatchedNode = false; @@ -198,8 +198,8 @@ int DynamicPatriciaTriePolicy::getUnigramProbability(const int nodePos) const { if (nodePos == NOT_A_VALID_WORD_POS) { return NOT_A_PROBABILITY; } - DynamicPatriciaTrieNodeReader nodeReader(mDictRoot, getBigramsStructurePolicy(), - getShortcutsStructurePolicy()); + DynamicPatriciaTrieNodeReader nodeReader(mDictRoot, mOriginalDictSize, &mExtendableBuffer, + getBigramsStructurePolicy(), getShortcutsStructurePolicy()); nodeReader.fetchNodeInfoFromBuffer(nodePos); if (nodeReader.isDeleted() || nodeReader.isBlacklisted() || nodeReader.isNotAWord()) { return NOT_A_PROBABILITY; @@ -211,8 +211,8 @@ int DynamicPatriciaTriePolicy::getShortcutPositionOfNode(const int nodePos) cons if (nodePos == NOT_A_VALID_WORD_POS) { return NOT_A_DICT_POS; } - DynamicPatriciaTrieNodeReader nodeReader(mDictRoot, getBigramsStructurePolicy(), - getShortcutsStructurePolicy()); + DynamicPatriciaTrieNodeReader nodeReader(mDictRoot, mOriginalDictSize, &mExtendableBuffer, + getBigramsStructurePolicy(), getShortcutsStructurePolicy()); nodeReader.fetchNodeInfoFromBuffer(nodePos); if (nodeReader.isDeleted()) { return NOT_A_DICT_POS; @@ -224,8 +224,8 @@ int DynamicPatriciaTriePolicy::getBigramsPositionOfNode(const int nodePos) const if (nodePos == NOT_A_VALID_WORD_POS) { return NOT_A_DICT_POS; } - DynamicPatriciaTrieNodeReader nodeReader(mDictRoot, getBigramsStructurePolicy(), - getShortcutsStructurePolicy()); + DynamicPatriciaTrieNodeReader nodeReader(mDictRoot, mOriginalDictSize, &mExtendableBuffer, + getBigramsStructurePolicy(), getShortcutsStructurePolicy()); nodeReader.fetchNodeInfoFromBuffer(nodePos); if (nodeReader.isDeleted()) { return NOT_A_DICT_POS; diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h index 0e5920f42..73b963212 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h @@ -21,9 +21,9 @@ #include "defines.h" #include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" -#include "suggest/policyimpl/dictionary/bigram/bigram_list_policy.h" +#include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h" #include "suggest/policyimpl/dictionary/header/header_policy.h" -#include "suggest/policyimpl/dictionary/shortcut/shortcut_list_policy.h" +#include "suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h" #include "suggest/policyimpl/dictionary/utils/extendable_buffer.h" #include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h" @@ -37,7 +37,9 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { DynamicPatriciaTriePolicy(const MmappedBuffer *const buffer) : mBuffer(buffer), mExtendableBuffer(), mHeaderPolicy(mBuffer->getBuffer()), mDictRoot(mBuffer->getBuffer() + mHeaderPolicy.getSize()), - mBigramListPolicy(mDictRoot), mShortcutListPolicy(mDictRoot) {} + mOriginalDictSize(mBuffer->getBufferSize() - mHeaderPolicy.getSize()), + mBigramListPolicy(mDictRoot, mOriginalDictSize, &mExtendableBuffer), + mShortcutListPolicy(mDictRoot, mOriginalDictSize, &mExtendableBuffer) {} ~DynamicPatriciaTriePolicy() { delete mBuffer; @@ -93,8 +95,9 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { // TODO: Consolidate mDictRoot. // CAVEAT!: Be careful about array out of bound access with mDictRoot const uint8_t *const mDictRoot; - const BigramListPolicy mBigramListPolicy; - const ShortcutListPolicy mShortcutListPolicy; + const int mOriginalDictSize; + const DynamicBigramListPolicy mBigramListPolicy; + const DynamicShortcutListPolicy mShortcutListPolicy; }; } // namespace latinime #endif // LATINIME_DYNAMIC_PATRICIA_TRIE_POLICY_H