From 668870be431d17ee4ceb5ce161aee1189063af18 Mon Sep 17 00:00:00 2001 From: Keisuke Kuroyanagi Date: Fri, 9 Aug 2013 20:20:34 +0900 Subject: [PATCH] Move bigrams iteration methods to policy. To support various bigram format. Bug: 6669677 Change-Id: Ifc8c1a855b03cd5a39d97a6e10872ef8ef76475b --- .../suggest/core/dicnode/dic_node_utils.cpp | 4 +- .../core/dictionary/bigram_dictionary.cpp | 22 ++++--- .../binary_dictionary_bigrams_iterator.h | 28 +++------ ...nary_terminal_attributes_reading_utils.cpp | 12 ++-- ...ionary_terminal_attributes_reading_utils.h | 14 ++--- .../core/dictionary/multi_bigram_map.h | 31 +++++----- .../core/dictionary/terminal_attributes.h | 2 +- .../dictionary_bigrams_structure_policy.h | 42 ++++++++++++++ .../dictionary_structure_with_buffer_policy.h | 3 + .../dictionary/bigrams/bigram_list_policy.h | 58 +++++++++++++++++++ .../dynamic_patricia_trie_node_reader.cpp | 4 +- .../dynamic_patricia_trie_node_reader.h | 14 +++-- .../dynamic_patricia_trie_policy.cpp | 12 ++-- .../dictionary/dynamic_patricia_trie_policy.h | 9 ++- .../dictionary/patricia_trie_policy.cpp | 3 +- .../dictionary/patricia_trie_policy.h | 9 ++- 16 files changed, 190 insertions(+), 77 deletions(-) create mode 100644 native/jni/src/suggest/core/policy/dictionary_bigrams_structure_policy.h create mode 100644 native/jni/src/suggest/policyimpl/dictionary/bigrams/bigram_list_policy.h diff --git a/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp b/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp index ec70ed30b..4ff4bc2e4 100644 --- a/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp +++ b/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp @@ -118,8 +118,8 @@ namespace latinime { return ProbabilityUtils::backoff(unigramProbability); } if (multiBigramMap) { - return multiBigramMap->getBigramProbability( - binaryDictionaryInfo, prevWordPos, wordPos, unigramProbability); + return multiBigramMap->getBigramProbability(binaryDictionaryInfo->getStructurePolicy(), + prevWordPos, wordPos, unigramProbability); } return ProbabilityUtils::backoff(unigramProbability); } diff --git a/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp b/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp index d78493b45..fdaa562e5 100644 --- a/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp +++ b/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp @@ -112,10 +112,12 @@ int BigramDictionary::getPredictions(const int *prevWord, const int prevWordLeng int bigramCount = 0; int unigramProbability = 0; int bigramBuffer[MAX_WORD_LENGTH]; - BinaryDictionaryBigramsIterator bigramsIt(mBinaryDictionaryInfo, pos); + const DictionaryStructureWithBufferPolicy *const structurePolicy = + mBinaryDictionaryInfo->getStructurePolicy(); + BinaryDictionaryBigramsIterator bigramsIt(structurePolicy->getBigramsStructurePolicy(), pos); while (bigramsIt.hasNext()) { bigramsIt.next(); - const int length = mBinaryDictionaryInfo->getStructurePolicy()-> + const int length = structurePolicy-> getCodePointsAndProbabilityAndReturnCodePointCount(bigramsIt.getBigramPos(), MAX_WORD_LENGTH, bigramBuffer, &unigramProbability); // Due to space constraints, the probability for bigrams is approximate - the lower the @@ -137,10 +139,12 @@ int BigramDictionary::getPredictions(const int *prevWord, const int prevWordLeng int BigramDictionary::getBigramListPositionForWord(const int *prevWord, const int prevWordLength, const bool forceLowerCaseSearch) const { if (0 >= prevWordLength) return NOT_A_DICT_POS; - int pos = mBinaryDictionaryInfo->getStructurePolicy()->getTerminalNodePositionOfWord( - prevWord, prevWordLength, forceLowerCaseSearch); + const DictionaryStructureWithBufferPolicy *const structurePolicy = + mBinaryDictionaryInfo->getStructurePolicy(); + int pos = structurePolicy->getTerminalNodePositionOfWord(prevWord, prevWordLength, + forceLowerCaseSearch); if (NOT_A_VALID_WORD_POS == pos) return NOT_A_DICT_POS; - return mBinaryDictionaryInfo->getStructurePolicy()->getBigramsPositionOfNode(pos); + return structurePolicy->getBigramsPositionOfNode(pos); } bool BigramDictionary::isValidBigram(const int *word0, int length0, const int *word1, @@ -148,11 +152,13 @@ bool BigramDictionary::isValidBigram(const int *word0, int length0, const int *w int pos = getBigramListPositionForWord(word0, length0, false /* forceLowerCaseSearch */); // getBigramListPositionForWord returns 0 if this word isn't in the dictionary or has no bigrams if (NOT_A_DICT_POS == pos) return false; - int nextWordPos = mBinaryDictionaryInfo->getStructurePolicy()->getTerminalNodePositionOfWord( - word1, length1, false /* forceLowerCaseSearch */); + const DictionaryStructureWithBufferPolicy *const structurePolicy = + mBinaryDictionaryInfo->getStructurePolicy(); + int nextWordPos = structurePolicy->getTerminalNodePositionOfWord(word1, length1, + false /* forceLowerCaseSearch */); if (NOT_A_VALID_WORD_POS == nextWordPos) return false; - BinaryDictionaryBigramsIterator bigramsIt(mBinaryDictionaryInfo, pos); + BinaryDictionaryBigramsIterator bigramsIt(structurePolicy->getBigramsStructurePolicy(), pos); while (bigramsIt.hasNext()) { bigramsIt.next(); if (bigramsIt.getBigramPos() == nextWordPos) { diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_bigrams_iterator.h b/native/jni/src/suggest/core/dictionary/binary_dictionary_bigrams_iterator.h index 8cbb12998..d16ac47fe 100644 --- a/native/jni/src/suggest/core/dictionary/binary_dictionary_bigrams_iterator.h +++ b/native/jni/src/suggest/core/dictionary/binary_dictionary_bigrams_iterator.h @@ -18,51 +18,41 @@ #define LATINIME_BINARY_DICTIONARY_BIGRAMS_ITERATOR_H #include "defines.h" -#include "suggest/core/dictionary/binary_dictionary_info.h" -#include "suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.h" +#include "suggest/core/policy/dictionary_bigrams_structure_policy.h" namespace latinime { class BinaryDictionaryBigramsIterator { public: BinaryDictionaryBigramsIterator( - const BinaryDictionaryInfo *const binaryDictionaryInfo, const int pos) - : mBinaryDictionaryInfo(binaryDictionaryInfo), mPos(pos), mBigramFlags(0), - mBigramPos(NOT_A_DICT_POS), mHasNext(pos != NOT_A_DICT_POS) {} + const DictionaryBigramsStructurePolicy *const bigramsStructurePolicy, const int pos) + : mBigramsStructurePolicy(bigramsStructurePolicy), mPos(pos), + mBigramPos(NOT_A_DICT_POS), mProbability(NOT_A_PROBABILITY), + mHasNext(pos != NOT_A_DICT_POS) {} AK_FORCE_INLINE bool hasNext() const { return mHasNext; } AK_FORCE_INLINE void next() { - mBigramFlags = BinaryDictionaryTerminalAttributesReadingUtils::getFlagsAndForwardPointer( - mBinaryDictionaryInfo, &mPos); - mBigramPos = - BinaryDictionaryTerminalAttributesReadingUtils::getBigramAddressAndForwardPointer( - mBinaryDictionaryInfo, mBigramFlags, &mPos); - mHasNext = BinaryDictionaryTerminalAttributesReadingUtils::hasNext(mBigramFlags); + mBigramsStructurePolicy->getNextBigram(&mBigramPos, &mProbability, &mHasNext, &mPos); } AK_FORCE_INLINE int getProbability() const { - return BinaryDictionaryTerminalAttributesReadingUtils::getProbabilityFromFlags( - mBigramFlags); + return mProbability; } AK_FORCE_INLINE int getBigramPos() const { return mBigramPos; } - AK_FORCE_INLINE int getFlags() const { - return mBigramFlags; - } - private: DISALLOW_COPY_AND_ASSIGN(BinaryDictionaryBigramsIterator); - const BinaryDictionaryInfo *const mBinaryDictionaryInfo; + const DictionaryBigramsStructurePolicy *const mBigramsStructurePolicy; int mPos; - BinaryDictionaryTerminalAttributesReadingUtils::BigramFlags mBigramFlags; int mBigramPos; + int mProbability; bool mHasNext; }; } // namespace latinime diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.cpp b/native/jni/src/suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.cpp index 20b77b3b2..9e7d7a391 100644 --- a/native/jni/src/suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.cpp +++ b/native/jni/src/suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.cpp @@ -16,7 +16,6 @@ #include "suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.h" -#include "suggest/core/dictionary/binary_dictionary_info.h" #include "suggest/core/dictionary/byte_array_utils.h" namespace latinime { @@ -38,22 +37,19 @@ const int TaUtils::SHORTCUT_LIST_SIZE_FIELD_SIZE = 2; const int TaUtils::WHITELIST_SHORTCUT_PROBABILITY = 15; /* static */ int TaUtils::getBigramAddressAndForwardPointer( - const BinaryDictionaryInfo *const binaryDictionaryInfo, const TerminalAttributeFlags flags, + const uint8_t *const dictRoot, const TerminalAttributeFlags flags, int *const pos) { int offset = 0; const int origin = *pos; switch (MASK_ATTRIBUTE_ADDRESS_TYPE & flags) { case FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE: - offset = ByteArrayUtils::readUint8AndAdvancePosition( - binaryDictionaryInfo->getDictRoot(), pos); + offset = ByteArrayUtils::readUint8AndAdvancePosition(dictRoot, pos); break; case FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES: - offset = ByteArrayUtils::readUint16AndAdvancePosition( - binaryDictionaryInfo->getDictRoot(), pos); + offset = ByteArrayUtils::readUint16AndAdvancePosition(dictRoot, pos); break; case FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES: - offset = ByteArrayUtils::readUint24AndAdvancePosition( - binaryDictionaryInfo->getDictRoot(), pos); + offset = ByteArrayUtils::readUint24AndAdvancePosition(dictRoot, pos); break; } if (isOffsetNegative(flags)) { diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.h b/native/jni/src/suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.h index 375fc7dff..e1e3e1618 100644 --- a/native/jni/src/suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.h +++ b/native/jni/src/suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.h @@ -32,9 +32,8 @@ class BinaryDictionaryTerminalAttributesReadingUtils { typedef TerminalAttributeFlags ShortcutFlags; static AK_FORCE_INLINE TerminalAttributeFlags getFlagsAndForwardPointer( - const BinaryDictionaryInfo *const binaryDictionaryInfo, int *const pos) { - return ByteArrayUtils::readUint8AndAdvancePosition( - binaryDictionaryInfo->getDictRoot(), pos); + const uint8_t *const dictRoot, int *const pos) { + return ByteArrayUtils::readUint8AndAdvancePosition(dictRoot, pos); } static AK_FORCE_INLINE int getProbabilityFromFlags(const TerminalAttributeFlags flags) { @@ -47,18 +46,17 @@ class BinaryDictionaryTerminalAttributesReadingUtils { // Bigrams reading methods static AK_FORCE_INLINE void skipExistingBigrams( - const BinaryDictionaryInfo *const binaryDictionaryInfo, int *const pos) { - BigramFlags flags = getFlagsAndForwardPointer(binaryDictionaryInfo, pos); + const uint8_t *const dictRoot, int *const pos) { + BigramFlags flags = getFlagsAndForwardPointer(dictRoot, pos); while (hasNext(flags)) { *pos += attributeAddressSize(flags); - flags = getFlagsAndForwardPointer(binaryDictionaryInfo, pos); + flags = getFlagsAndForwardPointer(dictRoot, pos); } *pos += attributeAddressSize(flags); } static int getBigramAddressAndForwardPointer( - const BinaryDictionaryInfo *const binaryDictionaryInfo, const BigramFlags flags, - int *const pos); + const uint8_t *const dictRoot, const BigramFlags flags, int *const pos); // Shortcuts reading methods // This method returns the size of the shortcut list region excluding the shortcut list size diff --git a/native/jni/src/suggest/core/dictionary/multi_bigram_map.h b/native/jni/src/suggest/core/dictionary/multi_bigram_map.h index 085438008..97d4cd161 100644 --- a/native/jni/src/suggest/core/dictionary/multi_bigram_map.h +++ b/native/jni/src/suggest/core/dictionary/multi_bigram_map.h @@ -21,9 +21,9 @@ #include "defines.h" #include "suggest/core/dictionary/binary_dictionary_bigrams_iterator.h" -#include "suggest/core/dictionary/binary_dictionary_info.h" #include "suggest/core/dictionary/bloom_filter.h" #include "suggest/core/dictionary/probability_utils.h" +#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" #include "utils/hash_map_compat.h" namespace latinime { @@ -38,7 +38,7 @@ class MultiBigramMap { // Look up the bigram probability for the given word pair from the cached bigram maps. // Also caches the bigrams if there is space remaining and they have not been cached already. - int getBigramProbability(const BinaryDictionaryInfo *const binaryDictionaryInfo, + int getBigramProbability(const DictionaryStructureWithBufferPolicy *const structurePolicy, const int wordPosition, const int nextWordPosition, const int unigramProbability) { hash_map_compat::const_iterator mapPosition = mBigramMaps.find(wordPosition); @@ -46,12 +46,12 @@ class MultiBigramMap { return mapPosition->second.getBigramProbability(nextWordPosition, unigramProbability); } if (mBigramMaps.size() < MAX_CACHED_PREV_WORDS_IN_BIGRAM_MAP) { - addBigramsForWordPosition(binaryDictionaryInfo, wordPosition); + addBigramsForWordPosition(structurePolicy, wordPosition); return mBigramMaps[wordPosition].getBigramProbability( nextWordPosition, unigramProbability); } - return readBigramProbabilityFromBinaryDictionary(binaryDictionaryInfo, - wordPosition, nextWordPosition, unigramProbability); + return readBigramProbabilityFromBinaryDictionary(structurePolicy, wordPosition, + nextWordPosition, unigramProbability); } void clear() { @@ -66,10 +66,11 @@ class MultiBigramMap { BigramMap() : mBigramMap(DEFAULT_HASH_MAP_SIZE_FOR_EACH_BIGRAM_MAP), mBloomFilter() {} ~BigramMap() {} - void init(const BinaryDictionaryInfo *const binaryDictionaryInfo, const int nodePos) { - const int bigramsListPos = binaryDictionaryInfo->getStructurePolicy()-> - getBigramsPositionOfNode(nodePos); - BinaryDictionaryBigramsIterator bigramsIt(binaryDictionaryInfo, bigramsListPos); + void init(const DictionaryStructureWithBufferPolicy *const structurePolicy, + const int nodePos) { + const int bigramsListPos = structurePolicy->getBigramsPositionOfNode(nodePos); + BinaryDictionaryBigramsIterator bigramsIt(structurePolicy->getBigramsStructurePolicy(), + bigramsListPos); while (bigramsIt.hasNext()) { bigramsIt.next(); mBigramMap[bigramsIt.getBigramPos()] = bigramsIt.getProbability(); @@ -100,16 +101,16 @@ class MultiBigramMap { }; AK_FORCE_INLINE void addBigramsForWordPosition( - const BinaryDictionaryInfo *const binaryDictionaryInfo, const int position) { - mBigramMaps[position].init(binaryDictionaryInfo, position); + const DictionaryStructureWithBufferPolicy *const structurePolicy, const int position) { + mBigramMaps[position].init(structurePolicy, position); } AK_FORCE_INLINE int readBigramProbabilityFromBinaryDictionary( - const BinaryDictionaryInfo *const binaryDictionaryInfo, const int nodePos, + const DictionaryStructureWithBufferPolicy *const structurePolicy, const int nodePos, const int nextWordPosition, const int unigramProbability) { - const int bigramsListPos = binaryDictionaryInfo->getStructurePolicy()-> - getBigramsPositionOfNode(nodePos); - BinaryDictionaryBigramsIterator bigramsIt(binaryDictionaryInfo, bigramsListPos); + const int bigramsListPos = structurePolicy->getBigramsPositionOfNode(nodePos); + BinaryDictionaryBigramsIterator bigramsIt(structurePolicy->getBigramsStructurePolicy(), + bigramsListPos); while (bigramsIt.hasNext()) { bigramsIt.next(); if (bigramsIt.getBigramPos() == nextWordPosition) { diff --git a/native/jni/src/suggest/core/dictionary/terminal_attributes.h b/native/jni/src/suggest/core/dictionary/terminal_attributes.h index 0da6504eb..c40a3bb59 100644 --- a/native/jni/src/suggest/core/dictionary/terminal_attributes.h +++ b/native/jni/src/suggest/core/dictionary/terminal_attributes.h @@ -49,7 +49,7 @@ class TerminalAttributes { bool *const outIsWhitelist) { const BinaryDictionaryTerminalAttributesReadingUtils::ShortcutFlags flags = BinaryDictionaryTerminalAttributesReadingUtils::getFlagsAndForwardPointer( - mBinaryDictionaryInfo, &mPos); + mBinaryDictionaryInfo->getDictRoot(), &mPos); mHasNextShortcutTarget = BinaryDictionaryTerminalAttributesReadingUtils::hasNext(flags); if (outIsWhitelist) { diff --git a/native/jni/src/suggest/core/policy/dictionary_bigrams_structure_policy.h b/native/jni/src/suggest/core/policy/dictionary_bigrams_structure_policy.h new file mode 100644 index 000000000..661ef1b1a --- /dev/null +++ b/native/jni/src/suggest/core/policy/dictionary_bigrams_structure_policy.h @@ -0,0 +1,42 @@ +/* + * Copyright (C) 2013, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_DICTIONARY_BIGRAMS_STRUCTURE_POLICY_H +#define LATINIME_DICTIONARY_BIGRAMS_STRUCTURE_POLICY_H + +#include "defines.h" + +namespace latinime { + +/* + * This class abstracts structure of bigrams. + */ +class DictionaryBigramsStructurePolicy { + public: + virtual ~DictionaryBigramsStructurePolicy() {} + + virtual void getNextBigram(int *const outBigramPos, int *const outProbability, + bool *const outHasNext, int *const pos) const = 0; + virtual void skipAllBigrams(int *const pos) const = 0; + + protected: + DictionaryBigramsStructurePolicy() {} + + private: + DISALLOW_COPY_AND_ASSIGN(DictionaryBigramsStructurePolicy); +}; +} // namespace latinime +#endif /* LATINIME_DICTIONARY_BIGRAMS_STRUCTURE_POLICY_H */ diff --git a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h index dce4e741a..d83d1e390 100644 --- a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h +++ b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h @@ -23,6 +23,7 @@ namespace latinime { class DicNode; class DicNodeVector; +class DictionaryBigramsStructurePolicy; /* * This class abstracts structure of dictionaries. @@ -63,6 +64,8 @@ class DictionaryStructureWithBufferPolicy { virtual int getBigramsPositionOfNode(const int nodePos) const = 0; + virtual const DictionaryBigramsStructurePolicy *getBigramsStructurePolicy() const = 0; + protected: DictionaryStructureWithBufferPolicy() {} diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigrams/bigram_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/bigrams/bigram_list_policy.h new file mode 100644 index 000000000..a9e5da3c7 --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/bigrams/bigram_list_policy.h @@ -0,0 +1,58 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_BIGRAM_LIST_POLICY_H +#define LATINIME_BIGRAM_LIST_POLICY_H + +#include + +#include "defines.h" +#include "suggest/core/policy/dictionary_bigrams_structure_policy.h" +// TODO: Move bigrams reading methods to policyimpl. +#include "suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.h" + +namespace latinime { + +class BigramListPolicy : public DictionaryBigramsStructurePolicy { + public: + explicit BigramListPolicy(const uint8_t *const bigramsBuf) : mBigramsBuf(bigramsBuf) {} + + ~BigramListPolicy() {} + + void getNextBigram(int *const outBigramPos, int *const outProbability, bool *const outHasNext, + int *const pos) const { + const BinaryDictionaryTerminalAttributesReadingUtils::BigramFlags flags = + BinaryDictionaryTerminalAttributesReadingUtils::getFlagsAndForwardPointer( + mBigramsBuf, pos); + *outBigramPos = + BinaryDictionaryTerminalAttributesReadingUtils::getBigramAddressAndForwardPointer( + mBigramsBuf, flags, pos); + *outProbability = + BinaryDictionaryTerminalAttributesReadingUtils::getProbabilityFromFlags(flags); + *outHasNext = BinaryDictionaryTerminalAttributesReadingUtils::hasNext(flags); + } + + void skipAllBigrams(int *const pos) const { + BinaryDictionaryTerminalAttributesReadingUtils::skipExistingBigrams(mBigramsBuf, pos); + } + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(BigramListPolicy); + + const uint8_t *const mBigramsBuf; +}; +} // namespace latinime +#endif // LATINIME_BIGRAM_LIST_POLICY_H diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp index 7ac635a00..e8799e574 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp @@ -18,6 +18,7 @@ #include "suggest/core/dictionary/binary_dictionary_info.h" #include "suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.h" +#include "suggest/policyimpl/dictionary/bigrams/bigram_list_policy.h" #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h" namespace latinime { @@ -56,8 +57,7 @@ void DynamicPatriciaTrieNodeReader::fetchNodeInfoFromBufferAndProcessMovedNode(c } if (PatriciaTrieReadingUtils::hasBigrams(mFlags)) { mBigramPos = pos; - BinaryDictionaryTerminalAttributesReadingUtils::skipExistingBigrams( - mBinaryDictionaryInfo, &pos); + mBigramsPolicy->skipAllBigrams(&pos); } else { mBigramPos = NOT_A_DICT_POS; } diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h index 71558edaa..641ac9b5b 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h @@ -17,13 +17,17 @@ #ifndef LATINIME_DYNAMIC_PATRICIA_TRIE_NODE_READER_H #define LATINIME_DYNAMIC_PATRICIA_TRIE_NODE_READER_H +#include + #include "defines.h" +#include "suggest/policyimpl/dictionary/bigrams/bigram_list_policy.h" #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h" #include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h" namespace latinime { class BinaryDictionaryInfo; +class DictionaryBigramsStructurePolicy; /* * This class is used for helping to read nodes of dynamic patricia trie. This class handles moved @@ -31,10 +35,11 @@ class BinaryDictionaryInfo; */ class DynamicPatriciaTrieNodeReader { public: - explicit DynamicPatriciaTrieNodeReader(const BinaryDictionaryInfo *const binaryDictionaryInfo) - : mBinaryDictionaryInfo(binaryDictionaryInfo), mNodePos(NOT_A_VALID_WORD_POS), - mFlags(0), mParentPos(NOT_A_DICT_POS), mCodePointCount(0), - mProbability(NOT_A_PROBABILITY), mChildrenPos(NOT_A_DICT_POS), + DynamicPatriciaTrieNodeReader(const BinaryDictionaryInfo *const binaryDictionaryInfo, + const DictionaryBigramsStructurePolicy *const bigramsPolicy) + : mBinaryDictionaryInfo(binaryDictionaryInfo), mBigramsPolicy(bigramsPolicy), + mNodePos(NOT_A_VALID_WORD_POS), mFlags(0), mParentPos(NOT_A_DICT_POS), + mCodePointCount(0), mProbability(NOT_A_PROBABILITY), mChildrenPos(NOT_A_DICT_POS), mShortcutPos(NOT_A_DICT_POS), mBigramPos(NOT_A_DICT_POS), mSiblingPos(NOT_A_VALID_WORD_POS) {} @@ -117,6 +122,7 @@ class DynamicPatriciaTrieNodeReader { DISALLOW_COPY_AND_ASSIGN(DynamicPatriciaTrieNodeReader); const BinaryDictionaryInfo *const mBinaryDictionaryInfo; + const DictionaryBigramsStructurePolicy *const mBigramsPolicy; int mNodePos; DynamicPatriciaTrieReadingUtils::NodeFlags mFlags; int mParentPos; diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp index bb49bb15c..9b384157e 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp @@ -34,7 +34,7 @@ void DynamicPatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const d if (!dicNode->hasChildren()) { return; } - DynamicPatriciaTrieNodeReader nodeReader(mBinaryDictionaryInfo); + DynamicPatriciaTrieNodeReader nodeReader(mBinaryDictionaryInfo, getBigramsStructurePolicy()); int mergedNodeCodePoints[MAX_WORD_LENGTH]; int nextPos = dicNode->getChildrenPos(); int totalChildCount = 0; @@ -79,7 +79,7 @@ int DynamicPatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCoun int mergedNodeCodePoints[maxCodePointCount]; int codePointCount = 0; - DynamicPatriciaTrieNodeReader nodeReader(mBinaryDictionaryInfo); + DynamicPatriciaTrieNodeReader nodeReader(mBinaryDictionaryInfo, getBigramsStructurePolicy()); // First, read terminal node and get its probability. nodeReader.fetchNodeInfoFromBufferAndGetNodeCodePoints(nodePos, maxCodePointCount, mergedNodeCodePoints); @@ -123,7 +123,7 @@ int DynamicPatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const in int mergedNodeCodePoints[MAX_WORD_LENGTH]; int currentLength = 0; int pos = getRootPosition(); - DynamicPatriciaTrieNodeReader nodeReader(mBinaryDictionaryInfo); + DynamicPatriciaTrieNodeReader nodeReader(mBinaryDictionaryInfo, getBigramsStructurePolicy()); while (currentLength <= length) { // When foundMatchedNode becomes true, currentLength is increased at least once. bool foundMatchedNode = false; @@ -194,7 +194,7 @@ int DynamicPatriciaTriePolicy::getUnigramProbability(const int nodePos) const { if (nodePos == NOT_A_VALID_WORD_POS) { return NOT_A_PROBABILITY; } - DynamicPatriciaTrieNodeReader nodeReader(mBinaryDictionaryInfo); + DynamicPatriciaTrieNodeReader nodeReader(mBinaryDictionaryInfo, getBigramsStructurePolicy()); nodeReader.fetchNodeInfoFromBuffer(nodePos); if (nodeReader.isDeleted() || nodeReader.isBlacklisted() || nodeReader.isNotAWord()) { return NOT_A_PROBABILITY; @@ -206,7 +206,7 @@ int DynamicPatriciaTriePolicy::getShortcutPositionOfNode(const int nodePos) cons if (nodePos == NOT_A_VALID_WORD_POS) { return NOT_A_DICT_POS; } - DynamicPatriciaTrieNodeReader nodeReader(mBinaryDictionaryInfo); + DynamicPatriciaTrieNodeReader nodeReader(mBinaryDictionaryInfo, getBigramsStructurePolicy()); nodeReader.fetchNodeInfoFromBuffer(nodePos); if (nodeReader.isDeleted()) { return NOT_A_DICT_POS; @@ -218,7 +218,7 @@ int DynamicPatriciaTriePolicy::getBigramsPositionOfNode(const int nodePos) const if (nodePos == NOT_A_VALID_WORD_POS) { return NOT_A_DICT_POS; } - DynamicPatriciaTrieNodeReader nodeReader(mBinaryDictionaryInfo); + DynamicPatriciaTrieNodeReader nodeReader(mBinaryDictionaryInfo, getBigramsStructurePolicy()); nodeReader.fetchNodeInfoFromBuffer(nodePos); if (nodeReader.isDeleted()) { return NOT_A_DICT_POS; diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h index e92672128..2386d937f 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h @@ -21,6 +21,7 @@ #include "defines.h" #include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" +#include "suggest/policyimpl/dictionary/bigrams/bigram_list_policy.h" namespace latinime { @@ -32,7 +33,8 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { public: DynamicPatriciaTriePolicy(const uint8_t *const dictRoot, const BinaryDictionaryInfo *const binaryDictionaryInfo) - : mDictRoot(dictRoot), mBinaryDictionaryInfo(binaryDictionaryInfo) {} + : mDictRoot(dictRoot), mBinaryDictionaryInfo(binaryDictionaryInfo), + mBigramListPolicy(dictRoot) {} ~DynamicPatriciaTriePolicy() {} @@ -56,6 +58,10 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { int getBigramsPositionOfNode(const int nodePos) const; + const DictionaryBigramsStructurePolicy *getBigramsStructurePolicy() const { + return &mBigramListPolicy; + } + private: DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTriePolicy); static const int MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP; @@ -63,6 +69,7 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { const uint8_t *const mDictRoot; // TODO: remove const BinaryDictionaryInfo *const mBinaryDictionaryInfo; + const BigramListPolicy mBigramListPolicy; }; } // namespace latinime #endif // LATINIME_DYNAMIC_PATRICIA_TRIE_POLICY_H diff --git a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp index fd5f6e7dd..7001509ab 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp @@ -136,8 +136,7 @@ int PatriciaTriePolicy::createAndGetLeavingChildNode(const DicNode *const dicNod BinaryDictionaryTerminalAttributesReadingUtils::skipShortcuts(mBinaryDictionaryInfo, &pos); } if (PatriciaTrieReadingUtils::hasBigrams(flags)) { - BinaryDictionaryTerminalAttributesReadingUtils::skipExistingBigrams( - mBinaryDictionaryInfo, &pos); + getBigramsStructurePolicy()->skipAllBigrams(&pos); } if (!childrenFilter->isFilteredOut(mergedNodeCodePoints[0])) { childDicNodes->pushLeavingChild(dicNode, nodePos, childrenPos, probability, diff --git a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h index e1034127c..bebee39fb 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h @@ -21,6 +21,7 @@ #include "defines.h" #include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" +#include "suggest/policyimpl/dictionary/bigrams/bigram_list_policy.h" namespace latinime { @@ -32,7 +33,8 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { public: PatriciaTriePolicy(const uint8_t *const dictRoot, const BinaryDictionaryInfo *const binaryDictionaryInfo) - : mDictRoot(dictRoot), mBinaryDictionaryInfo(binaryDictionaryInfo) {} + : mDictRoot(dictRoot), mBinaryDictionaryInfo(binaryDictionaryInfo), + mBigramListPolicy(dictRoot) {} ~PatriciaTriePolicy() {} @@ -56,12 +58,17 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { int getBigramsPositionOfNode(const int nodePos) const; + const DictionaryBigramsStructurePolicy *getBigramsStructurePolicy() const { + return &mBigramListPolicy; + } + private: DISALLOW_IMPLICIT_CONSTRUCTORS(PatriciaTriePolicy); const uint8_t *const mDictRoot; // TODO: remove const BinaryDictionaryInfo *const mBinaryDictionaryInfo; + const BigramListPolicy mBigramListPolicy; int createAndGetLeavingChildNode(const DicNode *const dicNode, const int nodePos, const NodeFilter *const nodeFilter, DicNodeVector *const childDicNodes) const;