diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_bigrams_iterator.h b/native/jni/src/suggest/core/dictionary/binary_dictionary_bigrams_iterator.h index bc9d57671..178b06554 100644 --- a/native/jni/src/suggest/core/dictionary/binary_dictionary_bigrams_iterator.h +++ b/native/jni/src/suggest/core/dictionary/binary_dictionary_bigrams_iterator.h @@ -24,6 +24,11 @@ namespace latinime { class BinaryDictionaryBigramsIterator { public: + // Empty iterator. + BinaryDictionaryBigramsIterator() + : mBigramsStructurePolicy(nullptr), mPos(NOT_A_DICT_POS), + mBigramPos(NOT_A_DICT_POS), mProbability(NOT_A_PROBABILITY), mHasNext(false) {} + BinaryDictionaryBigramsIterator( const DictionaryBigramsStructurePolicy *const bigramsStructurePolicy, const int pos) : mBigramsStructurePolicy(bigramsStructurePolicy), mPos(pos), diff --git a/native/jni/src/suggest/core/dictionary/multi_bigram_map.cpp b/native/jni/src/suggest/core/dictionary/multi_bigram_map.cpp index 105224126..012e4dc9c 100644 --- a/native/jni/src/suggest/core/dictionary/multi_bigram_map.cpp +++ b/native/jni/src/suggest/core/dictionary/multi_bigram_map.cpp @@ -53,9 +53,8 @@ int MultiBigramMap::getBigramProbability( void MultiBigramMap::BigramMap::init( const DictionaryStructureWithBufferPolicy *const structurePolicy, const int nodePos) { - const int bigramsListPos = structurePolicy->getBigramsPositionOfPtNode(nodePos); - BinaryDictionaryBigramsIterator bigramsIt(structurePolicy->getBigramsStructurePolicy(), - bigramsListPos); + BinaryDictionaryBigramsIterator bigramsIt = + structurePolicy->getBigramsIteratorOfPtNode(nodePos); while (bigramsIt.hasNext()) { bigramsIt.next(); if (bigramsIt.getBigramPos() == NOT_A_DICT_POS) { @@ -89,9 +88,8 @@ int MultiBigramMap::readBigramProbabilityFromBinaryDictionary( const DictionaryStructureWithBufferPolicy *const structurePolicy, const int nodePos, const int nextWordPosition, const int unigramProbability) { int bigramProbability = NOT_A_PROBABILITY; - const int bigramsListPos = structurePolicy->getBigramsPositionOfPtNode(nodePos); - BinaryDictionaryBigramsIterator bigramsIt(structurePolicy->getBigramsStructurePolicy(), - bigramsListPos); + BinaryDictionaryBigramsIterator bigramsIt = + structurePolicy->getBigramsIteratorOfPtNode(nodePos); while (bigramsIt.hasNext()) { bigramsIt.next(); if (bigramsIt.getBigramPos() == nextWordPosition) { diff --git a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h index b72601109..a48d64473 100644 --- a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h +++ b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h @@ -20,6 +20,7 @@ #include #include "defines.h" +#include "suggest/core/dictionary/binary_dictionary_bigrams_iterator.h" #include "suggest/core/dictionary/property/word_property.h" namespace latinime { @@ -61,12 +62,10 @@ class DictionaryStructureWithBufferPolicy { virtual int getShortcutPositionOfPtNode(const int nodePos) const = 0; - virtual int getBigramsPositionOfPtNode(const int nodePos) const = 0; + virtual BinaryDictionaryBigramsIterator getBigramsIteratorOfPtNode(const int nodePos) const = 0; virtual const DictionaryHeaderStructurePolicy *getHeaderStructurePolicy() const = 0; - virtual const DictionaryBigramsStructurePolicy *getBigramsStructurePolicy() const = 0; - virtual const DictionaryShortcutsStructurePolicy *getShortcutsStructurePolicy() const = 0; // Returns whether the update was success or not. diff --git a/native/jni/src/suggest/core/session/prev_words_info.h b/native/jni/src/suggest/core/session/prev_words_info.h index e350c6996..76276f528 100644 --- a/native/jni/src/suggest/core/session/prev_words_info.h +++ b/native/jni/src/suggest/core/session/prev_words_info.h @@ -92,11 +92,9 @@ class PrevWordsInfo { BinaryDictionaryBigramsIterator getBigramsIteratorForPrediction( const DictionaryStructureWithBufferPolicy *const dictStructurePolicy) const { - const int bigramListPos = getBigramListPositionForWordWithTryingLowerCaseSearch( + return getBigramsIteratorForWordWithTryingLowerCaseSearch( dictStructurePolicy, mPrevWordCodePoints[0], mPrevWordCodePointCount[0], mIsBeginningOfSentence[0]); - return BinaryDictionaryBigramsIterator(dictStructurePolicy->getBigramsStructurePolicy(), - bigramListPos); } // n is 1-indexed. @@ -156,12 +154,12 @@ class PrevWordsInfo { codePoints, codePointCount, true /* forceLowerCaseSearch */); } - static int getBigramListPositionForWordWithTryingLowerCaseSearch( + static BinaryDictionaryBigramsIterator getBigramsIteratorForWordWithTryingLowerCaseSearch( const DictionaryStructureWithBufferPolicy *const dictStructurePolicy, const int *const wordCodePoints, const int wordCodePointCount, const bool isBeginningOfSentence) { if (!dictStructurePolicy || !wordCodePoints || wordCodePointCount > MAX_WORD_LENGTH) { - return NOT_A_DICT_POS; + return BinaryDictionaryBigramsIterator(); } int codePoints[MAX_WORD_LENGTH]; int codePointCount = wordCodePointCount; @@ -170,30 +168,30 @@ class PrevWordsInfo { codePointCount = CharUtils::attachBeginningOfSentenceMarker(codePoints, codePointCount, MAX_WORD_LENGTH); if (codePointCount <= 0) { - return NOT_A_DICT_POS; + return BinaryDictionaryBigramsIterator(); } } - int pos = getBigramListPositionForWord(dictStructurePolicy, codePoints, - codePointCount, false /* forceLowerCaseSearch */); - // getBigramListPositionForWord returns NOT_A_DICT_POS if this word isn't in the - // dictionary or has no bigrams - if (NOT_A_DICT_POS == pos) { - // If no bigrams for this exact word, search again in lower case. - pos = getBigramListPositionForWord(dictStructurePolicy, codePoints, - codePointCount, true /* forceLowerCaseSearch */); + BinaryDictionaryBigramsIterator bigramsIt = getBigramsIteratorForWord(dictStructurePolicy, + codePoints, codePointCount, false /* forceLowerCaseSearch */); + // getBigramsIteratorForWord returns an empty iterator if this word isn't in the dictionary + // or has no bigrams. + if (bigramsIt.hasNext()) { + return bigramsIt; } - return pos; + // If no bigrams for this exact word, search again in lower case. + return getBigramsIteratorForWord(dictStructurePolicy, codePoints, + codePointCount, true /* forceLowerCaseSearch */); } - static int getBigramListPositionForWord( + static BinaryDictionaryBigramsIterator getBigramsIteratorForWord( const DictionaryStructureWithBufferPolicy *const dictStructurePolicy, const int *wordCodePoints, const int wordCodePointCount, const bool forceLowerCaseSearch) { - if (!wordCodePoints || wordCodePointCount <= 0) return NOT_A_DICT_POS; + if (!wordCodePoints || wordCodePointCount <= 0) return BinaryDictionaryBigramsIterator(); const int terminalPtNodePos = dictStructurePolicy->getTerminalPtNodePositionOfWord( wordCodePoints, wordCodePointCount, forceLowerCaseSearch); - if (NOT_A_DICT_POS == terminalPtNodePos) return NOT_A_DICT_POS; - return dictStructurePolicy->getBigramsPositionOfPtNode(terminalPtNodePos); + if (NOT_A_DICT_POS == terminalPtNodePos) return BinaryDictionaryBigramsIterator(); + return dictStructurePolicy->getBigramsIteratorOfPtNode(terminalPtNodePos); } void clear() { diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp index 9780ae048..f478d9b91 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp @@ -154,6 +154,12 @@ int Ver4PatriciaTriePolicy::getShortcutPositionOfPtNode(const int ptNodePos) con ptNodeParams.getTerminalId()); } +BinaryDictionaryBigramsIterator Ver4PatriciaTriePolicy::getBigramsIteratorOfPtNode( + const int ptNodePos) const { + const int bigramsPosition = getBigramsPositionOfPtNode(ptNodePos); + return BinaryDictionaryBigramsIterator(&mBigramPolicy, bigramsPosition); +} + int Ver4PatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) const { if (ptNodePos == NOT_A_DICT_POS) { return NOT_A_DICT_POS; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h index 16b1bd2c1..6d97c7cc8 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h @@ -94,16 +94,12 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { int getShortcutPositionOfPtNode(const int ptNodePos) const; - int getBigramsPositionOfPtNode(const int ptNodePos) const; + BinaryDictionaryBigramsIterator getBigramsIteratorOfPtNode(const int ptNodePos) const; const DictionaryHeaderStructurePolicy *getHeaderStructurePolicy() const { return mHeaderPolicy; } - const DictionaryBigramsStructurePolicy *getBigramsStructurePolicy() const { - return &mBigramPolicy; - } - const DictionaryShortcutsStructurePolicy *getShortcutsStructurePolicy() const { return &mShortcutPolicy; } @@ -167,6 +163,8 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { int mBigramCount; std::vector mTerminalPtNodePositionsForIteratingWords; mutable bool mIsCorrupted; + + int getBigramsPositionOfPtNode(const int ptNodePos) const; }; } // namespace v402 } // namespace backward diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp index 002593c49..91d76040f 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp @@ -304,6 +304,12 @@ int PatriciaTriePolicy::getShortcutPositionOfPtNode(const int ptNodePos) const { return mPtNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos).getShortcutPos(); } +BinaryDictionaryBigramsIterator PatriciaTriePolicy::getBigramsIteratorOfPtNode( + const int ptNodePos) const { + const int bigramsPosition = getBigramsPositionOfPtNode(ptNodePos); + return BinaryDictionaryBigramsIterator(&mBigramListPolicy, bigramsPosition); +} + int PatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) const { if (ptNodePos == NOT_A_DICT_POS) { return NOT_A_DICT_POS; @@ -322,7 +328,7 @@ int PatriciaTriePolicy::createAndGetLeavingChildNode(const DicNode *const dicNod int bigramPos = NOT_A_DICT_POS; int siblingPos = NOT_A_DICT_POS; PatriciaTrieReadingUtils::readPtNodeInfo(mDictRoot, ptNodePos, getShortcutsStructurePolicy(), - getBigramsStructurePolicy(), &flags, &mergedNodeCodePointCount, mergedNodeCodePoints, + &mBigramListPolicy, &flags, &mergedNodeCodePointCount, mergedNodeCodePoints, &probability, &childrenPos, &shortcutPos, &bigramPos, &siblingPos); // Skip PtNodes don't start with Unicode code point because they represent non-word information. if (CharUtils::isInUnicodeSpace(mergedNodeCodePoints[0])) { @@ -352,7 +358,7 @@ const WordProperty PatriciaTriePolicy::getWordProperty(const int *const codePoin std::vector bigrams; const int bigramListPos = getBigramsPositionOfPtNode(ptNodePos); int bigramWord1CodePoints[MAX_WORD_LENGTH]; - BinaryDictionaryBigramsIterator bigramsIt(getBigramsStructurePolicy(), bigramListPos); + BinaryDictionaryBigramsIterator bigramsIt(&mBigramListPolicy, bigramListPos); while (bigramsIt.hasNext()) { // Fetch the next bigram information and forward the iterator. bigramsIt.next(); diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h index ec8407408..7c0b9d3c5 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h @@ -67,16 +67,12 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { int getShortcutPositionOfPtNode(const int ptNodePos) const; - int getBigramsPositionOfPtNode(const int ptNodePos) const; + BinaryDictionaryBigramsIterator getBigramsIteratorOfPtNode(const int ptNodePos) const; const DictionaryHeaderStructurePolicy *getHeaderStructurePolicy() const { return &mHeaderPolicy; } - const DictionaryBigramsStructurePolicy *getBigramsStructurePolicy() const { - return &mBigramListPolicy; - } - const DictionaryShortcutsStructurePolicy *getShortcutsStructurePolicy() const { return &mShortcutListPolicy; } @@ -158,6 +154,7 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { std::vector mTerminalPtNodePositionsForIteratingWords; mutable bool mIsCorrupted; + int getBigramsPositionOfPtNode(const int ptNodePos) const; int createAndGetLeavingChildNode(const DicNode *const dicNode, const int ptNodePos, DicNodeVector *const childDicNodes) const; }; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp index 46107d92a..0b5764aba 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp @@ -144,6 +144,12 @@ int Ver4PatriciaTriePolicy::getShortcutPositionOfPtNode(const int ptNodePos) con ptNodeParams.getTerminalId()); } +BinaryDictionaryBigramsIterator Ver4PatriciaTriePolicy::getBigramsIteratorOfPtNode( + const int ptNodePos) const { + const int bigramsPosition = getBigramsPositionOfPtNode(ptNodePos); + return BinaryDictionaryBigramsIterator(&mBigramPolicy, bigramsPosition); +} + int Ver4PatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) const { if (ptNodePos == NOT_A_DICT_POS) { return NOT_A_DICT_POS; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h index 5d66a2cce..85929b785 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h @@ -76,16 +76,12 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { int getShortcutPositionOfPtNode(const int ptNodePos) const; - int getBigramsPositionOfPtNode(const int ptNodePos) const; + BinaryDictionaryBigramsIterator getBigramsIteratorOfPtNode(const int ptNodePos) const; const DictionaryHeaderStructurePolicy *getHeaderStructurePolicy() const { return mHeaderPolicy; } - const DictionaryBigramsStructurePolicy *getBigramsStructurePolicy() const { - return &mBigramPolicy; - } - const DictionaryShortcutsStructurePolicy *getShortcutsStructurePolicy() const { return &mShortcutPolicy; } @@ -146,6 +142,8 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { int mBigramCount; std::vector mTerminalPtNodePositionsForIteratingWords; mutable bool mIsCorrupted; + + int getBigramsPositionOfPtNode(const int ptNodePos) const; }; } // namespace latinime #endif // LATINIME_VER4_PATRICIA_TRIE_POLICY_H