diff --git a/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp b/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp index 4ff4bc2e4..150eb6762 100644 --- a/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp +++ b/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp @@ -21,7 +21,6 @@ #include "suggest/core/dicnode/dic_node.h" #include "suggest/core/dicnode/dic_node_proximity_filter.h" #include "suggest/core/dicnode/dic_node_vector.h" -#include "suggest/core/dictionary/binary_dictionary_info.h" #include "suggest/core/dictionary/multi_bigram_map.h" #include "suggest/core/dictionary/probability_utils.h" #include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" @@ -33,17 +32,17 @@ namespace latinime { // Node initialization utils // /////////////////////////////// -/* static */ void DicNodeUtils::initAsRoot(const BinaryDictionaryInfo *const binaryDictionaryInfo, +/* static */ void DicNodeUtils::initAsRoot( + const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, const int prevWordNodePos, DicNode *const newRootNode) { - newRootNode->initAsRoot(binaryDictionaryInfo->getStructurePolicy()->getRootPosition(), - prevWordNodePos); + newRootNode->initAsRoot(dictionaryStructurePolicy->getRootPosition(), prevWordNodePos); } /*static */ void DicNodeUtils::initAsRootWithPreviousWord( - const BinaryDictionaryInfo *const binaryDictionaryInfo, + const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, DicNode *const prevWordLastNode, DicNode *const newRootNode) { newRootNode->initAsRootWithPreviousWord( - prevWordLastNode, binaryDictionaryInfo->getStructurePolicy()->getRootPosition()); + prevWordLastNode, dictionaryStructurePolicy->getRootPosition()); } /* static */ void DicNodeUtils::initByCopy(DicNode *srcNode, DicNode *destNode) { @@ -67,12 +66,13 @@ namespace latinime { } /* static */ void DicNodeUtils::getAllChildDicNodes(DicNode *dicNode, - const BinaryDictionaryInfo *const binaryDictionaryInfo, DicNodeVector *childDicNodes) { - getProximityChildDicNodes(dicNode, binaryDictionaryInfo, 0, 0, false, childDicNodes); + const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, + DicNodeVector *childDicNodes) { + getProximityChildDicNodes(dicNode, dictionaryStructurePolicy, 0, 0, false, childDicNodes); } /* static */ void DicNodeUtils::getProximityChildDicNodes(DicNode *dicNode, - const BinaryDictionaryInfo *const binaryDictionaryInfo, + const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, const ProximityInfoState *pInfoState, const int pointIndex, bool exactOnly, DicNodeVector *childDicNodes) { if (dicNode->isTotalInputSizeExceedingLimit()) { @@ -82,7 +82,7 @@ namespace latinime { if (!dicNode->isLeavingNode()) { DicNodeUtils::createAndGetPassingChildNode(dicNode, &childrenFilter, childDicNodes); } else { - binaryDictionaryInfo->getStructurePolicy()->createAndGetAllChildNodes(dicNode, + dictionaryStructurePolicy->createAndGetAllChildNodes(dicNode, &childrenFilter, childDicNodes); } } @@ -94,12 +94,13 @@ namespace latinime { * Computes the combined bigram / unigram cost for the given dicNode. */ /* static */ float DicNodeUtils::getBigramNodeImprobability( - const BinaryDictionaryInfo *const binaryDictionaryInfo, + const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, const DicNode *const node, MultiBigramMap *multiBigramMap) { if (node->hasMultipleWords() && !node->isValidMultipleWordSuggestion()) { return static_cast(MAX_VALUE_FOR_WEIGHTING); } - const int probability = getBigramNodeProbability(binaryDictionaryInfo, node, multiBigramMap); + const int probability = getBigramNodeProbability(dictionaryStructurePolicy, node, + multiBigramMap); // TODO: This equation to calculate the improbability looks unreasonable. Investigate this. const float cost = static_cast(MAX_PROBABILITY - probability) / static_cast(MAX_PROBABILITY); @@ -107,7 +108,7 @@ namespace latinime { } /* static */ int DicNodeUtils::getBigramNodeProbability( - const BinaryDictionaryInfo *const binaryDictionaryInfo, + const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, const DicNode *const node, MultiBigramMap *multiBigramMap) { const int unigramProbability = node->getProbability(); const int wordPos = node->getPos(); @@ -118,8 +119,8 @@ namespace latinime { return ProbabilityUtils::backoff(unigramProbability); } if (multiBigramMap) { - return multiBigramMap->getBigramProbability(binaryDictionaryInfo->getStructurePolicy(), - prevWordPos, wordPos, unigramProbability); + return multiBigramMap->getBigramProbability(dictionaryStructurePolicy, prevWordPos, + wordPos, unigramProbability); } return ProbabilityUtils::backoff(unigramProbability); } diff --git a/native/jni/src/suggest/core/dicnode/dic_node_utils.h b/native/jni/src/suggest/core/dicnode/dic_node_utils.h index 4f12b29f4..8dc984fe1 100644 --- a/native/jni/src/suggest/core/dicnode/dic_node_utils.h +++ b/native/jni/src/suggest/core/dicnode/dic_node_utils.h @@ -23,10 +23,10 @@ namespace latinime { -class BinaryDictionaryInfo; class DicNode; class DicNodeProximityFilter; class DicNodeVector; +class DictionaryStructureWithBufferPolicy; class ProximityInfoState; class MultiBigramMap; @@ -34,18 +34,22 @@ class DicNodeUtils { public: static int appendTwoWords(const int *src0, const int16_t length0, const int *src1, const int16_t length1, int *dest); - static void initAsRoot(const BinaryDictionaryInfo *const binaryDictionaryInfo, + static void initAsRoot( + const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, const int prevWordNodePos, DicNode *newRootNode); - static void initAsRootWithPreviousWord(const BinaryDictionaryInfo *const binaryDictionaryInfo, + static void initAsRootWithPreviousWord( + const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, DicNode *prevWordLastNode, DicNode *newRootNode); static void initByCopy(DicNode *srcNode, DicNode *destNode); static void getAllChildDicNodes(DicNode *dicNode, - const BinaryDictionaryInfo *const binaryDictionaryInfo, DicNodeVector *childDicNodes); - static float getBigramNodeImprobability(const BinaryDictionaryInfo *const binaryDictionaryInfo, + const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, + DicNodeVector *childDicNodes); + static float getBigramNodeImprobability( + const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, const DicNode *const node, MultiBigramMap *const multiBigramMap); // TODO: Move to private static void getProximityChildDicNodes(DicNode *dicNode, - const BinaryDictionaryInfo *const binaryDictionaryInfo, + const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, const ProximityInfoState *pInfoState, const int pointIndex, bool exactOnly, DicNodeVector *childDicNodes); @@ -54,7 +58,8 @@ class DicNodeUtils { // Max number of bigrams to look up static const int MAX_BIGRAMS_CONSIDERED_PER_CONTEXT = 500; - static int getBigramNodeProbability(const BinaryDictionaryInfo *const binaryDictionaryInfo, + static int getBigramNodeProbability( + const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, const DicNode *const node, MultiBigramMap *multiBigramMap); static void createAndGetPassingChildNode(DicNode *dicNode, const DicNodeProximityFilter *const childrenFilter, DicNodeVector *childDicNodes); diff --git a/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp b/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp index fdaa562e5..ebe76467a 100644 --- a/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp +++ b/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp @@ -22,15 +22,16 @@ #include "defines.h" #include "suggest/core/dictionary/binary_dictionary_bigrams_iterator.h" -#include "suggest/core/dictionary/binary_dictionary_info.h" #include "suggest/core/dictionary/dictionary.h" #include "suggest/core/dictionary/probability_utils.h" +#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" #include "utils/char_utils.h" namespace latinime { -BigramDictionary::BigramDictionary(const BinaryDictionaryInfo *const binaryDictionaryInfo) - : mBinaryDictionaryInfo(binaryDictionaryInfo) { +BigramDictionary::BigramDictionary( + const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy) + : mDictionaryStructurePolicy(dictionaryStructurePolicy) { if (DEBUG_DICT) { AKLOGI("BigramDictionary - constructor"); } @@ -112,12 +113,11 @@ int BigramDictionary::getPredictions(const int *prevWord, const int prevWordLeng int bigramCount = 0; int unigramProbability = 0; int bigramBuffer[MAX_WORD_LENGTH]; - const DictionaryStructureWithBufferPolicy *const structurePolicy = - mBinaryDictionaryInfo->getStructurePolicy(); - BinaryDictionaryBigramsIterator bigramsIt(structurePolicy->getBigramsStructurePolicy(), pos); + BinaryDictionaryBigramsIterator bigramsIt( + mDictionaryStructurePolicy->getBigramsStructurePolicy(), pos); while (bigramsIt.hasNext()) { bigramsIt.next(); - const int length = structurePolicy-> + const int length = mDictionaryStructurePolicy-> getCodePointsAndProbabilityAndReturnCodePointCount(bigramsIt.getBigramPos(), MAX_WORD_LENGTH, bigramBuffer, &unigramProbability); // Due to space constraints, the probability for bigrams is approximate - the lower the @@ -139,12 +139,10 @@ int BigramDictionary::getPredictions(const int *prevWord, const int prevWordLeng int BigramDictionary::getBigramListPositionForWord(const int *prevWord, const int prevWordLength, const bool forceLowerCaseSearch) const { if (0 >= prevWordLength) return NOT_A_DICT_POS; - const DictionaryStructureWithBufferPolicy *const structurePolicy = - mBinaryDictionaryInfo->getStructurePolicy(); - int pos = structurePolicy->getTerminalNodePositionOfWord(prevWord, prevWordLength, + int pos = mDictionaryStructurePolicy->getTerminalNodePositionOfWord(prevWord, prevWordLength, forceLowerCaseSearch); if (NOT_A_VALID_WORD_POS == pos) return NOT_A_DICT_POS; - return structurePolicy->getBigramsPositionOfNode(pos); + return mDictionaryStructurePolicy->getBigramsPositionOfNode(pos); } bool BigramDictionary::isValidBigram(const int *word0, int length0, const int *word1, @@ -152,13 +150,12 @@ bool BigramDictionary::isValidBigram(const int *word0, int length0, const int *w int pos = getBigramListPositionForWord(word0, length0, false /* forceLowerCaseSearch */); // getBigramListPositionForWord returns 0 if this word isn't in the dictionary or has no bigrams if (NOT_A_DICT_POS == pos) return false; - const DictionaryStructureWithBufferPolicy *const structurePolicy = - mBinaryDictionaryInfo->getStructurePolicy(); - int nextWordPos = structurePolicy->getTerminalNodePositionOfWord(word1, length1, + int nextWordPos = mDictionaryStructurePolicy->getTerminalNodePositionOfWord(word1, length1, false /* forceLowerCaseSearch */); if (NOT_A_VALID_WORD_POS == nextWordPos) return false; - BinaryDictionaryBigramsIterator bigramsIt(structurePolicy->getBigramsStructurePolicy(), pos); + BinaryDictionaryBigramsIterator bigramsIt( + mDictionaryStructurePolicy->getBigramsStructurePolicy(), pos); while (bigramsIt.hasNext()) { bigramsIt.next(); if (bigramsIt.getBigramPos() == nextWordPos) { diff --git a/native/jni/src/suggest/core/dictionary/bigram_dictionary.h b/native/jni/src/suggest/core/dictionary/bigram_dictionary.h index 438c34cac..99b964c49 100644 --- a/native/jni/src/suggest/core/dictionary/bigram_dictionary.h +++ b/native/jni/src/suggest/core/dictionary/bigram_dictionary.h @@ -21,11 +21,11 @@ namespace latinime { -class BinaryDictionaryInfo; +class DictionaryStructureWithBufferPolicy; class BigramDictionary { public: - BigramDictionary(const BinaryDictionaryInfo *const binaryDictionaryInfo); + BigramDictionary(const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy); int getPredictions(const int *word, int length, int *outBigramCodePoints, int *outBigramProbability, int *outputTypes) const; @@ -40,7 +40,7 @@ class BigramDictionary { int getBigramListPositionForWord(const int *prevWord, const int prevWordLength, const bool forceLowerCaseSearch) const; - const BinaryDictionaryInfo *const mBinaryDictionaryInfo; + const DictionaryStructureWithBufferPolicy *const mDictionaryStructurePolicy; }; } // namespace latinime #endif // LATINIME_BIGRAM_DICTIONARY_H diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_header.h b/native/jni/src/suggest/core/dictionary/binary_dictionary_header.h index 4d9295229..6b51c326d 100644 --- a/native/jni/src/suggest/core/dictionary/binary_dictionary_header.h +++ b/native/jni/src/suggest/core/dictionary/binary_dictionary_header.h @@ -24,8 +24,6 @@ namespace latinime { -class BinaryDictionaryInfo; - /** * This class abstracts dictionary header structures and provide interface to access dictionary * header information. diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.cpp b/native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.cpp index bcf0e612c..dbcad9d66 100644 --- a/native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.cpp +++ b/native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.cpp @@ -45,9 +45,8 @@ const BinaryDictionaryHeaderReadingUtils::DictionaryFlags /* static */ int BinaryDictionaryHeaderReadingUtils::getHeaderSize(const uint8_t *const dictBuf) { // See the format of the header in the comment in // BinaryDictionaryFormatUtils::detectFormatVersion() - return ByteArrayUtils::readUint32(dictBuf, - VERSION_2_HEADER_MAGIC_NUMBER_SIZE + VERSION_2_HEADER_DICTIONARY_VERSION_SIZE - + VERSION_2_HEADER_FLAG_SIZE); + return ByteArrayUtils::readUint32(dictBuf, VERSION_2_HEADER_MAGIC_NUMBER_SIZE + + VERSION_2_HEADER_DICTIONARY_VERSION_SIZE + VERSION_2_HEADER_FLAG_SIZE); } /* static */ BinaryDictionaryHeaderReadingUtils::DictionaryFlags diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_info.h b/native/jni/src/suggest/core/dictionary/binary_dictionary_info.h index 818b2af56..da6b0da8c 100644 --- a/native/jni/src/suggest/core/dictionary/binary_dictionary_info.h +++ b/native/jni/src/suggest/core/dictionary/binary_dictionary_info.h @@ -21,9 +21,7 @@ #include "defines.h" #include "jni.h" -#include "suggest/core/dictionary/binary_dictionary_format_utils.h" #include "suggest/core/dictionary/binary_dictionary_header.h" -#include "suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.h" #include "utils/log_utils.h" namespace latinime { @@ -34,18 +32,11 @@ class BinaryDictionaryInfo { const int dictSize, const int mmapFd, const int dictBufOffset, const bool isUpdatable) : mDictBuf(dictBuf), mDictSize(dictSize), mMmapFd(mmapFd), mDictBufOffset(dictBufOffset), mIsUpdatable(isUpdatable), - mDictionaryFormat(BinaryDictionaryFormatUtils::detectFormatVersion( - mDictBuf, mDictSize)), - mDictionaryHeader(dictBuf), mDictRoot(mDictBuf + mDictionaryHeader.getSize()), - // TODO: Remove. - mStructurePolicy(DictionaryStructureWithBufferPolicyFactory - ::newDictionaryStructurePolicy(this)) { + mDictionaryHeader(dictBuf) { logDictionaryInfo(env); } - ~BinaryDictionaryInfo() { - delete mStructurePolicy; - } + ~BinaryDictionaryInfo() {} AK_FORCE_INLINE const uint8_t *getDictBuf() const { return mDictBuf; @@ -63,30 +54,12 @@ class BinaryDictionaryInfo { return mDictBufOffset; } - AK_FORCE_INLINE const uint8_t *getDictRoot() const { - return mDictRoot; - } - - AK_FORCE_INLINE BinaryDictionaryFormatUtils::FORMAT_VERSION getFormat() const { - return mDictionaryFormat; - } - - // TODO: Move to DictionaryStructurePolicy. - AK_FORCE_INLINE const BinaryDictionaryHeader *getHeader() const { - return &mDictionaryHeader; - } - AK_FORCE_INLINE bool isDynamicallyUpdatable() const { // TODO: Support dynamic dictionary formats. const bool isUpdatableDictionaryFormat = false; return mIsUpdatable && isUpdatableDictionaryFormat; } - // TODO: remove - AK_FORCE_INLINE const DictionaryStructureWithBufferPolicy *getStructurePolicy() const { - return mStructurePolicy; - } - private: DISALLOW_COPY_AND_ASSIGN(BinaryDictionaryInfo); @@ -95,13 +68,9 @@ class BinaryDictionaryInfo { const int mMmapFd; const int mDictBufOffset; const bool mIsUpdatable; - const BinaryDictionaryFormatUtils::FORMAT_VERSION mDictionaryFormat; // TODO: Move BinaryDictionaryHeader to policyimpl and introduce dedicated API to the - // DictionaryStructurePolicy. + // DictionaryStructureWithBufferPolicy. const BinaryDictionaryHeader mDictionaryHeader; - const uint8_t *const mDictRoot; - // TODO: remove - const DictionaryStructureWithBufferPolicy *const mStructurePolicy; AK_FORCE_INLINE void logDictionaryInfo(JNIEnv *const env) const { const int BUFFER_SIZE = 16; diff --git a/native/jni/src/suggest/core/dictionary/dictionary.cpp b/native/jni/src/suggest/core/dictionary/dictionary.cpp index 891b80331..55ad8611e 100644 --- a/native/jni/src/suggest/core/dictionary/dictionary.cpp +++ b/native/jni/src/suggest/core/dictionary/dictionary.cpp @@ -18,7 +18,6 @@ #include "suggest/core/dictionary/dictionary.h" -#include // TODO: remove #include #include "defines.h" @@ -27,6 +26,7 @@ #include "suggest/core/session/dic_traverse_session.h" #include "suggest/core/suggest.h" #include "suggest/core/suggest_options.h" +#include "suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.h" #include "suggest/policyimpl/gesture/gesture_suggest_policy_factory.h" #include "suggest/policyimpl/typing/typing_suggest_policy_factory.h" @@ -36,7 +36,10 @@ Dictionary::Dictionary(JNIEnv *env, void *dict, int dictSize, int mmapFd, int dictBufOffset, bool isUpdatable) : mBinaryDictionaryInfo(env, static_cast(dict), dictSize, mmapFd, dictBufOffset, isUpdatable), - mBigramDictionary(new BigramDictionary(&mBinaryDictionaryInfo)), + mDictionaryStructureWithBufferPolicy(DictionaryStructureWithBufferPolicyFactory + ::newDictionaryStructureWithBufferPolicy( + static_cast(dict), dictSize)), + mBigramDictionary(new BigramDictionary(mDictionaryStructureWithBufferPolicy)), mGestureSuggest(new Suggest(GestureSuggestPolicyFactory::getGestureSuggestPolicy())), mTypingSuggest(new Suggest(TypingSuggestPolicyFactory::getTypingSuggestPolicy())) { } @@ -45,6 +48,7 @@ Dictionary::~Dictionary() { delete mBigramDictionary; delete mGestureSuggest; delete mTypingSuggest; + delete mDictionaryStructureWithBufferPolicy; } int Dictionary::getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession *traverseSession, @@ -83,14 +87,12 @@ int Dictionary::getBigrams(const int *word, int length, int *outWords, int *freq } int Dictionary::getProbability(const int *word, int length) const { - const DictionaryStructureWithBufferPolicy *const structurePolicy = - mBinaryDictionaryInfo.getStructurePolicy(); - int pos = structurePolicy->getTerminalNodePositionOfWord(word, length, + int pos = getDictionaryStructurePolicy()->getTerminalNodePositionOfWord(word, length, false /* forceLowerCaseSearch */); if (NOT_A_VALID_WORD_POS == pos) { return NOT_A_PROBABILITY; } - return structurePolicy->getUnigramProbability(pos); + return getDictionaryStructurePolicy()->getUnigramProbability(pos); } bool Dictionary::isValidBigram(const int *word0, int length0, const int *word1, int length1) const { diff --git a/native/jni/src/suggest/core/dictionary/dictionary.h b/native/jni/src/suggest/core/dictionary/dictionary.h index 9f1e0729d..dc07efb0a 100644 --- a/native/jni/src/suggest/core/dictionary/dictionary.h +++ b/native/jni/src/suggest/core/dictionary/dictionary.h @@ -26,6 +26,7 @@ namespace latinime { class BigramDictionary; +class DictionaryStructureWithBufferPolicy; class DicTraverseSession; class ProximityInfo; class SuggestInterface; @@ -77,16 +78,22 @@ class Dictionary { void removeBigramWords(const int *const word0, const int length0, const int *const word1, const int length1); + // TODO: Remove. const BinaryDictionaryInfo *getBinaryDictionaryInfo() const { return &mBinaryDictionaryInfo; } + const DictionaryStructureWithBufferPolicy *getDictionaryStructurePolicy() const { + return mDictionaryStructureWithBufferPolicy; + } + virtual ~Dictionary(); private: DISALLOW_IMPLICIT_CONSTRUCTORS(Dictionary); const BinaryDictionaryInfo mBinaryDictionaryInfo; + DictionaryStructureWithBufferPolicy *const mDictionaryStructureWithBufferPolicy; const BigramDictionary *mBigramDictionary; SuggestInterface *mGestureSuggest; SuggestInterface *mTypingSuggest; diff --git a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h index 6c97067cf..a8a8b2607 100644 --- a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h +++ b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h @@ -21,6 +21,7 @@ namespace latinime { +class BinaryDictionaryHeader; class DicNode; class DicNodeVector; class DictionaryBigramsStructurePolicy; @@ -65,6 +66,9 @@ class DictionaryStructureWithBufferPolicy { virtual int getBigramsPositionOfNode(const int nodePos) const = 0; + // TODO: Use policy to access header information. + virtual const BinaryDictionaryHeader *getHeader() const = 0; + virtual const DictionaryBigramsStructurePolicy *getBigramsStructurePolicy() const = 0; virtual const DictionaryShortcutsStructurePolicy *getShortcutsStructurePolicy() const = 0; diff --git a/native/jni/src/suggest/core/policy/weighting.cpp b/native/jni/src/suggest/core/policy/weighting.cpp index 58729229f..f9b777df2 100644 --- a/native/jni/src/suggest/core/policy/weighting.cpp +++ b/native/jni/src/suggest/core/policy/weighting.cpp @@ -148,7 +148,7 @@ static inline void profile(const CorrectionType correctionType, DicNode *const n case CT_TERMINAL: { const float languageImprobability = DicNodeUtils::getBigramNodeImprobability( - traverseSession->getBinaryDictionaryInfo(), dicNode, multiBigramMap); + traverseSession->getDictionaryStructurePolicy(), dicNode, multiBigramMap); return weighting->getTerminalLanguageCost(traverseSession, dicNode, languageImprobability); } case CT_TERMINAL_INSERTION: diff --git a/native/jni/src/suggest/core/session/dic_traverse_session.cpp b/native/jni/src/suggest/core/session/dic_traverse_session.cpp index 11a147bda..30a41f0ea 100644 --- a/native/jni/src/suggest/core/session/dic_traverse_session.cpp +++ b/native/jni/src/suggest/core/session/dic_traverse_session.cpp @@ -19,29 +19,28 @@ #include "defines.h" #include "jni.h" #include "suggest/core/dictionary/binary_dictionary_header.h" -#include "suggest/core/dictionary/binary_dictionary_info.h" #include "suggest/core/dictionary/dictionary.h" +#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" namespace latinime { void DicTraverseSession::init(const Dictionary *const dictionary, const int *prevWord, int prevWordLength, const SuggestOptions *const suggestOptions) { mDictionary = dictionary; - const BinaryDictionaryInfo *const binaryDictionaryInfo = - mDictionary->getBinaryDictionaryInfo(); - mMultiWordCostMultiplier = binaryDictionaryInfo->getHeader()->getMultiWordCostMultiplier(); + mMultiWordCostMultiplier = getDictionaryStructurePolicy()->getHeader() + ->getMultiWordCostMultiplier(); mSuggestOptions = suggestOptions; if (!prevWord) { mPrevWordPos = NOT_A_VALID_WORD_POS; return; } // TODO: merge following similar calls to getTerminalPosition into one case-insensitive call. - mPrevWordPos = binaryDictionaryInfo->getStructurePolicy()->getTerminalNodePositionOfWord( + mPrevWordPos = getDictionaryStructurePolicy()->getTerminalNodePositionOfWord( prevWord, prevWordLength, false /* forceLowerCaseSearch */); if (mPrevWordPos == NOT_A_VALID_WORD_POS) { // Check bigrams for lower-cased previous word if original was not found. Useful for // auto-capitalized words like "The [current_word]". - mPrevWordPos = binaryDictionaryInfo->getStructurePolicy()->getTerminalNodePositionOfWord( + mPrevWordPos = getDictionaryStructurePolicy()->getTerminalNodePositionOfWord( prevWord, prevWordLength, true /* forceLowerCaseSearch */); } } @@ -56,8 +55,9 @@ void DicTraverseSession::setupForGetSuggestions(const ProximityInfo *pInfo, maxSpatialDistance, maxPointerCount); } -const BinaryDictionaryInfo *DicTraverseSession::getBinaryDictionaryInfo() const { - return mDictionary->getBinaryDictionaryInfo(); +const DictionaryStructureWithBufferPolicy *DicTraverseSession::getDictionaryStructurePolicy() + const { + return mDictionary->getDictionaryStructurePolicy(); } void DicTraverseSession::resetCache(const int nextActiveCacheSize, const int maxWords) { diff --git a/native/jni/src/suggest/core/session/dic_traverse_session.h b/native/jni/src/suggest/core/session/dic_traverse_session.h index 5c4cef02d..23de5cc65 100644 --- a/native/jni/src/suggest/core/session/dic_traverse_session.h +++ b/native/jni/src/suggest/core/session/dic_traverse_session.h @@ -28,8 +28,8 @@ namespace latinime { -class BinaryDictionaryInfo; class Dictionary; +class DictionaryStructureWithBufferPolicy; class ProximityInfo; class SuggestOptions; @@ -75,8 +75,7 @@ class DicTraverseSession { const int maxPointerCount); void resetCache(const int nextActiveCacheSize, const int maxWords); - // TODO: Use DictionaryStructurePolicy instead of BinaryDictionaryInfo. - const BinaryDictionaryInfo *getBinaryDictionaryInfo() const; + const DictionaryStructureWithBufferPolicy *getDictionaryStructurePolicy() const; //-------------------- // getters and setters diff --git a/native/jni/src/suggest/core/suggest.cpp b/native/jni/src/suggest/core/suggest.cpp index bc1f25d3e..5ebf79bfe 100644 --- a/native/jni/src/suggest/core/suggest.cpp +++ b/native/jni/src/suggest/core/suggest.cpp @@ -19,13 +19,12 @@ #include "suggest/core/dicnode/dic_node.h" #include "suggest/core/dicnode/dic_node_priority_queue.h" #include "suggest/core/dicnode/dic_node_vector.h" -// TODO: Use DictionaryStructurePolicy instead of BinaryDictionaryInfo. -#include "suggest/core/dictionary/binary_dictionary_info.h" #include "suggest/core/dictionary/binary_dictionary_shortcut_iterator.h" #include "suggest/core/dictionary/dictionary.h" #include "suggest/core/dictionary/digraph_utils.h" #include "suggest/core/dictionary/shortcut_utils.h" #include "suggest/core/layout/proximity_info.h" +#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" #include "suggest/core/policy/scoring.h" #include "suggest/core/policy/traversal.h" #include "suggest/core/policy/weighting.h" @@ -108,7 +107,7 @@ void Suggest::initializeSearch(DicTraverseSession *traverseSession, int commitPo MAX_RESULTS); // Create a new dic node here DicNode rootNode; - DicNodeUtils::initAsRoot(traverseSession->getBinaryDictionaryInfo(), + DicNodeUtils::initAsRoot(traverseSession->getDictionaryStructurePolicy(), traverseSession->getPrevWordPos(), &rootNode); traverseSession->getDicTraverseCache()->copyPushActive(&rootNode); } @@ -212,11 +211,10 @@ int Suggest::outputSuggestions(DicTraverseSession *traverseSession, int *frequen } if (!terminalDicNode->hasMultipleWords()) { - const DictionaryStructureWithBufferPolicy *const structurePolicy = - traverseSession->getBinaryDictionaryInfo()->getStructurePolicy(); BinaryDictionaryShortcutIterator shortcutIt( - structurePolicy->getShortcutsStructurePolicy(), - structurePolicy->getShortcutPositionOfNode(terminalDicNode->getPos())); + traverseSession->getDictionaryStructurePolicy()->getShortcutsStructurePolicy(), + traverseSession->getDictionaryStructurePolicy() + ->getShortcutPositionOfNode(terminalDicNode->getPos())); // Shortcut is not supported for multiple words suggestions. // TODO: Check shortcuts during traversal for multiple words suggestions. const bool sameAsTyped = TRAVERSAL->sameAsTyped(traverseSession, terminalDicNode); @@ -299,7 +297,7 @@ void Suggest::expandCurrentDicNodes(DicTraverseSession *traverseSession) const { } DicNodeUtils::getAllChildDicNodes( - &dicNode, traverseSession->getBinaryDictionaryInfo(), &childDicNodes); + &dicNode, traverseSession->getDictionaryStructurePolicy(), &childDicNodes); const int childDicNodesSize = childDicNodes.getSizeAndLock(); for (int i = 0; i < childDicNodesSize; ++i) { @@ -310,7 +308,7 @@ void Suggest::expandCurrentDicNodes(DicTraverseSession *traverseSession) const { continue; } if (DigraphUtils::hasDigraphForCodePoint( - traverseSession->getBinaryDictionaryInfo()->getHeader(), + traverseSession->getDictionaryStructurePolicy()->getHeader(), childDicNode->getNodeCodePoint())) { correctionDicNode.initByCopy(childDicNode); correctionDicNode.advanceDigraphIndex(); @@ -448,7 +446,7 @@ void Suggest::processDicNodeAsOmission( DicTraverseSession *traverseSession, DicNode *dicNode) const { DicNodeVector childDicNodes; DicNodeUtils::getAllChildDicNodes( - dicNode, traverseSession->getBinaryDictionaryInfo(), &childDicNodes); + dicNode, traverseSession->getDictionaryStructurePolicy(), &childDicNodes); const int size = childDicNodes.getSizeAndLock(); for (int i = 0; i < size; i++) { @@ -473,7 +471,8 @@ void Suggest::processDicNodeAsInsertion(DicTraverseSession *traverseSession, DicNode *dicNode) const { const int16_t pointIndex = dicNode->getInputIndex(0); DicNodeVector childDicNodes; - DicNodeUtils::getProximityChildDicNodes(dicNode, traverseSession->getBinaryDictionaryInfo(), + DicNodeUtils::getProximityChildDicNodes(dicNode, + traverseSession->getDictionaryStructurePolicy(), traverseSession->getProximityInfoState(0), pointIndex + 1, true, &childDicNodes); const int size = childDicNodes.getSizeAndLock(); for (int i = 0; i < size; i++) { @@ -491,14 +490,15 @@ void Suggest::processDicNodeAsTransposition(DicTraverseSession *traverseSession, DicNode *dicNode) const { const int16_t pointIndex = dicNode->getInputIndex(0); DicNodeVector childDicNodes1; - DicNodeUtils::getProximityChildDicNodes(dicNode, traverseSession->getBinaryDictionaryInfo(), + DicNodeUtils::getProximityChildDicNodes(dicNode, + traverseSession->getDictionaryStructurePolicy(), traverseSession->getProximityInfoState(0), pointIndex + 1, false, &childDicNodes1); const int childSize1 = childDicNodes1.getSizeAndLock(); for (int i = 0; i < childSize1; i++) { if (childDicNodes1[i]->hasChildren()) { DicNodeVector childDicNodes2; DicNodeUtils::getProximityChildDicNodes( - childDicNodes1[i], traverseSession->getBinaryDictionaryInfo(), + childDicNodes1[i], traverseSession->getDictionaryStructurePolicy(), traverseSession->getProximityInfoState(0), pointIndex, false, &childDicNodes2); const int childSize2 = childDicNodes2.getSizeAndLock(); for (int j = 0; j < childSize2; j++) { @@ -539,7 +539,7 @@ void Suggest::createNextWordDicNode(DicTraverseSession *traverseSession, DicNode // Create a non-cached node here. DicNode newDicNode; DicNodeUtils::initAsRootWithPreviousWord( - traverseSession->getBinaryDictionaryInfo(), dicNode, &newDicNode); + traverseSession->getDictionaryStructurePolicy(), dicNode, &newDicNode); const CorrectionType correctionType = spaceSubstitution ? CT_NEW_WORD_SPACE_SUBSTITUTION : CT_NEW_WORD_SPACE_OMITTION; Weighting::addCostAndForwardInputIndex(WEIGHTING, correctionType, traverseSession, dicNode, diff --git a/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.cpp b/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.cpp index 324992a48..3054e4ea6 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.cpp @@ -16,21 +16,23 @@ #include "suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.h" +#include + #include "defines.h" -#include "suggest/core/dictionary/binary_dictionary_info.h" +#include "suggest/core/dictionary/binary_dictionary_format_utils.h" #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h" #include "suggest/policyimpl/dictionary/patricia_trie_policy.h" namespace latinime { /* static */ DictionaryStructureWithBufferPolicy *DictionaryStructureWithBufferPolicyFactory - ::newDictionaryStructurePolicy( - const BinaryDictionaryInfo *const binaryDictionaryInfo) { - switch (binaryDictionaryInfo->getFormat()) { + ::newDictionaryStructureWithBufferPolicy(const uint8_t *const dictBuf, + const int dictSize) { + switch (BinaryDictionaryFormatUtils::detectFormatVersion(dictBuf, dictSize)) { case BinaryDictionaryFormatUtils::VERSION_2: - return new PatriciaTriePolicy(binaryDictionaryInfo->getDictRoot()); + return new PatriciaTriePolicy(dictBuf); case BinaryDictionaryFormatUtils::VERSION_3: - return new DynamicPatriciaTriePolicy(binaryDictionaryInfo->getDictRoot()); + return new DynamicPatriciaTriePolicy(dictBuf); default: ASSERT(false); return 0; diff --git a/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.h b/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.h index 95f82aabe..53eb8f927 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.h +++ b/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.h @@ -17,18 +17,17 @@ #ifndef LATINIME_DICTIONARY_STRUCTURE_WITH_BUFFER_POLICY_FACTORY_H #define LATINIME_DICTIONARY_STRUCTURE_WITH_BUFFER_POLICY_FACTORY_H -#include "defines.h" +#include +#include "defines.h" #include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" namespace latinime { -class BinaryDictionaryInfo; - class DictionaryStructureWithBufferPolicyFactory { public: - static DictionaryStructureWithBufferPolicy *newDictionaryStructurePolicy( - const BinaryDictionaryInfo *const binaryDictionaryInfo); + static DictionaryStructureWithBufferPolicy *newDictionaryStructureWithBufferPolicy( + const uint8_t *const dictBuf, const int dictSize); private: DISALLOW_IMPLICIT_CONSTRUCTORS(DictionaryStructureWithBufferPolicyFactory); diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h index caca36977..490c62f91 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h @@ -20,6 +20,7 @@ #include #include "defines.h" +#include "suggest/core/dictionary/binary_dictionary_header.h" #include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" #include "suggest/policyimpl/dictionary/bigram/bigram_list_policy.h" #include "suggest/policyimpl/dictionary/shortcut/shortcut_list_policy.h" @@ -31,8 +32,9 @@ class DicNodeVector; class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { public: - DynamicPatriciaTriePolicy(const uint8_t *const dictRoot) - : mDictRoot(dictRoot), mBigramListPolicy(dictRoot), mShortcutListPolicy(dictRoot) {} + DynamicPatriciaTriePolicy(const uint8_t *const dictBuf) + : mHeader(dictBuf), mDictRoot(dictBuf + mHeader.getSize()), + mBigramListPolicy(mDictRoot), mShortcutListPolicy(mDictRoot) {} ~DynamicPatriciaTriePolicy() {} @@ -56,6 +58,11 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { int getBigramsPositionOfNode(const int nodePos) const; + // TODO: Remove and use policy to access header information. + const BinaryDictionaryHeader *getHeader() const { + return &mHeader; + } + const DictionaryBigramsStructurePolicy *getBigramsStructurePolicy() const { return &mBigramListPolicy; } @@ -68,6 +75,7 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTriePolicy); static const int MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP; + const BinaryDictionaryHeader mHeader; // TODO: Consolidate mDictRoot. const uint8_t *const mDictRoot; const BigramListPolicy mBigramListPolicy; diff --git a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h index 51b5b9af9..ee51a7e05 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h @@ -20,6 +20,7 @@ #include #include "defines.h" +#include "suggest/core/dictionary/binary_dictionary_header.h" #include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" #include "suggest/policyimpl/dictionary/bigram/bigram_list_policy.h" #include "suggest/policyimpl/dictionary/shortcut/shortcut_list_policy.h" @@ -31,8 +32,9 @@ class DicNodeVector; class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { public: - PatriciaTriePolicy(const uint8_t *const dictRoot) - : mDictRoot(dictRoot), mBigramListPolicy(dictRoot), mShortcutListPolicy(dictRoot) {} + PatriciaTriePolicy(const uint8_t *const dictBuf) + : mHeader(dictBuf), mDictRoot(dictBuf + mHeader.getSize()), + mBigramListPolicy(mDictRoot), mShortcutListPolicy(mDictRoot) {} ~PatriciaTriePolicy() {} @@ -56,6 +58,11 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { int getBigramsPositionOfNode(const int nodePos) const; + // TODO: Remove and use policy to access header information. + const BinaryDictionaryHeader *getHeader() const { + return &mHeader; + } + const DictionaryBigramsStructurePolicy *getBigramsStructurePolicy() const { return &mBigramListPolicy; } @@ -67,6 +74,7 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { private: DISALLOW_IMPLICIT_CONSTRUCTORS(PatriciaTriePolicy); + const BinaryDictionaryHeader mHeader; const uint8_t *const mDictRoot; const BigramListPolicy mBigramListPolicy; const ShortcutListPolicy mShortcutListPolicy; diff --git a/native/jni/src/suggest/policyimpl/typing/typing_weighting.h b/native/jni/src/suggest/policyimpl/typing/typing_weighting.h index 7cddb0882..b6aa85896 100644 --- a/native/jni/src/suggest/policyimpl/typing/typing_weighting.h +++ b/native/jni/src/suggest/policyimpl/typing/typing_weighting.h @@ -155,7 +155,8 @@ class TypingWeighting : public Weighting { float getNewWordBigramLanguageCost(const DicTraverseSession *const traverseSession, const DicNode *const dicNode, MultiBigramMap *const multiBigramMap) const { - return DicNodeUtils::getBigramNodeImprobability(traverseSession->getBinaryDictionaryInfo(), + return DicNodeUtils::getBigramNodeImprobability( + traverseSession->getDictionaryStructurePolicy(), dicNode, multiBigramMap) * ScoringParams::DISTANCE_WEIGHT_LANGUAGE; }