diff --git a/native/jni/src/suggest/core/dicnode/dic_node.h b/native/jni/src/suggest/core/dicnode/dic_node.h index 49cfdecac..c319a38d5 100644 --- a/native/jni/src/suggest/core/dicnode/dic_node.h +++ b/native/jni/src/suggest/core/dicnode/dic_node.h @@ -99,7 +99,7 @@ class DicNode { virtual ~DicNode() {} // Init for copy - void initByCopy(const DicNode *dicNode) { + void initByCopy(const DicNode *const dicNode) { mIsUsed = true; mIsCachedForNextSuggestion = dicNode->mIsCachedForNextSuggestion; mDicNodeProperties.init(&dicNode->mDicNodeProperties); @@ -107,25 +107,25 @@ class DicNode { PROF_NODE_COPY(&dicNode->mProfiler, mProfiler); } - // Init for root with prevWordNodePos which is used for bigram - void initAsRoot(const int rootGroupPos, const int prevWordNodePos) { + // Init for root with prevWordPtNodePos which is used for bigram + void initAsRoot(const int rootPtNodeArrayPos, const int prevWordPtNodePos) { mIsUsed = true; mIsCachedForNextSuggestion = false; mDicNodeProperties.init( - NOT_A_DICT_POS /* pos */, rootGroupPos, NOT_A_CODE_POINT /* nodeCodePoint */, + NOT_A_DICT_POS /* pos */, rootPtNodeArrayPos, NOT_A_CODE_POINT /* nodeCodePoint */, NOT_A_PROBABILITY /* probability */, false /* isTerminal */, true /* hasChildren */, false /* isBlacklistedOrNotAWord */, 0 /* depth */, 0 /* terminalDepth */); - mDicNodeState.init(prevWordNodePos); + mDicNodeState.init(prevWordPtNodePos); PROF_NODE_RESET(mProfiler); } // Init for root with previous word - void initAsRootWithPreviousWord(DicNode *dicNode, const int rootGroupPos) { + void initAsRootWithPreviousWord(const DicNode *const dicNode, const int rootPtNodeArrayPos) { mIsUsed = true; mIsCachedForNextSuggestion = dicNode->mIsCachedForNextSuggestion; mDicNodeProperties.init( - NOT_A_DICT_POS /* pos */, rootGroupPos, NOT_A_CODE_POINT /* nodeCodePoint */, + NOT_A_DICT_POS /* pos */, rootPtNodeArrayPos, NOT_A_CODE_POINT /* nodeCodePoint */, NOT_A_PROBABILITY /* probability */, false /* isTerminal */, true /* hasChildren */, false /* isBlacklistedOrNotAWord */, 0 /* depth */, 0 /* terminalDepth */); @@ -138,7 +138,7 @@ class DicNode { mDicNodeState.mDicNodeStatePrevWord.init( dicNode->mDicNodeState.mDicNodeStatePrevWord.getPrevWordCount() + 1, dicNode->mDicNodeProperties.getProbability(), - dicNode->mDicNodeProperties.getPos(), + dicNode->mDicNodeProperties.getPtNodePos(), dicNode->mDicNodeState.mDicNodeStatePrevWord.mPrevWord, dicNode->mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength(), dicNode->getOutputWordBuf(), @@ -148,26 +148,27 @@ class DicNode { PROF_NODE_COPY(&dicNode->mProfiler, mProfiler); } - void initAsPassingChild(DicNode *parentNode) { + void initAsPassingChild(DicNode *parentDicNode) { mIsUsed = true; - mIsCachedForNextSuggestion = parentNode->mIsCachedForNextSuggestion; - const int c = parentNode->getNodeTypedCodePoint(); - mDicNodeProperties.init(&parentNode->mDicNodeProperties, c); - mDicNodeState.init(&parentNode->mDicNodeState); - PROF_NODE_COPY(&parentNode->mProfiler, mProfiler); + mIsCachedForNextSuggestion = parentDicNode->mIsCachedForNextSuggestion; + const int parentCodePoint = parentDicNode->getNodeTypedCodePoint(); + mDicNodeProperties.init(&parentDicNode->mDicNodeProperties, parentCodePoint); + mDicNodeState.init(&parentDicNode->mDicNodeState); + PROF_NODE_COPY(&parentDicNode->mProfiler, mProfiler); } - void initAsChild(const DicNode *const dicNode, const int pos, const int childrenPos, - const int probability, const bool isTerminal, const bool hasChildren, - const bool isBlacklistedOrNotAWord, const uint16_t mergedNodeCodePointCount, - const int *const mergedNodeCodePoints) { + void initAsChild(const DicNode *const dicNode, const int ptNodePos, + const int childrenPtNodeArrayPos, const int probability, const bool isTerminal, + const bool hasChildren, const bool isBlacklistedOrNotAWord, + const uint16_t mergedNodeCodePointCount, const int *const mergedNodeCodePoints) { mIsUsed = true; uint16_t newDepth = static_cast(dicNode->getNodeCodePointCount() + 1); mIsCachedForNextSuggestion = dicNode->mIsCachedForNextSuggestion; const uint16_t newLeavingDepth = static_cast( dicNode->mDicNodeProperties.getLeavingDepth() + mergedNodeCodePointCount); - mDicNodeProperties.init(pos, childrenPos, mergedNodeCodePoints[0], probability, - isTerminal, hasChildren, isBlacklistedOrNotAWord, newDepth, newLeavingDepth); + mDicNodeProperties.init(ptNodePos, childrenPtNodeArrayPos, mergedNodeCodePoints[0], + probability, isTerminal, hasChildren, isBlacklistedOrNotAWord, newDepth, + newLeavingDepth); mDicNodeState.init(&dicNode->mDicNodeState, mergedNodeCodePointCount, mergedNodeCodePoints); PROF_NODE_COPY(&dicNode->mProfiler, mProfiler); @@ -234,7 +235,7 @@ class DicNode { } bool isFirstWord() const { - return mDicNodeState.mDicNodeStatePrevWord.getPrevWordNodePos() == NOT_A_DICT_POS; + return mDicNodeState.mDicNodeStatePrevWord.getPrevWordPtNodePos() == NOT_A_DICT_POS; } bool isCompletion(const int inputSize) const { @@ -246,29 +247,30 @@ class DicNode { } // Used to get bigram probability in DicNodeUtils - int getPos() const { - return mDicNodeProperties.getPos(); + int getPtNodePos() const { + return mDicNodeProperties.getPtNodePos(); } // Used to get bigram probability in DicNodeUtils - int getPrevWordPos() const { - return mDicNodeState.mDicNodeStatePrevWord.getPrevWordNodePos(); + int getPrevWordTerminalPtNodePos() const { + return mDicNodeState.mDicNodeStatePrevWord.getPrevWordPtNodePos(); } // Used in DicNodeUtils - int getChildrenPos() const { - return mDicNodeProperties.getChildrenPos(); + int getChildrenPtNodeArrayPos() const { + return mDicNodeProperties.getChildrenPtNodeArrayPos(); } int getProbability() const { return mDicNodeProperties.getProbability(); } - AK_FORCE_INLINE bool isTerminalWordNode() const { - const bool isTerminalNodes = mDicNodeProperties.isTerminal(); - const int currentNodeDepth = getNodeCodePointCount(); - const int terminalNodeDepth = mDicNodeProperties.getLeavingDepth(); - return isTerminalNodes && currentNodeDepth > 0 && currentNodeDepth == terminalNodeDepth; + AK_FORCE_INLINE bool isTerminalDicNode() const { + const bool isTerminalPtNode = mDicNodeProperties.isTerminal(); + const int currentDicNodeDepth = getNodeCodePointCount(); + const int terminalDicNodeDepth = mDicNodeProperties.getLeavingDepth(); + return isTerminalPtNode && currentDicNodeDepth > 0 + && currentDicNodeDepth == terminalDicNodeDepth; } bool shouldBeFilteredBySafetyNetForBigram() const { @@ -374,8 +376,8 @@ class DicNode { } // Used to commit input partially - int getPrevWordNodePos() const { - return mDicNodeState.mDicNodeStatePrevWord.getPrevWordNodePos(); + int getPrevWordPtNodePos() const { + return mDicNodeState.mDicNodeStatePrevWord.getPrevWordPtNodePos(); } AK_FORCE_INLINE const int *getOutputWordBuf() const { @@ -410,7 +412,7 @@ class DicNode { // TODO: Remove once touch path is merged into ProximityInfoState // Note: Returned codepoint may be a digraph codepoint if the node is in a composite glyph. int getNodeCodePoint() const { - const int codePoint = mDicNodeProperties.getNodeCodePoint(); + const int codePoint = mDicNodeProperties.getDicNodeCodePoint(); const DigraphUtils::DigraphCodePointIndex digraphIndex = mDicNodeState.mDicNodeStateScoring.getDigraphIndex(); if (digraphIndex == DigraphUtils::NOT_A_DIGRAPH_INDEX) { @@ -423,8 +425,8 @@ class DicNode { // Utils for cost calculation // //////////////////////////////// AK_FORCE_INLINE bool isSameNodeCodePoint(const DicNode *const dicNode) const { - return mDicNodeProperties.getNodeCodePoint() - == dicNode->mDicNodeProperties.getNodeCodePoint(); + return mDicNodeProperties.getDicNodeCodePoint() + == dicNode->mDicNodeProperties.getDicNodeCodePoint(); } // TODO: remove diff --git a/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp b/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp index ec65114c7..5540b6df5 100644 --- a/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp +++ b/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp @@ -22,7 +22,6 @@ #include "suggest/core/dicnode/dic_node_vector.h" #include "suggest/core/dictionary/multi_bigram_map.h" #include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" -#include "utils/char_utils.h" namespace latinime { @@ -32,19 +31,20 @@ namespace latinime { /* static */ void DicNodeUtils::initAsRoot( const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, - const int prevWordNodePos, DicNode *const newRootNode) { - newRootNode->initAsRoot(dictionaryStructurePolicy->getRootPosition(), prevWordNodePos); + const int prevWordPtNodePos, DicNode *const newRootDicNode) { + newRootDicNode->initAsRoot(dictionaryStructurePolicy->getRootPosition(), prevWordPtNodePos); } /*static */ void DicNodeUtils::initAsRootWithPreviousWord( const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, - DicNode *const prevWordLastNode, DicNode *const newRootNode) { - newRootNode->initAsRootWithPreviousWord( - prevWordLastNode, dictionaryStructurePolicy->getRootPosition()); + const DicNode *const prevWordLastDicNode, DicNode *const newRootDicNode) { + newRootDicNode->initAsRootWithPreviousWord( + prevWordLastDicNode, dictionaryStructurePolicy->getRootPosition()); } -/* static */ void DicNodeUtils::initByCopy(DicNode *srcNode, DicNode *destNode) { - destNode->initByCopy(srcNode); +/* static */ void DicNodeUtils::initByCopy(const DicNode *const srcDicNode, + DicNode *const destDicNode) { + destDicNode->initByCopy(srcDicNode); } /////////////////////////////////// @@ -52,14 +52,14 @@ namespace latinime { /////////////////////////////////// /* static */ void DicNodeUtils::getAllChildDicNodes(DicNode *dicNode, const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, - DicNodeVector *childDicNodes) { + DicNodeVector *const childDicNodes) { if (dicNode->isTotalInputSizeExceedingLimit()) { return; } if (!dicNode->isLeavingNode()) { childDicNodes->pushPassingChild(dicNode); } else { - dictionaryStructurePolicy->createAndGetAllChildNodes(dicNode, childDicNodes); + dictionaryStructurePolicy->createAndGetAllChildDicNodes(dicNode, childDicNodes); } } @@ -71,11 +71,11 @@ namespace latinime { */ /* static */ float DicNodeUtils::getBigramNodeImprobability( const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, - const DicNode *const node, MultiBigramMap *multiBigramMap) { - if (node->hasMultipleWords() && !node->isValidMultipleWordSuggestion()) { + const DicNode *const dicNode, MultiBigramMap *const multiBigramMap) { + if (dicNode->hasMultipleWords() && !dicNode->isValidMultipleWordSuggestion()) { return static_cast(MAX_VALUE_FOR_WEIGHTING); } - const int probability = getBigramNodeProbability(dictionaryStructurePolicy, node, + const int probability = getBigramNodeProbability(dictionaryStructurePolicy, dicNode, multiBigramMap); // TODO: This equation to calculate the improbability looks unreasonable. Investigate this. const float cost = static_cast(MAX_PROBABILITY - probability) @@ -85,19 +85,19 @@ namespace latinime { /* static */ int DicNodeUtils::getBigramNodeProbability( const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, - const DicNode *const node, MultiBigramMap *multiBigramMap) { - const int unigramProbability = node->getProbability(); - const int wordPos = node->getPos(); - const int prevWordPos = node->getPrevWordPos(); - if (NOT_A_DICT_POS == wordPos || NOT_A_DICT_POS == prevWordPos) { + const DicNode *const dicNode, MultiBigramMap *const multiBigramMap) { + const int unigramProbability = dicNode->getProbability(); + const int ptNodePos = dicNode->getPtNodePos(); + const int prevWordTerminalPtNodePos = dicNode->getPrevWordTerminalPtNodePos(); + if (NOT_A_DICT_POS == ptNodePos || NOT_A_DICT_POS == prevWordTerminalPtNodePos) { // Note: Normally wordPos comes from the dictionary and should never equal // NOT_A_VALID_WORD_POS. return dictionaryStructurePolicy->getProbability(unigramProbability, NOT_A_PROBABILITY); } if (multiBigramMap) { - return multiBigramMap->getBigramProbability(dictionaryStructurePolicy, prevWordPos, - wordPos, unigramProbability); + return multiBigramMap->getBigramProbability(dictionaryStructurePolicy, + prevWordTerminalPtNodePos, ptNodePos, unigramProbability); } return dictionaryStructurePolicy->getProbability(unigramProbability, NOT_A_PROBABILITY); @@ -109,7 +109,7 @@ namespace latinime { // TODO: Move to char_utils? /* static */ int DicNodeUtils::appendTwoWords(const int *const src0, const int16_t length0, - const int *const src1, const int16_t length1, int *dest) { + const int *const src1, const int16_t length1, int *const dest) { int actualLength0 = 0; for (int i = 0; i < length0; ++i) { if (src0[i] == 0) { diff --git a/native/jni/src/suggest/core/dicnode/dic_node_utils.h b/native/jni/src/suggest/core/dicnode/dic_node_utils.h index 3fb351a61..3f1514a52 100644 --- a/native/jni/src/suggest/core/dicnode/dic_node_utils.h +++ b/native/jni/src/suggest/core/dicnode/dic_node_utils.h @@ -31,20 +31,20 @@ class MultiBigramMap; class DicNodeUtils { public: static int appendTwoWords(const int *src0, const int16_t length0, const int *src1, - const int16_t length1, int *dest); + const int16_t length1, int *const dest); static void initAsRoot( const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, - const int prevWordNodePos, DicNode *newRootNode); + const int prevWordPtNodePos, DicNode *const newRootDicNode); static void initAsRootWithPreviousWord( const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, - DicNode *prevWordLastNode, DicNode *newRootNode); - static void initByCopy(DicNode *srcNode, DicNode *destNode); + const DicNode *const prevWordLastDicNode, DicNode *const newRootDicNode); + static void initByCopy(const DicNode *const srcDicNode, DicNode *const destDicNode); static void getAllChildDicNodes(DicNode *dicNode, const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, DicNodeVector *childDicNodes); static float getBigramNodeImprobability( const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, - const DicNode *const node, MultiBigramMap *const multiBigramMap); + const DicNode *const dicNode, MultiBigramMap *const multiBigramMap); private: DISALLOW_IMPLICIT_CONSTRUCTORS(DicNodeUtils); @@ -53,7 +53,7 @@ class DicNodeUtils { static int getBigramNodeProbability( const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, - const DicNode *const node, MultiBigramMap *multiBigramMap); + const DicNode *const dicNode, MultiBigramMap *const multiBigramMap); }; } // namespace latinime #endif // LATINIME_DIC_NODE_UTILS_H diff --git a/native/jni/src/suggest/core/dicnode/dic_node_vector.h b/native/jni/src/suggest/core/dicnode/dic_node_vector.h index 42addae8d..9364e7751 100644 --- a/native/jni/src/suggest/core/dicnode/dic_node_vector.h +++ b/native/jni/src/suggest/core/dicnode/dic_node_vector.h @@ -62,14 +62,14 @@ class DicNodeVector { mDicNodes.back().initAsPassingChild(dicNode); } - void pushLeavingChild(const DicNode *const dicNode, const int pos, const int childrenPos, - const int probability, const bool isTerminal, const bool hasChildren, - const bool isBlacklistedOrNotAWord, const uint16_t mergedNodeCodePointCount, - const int *const mergedNodeCodePoints) { + void pushLeavingChild(const DicNode *const dicNode, const int ptNodePos, + const int childrenPtNodeArrayPos, const int probability, const bool isTerminal, + const bool hasChildren, const bool isBlacklistedOrNotAWord, + const uint16_t mergedNodeCodePointCount, const int *const mergedNodeCodePoints) { ASSERT(!mLock); mDicNodes.push_back(mEmptyNode); - mDicNodes.back().initAsChild(dicNode, pos, childrenPos, probability, isTerminal, - hasChildren, isBlacklistedOrNotAWord, mergedNodeCodePointCount, + mDicNodes.back().initAsChild(dicNode, ptNodePos, childrenPtNodeArrayPos, probability, + isTerminal, hasChildren, isBlacklistedOrNotAWord, mergedNodeCodePointCount, mergedNodeCodePoints); } diff --git a/native/jni/src/suggest/core/dicnode/internal/dic_node_properties.h b/native/jni/src/suggest/core/dicnode/internal/dic_node_properties.h index 9e0f62ceb..c41a7243a 100644 --- a/native/jni/src/suggest/core/dicnode/internal/dic_node_properties.h +++ b/native/jni/src/suggest/core/dicnode/internal/dic_node_properties.h @@ -24,15 +24,14 @@ namespace latinime { /** - * Node for traversing the lexicon trie. + * PtNode information related to the DicNode from the lexicon trie. */ -// TODO: Introduce a dictionary node class which has attribute members required to understand the -// dictionary structure. class DicNodeProperties { public: AK_FORCE_INLINE DicNodeProperties() - : mPos(0), mChildrenPos(0), mProbability(0), mNodeCodePoint(0), mIsTerminal(false), - mHasChildren(false), mIsBlacklistedOrNotAWord(false), mDepth(0), mLeavingDepth(0) {} + : mPtNodePos(0), mChildrenPtNodeArrayPos(0), mProbability(0), mDicNodeCodePoint(0), + mIsTerminal(false), mHasChildrenPtNodes(false), mIsBlacklistedOrNotAWord(false), + mDepth(0), mLeavingDepth(0) {} virtual ~DicNodeProperties() {} @@ -40,57 +39,57 @@ class DicNodeProperties { void init(const int pos, const int childrenPos, const int nodeCodePoint, const int probability, const bool isTerminal, const bool hasChildren, const bool isBlacklistedOrNotAWord, const uint16_t depth, const uint16_t leavingDepth) { - mPos = pos; - mChildrenPos = childrenPos; - mNodeCodePoint = nodeCodePoint; + mPtNodePos = pos; + mChildrenPtNodeArrayPos = childrenPos; + mDicNodeCodePoint = nodeCodePoint; mProbability = probability; mIsTerminal = isTerminal; - mHasChildren = hasChildren; + mHasChildrenPtNodes = hasChildren; mIsBlacklistedOrNotAWord = isBlacklistedOrNotAWord; mDepth = depth; mLeavingDepth = leavingDepth; } // Init for copy - void init(const DicNodeProperties *const nodeProp) { - mPos = nodeProp->mPos; - mChildrenPos = nodeProp->mChildrenPos; - mNodeCodePoint = nodeProp->mNodeCodePoint; - mProbability = nodeProp->mProbability; - mIsTerminal = nodeProp->mIsTerminal; - mHasChildren = nodeProp->mHasChildren; - mIsBlacklistedOrNotAWord = nodeProp->mIsBlacklistedOrNotAWord; - mDepth = nodeProp->mDepth; - mLeavingDepth = nodeProp->mLeavingDepth; + void init(const DicNodeProperties *const dicNodeProp) { + mPtNodePos = dicNodeProp->mPtNodePos; + mChildrenPtNodeArrayPos = dicNodeProp->mChildrenPtNodeArrayPos; + mDicNodeCodePoint = dicNodeProp->mDicNodeCodePoint; + mProbability = dicNodeProp->mProbability; + mIsTerminal = dicNodeProp->mIsTerminal; + mHasChildrenPtNodes = dicNodeProp->mHasChildrenPtNodes; + mIsBlacklistedOrNotAWord = dicNodeProp->mIsBlacklistedOrNotAWord; + mDepth = dicNodeProp->mDepth; + mLeavingDepth = dicNodeProp->mLeavingDepth; } // Init as passing child - void init(const DicNodeProperties *const nodeProp, const int codePoint) { - mPos = nodeProp->mPos; - mChildrenPos = nodeProp->mChildrenPos; - mNodeCodePoint = codePoint; // Overwrite the node char of a passing child - mProbability = nodeProp->mProbability; - mIsTerminal = nodeProp->mIsTerminal; - mHasChildren = nodeProp->mHasChildren; - mIsBlacklistedOrNotAWord = nodeProp->mIsBlacklistedOrNotAWord; - mDepth = nodeProp->mDepth + 1; // Increment the depth of a passing child - mLeavingDepth = nodeProp->mLeavingDepth; + void init(const DicNodeProperties *const dicNodeProp, const int codePoint) { + mPtNodePos = dicNodeProp->mPtNodePos; + mChildrenPtNodeArrayPos = dicNodeProp->mChildrenPtNodeArrayPos; + mDicNodeCodePoint = codePoint; // Overwrite the node char of a passing child + mProbability = dicNodeProp->mProbability; + mIsTerminal = dicNodeProp->mIsTerminal; + mHasChildrenPtNodes = dicNodeProp->mHasChildrenPtNodes; + mIsBlacklistedOrNotAWord = dicNodeProp->mIsBlacklistedOrNotAWord; + mDepth = dicNodeProp->mDepth + 1; // Increment the depth of a passing child + mLeavingDepth = dicNodeProp->mLeavingDepth; } - int getPos() const { - return mPos; + int getPtNodePos() const { + return mPtNodePos; } - int getChildrenPos() const { - return mChildrenPos; + int getChildrenPtNodeArrayPos() const { + return mChildrenPtNodeArrayPos; } int getProbability() const { return mProbability; } - int getNodeCodePoint() const { - return mNodeCodePoint; + int getDicNodeCodePoint() const { + return mDicNodeCodePoint; } uint16_t getDepth() const { @@ -107,7 +106,7 @@ class DicNodeProperties { } bool hasChildren() const { - return mHasChildren || mDepth != mLeavingDepth; + return mHasChildrenPtNodes || mDepth != mLeavingDepth; } bool isBlacklistedOrNotAWord() const { @@ -118,12 +117,12 @@ class DicNodeProperties { // Caution!!! // Use a default copy constructor and an assign operator because shallow copies are ok // for this class - int mPos; - int mChildrenPos; + int mPtNodePos; + int mChildrenPtNodeArrayPos; int mProbability; - int mNodeCodePoint; + int mDicNodeCodePoint; bool mIsTerminal; - bool mHasChildren; + bool mHasChildrenPtNodes; bool mIsBlacklistedOrNotAWord; uint16_t mDepth; uint16_t mLeavingDepth; diff --git a/native/jni/src/suggest/core/dicnode/internal/dic_node_state_prevword.h b/native/jni/src/suggest/core/dicnode/internal/dic_node_state_prevword.h index b8986203d..dba57056b 100644 --- a/native/jni/src/suggest/core/dicnode/internal/dic_node_state_prevword.h +++ b/native/jni/src/suggest/core/dicnode/internal/dic_node_state_prevword.h @@ -30,7 +30,7 @@ class DicNodeStatePrevWord { public: AK_FORCE_INLINE DicNodeStatePrevWord() : mPrevWordCount(0), mPrevWordLength(0), mPrevWordStart(0), mPrevWordProbability(0), - mPrevWordNodePos(NOT_A_DICT_POS), mSecondWordFirstInputIndex(NOT_AN_INDEX) { + mPrevWordPtNodePos(NOT_A_DICT_POS), mSecondWordFirstInputIndex(NOT_AN_INDEX) { memset(mPrevWord, 0, sizeof(mPrevWord)); } @@ -41,7 +41,7 @@ class DicNodeStatePrevWord { mPrevWordCount = 0; mPrevWordStart = 0; mPrevWordProbability = -1; - mPrevWordNodePos = NOT_A_DICT_POS; + mPrevWordPtNodePos = NOT_A_DICT_POS; mSecondWordFirstInputIndex = NOT_AN_INDEX; } @@ -50,7 +50,7 @@ class DicNodeStatePrevWord { mPrevWordCount = 0; mPrevWordStart = 0; mPrevWordProbability = -1; - mPrevWordNodePos = prevWordNodePos; + mPrevWordPtNodePos = prevWordNodePos; mSecondWordFirstInputIndex = NOT_AN_INDEX; } @@ -60,7 +60,7 @@ class DicNodeStatePrevWord { mPrevWordCount = prevWord->mPrevWordCount; mPrevWordStart = prevWord->mPrevWordStart; mPrevWordProbability = prevWord->mPrevWordProbability; - mPrevWordNodePos = prevWord->mPrevWordNodePos; + mPrevWordPtNodePos = prevWord->mPrevWordPtNodePos; mSecondWordFirstInputIndex = prevWord->mSecondWordFirstInputIndex; memcpy(mPrevWord, prevWord->mPrevWord, prevWord->mPrevWordLength * sizeof(mPrevWord[0])); } @@ -71,7 +71,7 @@ class DicNodeStatePrevWord { const int prevWordSecondWordFirstInputIndex, const int lastInputIndex) { mPrevWordCount = min(prevWordCount, static_cast(MAX_RESULTS)); mPrevWordProbability = prevWordProbability; - mPrevWordNodePos = prevWordNodePos; + mPrevWordPtNodePos = prevWordNodePos; int twoWordsLen = DicNodeUtils::appendTwoWords(src0, length0, src1, length1, mPrevWord); if (twoWordsLen >= MAX_WORD_LENGTH) { @@ -116,8 +116,8 @@ class DicNodeStatePrevWord { return mPrevWordStart; } - int getPrevWordNodePos() const { - return mPrevWordNodePos; + int getPrevWordPtNodePos() const { + return mPrevWordPtNodePos; } int getPrevWordCodePointAt(const int id) const { @@ -147,7 +147,7 @@ class DicNodeStatePrevWord { int16_t mPrevWordLength; int16_t mPrevWordStart; int16_t mPrevWordProbability; - int mPrevWordNodePos; + int mPrevWordPtNodePos; int mSecondWordFirstInputIndex; }; } // namespace latinime diff --git a/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp b/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp index 71f4ef6ea..c2a15a312 100644 --- a/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp +++ b/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp @@ -144,7 +144,7 @@ int BigramDictionary::getPredictions(const int *prevWord, const int prevWordLeng int BigramDictionary::getBigramListPositionForWord(const int *prevWord, const int prevWordLength, const bool forceLowerCaseSearch) const { if (0 >= prevWordLength) return NOT_A_DICT_POS; - int pos = mDictionaryStructurePolicy->getTerminalNodePositionOfWord(prevWord, prevWordLength, + int pos = mDictionaryStructurePolicy->getTerminalPtNodePositionOfWord(prevWord, prevWordLength, forceLowerCaseSearch); if (NOT_A_DICT_POS == pos) return NOT_A_DICT_POS; return mDictionaryStructurePolicy->getBigramsPositionOfPtNode(pos); @@ -155,7 +155,7 @@ int BigramDictionary::getBigramProbability(const int *word0, int length0, const int pos = getBigramListPositionForWord(word0, length0, false /* forceLowerCaseSearch */); // getBigramListPositionForWord returns 0 if this word isn't in the dictionary or has no bigrams if (NOT_A_DICT_POS == pos) return NOT_A_PROBABILITY; - int nextWordPos = mDictionaryStructurePolicy->getTerminalNodePositionOfWord(word1, length1, + int nextWordPos = mDictionaryStructurePolicy->getTerminalPtNodePositionOfWord(word1, length1, false /* forceLowerCaseSearch */); if (NOT_A_DICT_POS == nextWordPos) return NOT_A_PROBABILITY; diff --git a/native/jni/src/suggest/core/dictionary/dictionary.cpp b/native/jni/src/suggest/core/dictionary/dictionary.cpp index 59ead1894..264b46056 100644 --- a/native/jni/src/suggest/core/dictionary/dictionary.cpp +++ b/native/jni/src/suggest/core/dictionary/dictionary.cpp @@ -88,7 +88,7 @@ int Dictionary::getBigrams(const int *word, int length, int *outWords, int *freq } int Dictionary::getProbability(const int *word, int length) const { - int pos = getDictionaryStructurePolicy()->getTerminalNodePositionOfWord(word, length, + int pos = getDictionaryStructurePolicy()->getTerminalPtNodePositionOfWord(word, length, false /* forceLowerCaseSearch */); if (NOT_A_DICT_POS == pos) { return NOT_A_PROBABILITY; diff --git a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h index 41f82049f..610de48ab 100644 --- a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h +++ b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h @@ -37,14 +37,14 @@ class DictionaryStructureWithBufferPolicy { virtual int getRootPosition() const = 0; - virtual void createAndGetAllChildNodes(const DicNode *const dicNode, + virtual void createAndGetAllChildDicNodes(const DicNode *const dicNode, DicNodeVector *const childDicNodes) const = 0; virtual int getCodePointsAndProbabilityAndReturnCodePointCount( const int nodePos, const int maxCodePointCount, int *const outCodePoints, int *const outUnigramProbability) const = 0; - virtual int getTerminalNodePositionOfWord(const int *const inWord, + virtual int getTerminalPtNodePositionOfWord(const int *const inWord, const int length, const bool forceLowerCaseSearch) const = 0; virtual int getProbability(const int unigramProbability, diff --git a/native/jni/src/suggest/core/session/dic_traverse_session.cpp b/native/jni/src/suggest/core/session/dic_traverse_session.cpp index 50f2bbd8d..5070491f4 100644 --- a/native/jni/src/suggest/core/session/dic_traverse_session.cpp +++ b/native/jni/src/suggest/core/session/dic_traverse_session.cpp @@ -35,16 +35,16 @@ void DicTraverseSession::init(const Dictionary *const dictionary, const int *pre ->getMultiWordCostMultiplier(); mSuggestOptions = suggestOptions; if (!prevWord) { - mPrevWordPos = NOT_A_DICT_POS; + mPrevWordPtNodePos = NOT_A_DICT_POS; return; } // TODO: merge following similar calls to getTerminalPosition into one case-insensitive call. - mPrevWordPos = getDictionaryStructurePolicy()->getTerminalNodePositionOfWord( + mPrevWordPtNodePos = getDictionaryStructurePolicy()->getTerminalPtNodePositionOfWord( prevWord, prevWordLength, false /* forceLowerCaseSearch */); - if (mPrevWordPos == NOT_A_DICT_POS) { + if (mPrevWordPtNodePos == NOT_A_DICT_POS) { // Check bigrams for lower-cased previous word if original was not found. Useful for // auto-capitalized words like "The [current_word]". - mPrevWordPos = getDictionaryStructurePolicy()->getTerminalNodePositionOfWord( + mPrevWordPtNodePos = getDictionaryStructurePolicy()->getTerminalPtNodePositionOfWord( prevWord, prevWordLength, true /* forceLowerCaseSearch */); } } diff --git a/native/jni/src/suggest/core/session/dic_traverse_session.h b/native/jni/src/suggest/core/session/dic_traverse_session.h index e0b1c67d9..6e4dda44d 100644 --- a/native/jni/src/suggest/core/session/dic_traverse_session.h +++ b/native/jni/src/suggest/core/session/dic_traverse_session.h @@ -59,7 +59,7 @@ class DicTraverseSession { } AK_FORCE_INLINE DicTraverseSession(JNIEnv *env, jstring localeStr, bool usesLargeCache) - : mPrevWordPos(NOT_A_DICT_POS), mProximityInfo(0), + : mPrevWordPtNodePos(NOT_A_DICT_POS), mProximityInfo(0), mDictionary(0), mSuggestOptions(0), mDicNodesCache(usesLargeCache), mMultiBigramMap(), mInputSize(0), mPartiallyCommited(false), mMaxPointerCount(1), mMultiWordCostMultiplier(1.0f) { @@ -86,11 +86,9 @@ class DicTraverseSession { //-------------------- const ProximityInfo *getProximityInfo() const { return mProximityInfo; } const SuggestOptions *getSuggestOptions() const { return mSuggestOptions; } - int getPrevWordPos() const { return mPrevWordPos; } + int getPrevWordPtNodePos() const { return mPrevWordPtNodePos; } // TODO: REMOVE - void setPrevWordPos(int pos) { mPrevWordPos = pos; } - // TODO: Use proper parameter when changed - int getDicRootPos() const { return 0; } + void setPrevWordPtNodePos(const int ptNodePos) { mPrevWordPtNodePos = ptNodePos; } DicNodesCache *getDicTraverseCache() { return &mDicNodesCache; } MultiBigramMap *getMultiBigramMap() { return &mMultiBigramMap; } const ProximityInfoState *getProximityInfoState(int id) const { @@ -119,26 +117,13 @@ class DicTraverseSession { return true; } - void getSearchKeys(const DicNode *node, std::vector *const outputSearchKeyVector) const { - for (int i = 0; i < MAX_POINTER_COUNT_G; ++i) { - if (!mProximityInfoStates[i].isUsed()) { - continue; - } - const int pointerId = node->getInputIndex(i); - const std::vector *const searchKeyVector = - mProximityInfoStates[i].getSearchKeyVector(pointerId); - outputSearchKeyVector->insert(outputSearchKeyVector->end(), searchKeyVector->begin(), - searchKeyVector->end()); - } - } - - ProximityType getProximityTypeG(const DicNode *const node, const int childCodePoint) const { + ProximityType getProximityTypeG(const DicNode *const dicNode, const int childCodePoint) const { ProximityType proximityType = UNRELATED_CHAR; for (int i = 0; i < MAX_POINTER_COUNT_G; ++i) { if (!mProximityInfoStates[i].isUsed()) { continue; } - const int pointerId = node->getInputIndex(i); + const int pointerId = dicNode->getInputIndex(i); proximityType = mProximityInfoStates[i].getProximityTypeG(pointerId, childCodePoint); ASSERT(proximityType == UNRELATED_CHAR || proximityType == MATCH_CHAR); // TODO: Make this more generic @@ -192,7 +177,7 @@ class DicTraverseSession { const int *const inputYs, const int *const times, const int *const pointerIds, const int inputSize, const float maxSpatialDistance, const int maxPointerCount); - int mPrevWordPos; + int mPrevWordPtNodePos; const ProximityInfo *mProximityInfo; const Dictionary *mDictionary; const SuggestOptions *mSuggestOptions; diff --git a/native/jni/src/suggest/core/suggest.cpp b/native/jni/src/suggest/core/suggest.cpp index 73ccebc88..2eda414f4 100644 --- a/native/jni/src/suggest/core/suggest.cpp +++ b/native/jni/src/suggest/core/suggest.cpp @@ -98,7 +98,7 @@ void Suggest::initializeSearch(DicTraverseSession *traverseSession, int commitPo // Continue suggestion after partial commit. DicNode *topDicNode = traverseSession->getDicTraverseCache()->setCommitPoint(commitPoint); - traverseSession->setPrevWordPos(topDicNode->getPrevWordNodePos()); + traverseSession->setPrevWordPtNodePos(topDicNode->getPrevWordPtNodePos()); traverseSession->getDicTraverseCache()->continueSearch(); traverseSession->setPartiallyCommited(); } @@ -109,7 +109,7 @@ void Suggest::initializeSearch(DicTraverseSession *traverseSession, int commitPo // Create a new dic node here DicNode rootNode; DicNodeUtils::initAsRoot(traverseSession->getDictionaryStructurePolicy(), - traverseSession->getPrevWordPos(), &rootNode); + traverseSession->getPrevWordPtNodePos(), &rootNode); traverseSession->getDicTraverseCache()->copyPushActive(&rootNode); } } @@ -231,7 +231,7 @@ int Suggest::outputSuggestions(DicTraverseSession *traverseSession, int *frequen BinaryDictionaryShortcutIterator shortcutIt( traverseSession->getDictionaryStructurePolicy()->getShortcutsStructurePolicy(), traverseSession->getDictionaryStructurePolicy() - ->getShortcutPositionOfPtNode(terminalDicNode->getPos())); + ->getShortcutPositionOfPtNode(terminalDicNode->getPtNodePos())); // Shortcut is not supported for multiple words suggestions. // TODO: Check shortcuts during traversal for multiple words suggestions. const bool sameAsTyped = TRAVERSAL->sameAsTyped(traverseSession, terminalDicNode); @@ -421,15 +421,15 @@ void Suggest::expandCurrentDicNodes(DicTraverseSession *traverseSession) const { } break; case UNRELATED_CHAR: - // Just drop this node and do nothing. + // Just drop this dicNode and do nothing. break; default: - // Just drop this node and do nothing. + // Just drop this dicNode and do nothing. break; } } - // Push the node for look-ahead correction + // Push the dicNode for look-ahead correction if (allowsErrorCorrections && canDoLookAheadCorrection) { traverseSession->getDicTraverseCache()->copyPushNextActive(&dicNode); } @@ -442,7 +442,7 @@ void Suggest::processTerminalDicNode( if (dicNode->getCompoundDistance() >= static_cast(MAX_VALUE_FOR_WEIGHTING)) { return; } - if (!dicNode->isTerminalWordNode()) { + if (!dicNode->isTerminalDicNode()) { return; } if (dicNode->shouldBeFilteredBySafetyNetForBigram()) { @@ -463,7 +463,7 @@ void Suggest::processTerminalDicNode( /** * Adds the expanded dicNode to the next search priority queue. Also creates an additional next word - * (by the space omission error correction) search path if input dicNode is on a terminal node. + * (by the space omission error correction) search path if input dicNode is on a terminal. */ void Suggest::processExpandedDicNode( DicTraverseSession *traverseSession, DicNode *dicNode) const { @@ -505,7 +505,7 @@ void Suggest::processDicNodeAsSubstitution(DicTraverseSession *traverseSession, processExpandedDicNode(traverseSession, childDicNode); } -// Process the node codepoint as a digraph. This means that composite glyphs like the German +// Process the DicNode codepoint as a digraph. This means that composite glyphs like the German // u-umlaut is expanded to the transliteration "ue". Note that this happens in parallel with // the normal non-digraph traversal, so both "uber" and "ueber" can be corrected to "[u-umlaut]ber". void Suggest::processDicNodeAsDigraph(DicTraverseSession *traverseSession, @@ -518,7 +518,7 @@ void Suggest::processDicNodeAsDigraph(DicTraverseSession *traverseSession, /** * Handle the dicNode as an omission error (e.g., ths => this). Skip the current letter and consider * matches for all possible next letters. Note that just skipping the current letter without any - * other conditions tends to flood the search dic nodes cache with omission nodes. Instead, check + * other conditions tends to flood the search DicNodes cache with omission DicNodes. Instead, check * the possible *next* letters after the omission to better limit search to plausible omissions. * Note that apostrophes are handled as omissions. */ @@ -605,7 +605,7 @@ void Suggest::processDicNodeAsTransposition(DicTraverseSession *traverseSession, } /** - * Weight child node by aligning it to the key + * Weight child dicNode by aligning it to the key */ void Suggest::weightChildNode(DicTraverseSession *traverseSession, DicNode *dicNode) const { const int inputSize = traverseSession->getInputSize(); diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp index a8ea69f3c..1d902d7c2 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp @@ -45,14 +45,14 @@ const int DynamicPatriciaTriePolicy::MAX_DICT_EXTENDED_REGION_SIZE = 1024 * 1024 const int DynamicPatriciaTriePolicy::MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS = DynamicPatriciaTrieWritingHelper::MAX_DICTIONARY_SIZE - 1024; -void DynamicPatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode, +void DynamicPatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const dicNode, DicNodeVector *const childDicNodes) const { if (!dicNode->hasChildren()) { return; } DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer, getBigramsStructurePolicy(), getShortcutsStructurePolicy()); - readingHelper.initWithPtNodeArrayPos(dicNode->getChildrenPos()); + readingHelper.initWithPtNodeArrayPos(dicNode->getChildrenPtNodeArrayPos()); const DynamicPatriciaTrieNodeReader *const nodeReader = readingHelper.getNodeReader(); while (!readingHelper.isEnd()) { childDicNodes->pushLeavingChild(dicNode, nodeReader->getHeadPos(), @@ -107,7 +107,7 @@ int DynamicPatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCoun return codePointCount; } -int DynamicPatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const inWord, +int DynamicPatriciaTriePolicy::getTerminalPtNodePositionOfWord(const int *const inWord, const int length, const bool forceLowerCaseSearch) const { int searchCodePoints[length]; for (int i = 0; i < length; ++i) { @@ -246,12 +246,12 @@ bool DynamicPatriciaTriePolicy::addBigramWords(const int *const word0, const int AKLOGE("The dictionary is too large to dynamically update."); return false; } - const int word0Pos = getTerminalNodePositionOfWord(word0, length0, + const int word0Pos = getTerminalPtNodePositionOfWord(word0, length0, false /* forceLowerCaseSearch */); if (word0Pos == NOT_A_DICT_POS) { return false; } - const int word1Pos = getTerminalNodePositionOfWord(word1, length1, + const int word1Pos = getTerminalPtNodePositionOfWord(word1, length1, false /* forceLowerCaseSearch */); if (word1Pos == NOT_A_DICT_POS) { return false; @@ -280,12 +280,12 @@ bool DynamicPatriciaTriePolicy::removeBigramWords(const int *const word0, const AKLOGE("The dictionary is too large to dynamically update."); return false; } - const int word0Pos = getTerminalNodePositionOfWord(word0, length0, + const int word0Pos = getTerminalPtNodePositionOfWord(word0, length0, false /* forceLowerCaseSearch */); if (word0Pos == NOT_A_DICT_POS) { return false; } - const int word1Pos = getTerminalNodePositionOfWord(word1, length1, + const int word1Pos = getTerminalPtNodePositionOfWord(word1, length1, false /* forceLowerCaseSearch */); if (word1Pos == NOT_A_DICT_POS) { return false; diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h index be97ee1a5..2c722e8ed 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h @@ -50,14 +50,14 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { return 0; } - void createAndGetAllChildNodes(const DicNode *const dicNode, + void createAndGetAllChildDicNodes(const DicNode *const dicNode, DicNodeVector *const childDicNodes) const; int getCodePointsAndProbabilityAndReturnCodePointCount( const int terminalPtNodePos, const int maxCodePointCount, int *const outCodePoints, int *const outUnigramProbability) const; - int getTerminalNodePositionOfWord(const int *const inWord, + int getTerminalPtNodePositionOfWord(const int *const inWord, const int length, const bool forceLowerCaseSearch) const; int getProbability(const int unigramProbability, const int bigramProbability) const; diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.cpp index f108c219f..91794266d 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.cpp @@ -22,7 +22,7 @@ namespace latinime { // To avoid infinite loop caused by invalid or malicious forward links. const int DynamicPatriciaTrieReadingHelper::MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP = 100000; -const int DynamicPatriciaTrieReadingHelper::MAX_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP = 100000; +const int DynamicPatriciaTrieReadingHelper::MAX_PT_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP = 100000; const size_t DynamicPatriciaTrieReadingHelper::MAX_READING_STATE_STACK_SIZE = MAX_WORD_LENGTH; // Visits all PtNodes in post-order depth first manner. @@ -170,35 +170,41 @@ void DynamicPatriciaTrieReadingHelper::nextPtNodeArray() { mReadingState.mPos = NOT_A_DICT_POS; return; } - mReadingState.mPosOfLastPtNodeArrayHead = mReadingState.mPos; + mReadingState.mPosOfThisPtNodeArrayHead = mReadingState.mPos; const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(mReadingState.mPos); const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer); if (usesAdditionalBuffer) { mReadingState.mPos -= mBuffer->getOriginalBufferSize(); } - mReadingState.mNodeCount = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition( - dictBuf, &mReadingState.mPos); + mReadingState.mRemainingPtNodeCountInThisArray = + PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition(dictBuf, + &mReadingState.mPos); if (usesAdditionalBuffer) { mReadingState.mPos += mBuffer->getOriginalBufferSize(); } // Count up nodes and node arrays to avoid infinite loop. - mReadingState.mTotalNodeCount += mReadingState.mNodeCount; - mReadingState.mNodeArrayCount++; - if (mReadingState.mNodeCount < 0 - || mReadingState.mTotalNodeCount > MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP - || mReadingState.mNodeArrayCount > MAX_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP) { + mReadingState.mTotalPtNodeIndexInThisArrayChain += + mReadingState.mRemainingPtNodeCountInThisArray; + mReadingState.mPtNodeArrayIndexInThisArrayChain++; + if (mReadingState.mRemainingPtNodeCountInThisArray < 0 + || mReadingState.mTotalPtNodeIndexInThisArrayChain + > MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP + || mReadingState.mPtNodeArrayIndexInThisArrayChain + > MAX_PT_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP) { // Invalid dictionary. AKLOGI("Invalid dictionary. nodeCount: %d, totalNodeCount: %d, MAX_CHILD_COUNT: %d" "nodeArrayCount: %d, MAX_NODE_ARRAY_COUNT: %d", - mReadingState.mNodeCount, mReadingState.mTotalNodeCount, - MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP, mReadingState.mNodeArrayCount, - MAX_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP); + mReadingState.mRemainingPtNodeCountInThisArray, + mReadingState.mTotalPtNodeIndexInThisArrayChain, + MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP, + mReadingState.mPtNodeArrayIndexInThisArrayChain, + MAX_PT_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP); ASSERT(false); mIsError = true; mReadingState.mPos = NOT_A_DICT_POS; return; } - if (mReadingState.mNodeCount == 0) { + if (mReadingState.mRemainingPtNodeCountInThisArray == 0) { // Empty node array. Try following forward link. followForwardLink(); } diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h index a71c06971..90a223934 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h @@ -84,9 +84,9 @@ class DynamicPatriciaTrieReadingHelper { } else { mIsError = false; mReadingState.mPos = ptNodeArrayPos; - mReadingState.mPrevTotalCodePointCount = 0; - mReadingState.mTotalNodeCount = 0; - mReadingState.mNodeArrayCount = 0; + mReadingState.mTotalCodePointCountSinceInitialization = 0; + mReadingState.mTotalPtNodeIndexInThisArrayChain = 0; + mReadingState.mPtNodeArrayIndexInThisArrayChain = 0; mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS; mReadingStateStack.clear(); nextPtNodeArray(); @@ -103,12 +103,12 @@ class DynamicPatriciaTrieReadingHelper { } else { mIsError = false; mReadingState.mPos = ptNodePos; - mReadingState.mNodeCount = 1; - mReadingState.mPrevTotalCodePointCount = 0; - mReadingState.mTotalNodeCount = 1; - mReadingState.mNodeArrayCount = 1; + mReadingState.mRemainingPtNodeCountInThisArray = 1; + mReadingState.mTotalCodePointCountSinceInitialization = 0; + mReadingState.mTotalPtNodeIndexInThisArrayChain = 1; + mReadingState.mPtNodeArrayIndexInThisArrayChain = 1; mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS; - mReadingState.mPosOfLastPtNodeArrayHead = NOT_A_DICT_POS; + mReadingState.mPosOfThisPtNodeArrayHead = NOT_A_DICT_POS; mReadingStateStack.clear(); fetchPtNodeInfo(); } @@ -128,12 +128,13 @@ class DynamicPatriciaTrieReadingHelper { // Return code point count exclude the last read node's code points. AK_FORCE_INLINE int getPrevTotalCodePointCount() const { - return mReadingState.mPrevTotalCodePointCount; + return mReadingState.mTotalCodePointCountSinceInitialization; } // Return code point count include the last read node's code points. AK_FORCE_INLINE int getTotalCodePointCount() const { - return mReadingState.mPrevTotalCodePointCount + mNodeReader.getCodePointCount(); + return mReadingState.mTotalCodePointCountSinceInitialization + + mNodeReader.getCodePointCount(); } AK_FORCE_INLINE void fetchMergedNodeCodePointsInReverseOrder( @@ -149,9 +150,9 @@ class DynamicPatriciaTrieReadingHelper { } AK_FORCE_INLINE void readNextSiblingNode() { - mReadingState.mNodeCount -= 1; + mReadingState.mRemainingPtNodeCountInThisArray -= 1; mReadingState.mPos = mNodeReader.getSiblingNodePos(); - if (mReadingState.mNodeCount <= 0) { + if (mReadingState.mRemainingPtNodeCountInThisArray <= 0) { // All nodes in the current node array have been read. followForwardLink(); if (!isEnd()) { @@ -165,9 +166,10 @@ class DynamicPatriciaTrieReadingHelper { // Read the first child node of the current node. AK_FORCE_INLINE void readChildNode() { if (mNodeReader.hasChildren()) { - mReadingState.mPrevTotalCodePointCount += mNodeReader.getCodePointCount(); - mReadingState.mTotalNodeCount = 0; - mReadingState.mNodeArrayCount = 0; + mReadingState.mTotalCodePointCountSinceInitialization += + mNodeReader.getCodePointCount(); + mReadingState.mTotalPtNodeIndexInThisArrayChain = 0; + mReadingState.mPtNodeArrayIndexInThisArrayChain = 0; mReadingState.mPos = mNodeReader.getChildrenPos(); mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS; // Read children node array. @@ -183,13 +185,14 @@ class DynamicPatriciaTrieReadingHelper { // Read the parent node of the current node. AK_FORCE_INLINE void readParentNode() { if (mNodeReader.getParentPos() != NOT_A_DICT_POS) { - mReadingState.mPrevTotalCodePointCount += mNodeReader.getCodePointCount(); - mReadingState.mTotalNodeCount = 1; - mReadingState.mNodeArrayCount = 1; - mReadingState.mNodeCount = 1; + mReadingState.mTotalCodePointCountSinceInitialization += + mNodeReader.getCodePointCount(); + mReadingState.mTotalPtNodeIndexInThisArrayChain = 1; + mReadingState.mPtNodeArrayIndexInThisArrayChain = 1; + mReadingState.mRemainingPtNodeCountInThisArray = 1; mReadingState.mPos = mNodeReader.getParentPos(); mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS; - mReadingState.mPosOfLastPtNodeArrayHead = NOT_A_DICT_POS; + mReadingState.mPosOfThisPtNodeArrayHead = NOT_A_DICT_POS; fetchPtNodeInfo(); } else { mReadingState.mPos = NOT_A_DICT_POS; @@ -201,7 +204,7 @@ class DynamicPatriciaTrieReadingHelper { } AK_FORCE_INLINE int getPosOfLastPtNodeArrayHead() const { - return mReadingState.mPosOfLastPtNodeArrayHead; + return mReadingState.mPosOfThisPtNodeArrayHead; } AK_FORCE_INLINE void reloadCurrentPtNodeInfo() { @@ -218,35 +221,41 @@ class DynamicPatriciaTrieReadingHelper { private: DISALLOW_COPY_AND_ASSIGN(DynamicPatriciaTrieReadingHelper); - class ReadingState { + // This class encapsulates the reading state of a position in the dictionary. It points at a + // specific PtNode in the dictionary. + class PtNodeReadingState { public: // Note that copy constructor and assignment operator are used for this class to use // std::vector. - ReadingState() : mPos(NOT_A_DICT_POS), mNodeCount(0), mPrevTotalCodePointCount(0), - mTotalNodeCount(0), mNodeArrayCount(0), mPosOfLastForwardLinkField(NOT_A_DICT_POS), - mPosOfLastPtNodeArrayHead(NOT_A_DICT_POS) {} + PtNodeReadingState() : mPos(NOT_A_DICT_POS), mRemainingPtNodeCountInThisArray(0), + mTotalCodePointCountSinceInitialization(0), mTotalPtNodeIndexInThisArrayChain(0), + mPtNodeArrayIndexInThisArrayChain(0), mPosOfLastForwardLinkField(NOT_A_DICT_POS), + mPosOfThisPtNodeArrayHead(NOT_A_DICT_POS) {} int mPos; - // Node count of a node array. - int mNodeCount; - int mPrevTotalCodePointCount; - int mTotalNodeCount; - int mNodeArrayCount; + // Remaining node count in the current array. + int mRemainingPtNodeCountInThisArray; + int mTotalCodePointCountSinceInitialization; + // Counter of PtNodes used to avoid infinite loops caused by broken or malicious links. + int mTotalPtNodeIndexInThisArrayChain; + // Counter of PtNode arrays used to avoid infinite loops caused by cyclic links of empty + // PtNode arrays. + int mPtNodeArrayIndexInThisArrayChain; int mPosOfLastForwardLinkField; - int mPosOfLastPtNodeArrayHead; + int mPosOfThisPtNodeArrayHead; }; static const int MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP; - static const int MAX_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP; + static const int MAX_PT_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP; static const size_t MAX_READING_STATE_STACK_SIZE; // TODO: Introduce error code to track what caused the error. bool mIsError; - ReadingState mReadingState; + PtNodeReadingState mReadingState; const BufferWithExtendableBuffer *const mBuffer; DynamicPatriciaTrieNodeReader mNodeReader; int mMergedNodeCodePoints[MAX_WORD_LENGTH]; - std::vector mReadingStateStack; + std::vector mReadingStateStack; void nextPtNodeArray(); diff --git a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp index 8a84bd261..9921e4bba 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp @@ -25,12 +25,12 @@ namespace latinime { -void PatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode, +void PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const dicNode, DicNodeVector *const childDicNodes) const { if (!dicNode->hasChildren()) { return; } - int nextPos = dicNode->getChildrenPos(); + int nextPos = dicNode->getChildrenPtNodeArrayPos(); if (nextPos < 0 || nextPos >= mDictBufferSize) { AKLOGE("Children PtNode array position is invalid. pos: %d, dict size: %d", nextPos, mDictBufferSize); @@ -52,14 +52,14 @@ void PatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode, // This retrieves code points and the probability of the word by its terminal position. // Due to the fact that words are ordered in the dictionary in a strict breadth-first order, -// it is possible to check for this with advantageous complexity. For each node, we search +// it is possible to check for this with advantageous complexity. For each PtNode array, we search // for PtNodes with children and compare the children position with the position we look for. // When we shoot the position we look for, it means the word we look for is in the children // of the previous PtNode. The only tricky part is the fact that if we arrive at the end of a // PtNode array with the last PtNode's children position still less than what we are searching for, // we must descend the last PtNode's children (for example, if the word we are searching for starts // with a z, it's the last PtNode of the root array, so all children addresses will be smaller -// than the position we look for, and we have to descend the z node). +// than the position we look for, and we have to descend the z PtNode). /* Parameters : * ptNodePos: the byte position of the terminal PtNode of the word we are searching for (this is * what is stored as the "bigram position" in each bigram) @@ -74,9 +74,9 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount( int pos = getRootPosition(); int wordPos = 0; // One iteration of the outer loop iterates through PtNode arrays. As stated above, we will - // only traverse nodes that are actually a part of the terminal we are searching, so each time - // we enter this loop we are one depth level further than last time. - // The only reason we count nodes is because we want to reduce the probability of infinite + // only traverse PtNodes that are actually a part of the terminal we are searching, so each + // time we enter this loop we are one depth level further than last time. + // The only reason we count PtNodes is because we want to reduce the probability of infinite // looping in case there is a bug. Since we know there is an upper bound to the depth we are // supposed to traverse, it does not hurt to count iterations. for (int loopCount = maxCodePointCount; loopCount > 0; --loopCount) { @@ -140,8 +140,9 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount( found = true; } else if (1 >= ptNodeCount) { // However if we are on the LAST PtNode of this array, and we have NOT shot the - // position we should descend THIS node. So we trick the lastCandidatePtNodePos - // so that we will descend this PtNode, not the previous one. + // position we should descend THIS PtNode. So we trick the + // lastCandidatePtNodePos so that we will descend this PtNode, not the previous + // one. lastCandidatePtNodePos = startPos; found = true; } else { @@ -149,7 +150,7 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount( found = false; } } else { - // Even if we don't have children here, we could still be on the last PtNode of / + // Even if we don't have children here, we could still be on the last PtNode of // this array. If this is the case, we should descend the last PtNode that had // children, and their position is already in lastCandidatePtNodePos. found = (1 >= ptNodeCount); @@ -230,9 +231,9 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount( return 0; } -// This function gets the position of the terminal node of the exact matching word in the +// This function gets the position of the terminal PtNode of the exact matching word in the // dictionary. If no match is found, it returns NOT_A_DICT_POS. -int PatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const inWord, +int PatriciaTriePolicy::getTerminalPtNodePositionOfWord(const int *const inWord, const int length, const bool forceLowerCaseSearch) const { int pos = getRootPosition(); int wordPos = 0; diff --git a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h index 0f8662aea..4b4c39dfa 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h @@ -47,14 +47,14 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { return 0; } - void createAndGetAllChildNodes(const DicNode *const dicNode, + void createAndGetAllChildDicNodes(const DicNode *const dicNode, DicNodeVector *const childDicNodes) const; int getCodePointsAndProbabilityAndReturnCodePointCount( const int terminalNodePos, const int maxCodePointCount, int *const outCodePoints, int *const outUnigramProbability) const; - int getTerminalNodePositionOfWord(const int *const inWord, + int getTerminalPtNodePositionOfWord(const int *const inWord, const int length, const bool forceLowerCaseSearch) const; int getProbability(const int unigramProbability, const int bigramProbability) const; diff --git a/native/jni/src/suggest/policyimpl/typing/typing_traversal.h b/native/jni/src/suggest/policyimpl/typing/typing_traversal.h index 007c19e0a..fd0ac9eb6 100644 --- a/native/jni/src/suggest/policyimpl/typing/typing_traversal.h +++ b/native/jni/src/suggest/policyimpl/typing/typing_traversal.h @@ -81,7 +81,7 @@ class TypingTraversal : public Traversal { return false; } const int point0Index = dicNode->getInputIndex(0); - return dicNode->isTerminalWordNode() + return dicNode->isTerminalDicNode() && traverseSession->getProximityInfoState(0)-> hasSpaceProximity(point0Index); } @@ -96,7 +96,7 @@ class TypingTraversal : public Traversal { if (dicNode->isCompletion(inputSize)) { return false; } - if (!dicNode->isTerminalWordNode()) { + if (!dicNode->isTerminalDicNode()) { return false; } const int16_t pointIndex = dicNode->getInputIndex(0);