From fa7db65dec4b5d69c1565f114f18084d0d4eb5ec Mon Sep 17 00:00:00 2001 From: Keisuke Kuroyanagi Date: Mon, 19 May 2014 11:47:10 +0900 Subject: [PATCH] Support multiple previous words in DicNode. Bug: 14425059 Change-Id: Ib8682befe4d7d9fe5122eb538e7c804f75ded463 --- .../jni/src/suggest/core/dicnode/dic_node.h | 26 ++++++++++++------- .../suggest/core/dicnode/dic_node_utils.cpp | 6 ++--- .../src/suggest/core/dicnode/dic_node_utils.h | 2 +- .../dicnode/internal/dic_node_properties.h | 25 +++++++++--------- .../core/session/dic_traverse_session.h | 2 +- native/jni/src/suggest/core/suggest.cpp | 2 +- 6 files changed, 36 insertions(+), 27 deletions(-) diff --git a/native/jni/src/suggest/core/dicnode/dic_node.h b/native/jni/src/suggest/core/dicnode/dic_node.h index 47f5ec0d7..e69d2c46b 100644 --- a/native/jni/src/suggest/core/dicnode/dic_node.h +++ b/native/jni/src/suggest/core/dicnode/dic_node.h @@ -103,10 +103,10 @@ class DicNode { PROF_NODE_COPY(&dicNode->mProfiler, mProfiler); } - // Init for root with prevWordPtNodePos which is used for bigram - void initAsRoot(const int rootPtNodeArrayPos, const int prevWordPtNodePos) { + // Init for root with prevWordsPtNodePos which is used for n-gram + void initAsRoot(const int rootPtNodeArrayPos, const int *const prevWordsPtNodePos) { mIsCachedForNextSuggestion = false; - mDicNodeProperties.init(rootPtNodeArrayPos, prevWordPtNodePos); + mDicNodeProperties.init(rootPtNodeArrayPos, prevWordsPtNodePos); mDicNodeState.init(); PROF_NODE_RESET(mProfiler); } @@ -114,7 +114,12 @@ class DicNode { // Init for root with previous word void initAsRootWithPreviousWord(const DicNode *const dicNode, const int rootPtNodeArrayPos) { mIsCachedForNextSuggestion = dicNode->mIsCachedForNextSuggestion; - mDicNodeProperties.init(rootPtNodeArrayPos, dicNode->mDicNodeProperties.getPtNodePos()); + int newPrevWordsPtNodePos[MAX_PREV_WORD_COUNT_FOR_N_GRAM]; + newPrevWordsPtNodePos[0] = dicNode->mDicNodeProperties.getPtNodePos(); + for (size_t i = 1; i < NELEMS(newPrevWordsPtNodePos); ++i) { + newPrevWordsPtNodePos[i] = dicNode->getNthPrevWordTerminalPtNodePos(i); + } + mDicNodeProperties.init(rootPtNodeArrayPos, newPrevWordsPtNodePos); mDicNodeState.initAsRootWithPreviousWord(&dicNode->mDicNodeState, dicNode->mDicNodeProperties.getDepth()); PROF_NODE_COPY(&dicNode->mProfiler, mProfiler); @@ -140,7 +145,7 @@ class DicNode { dicNode->mDicNodeProperties.getLeavingDepth() + mergedNodeCodePointCount); mDicNodeProperties.init(ptNodePos, childrenPtNodeArrayPos, mergedNodeCodePoints[0], probability, isTerminal, hasChildren, isBlacklistedOrNotAWord, newDepth, - newLeavingDepth, dicNode->mDicNodeProperties.getPrevWordTerminalPtNodePos()); + newLeavingDepth, dicNode->mDicNodeProperties.getPrevWordsTerminalPtNodePos()); mDicNodeState.init(&dicNode->mDicNodeState, mergedNodeCodePointCount, mergedNodeCodePoints); PROF_NODE_COPY(&dicNode->mProfiler, mProfiler); @@ -198,14 +203,17 @@ class DicNode { return mDicNodeState.mDicNodeStateInput.getInputIndex(0) < inputSize - 1; } - // Used to get bigram probability in DicNodeUtils + // Used to get n-gram probability in DicNodeUtils int getPtNodePos() const { return mDicNodeProperties.getPtNodePos(); } - // Used to get bigram probability in DicNodeUtils - int getPrevWordTerminalPtNodePos() const { - return mDicNodeProperties.getPrevWordTerminalPtNodePos(); + // Used to get n-gram probability in DicNodeUtils + int getNthPrevWordTerminalPtNodePos(const int n) const { + if (n <= 0 || n > MAX_PREV_WORD_COUNT_FOR_N_GRAM) { + return NOT_A_DICT_POS; + } + return mDicNodeProperties.getPrevWordsTerminalPtNodePos()[n - 1]; } // Used in DicNodeUtils diff --git a/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp b/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp index 2d02a7d9c..bf2a0000d 100644 --- a/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp +++ b/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp @@ -29,8 +29,8 @@ namespace latinime { /* static */ void DicNodeUtils::initAsRoot( const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, - const int prevWordPtNodePos, DicNode *const newRootDicNode) { - newRootDicNode->initAsRoot(dictionaryStructurePolicy->getRootPosition(), prevWordPtNodePos); + const int *const prevWordsPtNodePos, DicNode *const newRootDicNode) { + newRootDicNode->initAsRoot(dictionaryStructurePolicy->getRootPosition(), prevWordsPtNodePos); } /*static */ void DicNodeUtils::initAsRootWithPreviousWord( @@ -86,7 +86,7 @@ namespace latinime { const DicNode *const dicNode, MultiBigramMap *const multiBigramMap) { const int unigramProbability = dicNode->getProbability(); const int ptNodePos = dicNode->getPtNodePos(); - const int prevWordTerminalPtNodePos = dicNode->getPrevWordTerminalPtNodePos(); + const int prevWordTerminalPtNodePos = dicNode->getNthPrevWordTerminalPtNodePos(1 /* n */); if (NOT_A_DICT_POS == ptNodePos || NOT_A_DICT_POS == prevWordTerminalPtNodePos) { // Note: Normally wordPos comes from the dictionary and should never equal // NOT_A_VALID_WORD_POS. diff --git a/native/jni/src/suggest/core/dicnode/dic_node_utils.h b/native/jni/src/suggest/core/dicnode/dic_node_utils.h index 4c0f1f15d..0d60e5796 100644 --- a/native/jni/src/suggest/core/dicnode/dic_node_utils.h +++ b/native/jni/src/suggest/core/dicnode/dic_node_utils.h @@ -30,7 +30,7 @@ class DicNodeUtils { public: static void initAsRoot( const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, - const int prevWordPtNodePos, DicNode *const newRootDicNode); + const int *const prevWordPtNodePos, DicNode *const newRootDicNode); static void initAsRootWithPreviousWord( const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, const DicNode *const prevWordLastDicNode, DicNode *const newRootDicNode); diff --git a/native/jni/src/suggest/core/dicnode/internal/dic_node_properties.h b/native/jni/src/suggest/core/dicnode/internal/dic_node_properties.h index 11f8c2905..8202176f7 100644 --- a/native/jni/src/suggest/core/dicnode/internal/dic_node_properties.h +++ b/native/jni/src/suggest/core/dicnode/internal/dic_node_properties.h @@ -32,15 +32,14 @@ class DicNodeProperties { : mPtNodePos(NOT_A_DICT_POS), mChildrenPtNodeArrayPos(NOT_A_DICT_POS), mProbability(NOT_A_PROBABILITY), mDicNodeCodePoint(NOT_A_CODE_POINT), mIsTerminal(false), mHasChildrenPtNodes(false), - mIsBlacklistedOrNotAWord(false), mDepth(0), mLeavingDepth(0), - mPrevWordTerminalPtNodePos(NOT_A_DICT_POS) {} + mIsBlacklistedOrNotAWord(false), mDepth(0), mLeavingDepth(0) {} ~DicNodeProperties() {} // Should be called only once per DicNode is initialized. void init(const int pos, const int childrenPos, const int nodeCodePoint, const int probability, const bool isTerminal, const bool hasChildren, const bool isBlacklistedOrNotAWord, - const uint16_t depth, const uint16_t leavingDepth, const int prevWordNodePos) { + const uint16_t depth, const uint16_t leavingDepth, const int *const prevWordsNodePos) { mPtNodePos = pos; mChildrenPtNodeArrayPos = childrenPos; mDicNodeCodePoint = nodeCodePoint; @@ -50,11 +49,11 @@ class DicNodeProperties { mIsBlacklistedOrNotAWord = isBlacklistedOrNotAWord; mDepth = depth; mLeavingDepth = leavingDepth; - mPrevWordTerminalPtNodePos = prevWordNodePos; + memmove(mPrevWordsTerminalPtNodePos, prevWordsNodePos, sizeof(mPrevWordsTerminalPtNodePos)); } - // Init for root with prevWordPtNodePos which is used for bigram - void init(const int rootPtNodeArrayPos, const int prevWordNodePos) { + // Init for root with prevWordsPtNodePos which is used for n-gram + void init(const int rootPtNodeArrayPos, const int *const prevWordsNodePos) { mPtNodePos = NOT_A_DICT_POS; mChildrenPtNodeArrayPos = rootPtNodeArrayPos; mDicNodeCodePoint = NOT_A_CODE_POINT; @@ -64,7 +63,7 @@ class DicNodeProperties { mIsBlacklistedOrNotAWord = false; mDepth = 0; mLeavingDepth = 0; - mPrevWordTerminalPtNodePos = prevWordNodePos; + memmove(mPrevWordsTerminalPtNodePos, prevWordsNodePos, sizeof(mPrevWordsTerminalPtNodePos)); } void initByCopy(const DicNodeProperties *const dicNodeProp) { @@ -77,7 +76,8 @@ class DicNodeProperties { mIsBlacklistedOrNotAWord = dicNodeProp->mIsBlacklistedOrNotAWord; mDepth = dicNodeProp->mDepth; mLeavingDepth = dicNodeProp->mLeavingDepth; - mPrevWordTerminalPtNodePos = dicNodeProp->mPrevWordTerminalPtNodePos; + memmove(mPrevWordsTerminalPtNodePos, dicNodeProp->mPrevWordsTerminalPtNodePos, + sizeof(mPrevWordsTerminalPtNodePos)); } // Init as passing child @@ -91,7 +91,8 @@ class DicNodeProperties { mIsBlacklistedOrNotAWord = dicNodeProp->mIsBlacklistedOrNotAWord; mDepth = dicNodeProp->mDepth + 1; // Increment the depth of a passing child mLeavingDepth = dicNodeProp->mLeavingDepth; - mPrevWordTerminalPtNodePos = dicNodeProp->mPrevWordTerminalPtNodePos; + memmove(mPrevWordsTerminalPtNodePos, dicNodeProp->mPrevWordsTerminalPtNodePos, + sizeof(mPrevWordsTerminalPtNodePos)); } int getPtNodePos() const { @@ -131,8 +132,8 @@ class DicNodeProperties { return mIsBlacklistedOrNotAWord; } - int getPrevWordTerminalPtNodePos() const { - return mPrevWordTerminalPtNodePos; + const int *getPrevWordsTerminalPtNodePos() const { + return mPrevWordsTerminalPtNodePos; } private: @@ -148,7 +149,7 @@ class DicNodeProperties { bool mIsBlacklistedOrNotAWord; uint16_t mDepth; uint16_t mLeavingDepth; - int mPrevWordTerminalPtNodePos; + int mPrevWordsTerminalPtNodePos[MAX_PREV_WORD_COUNT_FOR_N_GRAM]; }; } // namespace latinime #endif // LATINIME_DIC_NODE_PROPERTIES_H diff --git a/native/jni/src/suggest/core/session/dic_traverse_session.h b/native/jni/src/suggest/core/session/dic_traverse_session.h index 90aff06c3..5a51a112d 100644 --- a/native/jni/src/suggest/core/session/dic_traverse_session.h +++ b/native/jni/src/suggest/core/session/dic_traverse_session.h @@ -79,7 +79,7 @@ class DicTraverseSession { //-------------------- const ProximityInfo *getProximityInfo() const { return mProximityInfo; } const SuggestOptions *getSuggestOptions() const { return mSuggestOptions; } - int getPrevWordPtNodePos() const { return mPrevWordsPtNodePos[0]; } + const int *getPrevWordsPtNodePos() const { return mPrevWordsPtNodePos; } DicNodesCache *getDicTraverseCache() { return &mDicNodesCache; } MultiBigramMap *getMultiBigramMap() { return &mMultiBigramMap; } const ProximityInfoState *getProximityInfoState(int id) const { diff --git a/native/jni/src/suggest/core/suggest.cpp b/native/jni/src/suggest/core/suggest.cpp index e675e0bb3..0cd305f5a 100644 --- a/native/jni/src/suggest/core/suggest.cpp +++ b/native/jni/src/suggest/core/suggest.cpp @@ -92,7 +92,7 @@ void Suggest::initializeSearch(DicTraverseSession *traverseSession) const { // Create a new dic node here DicNode rootNode; DicNodeUtils::initAsRoot(traverseSession->getDictionaryStructurePolicy(), - traverseSession->getPrevWordPtNodePos(), &rootNode); + traverseSession->getPrevWordsPtNodePos(), &rootNode); traverseSession->getDicTraverseCache()->copyPushActive(&rootNode); } }