From 6379a4de29fee7019b32b93bc424eda720e02dcf Mon Sep 17 00:00:00 2001 From: Keisuke Kuroynagi Date: Wed, 26 Jun 2013 20:55:14 +0900 Subject: [PATCH] Reduce the number of arguments required to initialize dic_node. Bug: 6669677 Change-Id: I52249b83f72560d8f5ab028da5cfb5c50f6e40b8 --- .../jni/src/suggest/core/dicnode/dic_node.h | 49 ++++++++---------- .../core/dicnode/dic_node_properties.h | 50 ++++--------------- .../src/suggest/core/dicnode/dic_node_state.h | 9 ++-- .../core/dicnode/dic_node_state_output.h | 40 ++++++++------- .../suggest/core/dicnode/dic_node_utils.cpp | 49 ++++++++---------- .../src/suggest/core/dicnode/dic_node_utils.h | 2 +- .../suggest/core/dicnode/dic_node_vector.h | 13 ++--- 7 files changed, 83 insertions(+), 129 deletions(-) diff --git a/native/jni/src/suggest/core/dicnode/dic_node.h b/native/jni/src/suggest/core/dicnode/dic_node.h index c700b01ca..abfe82a37 100644 --- a/native/jni/src/suggest/core/dicnode/dic_node.h +++ b/native/jni/src/suggest/core/dicnode/dic_node.h @@ -109,12 +109,14 @@ class DicNode { // TODO: minimize arguments by looking binary_format // Init for root with prevWordNodePos which is used for bigram - void initAsRoot(const int pos, const int childrenPos, const int childrenCount, - const int prevWordNodePos) { + void initAsRoot(const int pos, const int childrenPos, const int prevWordNodePos) { mIsUsed = true; mIsCachedForNextSuggestion = false; mDicNodeProperties.init( - pos, 0, childrenPos, 0, 0, 0, childrenCount, 0, 0, false, false, true, 0, 0); + pos, 0 /* flags */, childrenPos, 0 /* attributesPos */, + NOT_A_CODE_POINT /* nodeCodePoint */, NOT_A_PROBABILITY /* probability */, + false /* isTerminal */, true /* hasChildren */, 0 /* depth */, + 0 /* terminalDepth */); mDicNodeState.init(prevWordNodePos); PROF_NODE_RESET(mProfiler); } @@ -130,12 +132,14 @@ class DicNode { // TODO: minimize arguments by looking binary_format // Init for root with previous word - void initAsRootWithPreviousWord(DicNode *dicNode, const int pos, const int childrenPos, - const int childrenCount) { + void initAsRootWithPreviousWord(DicNode *dicNode, const int pos, const int childrenPos) { mIsUsed = true; mIsCachedForNextSuggestion = dicNode->mIsCachedForNextSuggestion; mDicNodeProperties.init( - pos, 0, childrenPos, 0, 0, 0, childrenCount, 0, 0, false, false, true, 0, 0); + pos, 0 /* flags */, childrenPos, 0 /* attributesPos */, + NOT_A_CODE_POINT /* nodeCodePoint */, NOT_A_PROBABILITY /* probability */, + false /* isTerminal */, true /* hasChildren */, 0 /* depth */, + 0 /* terminalDepth */); // TODO: Move to dicNodeState? mDicNodeState.mDicNodeStateOutput.init(); // reset for next word mDicNodeState.mDicNodeStateInput.init( @@ -157,19 +161,18 @@ class DicNode { // TODO: minimize arguments by looking binary_format void initAsChild(DicNode *dicNode, const int pos, const uint8_t flags, const int childrenPos, - const int attributesPos, const int siblingPos, const int nodeCodePoint, - const int childrenCount, const int probability, const int bigramProbability, - const bool isTerminal, const bool hasMultipleChars, const bool hasChildren, - const uint16_t additionalSubwordLength, const int *additionalSubword) { + const int attributesPos, const int probability, const bool isTerminal, + const bool hasChildren, const uint16_t mergedNodeCodePointCount, + const int *const mergedNodeCodePoints) { mIsUsed = true; uint16_t newDepth = static_cast(dicNode->getNodeCodePointCount() + 1); mIsCachedForNextSuggestion = dicNode->mIsCachedForNextSuggestion; const uint16_t newLeavingDepth = static_cast( - dicNode->mDicNodeProperties.getLeavingDepth() + additionalSubwordLength); - mDicNodeProperties.init(pos, flags, childrenPos, attributesPos, siblingPos, nodeCodePoint, - childrenCount, probability, bigramProbability, isTerminal, hasMultipleChars, - hasChildren, newDepth, newLeavingDepth); - mDicNodeState.init(&dicNode->mDicNodeState, additionalSubwordLength, additionalSubword); + dicNode->mDicNodeProperties.getLeavingDepth() + mergedNodeCodePointCount); + mDicNodeProperties.init(pos, flags, childrenPos, attributesPos, mergedNodeCodePoints[0], + probability, isTerminal, hasChildren, newDepth, newLeavingDepth); + mDicNodeState.init(&dicNode->mDicNodeState, mergedNodeCodePointCount, + mergedNodeCodePoints); PROF_NODE_COPY(&dicNode->mProfiler, mProfiler); } @@ -193,8 +196,8 @@ class DicNode { } bool isLeavingNode() const { - ASSERT(getNodeCodePointCount() <= getLeavingDepth()); - return getNodeCodePointCount() == getLeavingDepth(); + ASSERT(getNodeCodePointCount() <= mDicNodeProperties.getLeavingDepth()); + return getNodeCodePointCount() == mDicNodeProperties.getLeavingDepth(); } AK_FORCE_INLINE bool isFirstLetter() const { @@ -256,12 +259,6 @@ class DicNode { return mDicNodeProperties.getChildrenPos(); } - // Used in DicNodeUtils - int getChildrenCount() const { - return mDicNodeProperties.getChildrenCount(); - } - - // Used in DicNodeUtils int getProbability() const { return mDicNodeProperties.getProbability(); } @@ -280,10 +277,6 @@ class DicNode { return !(currentDepth > 0 && (currentDepth != 1 || prevWordLen != 1)); } - uint16_t getLeavingDepth() const { - return mDicNodeProperties.getLeavingDepth(); - } - bool isTotalInputSizeExceedingLimit() const { const int prevWordsLen = mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength(); const int currentWordDepth = getNodeCodePointCount(); @@ -370,7 +363,7 @@ class DicNode { } AK_FORCE_INLINE const int *getOutputWordBuf() const { - return mDicNodeState.mDicNodeStateOutput.mWordBuf; + return mDicNodeState.mDicNodeStateOutput.mCodePointsBuf; } int getPrevCodePointG(int pointerId) const { diff --git a/native/jni/src/suggest/core/dicnode/dic_node_properties.h b/native/jni/src/suggest/core/dicnode/dic_node_properties.h index d2f87c10b..7e8aa4979 100644 --- a/native/jni/src/suggest/core/dicnode/dic_node_properties.h +++ b/native/jni/src/suggest/core/dicnode/dic_node_properties.h @@ -27,37 +27,31 @@ namespace latinime { /** * Node for traversing the lexicon trie. */ +// TODO: Introduce a dictionary node class which has attribute members required to understand the +// dictionary structure. class DicNodeProperties { public: AK_FORCE_INLINE DicNodeProperties() - : mPos(0), mFlags(0), mChildrenPos(0), mAttributesPos(0), mSiblingPos(0), - mChildrenCount(0), mProbability(0), mBigramProbability(0), mNodeCodePoint(0), - mDepth(0), mLeavingDepth(0), mIsTerminal(false), mHasMultipleChars(false), - mHasChildren(false) { - } + : mPos(0), mFlags(0), mChildrenPos(0), mAttributesPos(0), mProbability(0), + mNodeCodePoint(0), mDepth(0), mLeavingDepth(0), mIsTerminal(false), + mHasChildren(false) {} virtual ~DicNodeProperties() {} // Should be called only once per DicNode is initialized. void init(const int pos, const uint8_t flags, const int childrenPos, const int attributesPos, - const int siblingPos, const int nodeCodePoint, const int childrenCount, - const int probability, const int bigramProbability, const bool isTerminal, - const bool hasMultipleChars, const bool hasChildren, const uint16_t depth, - const uint16_t terminalDepth) { + const int nodeCodePoint, const int probability, const bool isTerminal, + const bool hasChildren, const uint16_t depth, const uint16_t leavingDepth) { mPos = pos; mFlags = flags; mChildrenPos = childrenPos; mAttributesPos = attributesPos; - mSiblingPos = siblingPos; mNodeCodePoint = nodeCodePoint; - mChildrenCount = childrenCount; mProbability = probability; - mBigramProbability = bigramProbability; mIsTerminal = isTerminal; - mHasMultipleChars = hasMultipleChars; mHasChildren = hasChildren; mDepth = depth; - mLeavingDepth = terminalDepth; + mLeavingDepth = leavingDepth; } // Init for copy @@ -66,13 +60,9 @@ class DicNodeProperties { mFlags = nodeProp->mFlags; mChildrenPos = nodeProp->mChildrenPos; mAttributesPos = nodeProp->mAttributesPos; - mSiblingPos = nodeProp->mSiblingPos; mNodeCodePoint = nodeProp->mNodeCodePoint; - mChildrenCount = nodeProp->mChildrenCount; mProbability = nodeProp->mProbability; - mBigramProbability = nodeProp->mBigramProbability; mIsTerminal = nodeProp->mIsTerminal; - mHasMultipleChars = nodeProp->mHasMultipleChars; mHasChildren = nodeProp->mHasChildren; mDepth = nodeProp->mDepth; mLeavingDepth = nodeProp->mLeavingDepth; @@ -84,13 +74,9 @@ class DicNodeProperties { mFlags = nodeProp->mFlags; mChildrenPos = nodeProp->mChildrenPos; mAttributesPos = nodeProp->mAttributesPos; - mSiblingPos = nodeProp->mSiblingPos; mNodeCodePoint = codePoint; // Overwrite the node char of a passing child - mChildrenCount = nodeProp->mChildrenCount; mProbability = nodeProp->mProbability; - mBigramProbability = nodeProp->mBigramProbability; mIsTerminal = nodeProp->mIsTerminal; - mHasMultipleChars = nodeProp->mHasMultipleChars; mHasChildren = nodeProp->mHasChildren; mDepth = nodeProp->mDepth + 1; // Increment the depth of a passing child mLeavingDepth = nodeProp->mLeavingDepth; @@ -112,10 +98,6 @@ class DicNodeProperties { return mAttributesPos; } - int getChildrenCount() const { - return mChildrenCount; - } - int getProbability() const { return mProbability; } @@ -137,12 +119,8 @@ class DicNodeProperties { return mIsTerminal; } - bool hasMultipleChars() const { - return mHasMultipleChars; - } - bool hasChildren() const { - return mChildrenCount > 0 || mDepth != mLeavingDepth; + return mHasChildren || mDepth != mLeavingDepth; } bool hasBlacklistedOrNotAWordFlag() const { @@ -153,25 +131,15 @@ class DicNodeProperties { // Caution!!! // Use a default copy constructor and an assign operator because shallow copies are ok // for this class - - // Not used - int getSiblingPos() const { - return mSiblingPos; - } - int mPos; uint8_t mFlags; int mChildrenPos; int mAttributesPos; - int mSiblingPos; - int mChildrenCount; int mProbability; - int mBigramProbability; // not used for now int mNodeCodePoint; uint16_t mDepth; uint16_t mLeavingDepth; bool mIsTerminal; - bool mHasMultipleChars; bool mHasChildren; }; } // namespace latinime diff --git a/native/jni/src/suggest/core/dicnode/dic_node_state.h b/native/jni/src/suggest/core/dicnode/dic_node_state.h index d35e7d79f..b1b6266f2 100644 --- a/native/jni/src/suggest/core/dicnode/dic_node_state.h +++ b/native/jni/src/suggest/core/dicnode/dic_node_state.h @@ -55,11 +55,12 @@ class DicNodeState { mDicNodeStateScoring.init(&src->mDicNodeStateScoring); } - // Init by copy and adding subword - void init(const DicNodeState *const src, const uint16_t additionalSubwordLength, - const int *const additionalSubword) { + // Init by copy and adding merged node code points. + void init(const DicNodeState *const src, const uint16_t mergedNodeCodePointCount, + const int *const mergedNodeCodePoints) { init(src); - mDicNodeStateOutput.addSubword(additionalSubwordLength, additionalSubword); + mDicNodeStateOutput.addMergedNodeCodePoints( + mergedNodeCodePointCount, mergedNodeCodePoints); } private: diff --git a/native/jni/src/suggest/core/dicnode/dic_node_state_output.h b/native/jni/src/suggest/core/dicnode/dic_node_state_output.h index 1d4f50a06..45c7f5cf9 100644 --- a/native/jni/src/suggest/core/dicnode/dic_node_state_output.h +++ b/native/jni/src/suggest/core/dicnode/dic_node_state_output.h @@ -26,50 +26,52 @@ namespace latinime { class DicNodeStateOutput { public: - DicNodeStateOutput() : mOutputtedLength(0) { + DicNodeStateOutput() : mOutputtedCodePointCount(0) { init(); } virtual ~DicNodeStateOutput() {} void init() { - mOutputtedLength = 0; - mWordBuf[0] = 0; + mOutputtedCodePointCount = 0; + mCodePointsBuf[0] = 0; } void init(const DicNodeStateOutput *const stateOutput) { - memcpy(mWordBuf, stateOutput->mWordBuf, - stateOutput->mOutputtedLength * sizeof(mWordBuf[0])); - mOutputtedLength = stateOutput->mOutputtedLength; - if (mOutputtedLength < MAX_WORD_LENGTH) { - mWordBuf[mOutputtedLength] = 0; + memcpy(mCodePointsBuf, stateOutput->mCodePointsBuf, + stateOutput->mOutputtedCodePointCount * sizeof(mCodePointsBuf[0])); + mOutputtedCodePointCount = stateOutput->mOutputtedCodePointCount; + if (mOutputtedCodePointCount < MAX_WORD_LENGTH) { + mCodePointsBuf[mOutputtedCodePointCount] = 0; } } - void addSubword(const uint16_t additionalSubwordLength, const int *const additionalSubword) { - if (additionalSubword) { - memcpy(&mWordBuf[mOutputtedLength], additionalSubword, - additionalSubwordLength * sizeof(mWordBuf[0])); - mOutputtedLength = static_cast(mOutputtedLength + additionalSubwordLength); - if (mOutputtedLength < MAX_WORD_LENGTH) { - mWordBuf[mOutputtedLength] = 0; + void addMergedNodeCodePoints(const uint16_t mergedNodeCodePointCount, + const int *const mergedNodeCodePoints) { + if (mergedNodeCodePoints) { + memcpy(&mCodePointsBuf[mOutputtedCodePointCount], mergedNodeCodePoints, + mergedNodeCodePointCount * sizeof(mCodePointsBuf[0])); + mOutputtedCodePointCount = static_cast( + mOutputtedCodePointCount + mergedNodeCodePointCount); + if (mOutputtedCodePointCount < MAX_WORD_LENGTH) { + mCodePointsBuf[mOutputtedCodePointCount] = 0; } } } // TODO: Remove - int getCodePointAt(const int id) const { - return mWordBuf[id]; + int getCodePointAt(const int index) const { + return mCodePointsBuf[index]; } // TODO: Move to private - int mWordBuf[MAX_WORD_LENGTH]; + int mCodePointsBuf[MAX_WORD_LENGTH]; private: // Caution!!! // Use a default copy constructor and an assign operator because shallow copies are ok // for this class - uint16_t mOutputtedLength; + uint16_t mOutputtedCodePointCount; }; } // namespace latinime #endif // LATINIME_DIC_NODE_STATE_OUTPUT_H diff --git a/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp b/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp index f0f26c72b..5902882ff 100644 --- a/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp +++ b/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp @@ -36,23 +36,17 @@ namespace latinime { /* static */ void DicNodeUtils::initAsRoot(const BinaryDictionaryInfo *const binaryDictionaryInfo, const int prevWordNodePos, DicNode *const newRootNode) { - int curPos = binaryDictionaryInfo->getRootPosition(); - const int pos = curPos; - const int childrenCount = BinaryFormat::getGroupCountAndForwardPointer( - binaryDictionaryInfo->getDictRoot(), &curPos); - const int childrenPos = curPos; - newRootNode->initAsRoot(pos, childrenPos, childrenCount, prevWordNodePos); + const int rootPos = binaryDictionaryInfo->getRootPosition(); + const int childrenPos = rootPos; + newRootNode->initAsRoot(rootPos, childrenPos, prevWordNodePos); } /*static */ void DicNodeUtils::initAsRootWithPreviousWord( const BinaryDictionaryInfo *const binaryDictionaryInfo, DicNode *const prevWordLastNode, DicNode *const newRootNode) { - int curPos = binaryDictionaryInfo->getRootPosition(); - const int pos = curPos; - const int childrenCount = BinaryFormat::getGroupCountAndForwardPointer( - binaryDictionaryInfo->getDictRoot(), &curPos); - const int childrenPos = curPos; - newRootNode->initAsRootWithPreviousWord(prevWordLastNode, pos, childrenPos, childrenCount); + const int rootPos = binaryDictionaryInfo->getRootPosition(); + const int childrenPos = rootPos; + newRootNode->initAsRootWithPreviousWord(prevWordLastNode, rootPos, childrenPos); } /* static */ void DicNodeUtils::initByCopy(DicNode *srcNode, DicNode *destNode) { @@ -76,7 +70,7 @@ namespace latinime { } /* static */ int DicNodeUtils::createAndGetLeavingChildNode(DicNode *dicNode, int pos, - const BinaryDictionaryInfo *const binaryDictionaryInfo, const int terminalDepth, + const BinaryDictionaryInfo *const binaryDictionaryInfo, const ProximityInfoState *pInfoState, const int pointIndex, const bool exactOnly, const std::vector *const codePointsFilter, const ProximityInfo *const pInfo, DicNodeVector *childDicNodes) { @@ -90,11 +84,10 @@ namespace latinime { int codePoint = BinaryFormat::getCodePointAndForwardPointer( binaryDictionaryInfo->getDictRoot(), &pos); ASSERT(NOT_A_CODE_POINT != codePoint); - const int nodeCodePoint = codePoint; // TODO: optimize this - int additionalWordBuf[MAX_WORD_LENGTH]; - uint16_t additionalSubwordLength = 0; - additionalWordBuf[additionalSubwordLength++] = codePoint; + int mergedNodeCodePoints[MAX_WORD_LENGTH]; + uint16_t mergedNodeCodePointCount = 0; + mergedNodeCodePoints[mergedNodeCodePointCount++] = codePoint; do { const int nextCodePoint = hasMultipleChars @@ -102,7 +95,7 @@ namespace latinime { binaryDictionaryInfo->getDictRoot(), &pos) : NOT_A_CODE_POINT; const bool isLastChar = (NOT_A_CODE_POINT == nextCodePoint); if (!isLastChar) { - additionalWordBuf[additionalSubwordLength++] = nextCodePoint; + mergedNodeCodePoints[mergedNodeCodePointCount++] = nextCodePoint; } codePoint = nextCodePoint; } while (NOT_A_CODE_POINT != codePoint); @@ -116,17 +109,14 @@ namespace latinime { const int siblingPos = BinaryFormat::skipChildrenPosAndAttributes( binaryDictionaryInfo->getDictRoot(), flags, pos); - if (isDicNodeFilteredOut(nodeCodePoint, pInfo, codePointsFilter)) { + if (isDicNodeFilteredOut(mergedNodeCodePoints[0], pInfo, codePointsFilter)) { return siblingPos; } - if (!isMatchedNodeCodePoint(pInfoState, pointIndex, exactOnly, nodeCodePoint)) { + if (!isMatchedNodeCodePoint(pInfoState, pointIndex, exactOnly, mergedNodeCodePoints[0])) { return siblingPos; } - const int childrenCount = hasChildren ? BinaryFormat::getGroupCountAndForwardPointer( - binaryDictionaryInfo->getDictRoot(), &childrenPos) : 0; - childDicNodes->pushLeavingChild(dicNode, nextPos, flags, childrenPos, attributesPos, siblingPos, - nodeCodePoint, childrenCount, probability, -1 /* bigramProbability */, isTerminal, - hasMultipleChars, hasChildren, additionalSubwordLength, additionalWordBuf); + childDicNodes->pushLeavingChild(dicNode, nextPos, flags, childrenPos, attributesPos, + probability, isTerminal, hasChildren, mergedNodeCodePointCount, mergedNodeCodePoints); return siblingPos; } @@ -163,13 +153,16 @@ namespace latinime { const ProximityInfoState *pInfoState, const int pointIndex, const bool exactOnly, const std::vector *const codePointsFilter, const ProximityInfo *const pInfo, DicNodeVector *childDicNodes) { - const int terminalDepth = dicNode->getLeavingDepth(); - const int childCount = dicNode->getChildrenCount(); + if (!dicNode->hasChildren()) { + return; + } int nextPos = dicNode->getChildrenPos(); + const int childCount = BinaryFormat::getGroupCountAndForwardPointer( + binaryDictionaryInfo->getDictRoot(), &nextPos); for (int i = 0; i < childCount; i++) { const int filterSize = codePointsFilter ? codePointsFilter->size() : 0; nextPos = createAndGetLeavingChildNode(dicNode, nextPos, binaryDictionaryInfo, - terminalDepth, pInfoState, pointIndex, exactOnly, codePointsFilter, pInfo, + pInfoState, pointIndex, exactOnly, codePointsFilter, pInfo, childDicNodes); if (!pInfo && filterSize > 0 && childDicNodes->exceeds(filterSize)) { // All code points have been found. diff --git a/native/jni/src/suggest/core/dicnode/dic_node_utils.h b/native/jni/src/suggest/core/dicnode/dic_node_utils.h index e198d6181..d526975ce 100644 --- a/native/jni/src/suggest/core/dicnode/dic_node_utils.h +++ b/native/jni/src/suggest/core/dicnode/dic_node_utils.h @@ -72,7 +72,7 @@ class DicNodeUtils { const std::vector *const codePointsFilter, const ProximityInfo *const pInfo, DicNodeVector *childDicNodes); static int createAndGetLeavingChildNode(DicNode *dicNode, int pos, - const BinaryDictionaryInfo *const binaryDictionaryInfo, const int terminalDepth, + const BinaryDictionaryInfo *const binaryDictionaryInfo, const ProximityInfoState *pInfoState, const int pointIndex, const bool exactOnly, const std::vector *const codePointsFilter, const ProximityInfo *const pInfo, DicNodeVector *childDicNodes); diff --git a/native/jni/src/suggest/core/dicnode/dic_node_vector.h b/native/jni/src/suggest/core/dicnode/dic_node_vector.h index e23c411f0..9641cc19c 100644 --- a/native/jni/src/suggest/core/dicnode/dic_node_vector.h +++ b/native/jni/src/suggest/core/dicnode/dic_node_vector.h @@ -63,16 +63,13 @@ class DicNodeVector { } void pushLeavingChild(DicNode *dicNode, const int pos, const uint8_t flags, - const int childrenPos, const int attributesPos, const int siblingPos, - const int nodeCodePoint, const int childrenCount, const int probability, - const int bigramProbability, const bool isTerminal, const bool hasMultipleChars, - const bool hasChildren, const uint16_t additionalSubwordLength, - const int *additionalSubword) { + const int childrenPos, const int attributesPos, const int probability, + const bool isTerminal, const bool hasChildren, const uint16_t mergedNodeCodePointCount, + const int *const mergedNodeCodePoints) { ASSERT(!mLock); mDicNodes.push_back(mEmptyNode); - mDicNodes.back().initAsChild(dicNode, pos, flags, childrenPos, attributesPos, siblingPos, - nodeCodePoint, childrenCount, probability, -1 /* bigramProbability */, isTerminal, - hasMultipleChars, hasChildren, additionalSubwordLength, additionalSubword); + mDicNodes.back().initAsChild(dicNode, pos, flags, childrenPos, attributesPos, probability, + isTerminal, hasChildren, mergedNodeCodePointCount, mergedNodeCodePoints); } DicNode *operator[](const int id) {