From 7d475003572c9c2902f5918bad524f4ac233e629 Mon Sep 17 00:00:00 2001 From: Keisuke Kuroyanagi Date: Tue, 26 Aug 2014 14:33:19 +0900 Subject: [PATCH] Use word id to construct DicNode instead of isTerminal flag. Bug: 14425059 Change-Id: I8484d34756bd76668ece34211e7366a4758d7bf5 --- native/jni/src/defines.h | 1 + native/jni/src/suggest/core/dicnode/dic_node.h | 4 ++-- .../src/suggest/core/dicnode/dic_node_vector.h | 4 ++-- .../core/dicnode/internal/dic_node_properties.h | 16 ++++++++-------- .../dictionary_structure_with_buffer_policy.h | 1 + .../backward/v402/ver4_patricia_trie_policy.cpp | 3 ++- .../backward/v402/ver4_patricia_trie_policy.h | 1 + .../structure/v2/patricia_trie_policy.cpp | 4 ++-- .../structure/v2/patricia_trie_policy.h | 1 + .../structure/v4/ver4_patricia_trie_policy.cpp | 3 ++- .../structure/v4/ver4_patricia_trie_policy.h | 1 + 11 files changed, 23 insertions(+), 16 deletions(-) diff --git a/native/jni/src/defines.h b/native/jni/src/defines.h index 24d04e51f..57e18884d 100644 --- a/native/jni/src/defines.h +++ b/native/jni/src/defines.h @@ -299,6 +299,7 @@ static inline void prof_out(void) { #define NOT_AN_INDEX (-1) #define NOT_A_PROBABILITY (-1) #define NOT_A_DICT_POS (S_INT_MIN) +#define NOT_A_WORD_ID (S_INT_MIN) #define NOT_A_TIMESTAMP (-1) #define NOT_A_LANGUAGE_WEIGHT (-1.0f) diff --git a/native/jni/src/suggest/core/dicnode/dic_node.h b/native/jni/src/suggest/core/dicnode/dic_node.h index d1b2c87be..214cdfca6 100644 --- a/native/jni/src/suggest/core/dicnode/dic_node.h +++ b/native/jni/src/suggest/core/dicnode/dic_node.h @@ -136,7 +136,7 @@ class DicNode { } void initAsChild(const DicNode *const dicNode, const int ptNodePos, - const int childrenPtNodeArrayPos, const int probability, const bool isTerminal, + const int childrenPtNodeArrayPos, const int probability, const int wordId, const bool hasChildren, const bool isBlacklistedOrNotAWord, const uint16_t mergedNodeCodePointCount, const int *const mergedNodeCodePoints) { uint16_t newDepth = static_cast(dicNode->getNodeCodePointCount() + 1); @@ -144,7 +144,7 @@ class DicNode { const uint16_t newLeavingDepth = static_cast( dicNode->mDicNodeProperties.getLeavingDepth() + mergedNodeCodePointCount); mDicNodeProperties.init(ptNodePos, childrenPtNodeArrayPos, mergedNodeCodePoints[0], - probability, isTerminal, hasChildren, isBlacklistedOrNotAWord, newDepth, + probability, wordId, hasChildren, isBlacklistedOrNotAWord, newDepth, newLeavingDepth, dicNode->mDicNodeProperties.getPrevWordsTerminalPtNodePos()); mDicNodeState.init(&dicNode->mDicNodeState, mergedNodeCodePointCount, mergedNodeCodePoints); diff --git a/native/jni/src/suggest/core/dicnode/dic_node_vector.h b/native/jni/src/suggest/core/dicnode/dic_node_vector.h index 54cde1988..f01640a93 100644 --- a/native/jni/src/suggest/core/dicnode/dic_node_vector.h +++ b/native/jni/src/suggest/core/dicnode/dic_node_vector.h @@ -59,13 +59,13 @@ class DicNodeVector { } void pushLeavingChild(const DicNode *const dicNode, const int ptNodePos, - const int childrenPtNodeArrayPos, const int probability, const bool isTerminal, + const int childrenPtNodeArrayPos, const int probability, const int wordId, const bool hasChildren, const bool isBlacklistedOrNotAWord, const uint16_t mergedNodeCodePointCount, const int *const mergedNodeCodePoints) { ASSERT(!mLock); mDicNodes.emplace_back(); mDicNodes.back().initAsChild(dicNode, ptNodePos, childrenPtNodeArrayPos, probability, - isTerminal, hasChildren, isBlacklistedOrNotAWord, mergedNodeCodePointCount, + wordId, hasChildren, isBlacklistedOrNotAWord, mergedNodeCodePointCount, mergedNodeCodePoints); } diff --git a/native/jni/src/suggest/core/dicnode/internal/dic_node_properties.h b/native/jni/src/suggest/core/dicnode/internal/dic_node_properties.h index 8202176f7..fc242a92b 100644 --- a/native/jni/src/suggest/core/dicnode/internal/dic_node_properties.h +++ b/native/jni/src/suggest/core/dicnode/internal/dic_node_properties.h @@ -31,20 +31,20 @@ class DicNodeProperties { AK_FORCE_INLINE DicNodeProperties() : mPtNodePos(NOT_A_DICT_POS), mChildrenPtNodeArrayPos(NOT_A_DICT_POS), mProbability(NOT_A_PROBABILITY), mDicNodeCodePoint(NOT_A_CODE_POINT), - mIsTerminal(false), mHasChildrenPtNodes(false), + mWordId(NOT_A_WORD_ID), mHasChildrenPtNodes(false), mIsBlacklistedOrNotAWord(false), mDepth(0), mLeavingDepth(0) {} ~DicNodeProperties() {} // Should be called only once per DicNode is initialized. void init(const int pos, const int childrenPos, const int nodeCodePoint, const int probability, - const bool isTerminal, const bool hasChildren, const bool isBlacklistedOrNotAWord, + const int wordId, const bool hasChildren, const bool isBlacklistedOrNotAWord, const uint16_t depth, const uint16_t leavingDepth, const int *const prevWordsNodePos) { mPtNodePos = pos; mChildrenPtNodeArrayPos = childrenPos; mDicNodeCodePoint = nodeCodePoint; mProbability = probability; - mIsTerminal = isTerminal; + mWordId = wordId; mHasChildrenPtNodes = hasChildren; mIsBlacklistedOrNotAWord = isBlacklistedOrNotAWord; mDepth = depth; @@ -58,7 +58,7 @@ class DicNodeProperties { mChildrenPtNodeArrayPos = rootPtNodeArrayPos; mDicNodeCodePoint = NOT_A_CODE_POINT; mProbability = NOT_A_PROBABILITY; - mIsTerminal = false; + mWordId = NOT_A_WORD_ID; mHasChildrenPtNodes = true; mIsBlacklistedOrNotAWord = false; mDepth = 0; @@ -71,7 +71,7 @@ class DicNodeProperties { mChildrenPtNodeArrayPos = dicNodeProp->mChildrenPtNodeArrayPos; mDicNodeCodePoint = dicNodeProp->mDicNodeCodePoint; mProbability = dicNodeProp->mProbability; - mIsTerminal = dicNodeProp->mIsTerminal; + mWordId = dicNodeProp->mWordId; mHasChildrenPtNodes = dicNodeProp->mHasChildrenPtNodes; mIsBlacklistedOrNotAWord = dicNodeProp->mIsBlacklistedOrNotAWord; mDepth = dicNodeProp->mDepth; @@ -86,7 +86,7 @@ class DicNodeProperties { mChildrenPtNodeArrayPos = dicNodeProp->mChildrenPtNodeArrayPos; mDicNodeCodePoint = codePoint; // Overwrite the node char of a passing child mProbability = dicNodeProp->mProbability; - mIsTerminal = dicNodeProp->mIsTerminal; + mWordId = dicNodeProp->mWordId; mHasChildrenPtNodes = dicNodeProp->mHasChildrenPtNodes; mIsBlacklistedOrNotAWord = dicNodeProp->mIsBlacklistedOrNotAWord; mDepth = dicNodeProp->mDepth + 1; // Increment the depth of a passing child @@ -121,7 +121,7 @@ class DicNodeProperties { } bool isTerminal() const { - return mIsTerminal; + return mWordId != NOT_A_WORD_ID; } bool hasChildren() const { @@ -144,7 +144,7 @@ class DicNodeProperties { int mChildrenPtNodeArrayPos; int mProbability; int mDicNodeCodePoint; - bool mIsTerminal; + int mWordId; bool mHasChildrenPtNodes; bool mIsBlacklistedOrNotAWord; uint16_t mDepth; diff --git a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h index e91f07682..5052f46cb 100644 --- a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h +++ b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h @@ -36,6 +36,7 @@ class UnigramProperty; * This class abstracts the structure of dictionaries. * Implement this policy to support additional dictionaries. */ +// TODO: Use word id instead of terminal PtNode position. class DictionaryStructureWithBufferPolicy { public: typedef std::unique_ptr StructurePolicyPtr; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp index 9c6452e40..c86ae9305 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp @@ -76,8 +76,9 @@ void Ver4PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const d // Skip PtNodes that represent non-word information. continue; } + const int wordId = isTerminal ? ptNodeParams.getHeadPos() : NOT_A_WORD_ID; childDicNodes->pushLeavingChild(dicNode, ptNodeParams.getHeadPos(), - ptNodeParams.getChildrenPos(), ptNodeParams.getProbability(), isTerminal, + ptNodeParams.getChildrenPos(), ptNodeParams.getProbability(), wordId, ptNodeParams.hasChildren(), ptNodeParams.isBlacklisted() || ptNodeParams.isNotAWord() /* isBlacklistedOrNotAWord */, diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h index d77499636..fac3828c3 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h @@ -55,6 +55,7 @@ class DicNodeVector; namespace backward { namespace v402 { +// Word id = Position of a PtNode that represents the word. class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { public: Ver4PatriciaTriePolicy(Ver4DictBuffers::Ver4DictBuffersPtr buffers) diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp index ea32eb2a9..aca64b351 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp @@ -367,8 +367,8 @@ int PatriciaTriePolicy::createAndGetLeavingChildNode(const DicNode *const dicNod &probability, &childrenPos, &shortcutPos, &bigramPos, &siblingPos); // Skip PtNodes don't start with Unicode code point because they represent non-word information. if (CharUtils::isInUnicodeSpace(mergedNodeCodePoints[0])) { - childDicNodes->pushLeavingChild(dicNode, ptNodePos, childrenPos, probability, - PatriciaTrieReadingUtils::isTerminal(flags), + const int wordId = PatriciaTrieReadingUtils::isTerminal(flags) ? ptNodePos : NOT_A_WORD_ID; + childDicNodes->pushLeavingChild(dicNode, ptNodePos, childrenPos, probability, wordId, PatriciaTrieReadingUtils::hasChildrenInFlags(flags), PatriciaTrieReadingUtils::isBlacklisted(flags) || PatriciaTrieReadingUtils::isNotAWord(flags), diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h index 70351d147..4257b0bf6 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h @@ -36,6 +36,7 @@ namespace latinime { class DicNode; class DicNodeVector; +// Word id = Position of a PtNode that represents the word. class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { public: PatriciaTriePolicy(MmappedBuffer::MmappedBufferPtr mmappedBuffer) diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp index 2ea248e86..5eb2d3fe8 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp @@ -66,8 +66,9 @@ void Ver4PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const d // Skip PtNodes that represent non-word information. continue; } + const int wordId = isTerminal ? ptNodeParams.getTerminalId() : NOT_A_WORD_ID; childDicNodes->pushLeavingChild(dicNode, ptNodeParams.getHeadPos(), - ptNodeParams.getChildrenPos(), ptNodeParams.getProbability(), isTerminal, + ptNodeParams.getChildrenPos(), ptNodeParams.getProbability(), wordId, ptNodeParams.hasChildren(), ptNodeParams.isBlacklisted() || ptNodeParams.isNotAWord() /* isBlacklistedOrNotAWord */, diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h index faad4290d..e46803ffe 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h @@ -37,6 +37,7 @@ namespace latinime { class DicNode; class DicNodeVector; +// Word id = Artificial id that is stored in the PtNode looked up by the word. class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { public: Ver4PatriciaTriePolicy(Ver4DictBuffers::Ver4DictBuffersPtr buffers)