From 3f0a8e3fb0621d9e213fa30e9e52d55812d7eb08 Mon Sep 17 00:00:00 2001 From: Keisuke Kuroyanagi Date: Thu, 24 Oct 2013 18:02:39 -0700 Subject: [PATCH] Refactoring: Move some methods to the reading helper. To handle multiple dictionary formats in helpers. Bug: 11073222 Change-Id: I376bbdea934e13588feca02b9865105742f1a058 --- .../v3/dynamic_patricia_trie_policy.cpp | 84 +------------------ .../dynamic_patricia_trie_reading_helper.cpp | 84 +++++++++++++++++++ .../v3/dynamic_patricia_trie_reading_helper.h | 6 ++ 3 files changed, 93 insertions(+), 81 deletions(-) diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_policy.cpp index 50882b3e9..205d181e7 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_policy.cpp @@ -77,95 +77,17 @@ void DynamicPatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *cons int DynamicPatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount( const int ptNodePos, const int maxCodePointCount, int *const outCodePoints, int *const outUnigramProbability) const { - // This method traverses parent nodes from the terminal by following parent pointers; thus, - // node code points are stored in the buffer in the reverse order. - int reverseCodePoints[maxCodePointCount]; DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer, &mNodeReader); - // First, read the terminal node and get its probability. readingHelper.initWithPtNodePos(ptNodePos); - - const PtNodeParams terminalPtNodeParams(readingHelper.getPtNodeParams()); - if (!readingHelper.isValidTerminalNode(terminalPtNodeParams)) { - // Node at the ptNodePos is not a valid terminal node. - *outUnigramProbability = NOT_A_PROBABILITY; - return 0; - } - // Store terminal node probability. - *outUnigramProbability = terminalPtNodeParams.getProbability(); - // Then, following parent node link to the dictionary root and fetch node code points. - int totalCodePointCount = 0; - while (!readingHelper.isEnd()) { - const PtNodeParams ptNodeParams(readingHelper.getPtNodeParams()); - totalCodePointCount = readingHelper.getTotalCodePointCount(ptNodeParams); - if (!ptNodeParams.isValid() || totalCodePointCount > maxCodePointCount) { - // The ptNodePos is not a valid terminal node position in the dictionary. - *outUnigramProbability = NOT_A_PROBABILITY; - return 0; - } - // Store node code points to buffer in the reverse order. - readingHelper.fetchMergedNodeCodePointsInReverseOrder(ptNodeParams, - readingHelper.getPrevTotalCodePointCount(), reverseCodePoints); - // Follow parent node toward the root node. - readingHelper.readParentNode(ptNodeParams); - } - if (readingHelper.isError()) { - // The node position or the dictionary is invalid. - *outUnigramProbability = NOT_A_PROBABILITY; - return 0; - } - // Reverse the stored code points to output them. - for (int i = 0; i < totalCodePointCount; ++i) { - outCodePoints[i] = reverseCodePoints[totalCodePointCount - i - 1]; - } - return totalCodePointCount; + return readingHelper.getCodePointsAndProbabilityAndReturnCodePointCount(maxCodePointCount, + outCodePoints, outUnigramProbability); } int DynamicPatriciaTriePolicy::getTerminalPtNodePositionOfWord(const int *const inWord, const int length, const bool forceLowerCaseSearch) const { - int searchCodePoints[length]; - for (int i = 0; i < length; ++i) { - searchCodePoints[i] = forceLowerCaseSearch ? CharUtils::toLowerCase(inWord[i]) : inWord[i]; - } - DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer, &mNodeReader); readingHelper.initWithPtNodeArrayPos(getRootPosition()); - while (!readingHelper.isEnd()) { - const PtNodeParams ptNodeParams(readingHelper.getPtNodeParams()); - if (!ptNodeParams.isValid()) { - break; - } - const int matchedCodePointCount = readingHelper.getPrevTotalCodePointCount(); - if (readingHelper.getTotalCodePointCount(ptNodeParams) > length - || !readingHelper.isMatchedCodePoint(ptNodeParams, 0 /* index */, - searchCodePoints[matchedCodePointCount])) { - // Current node has too many code points or its first code point is different from - // target code point. Skip this node and read the next sibling node. - readingHelper.readNextSiblingNode(ptNodeParams); - continue; - } - // Check following merged node code points. - const int nodeCodePointCount = ptNodeParams.getCodePointCount(); - for (int j = 1; j < nodeCodePointCount; ++j) { - if (!readingHelper.isMatchedCodePoint(ptNodeParams, - j, searchCodePoints[matchedCodePointCount + j])) { - // Different code point is found. The given word is not included in the dictionary. - return NOT_A_DICT_POS; - } - } - // All characters are matched. - if (length == readingHelper.getTotalCodePointCount(ptNodeParams)) { - // Terminal position is found. - return ptNodeParams.getHeadPos(); - } - if (!ptNodeParams.hasChildren()) { - return NOT_A_DICT_POS; - } - // Advance to the children nodes. - readingHelper.readChildNode(ptNodeParams); - } - // If we already traversed the tree further than the word is long, there means - // there was no match (or we would have found it). - return NOT_A_DICT_POS; + return readingHelper.getTerminalPtNodePositionOfWord(inWord, length, forceLowerCaseSearch); } int DynamicPatriciaTriePolicy::getProbability(const int unigramProbability, diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.cpp index 398ff21cf..c3fe03d37 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.cpp @@ -19,6 +19,7 @@ #include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" #include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h" #include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.h" +#include "utils/char_utils.h" namespace latinime { @@ -168,6 +169,89 @@ bool DynamicPatriciaTrieReadingHelper::traverseAllPtNodesInPtNodeArrayLevelPreor return !isError(); } +int DynamicPatriciaTrieReadingHelper::getCodePointsAndProbabilityAndReturnCodePointCount( + const int maxCodePointCount, int *const outCodePoints, int *const outUnigramProbability) { + // This method traverses parent nodes from the terminal by following parent pointers; thus, + // node code points are stored in the buffer in the reverse order. + int reverseCodePoints[maxCodePointCount]; + const PtNodeParams terminalPtNodeParams(getPtNodeParams()); + // First, read the terminal node and get its probability. + if (!isValidTerminalNode(terminalPtNodeParams)) { + // Node at the ptNodePos is not a valid terminal node. + *outUnigramProbability = NOT_A_PROBABILITY; + return 0; + } + // Store terminal node probability. + *outUnigramProbability = terminalPtNodeParams.getProbability(); + // Then, following parent node link to the dictionary root and fetch node code points. + int totalCodePointCount = 0; + while (!isEnd()) { + const PtNodeParams ptNodeParams(getPtNodeParams()); + totalCodePointCount = getTotalCodePointCount(ptNodeParams); + if (!ptNodeParams.isValid() || totalCodePointCount > maxCodePointCount) { + // The ptNodePos is not a valid terminal node position in the dictionary. + *outUnigramProbability = NOT_A_PROBABILITY; + return 0; + } + // Store node code points to buffer in the reverse order. + fetchMergedNodeCodePointsInReverseOrder(ptNodeParams, getPrevTotalCodePointCount(), + reverseCodePoints); + // Follow parent node toward the root node. + readParentNode(ptNodeParams); + } + if (isError()) { + // The node position or the dictionary is invalid. + *outUnigramProbability = NOT_A_PROBABILITY; + return 0; + } + // Reverse the stored code points to output them. + for (int i = 0; i < totalCodePointCount; ++i) { + outCodePoints[i] = reverseCodePoints[totalCodePointCount - i - 1]; + } + return totalCodePointCount; +} + +int DynamicPatriciaTrieReadingHelper::getTerminalPtNodePositionOfWord(const int *const inWord, + const int length, const bool forceLowerCaseSearch) { + int searchCodePoints[length]; + for (int i = 0; i < length; ++i) { + searchCodePoints[i] = forceLowerCaseSearch ? CharUtils::toLowerCase(inWord[i]) : inWord[i]; + } + while (!isEnd()) { + const PtNodeParams ptNodeParams(getPtNodeParams()); + const int matchedCodePointCount = getPrevTotalCodePointCount(); + if (getTotalCodePointCount(ptNodeParams) > length + || !isMatchedCodePoint(ptNodeParams, 0 /* index */, + searchCodePoints[matchedCodePointCount])) { + // Current node has too many code points or its first code point is different from + // target code point. Skip this node and read the next sibling node. + readNextSiblingNode(ptNodeParams); + continue; + } + // Check following merged node code points. + const int nodeCodePointCount = ptNodeParams.getCodePointCount(); + for (int j = 1; j < nodeCodePointCount; ++j) { + if (!isMatchedCodePoint(ptNodeParams, j, searchCodePoints[matchedCodePointCount + j])) { + // Different code point is found. The given word is not included in the dictionary. + return NOT_A_DICT_POS; + } + } + // All characters are matched. + if (length == getTotalCodePointCount(ptNodeParams)) { + // Terminal position is found. + return ptNodeParams.getHeadPos(); + } + if (!ptNodeParams.hasChildren()) { + return NOT_A_DICT_POS; + } + // Advance to the children nodes. + readChildNode(ptNodeParams); + } + // If we already traversed the tree further than the word is long, there means + // there was no match (or we would have found it). + return NOT_A_DICT_POS; +} + // Read node array size and process empty node arrays. Nodes and arrays are counted up in this // method to avoid an infinite loop. void DynamicPatriciaTrieReadingHelper::nextPtNodeArray() { diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.h index 1e9218e58..197027313 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.h @@ -198,6 +198,12 @@ class DynamicPatriciaTrieReadingHelper { bool traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner( TraversingEventListener *const listener); + int getCodePointsAndProbabilityAndReturnCodePointCount(const int maxCodePointCount, + int *const outCodePoints, int *const outUnigramProbability); + + int getTerminalPtNodePositionOfWord(const int *const inWord, const int length, + const bool forceLowerCaseSearch); + private: DISALLOW_COPY_AND_ASSIGN(DynamicPatriciaTrieReadingHelper);