Merge "Move creating children methods to structurePolicy."

This commit is contained in:
Keisuke Kuroynagi 2013-07-29 12:44:01 +00:00 committed by Android (Google) Code Review
commit 9118467b86
6 changed files with 67 additions and 76 deletions

View file

@ -157,9 +157,10 @@ class DicNode {
PROF_NODE_COPY(&parentNode->mProfiler, mProfiler); PROF_NODE_COPY(&parentNode->mProfiler, mProfiler);
} }
void initAsChild(DicNode *dicNode, const int pos, const int childrenPos, const int probability, void initAsChild(const DicNode *const dicNode, const int pos, const int childrenPos,
const bool isTerminal, const bool hasChildren, const bool isBlacklistedOrNotAWord, const int probability, const bool isTerminal, const bool hasChildren,
const uint16_t mergedNodeCodePointCount, const int *const mergedNodeCodePoints) { const bool isBlacklistedOrNotAWord, const uint16_t mergedNodeCodePointCount,
const int *const mergedNodeCodePoints) {
mIsUsed = true; mIsUsed = true;
uint16_t newDepth = static_cast<uint16_t>(dicNode->getNodeCodePointCount() + 1); uint16_t newDepth = static_cast<uint16_t>(dicNode->getNodeCodePointCount() + 1);
mIsCachedForNextSuggestion = dicNode->mIsCachedForNextSuggestion; mIsCachedForNextSuggestion = dicNode->mIsCachedForNextSuggestion;

View file

@ -22,7 +22,6 @@
#include "suggest/core/dicnode/dic_node_proximity_filter.h" #include "suggest/core/dicnode/dic_node_proximity_filter.h"
#include "suggest/core/dicnode/dic_node_vector.h" #include "suggest/core/dicnode/dic_node_vector.h"
#include "suggest/core/dictionary/binary_dictionary_info.h" #include "suggest/core/dictionary/binary_dictionary_info.h"
#include "suggest/core/dictionary/binary_format.h"
#include "suggest/core/dictionary/multi_bigram_map.h" #include "suggest/core/dictionary/multi_bigram_map.h"
#include "suggest/core/dictionary/probability_utils.h" #include "suggest/core/dictionary/probability_utils.h"
#include "suggest/core/policy/dictionary_structure_policy.h" #include "suggest/core/policy/dictionary_structure_policy.h"
@ -67,68 +66,6 @@ namespace latinime {
} }
} }
/* static */ int DicNodeUtils::createAndGetLeavingChildNode(DicNode *dicNode, int pos,
const BinaryDictionaryInfo *const binaryDictionaryInfo,
const DicNodeProximityFilter *const childrenFilter,
DicNodeVector *childDicNodes) {
int nextPos = pos;
const uint8_t flags = BinaryFormat::getFlagsAndForwardPointer(
binaryDictionaryInfo->getDictRoot(), &pos);
const bool hasMultipleChars = (0 != (BinaryFormat::FLAG_HAS_MULTIPLE_CHARS & flags));
const bool isTerminal = (0 != (BinaryFormat::FLAG_IS_TERMINAL & flags));
const bool hasChildren = BinaryFormat::hasChildrenInFlags(flags);
const bool isBlacklistedOrNotAWord = BinaryFormat::hasBlacklistedOrNotAWordFlag(flags);
int codePoint = BinaryFormat::getCodePointAndForwardPointer(
binaryDictionaryInfo->getDictRoot(), &pos);
ASSERT(NOT_A_CODE_POINT != codePoint);
// TODO: optimize this
int mergedNodeCodePoints[MAX_WORD_LENGTH];
uint16_t mergedNodeCodePointCount = 0;
mergedNodeCodePoints[mergedNodeCodePointCount++] = codePoint;
do {
const int nextCodePoint = hasMultipleChars
? BinaryFormat::getCodePointAndForwardPointer(
binaryDictionaryInfo->getDictRoot(), &pos) : NOT_A_CODE_POINT;
const bool isLastChar = (NOT_A_CODE_POINT == nextCodePoint);
if (!isLastChar) {
mergedNodeCodePoints[mergedNodeCodePointCount++] = nextCodePoint;
}
codePoint = nextCodePoint;
} while (NOT_A_CODE_POINT != codePoint);
const int probability = isTerminal ? BinaryFormat::readProbabilityWithoutMovingPointer(
binaryDictionaryInfo->getDictRoot(), pos) : NOT_A_PROBABILITY;
pos = BinaryFormat::skipProbability(flags, pos);
int childrenPos = hasChildren ? BinaryFormat::readChildrenPosition(
binaryDictionaryInfo->getDictRoot(), flags, pos) : NOT_A_DICT_POS;
const int siblingPos = BinaryFormat::skipChildrenPosAndAttributes(
binaryDictionaryInfo->getDictRoot(), flags, pos);
if (childrenFilter->isFilteredOut(mergedNodeCodePoints[0])) {
return siblingPos;
}
childDicNodes->pushLeavingChild(dicNode, nextPos, childrenPos, probability, isTerminal,
hasChildren, isBlacklistedOrNotAWord, mergedNodeCodePointCount, mergedNodeCodePoints);
return siblingPos;
}
/* static */ void DicNodeUtils::createAndGetAllLeavingChildNodes(DicNode *dicNode,
const BinaryDictionaryInfo *const binaryDictionaryInfo,
const DicNodeProximityFilter *const childrenFilter, DicNodeVector *childDicNodes) {
if (!dicNode->hasChildren()) {
return;
}
int nextPos = dicNode->getChildrenPos();
const int childCount = BinaryFormat::getGroupCountAndForwardPointer(
binaryDictionaryInfo->getDictRoot(), &nextPos);
for (int i = 0; i < childCount; i++) {
nextPos = createAndGetLeavingChildNode(dicNode, nextPos, binaryDictionaryInfo,
childrenFilter, childDicNodes);
}
}
/* static */ void DicNodeUtils::getAllChildDicNodes(DicNode *dicNode, /* static */ void DicNodeUtils::getAllChildDicNodes(DicNode *dicNode,
const BinaryDictionaryInfo *const binaryDictionaryInfo, DicNodeVector *childDicNodes) { const BinaryDictionaryInfo *const binaryDictionaryInfo, DicNodeVector *childDicNodes) {
getProximityChildDicNodes(dicNode, binaryDictionaryInfo, 0, 0, false, childDicNodes); getProximityChildDicNodes(dicNode, binaryDictionaryInfo, 0, 0, false, childDicNodes);
@ -145,8 +82,8 @@ namespace latinime {
if (!dicNode->isLeavingNode()) { if (!dicNode->isLeavingNode()) {
DicNodeUtils::createAndGetPassingChildNode(dicNode, &childrenFilter, childDicNodes); DicNodeUtils::createAndGetPassingChildNode(dicNode, &childrenFilter, childDicNodes);
} else { } else {
DicNodeUtils::createAndGetAllLeavingChildNodes( binaryDictionaryInfo->getStructurePolicy()->createAndGetAllChildNodes(dicNode,
dicNode, binaryDictionaryInfo, &childrenFilter, childDicNodes); binaryDictionaryInfo, &childrenFilter, childDicNodes);
} }
} }

View file

@ -58,12 +58,6 @@ class DicNodeUtils {
const DicNode *const node, MultiBigramMap *multiBigramMap); const DicNode *const node, MultiBigramMap *multiBigramMap);
static void createAndGetPassingChildNode(DicNode *dicNode, static void createAndGetPassingChildNode(DicNode *dicNode,
const DicNodeProximityFilter *const childrenFilter, DicNodeVector *childDicNodes); const DicNodeProximityFilter *const childrenFilter, DicNodeVector *childDicNodes);
static void createAndGetAllLeavingChildNodes(DicNode *dicNode,
const BinaryDictionaryInfo *const binaryDictionaryInfo,
const DicNodeProximityFilter *const childrenFilter, DicNodeVector *childDicNodes);
static int createAndGetLeavingChildNode(DicNode *dicNode, int pos,
const BinaryDictionaryInfo *const binaryDictionaryInfo,
const DicNodeProximityFilter *const childrenFilter, DicNodeVector *childDicNodes);
}; };
} // namespace latinime } // namespace latinime
#endif // LATINIME_DIC_NODE_UTILS_H #endif // LATINIME_DIC_NODE_UTILS_H

View file

@ -62,7 +62,7 @@ class DicNodeVector {
mDicNodes.back().initAsPassingChild(dicNode); mDicNodes.back().initAsPassingChild(dicNode);
} }
void pushLeavingChild(DicNode *dicNode, const int pos, const int childrenPos, void pushLeavingChild(const DicNode *const dicNode, const int pos, const int childrenPos,
const int probability, const bool isTerminal, const bool hasChildren, const int probability, const bool isTerminal, const bool hasChildren,
const bool isBlacklistedOrNotAWord, const uint16_t mergedNodeCodePointCount, const bool isBlacklistedOrNotAWord, const uint16_t mergedNodeCodePointCount,
const int *const mergedNodeCodePoints) { const int *const mergedNodeCodePoints) {

View file

@ -30,7 +30,16 @@ const PatriciaTriePolicy PatriciaTriePolicy::sInstance;
void PatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode, void PatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode,
const BinaryDictionaryInfo *const binaryDictionaryInfo, const BinaryDictionaryInfo *const binaryDictionaryInfo,
const NodeFilter *const nodeFilter, DicNodeVector *const childDicNodes) const { const NodeFilter *const nodeFilter, DicNodeVector *const childDicNodes) const {
// TODO: Move children creating methods form DicNodeUtils. if (!dicNode->hasChildren()) {
return;
}
int nextPos = dicNode->getChildrenPos();
const int childCount = BinaryFormat::getGroupCountAndForwardPointer(
binaryDictionaryInfo->getDictRoot(), &nextPos);
for (int i = 0; i < childCount; i++) {
nextPos = createAndGetLeavingChildNode(dicNode, nextPos, binaryDictionaryInfo,
nodeFilter, childDicNodes);
}
} }
int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount( int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
@ -83,4 +92,50 @@ int PatriciaTriePolicy::getBigramsPositionOfNode(
binaryDictionaryInfo->getDictRoot(), nodePos); binaryDictionaryInfo->getDictRoot(), nodePos);
} }
int PatriciaTriePolicy::createAndGetLeavingChildNode(const DicNode *const dicNode, int pos,
const BinaryDictionaryInfo *const binaryDictionaryInfo,
const NodeFilter *const childrenFilter, DicNodeVector *childDicNodes) const {
const int nextPos = pos;
const uint8_t flags = BinaryFormat::getFlagsAndForwardPointer(
binaryDictionaryInfo->getDictRoot(), &pos);
const bool hasMultipleChars = (0 != (BinaryFormat::FLAG_HAS_MULTIPLE_CHARS & flags));
const bool isTerminal = (0 != (BinaryFormat::FLAG_IS_TERMINAL & flags));
const bool hasChildren = BinaryFormat::hasChildrenInFlags(flags);
const bool isBlacklistedOrNotAWord = BinaryFormat::hasBlacklistedOrNotAWordFlag(flags);
int codePoint = BinaryFormat::getCodePointAndForwardPointer(
binaryDictionaryInfo->getDictRoot(), &pos);
ASSERT(NOT_A_CODE_POINT != codePoint);
// TODO: optimize this
int mergedNodeCodePoints[MAX_WORD_LENGTH];
uint16_t mergedNodeCodePointCount = 0;
mergedNodeCodePoints[mergedNodeCodePointCount++] = codePoint;
do {
const int nextCodePoint = hasMultipleChars
? BinaryFormat::getCodePointAndForwardPointer(
binaryDictionaryInfo->getDictRoot(), &pos) : NOT_A_CODE_POINT;
const bool isLastChar = (NOT_A_CODE_POINT == nextCodePoint);
if (!isLastChar) {
mergedNodeCodePoints[mergedNodeCodePointCount++] = nextCodePoint;
}
codePoint = nextCodePoint;
} while (NOT_A_CODE_POINT != codePoint);
const int probability = isTerminal ? BinaryFormat::readProbabilityWithoutMovingPointer(
binaryDictionaryInfo->getDictRoot(), pos) : NOT_A_PROBABILITY;
pos = BinaryFormat::skipProbability(flags, pos);
int childrenPos = hasChildren ? BinaryFormat::readChildrenPosition(
binaryDictionaryInfo->getDictRoot(), flags, pos) : NOT_A_DICT_POS;
const int siblingPos = BinaryFormat::skipChildrenPosAndAttributes(
binaryDictionaryInfo->getDictRoot(), flags, pos);
if (childrenFilter->isFilteredOut(mergedNodeCodePoints[0])) {
return siblingPos;
}
childDicNodes->pushLeavingChild(dicNode, nextPos, childrenPos, probability, isTerminal,
hasChildren, isBlacklistedOrNotAWord, mergedNodeCodePointCount, mergedNodeCodePoints);
return siblingPos;
}
} // namespace latinime } // namespace latinime

View file

@ -60,6 +60,10 @@ class PatriciaTriePolicy : public DictionaryStructurePolicy {
PatriciaTriePolicy() {} PatriciaTriePolicy() {}
~PatriciaTriePolicy() {} ~PatriciaTriePolicy() {}
int createAndGetLeavingChildNode(const DicNode *const dicNode, int pos,
const BinaryDictionaryInfo *const binaryDictionaryInfo,
const NodeFilter *const nodeFilter, DicNodeVector *const childDicNodes) const;
}; };
} // namespace latinime } // namespace latinime
#endif // LATINIME_PATRICIA_TRIE_POLICY_H #endif // LATINIME_PATRICIA_TRIE_POLICY_H