am 9118467b: Merge "Move creating children methods to structurePolicy."
* commit '9118467b8601c87ae6f55b47ea7becaba8d1f9fb': Move creating children methods to structurePolicy.main
commit
68d406b00a
|
@ -157,9 +157,10 @@ class DicNode {
|
||||||
PROF_NODE_COPY(&parentNode->mProfiler, mProfiler);
|
PROF_NODE_COPY(&parentNode->mProfiler, mProfiler);
|
||||||
}
|
}
|
||||||
|
|
||||||
void initAsChild(DicNode *dicNode, const int pos, const int childrenPos, const int probability,
|
void initAsChild(const DicNode *const dicNode, const int pos, const int childrenPos,
|
||||||
const bool isTerminal, const bool hasChildren, const bool isBlacklistedOrNotAWord,
|
const int probability, const bool isTerminal, const bool hasChildren,
|
||||||
const uint16_t mergedNodeCodePointCount, const int *const mergedNodeCodePoints) {
|
const bool isBlacklistedOrNotAWord, const uint16_t mergedNodeCodePointCount,
|
||||||
|
const int *const mergedNodeCodePoints) {
|
||||||
mIsUsed = true;
|
mIsUsed = true;
|
||||||
uint16_t newDepth = static_cast<uint16_t>(dicNode->getNodeCodePointCount() + 1);
|
uint16_t newDepth = static_cast<uint16_t>(dicNode->getNodeCodePointCount() + 1);
|
||||||
mIsCachedForNextSuggestion = dicNode->mIsCachedForNextSuggestion;
|
mIsCachedForNextSuggestion = dicNode->mIsCachedForNextSuggestion;
|
||||||
|
|
|
@ -22,7 +22,6 @@
|
||||||
#include "suggest/core/dicnode/dic_node_proximity_filter.h"
|
#include "suggest/core/dicnode/dic_node_proximity_filter.h"
|
||||||
#include "suggest/core/dicnode/dic_node_vector.h"
|
#include "suggest/core/dicnode/dic_node_vector.h"
|
||||||
#include "suggest/core/dictionary/binary_dictionary_info.h"
|
#include "suggest/core/dictionary/binary_dictionary_info.h"
|
||||||
#include "suggest/core/dictionary/binary_format.h"
|
|
||||||
#include "suggest/core/dictionary/multi_bigram_map.h"
|
#include "suggest/core/dictionary/multi_bigram_map.h"
|
||||||
#include "suggest/core/dictionary/probability_utils.h"
|
#include "suggest/core/dictionary/probability_utils.h"
|
||||||
#include "suggest/core/policy/dictionary_structure_policy.h"
|
#include "suggest/core/policy/dictionary_structure_policy.h"
|
||||||
|
@ -67,68 +66,6 @@ namespace latinime {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* static */ int DicNodeUtils::createAndGetLeavingChildNode(DicNode *dicNode, int pos,
|
|
||||||
const BinaryDictionaryInfo *const binaryDictionaryInfo,
|
|
||||||
const DicNodeProximityFilter *const childrenFilter,
|
|
||||||
DicNodeVector *childDicNodes) {
|
|
||||||
int nextPos = pos;
|
|
||||||
const uint8_t flags = BinaryFormat::getFlagsAndForwardPointer(
|
|
||||||
binaryDictionaryInfo->getDictRoot(), &pos);
|
|
||||||
const bool hasMultipleChars = (0 != (BinaryFormat::FLAG_HAS_MULTIPLE_CHARS & flags));
|
|
||||||
const bool isTerminal = (0 != (BinaryFormat::FLAG_IS_TERMINAL & flags));
|
|
||||||
const bool hasChildren = BinaryFormat::hasChildrenInFlags(flags);
|
|
||||||
const bool isBlacklistedOrNotAWord = BinaryFormat::hasBlacklistedOrNotAWordFlag(flags);
|
|
||||||
|
|
||||||
int codePoint = BinaryFormat::getCodePointAndForwardPointer(
|
|
||||||
binaryDictionaryInfo->getDictRoot(), &pos);
|
|
||||||
ASSERT(NOT_A_CODE_POINT != codePoint);
|
|
||||||
// TODO: optimize this
|
|
||||||
int mergedNodeCodePoints[MAX_WORD_LENGTH];
|
|
||||||
uint16_t mergedNodeCodePointCount = 0;
|
|
||||||
mergedNodeCodePoints[mergedNodeCodePointCount++] = codePoint;
|
|
||||||
|
|
||||||
do {
|
|
||||||
const int nextCodePoint = hasMultipleChars
|
|
||||||
? BinaryFormat::getCodePointAndForwardPointer(
|
|
||||||
binaryDictionaryInfo->getDictRoot(), &pos) : NOT_A_CODE_POINT;
|
|
||||||
const bool isLastChar = (NOT_A_CODE_POINT == nextCodePoint);
|
|
||||||
if (!isLastChar) {
|
|
||||||
mergedNodeCodePoints[mergedNodeCodePointCount++] = nextCodePoint;
|
|
||||||
}
|
|
||||||
codePoint = nextCodePoint;
|
|
||||||
} while (NOT_A_CODE_POINT != codePoint);
|
|
||||||
|
|
||||||
const int probability = isTerminal ? BinaryFormat::readProbabilityWithoutMovingPointer(
|
|
||||||
binaryDictionaryInfo->getDictRoot(), pos) : NOT_A_PROBABILITY;
|
|
||||||
pos = BinaryFormat::skipProbability(flags, pos);
|
|
||||||
int childrenPos = hasChildren ? BinaryFormat::readChildrenPosition(
|
|
||||||
binaryDictionaryInfo->getDictRoot(), flags, pos) : NOT_A_DICT_POS;
|
|
||||||
const int siblingPos = BinaryFormat::skipChildrenPosAndAttributes(
|
|
||||||
binaryDictionaryInfo->getDictRoot(), flags, pos);
|
|
||||||
|
|
||||||
if (childrenFilter->isFilteredOut(mergedNodeCodePoints[0])) {
|
|
||||||
return siblingPos;
|
|
||||||
}
|
|
||||||
childDicNodes->pushLeavingChild(dicNode, nextPos, childrenPos, probability, isTerminal,
|
|
||||||
hasChildren, isBlacklistedOrNotAWord, mergedNodeCodePointCount, mergedNodeCodePoints);
|
|
||||||
return siblingPos;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* static */ void DicNodeUtils::createAndGetAllLeavingChildNodes(DicNode *dicNode,
|
|
||||||
const BinaryDictionaryInfo *const binaryDictionaryInfo,
|
|
||||||
const DicNodeProximityFilter *const childrenFilter, DicNodeVector *childDicNodes) {
|
|
||||||
if (!dicNode->hasChildren()) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
int nextPos = dicNode->getChildrenPos();
|
|
||||||
const int childCount = BinaryFormat::getGroupCountAndForwardPointer(
|
|
||||||
binaryDictionaryInfo->getDictRoot(), &nextPos);
|
|
||||||
for (int i = 0; i < childCount; i++) {
|
|
||||||
nextPos = createAndGetLeavingChildNode(dicNode, nextPos, binaryDictionaryInfo,
|
|
||||||
childrenFilter, childDicNodes);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* static */ void DicNodeUtils::getAllChildDicNodes(DicNode *dicNode,
|
/* static */ void DicNodeUtils::getAllChildDicNodes(DicNode *dicNode,
|
||||||
const BinaryDictionaryInfo *const binaryDictionaryInfo, DicNodeVector *childDicNodes) {
|
const BinaryDictionaryInfo *const binaryDictionaryInfo, DicNodeVector *childDicNodes) {
|
||||||
getProximityChildDicNodes(dicNode, binaryDictionaryInfo, 0, 0, false, childDicNodes);
|
getProximityChildDicNodes(dicNode, binaryDictionaryInfo, 0, 0, false, childDicNodes);
|
||||||
|
@ -145,8 +82,8 @@ namespace latinime {
|
||||||
if (!dicNode->isLeavingNode()) {
|
if (!dicNode->isLeavingNode()) {
|
||||||
DicNodeUtils::createAndGetPassingChildNode(dicNode, &childrenFilter, childDicNodes);
|
DicNodeUtils::createAndGetPassingChildNode(dicNode, &childrenFilter, childDicNodes);
|
||||||
} else {
|
} else {
|
||||||
DicNodeUtils::createAndGetAllLeavingChildNodes(
|
binaryDictionaryInfo->getStructurePolicy()->createAndGetAllChildNodes(dicNode,
|
||||||
dicNode, binaryDictionaryInfo, &childrenFilter, childDicNodes);
|
binaryDictionaryInfo, &childrenFilter, childDicNodes);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -58,12 +58,6 @@ class DicNodeUtils {
|
||||||
const DicNode *const node, MultiBigramMap *multiBigramMap);
|
const DicNode *const node, MultiBigramMap *multiBigramMap);
|
||||||
static void createAndGetPassingChildNode(DicNode *dicNode,
|
static void createAndGetPassingChildNode(DicNode *dicNode,
|
||||||
const DicNodeProximityFilter *const childrenFilter, DicNodeVector *childDicNodes);
|
const DicNodeProximityFilter *const childrenFilter, DicNodeVector *childDicNodes);
|
||||||
static void createAndGetAllLeavingChildNodes(DicNode *dicNode,
|
|
||||||
const BinaryDictionaryInfo *const binaryDictionaryInfo,
|
|
||||||
const DicNodeProximityFilter *const childrenFilter, DicNodeVector *childDicNodes);
|
|
||||||
static int createAndGetLeavingChildNode(DicNode *dicNode, int pos,
|
|
||||||
const BinaryDictionaryInfo *const binaryDictionaryInfo,
|
|
||||||
const DicNodeProximityFilter *const childrenFilter, DicNodeVector *childDicNodes);
|
|
||||||
};
|
};
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
#endif // LATINIME_DIC_NODE_UTILS_H
|
#endif // LATINIME_DIC_NODE_UTILS_H
|
||||||
|
|
|
@ -62,7 +62,7 @@ class DicNodeVector {
|
||||||
mDicNodes.back().initAsPassingChild(dicNode);
|
mDicNodes.back().initAsPassingChild(dicNode);
|
||||||
}
|
}
|
||||||
|
|
||||||
void pushLeavingChild(DicNode *dicNode, const int pos, const int childrenPos,
|
void pushLeavingChild(const DicNode *const dicNode, const int pos, const int childrenPos,
|
||||||
const int probability, const bool isTerminal, const bool hasChildren,
|
const int probability, const bool isTerminal, const bool hasChildren,
|
||||||
const bool isBlacklistedOrNotAWord, const uint16_t mergedNodeCodePointCount,
|
const bool isBlacklistedOrNotAWord, const uint16_t mergedNodeCodePointCount,
|
||||||
const int *const mergedNodeCodePoints) {
|
const int *const mergedNodeCodePoints) {
|
||||||
|
|
|
@ -30,7 +30,16 @@ const PatriciaTriePolicy PatriciaTriePolicy::sInstance;
|
||||||
void PatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode,
|
void PatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode,
|
||||||
const BinaryDictionaryInfo *const binaryDictionaryInfo,
|
const BinaryDictionaryInfo *const binaryDictionaryInfo,
|
||||||
const NodeFilter *const nodeFilter, DicNodeVector *const childDicNodes) const {
|
const NodeFilter *const nodeFilter, DicNodeVector *const childDicNodes) const {
|
||||||
// TODO: Move children creating methods form DicNodeUtils.
|
if (!dicNode->hasChildren()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
int nextPos = dicNode->getChildrenPos();
|
||||||
|
const int childCount = BinaryFormat::getGroupCountAndForwardPointer(
|
||||||
|
binaryDictionaryInfo->getDictRoot(), &nextPos);
|
||||||
|
for (int i = 0; i < childCount; i++) {
|
||||||
|
nextPos = createAndGetLeavingChildNode(dicNode, nextPos, binaryDictionaryInfo,
|
||||||
|
nodeFilter, childDicNodes);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
|
int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
|
||||||
|
@ -83,4 +92,50 @@ int PatriciaTriePolicy::getBigramsPositionOfNode(
|
||||||
binaryDictionaryInfo->getDictRoot(), nodePos);
|
binaryDictionaryInfo->getDictRoot(), nodePos);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int PatriciaTriePolicy::createAndGetLeavingChildNode(const DicNode *const dicNode, int pos,
|
||||||
|
const BinaryDictionaryInfo *const binaryDictionaryInfo,
|
||||||
|
const NodeFilter *const childrenFilter, DicNodeVector *childDicNodes) const {
|
||||||
|
const int nextPos = pos;
|
||||||
|
const uint8_t flags = BinaryFormat::getFlagsAndForwardPointer(
|
||||||
|
binaryDictionaryInfo->getDictRoot(), &pos);
|
||||||
|
const bool hasMultipleChars = (0 != (BinaryFormat::FLAG_HAS_MULTIPLE_CHARS & flags));
|
||||||
|
const bool isTerminal = (0 != (BinaryFormat::FLAG_IS_TERMINAL & flags));
|
||||||
|
const bool hasChildren = BinaryFormat::hasChildrenInFlags(flags);
|
||||||
|
const bool isBlacklistedOrNotAWord = BinaryFormat::hasBlacklistedOrNotAWordFlag(flags);
|
||||||
|
|
||||||
|
int codePoint = BinaryFormat::getCodePointAndForwardPointer(
|
||||||
|
binaryDictionaryInfo->getDictRoot(), &pos);
|
||||||
|
ASSERT(NOT_A_CODE_POINT != codePoint);
|
||||||
|
// TODO: optimize this
|
||||||
|
int mergedNodeCodePoints[MAX_WORD_LENGTH];
|
||||||
|
uint16_t mergedNodeCodePointCount = 0;
|
||||||
|
mergedNodeCodePoints[mergedNodeCodePointCount++] = codePoint;
|
||||||
|
|
||||||
|
do {
|
||||||
|
const int nextCodePoint = hasMultipleChars
|
||||||
|
? BinaryFormat::getCodePointAndForwardPointer(
|
||||||
|
binaryDictionaryInfo->getDictRoot(), &pos) : NOT_A_CODE_POINT;
|
||||||
|
const bool isLastChar = (NOT_A_CODE_POINT == nextCodePoint);
|
||||||
|
if (!isLastChar) {
|
||||||
|
mergedNodeCodePoints[mergedNodeCodePointCount++] = nextCodePoint;
|
||||||
|
}
|
||||||
|
codePoint = nextCodePoint;
|
||||||
|
} while (NOT_A_CODE_POINT != codePoint);
|
||||||
|
|
||||||
|
const int probability = isTerminal ? BinaryFormat::readProbabilityWithoutMovingPointer(
|
||||||
|
binaryDictionaryInfo->getDictRoot(), pos) : NOT_A_PROBABILITY;
|
||||||
|
pos = BinaryFormat::skipProbability(flags, pos);
|
||||||
|
int childrenPos = hasChildren ? BinaryFormat::readChildrenPosition(
|
||||||
|
binaryDictionaryInfo->getDictRoot(), flags, pos) : NOT_A_DICT_POS;
|
||||||
|
const int siblingPos = BinaryFormat::skipChildrenPosAndAttributes(
|
||||||
|
binaryDictionaryInfo->getDictRoot(), flags, pos);
|
||||||
|
|
||||||
|
if (childrenFilter->isFilteredOut(mergedNodeCodePoints[0])) {
|
||||||
|
return siblingPos;
|
||||||
|
}
|
||||||
|
childDicNodes->pushLeavingChild(dicNode, nextPos, childrenPos, probability, isTerminal,
|
||||||
|
hasChildren, isBlacklistedOrNotAWord, mergedNodeCodePointCount, mergedNodeCodePoints);
|
||||||
|
return siblingPos;
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
|
|
|
@ -60,6 +60,10 @@ class PatriciaTriePolicy : public DictionaryStructurePolicy {
|
||||||
|
|
||||||
PatriciaTriePolicy() {}
|
PatriciaTriePolicy() {}
|
||||||
~PatriciaTriePolicy() {}
|
~PatriciaTriePolicy() {}
|
||||||
|
|
||||||
|
int createAndGetLeavingChildNode(const DicNode *const dicNode, int pos,
|
||||||
|
const BinaryDictionaryInfo *const binaryDictionaryInfo,
|
||||||
|
const NodeFilter *const nodeFilter, DicNodeVector *const childDicNodes) const;
|
||||||
};
|
};
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
#endif // LATINIME_PATRICIA_TRIE_POLICY_H
|
#endif // LATINIME_PATRICIA_TRIE_POLICY_H
|
||||||
|
|
Loading…
Reference in New Issue