Merge "Make "node"s clear by renaming to PtNode or DicNode."

main
Keisuke Kuroyanagi 2013-10-17 08:23:01 +00:00 committed by Android (Google) Code Review
commit be5e8f18e9
19 changed files with 233 additions and 231 deletions

View File

@ -99,7 +99,7 @@ class DicNode {
virtual ~DicNode() {} virtual ~DicNode() {}
// Init for copy // Init for copy
void initByCopy(const DicNode *dicNode) { void initByCopy(const DicNode *const dicNode) {
mIsUsed = true; mIsUsed = true;
mIsCachedForNextSuggestion = dicNode->mIsCachedForNextSuggestion; mIsCachedForNextSuggestion = dicNode->mIsCachedForNextSuggestion;
mDicNodeProperties.init(&dicNode->mDicNodeProperties); mDicNodeProperties.init(&dicNode->mDicNodeProperties);
@ -107,25 +107,25 @@ class DicNode {
PROF_NODE_COPY(&dicNode->mProfiler, mProfiler); PROF_NODE_COPY(&dicNode->mProfiler, mProfiler);
} }
// Init for root with prevWordNodePos which is used for bigram // Init for root with prevWordPtNodePos which is used for bigram
void initAsRoot(const int rootGroupPos, const int prevWordNodePos) { void initAsRoot(const int rootPtNodeArrayPos, const int prevWordPtNodePos) {
mIsUsed = true; mIsUsed = true;
mIsCachedForNextSuggestion = false; mIsCachedForNextSuggestion = false;
mDicNodeProperties.init( mDicNodeProperties.init(
NOT_A_DICT_POS /* pos */, rootGroupPos, NOT_A_CODE_POINT /* nodeCodePoint */, NOT_A_DICT_POS /* pos */, rootPtNodeArrayPos, NOT_A_CODE_POINT /* nodeCodePoint */,
NOT_A_PROBABILITY /* probability */, false /* isTerminal */, NOT_A_PROBABILITY /* probability */, false /* isTerminal */,
true /* hasChildren */, false /* isBlacklistedOrNotAWord */, 0 /* depth */, true /* hasChildren */, false /* isBlacklistedOrNotAWord */, 0 /* depth */,
0 /* terminalDepth */); 0 /* terminalDepth */);
mDicNodeState.init(prevWordNodePos); mDicNodeState.init(prevWordPtNodePos);
PROF_NODE_RESET(mProfiler); PROF_NODE_RESET(mProfiler);
} }
// Init for root with previous word // Init for root with previous word
void initAsRootWithPreviousWord(DicNode *dicNode, const int rootGroupPos) { void initAsRootWithPreviousWord(const DicNode *const dicNode, const int rootPtNodeArrayPos) {
mIsUsed = true; mIsUsed = true;
mIsCachedForNextSuggestion = dicNode->mIsCachedForNextSuggestion; mIsCachedForNextSuggestion = dicNode->mIsCachedForNextSuggestion;
mDicNodeProperties.init( mDicNodeProperties.init(
NOT_A_DICT_POS /* pos */, rootGroupPos, NOT_A_CODE_POINT /* nodeCodePoint */, NOT_A_DICT_POS /* pos */, rootPtNodeArrayPos, NOT_A_CODE_POINT /* nodeCodePoint */,
NOT_A_PROBABILITY /* probability */, false /* isTerminal */, NOT_A_PROBABILITY /* probability */, false /* isTerminal */,
true /* hasChildren */, false /* isBlacklistedOrNotAWord */, 0 /* depth */, true /* hasChildren */, false /* isBlacklistedOrNotAWord */, 0 /* depth */,
0 /* terminalDepth */); 0 /* terminalDepth */);
@ -138,7 +138,7 @@ class DicNode {
mDicNodeState.mDicNodeStatePrevWord.init( mDicNodeState.mDicNodeStatePrevWord.init(
dicNode->mDicNodeState.mDicNodeStatePrevWord.getPrevWordCount() + 1, dicNode->mDicNodeState.mDicNodeStatePrevWord.getPrevWordCount() + 1,
dicNode->mDicNodeProperties.getProbability(), dicNode->mDicNodeProperties.getProbability(),
dicNode->mDicNodeProperties.getPos(), dicNode->mDicNodeProperties.getPtNodePos(),
dicNode->mDicNodeState.mDicNodeStatePrevWord.mPrevWord, dicNode->mDicNodeState.mDicNodeStatePrevWord.mPrevWord,
dicNode->mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength(), dicNode->mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength(),
dicNode->getOutputWordBuf(), dicNode->getOutputWordBuf(),
@ -148,26 +148,27 @@ class DicNode {
PROF_NODE_COPY(&dicNode->mProfiler, mProfiler); PROF_NODE_COPY(&dicNode->mProfiler, mProfiler);
} }
void initAsPassingChild(DicNode *parentNode) { void initAsPassingChild(DicNode *parentDicNode) {
mIsUsed = true; mIsUsed = true;
mIsCachedForNextSuggestion = parentNode->mIsCachedForNextSuggestion; mIsCachedForNextSuggestion = parentDicNode->mIsCachedForNextSuggestion;
const int c = parentNode->getNodeTypedCodePoint(); const int parentCodePoint = parentDicNode->getNodeTypedCodePoint();
mDicNodeProperties.init(&parentNode->mDicNodeProperties, c); mDicNodeProperties.init(&parentDicNode->mDicNodeProperties, parentCodePoint);
mDicNodeState.init(&parentNode->mDicNodeState); mDicNodeState.init(&parentDicNode->mDicNodeState);
PROF_NODE_COPY(&parentNode->mProfiler, mProfiler); PROF_NODE_COPY(&parentDicNode->mProfiler, mProfiler);
} }
void initAsChild(const DicNode *const dicNode, const int pos, const int childrenPos, void initAsChild(const DicNode *const dicNode, const int ptNodePos,
const int probability, const bool isTerminal, const bool hasChildren, const int childrenPtNodeArrayPos, const int probability, const bool isTerminal,
const bool isBlacklistedOrNotAWord, const uint16_t mergedNodeCodePointCount, const bool hasChildren, const bool isBlacklistedOrNotAWord,
const int *const mergedNodeCodePoints) { const uint16_t mergedNodeCodePointCount, const int *const mergedNodeCodePoints) {
mIsUsed = true; mIsUsed = true;
uint16_t newDepth = static_cast<uint16_t>(dicNode->getNodeCodePointCount() + 1); uint16_t newDepth = static_cast<uint16_t>(dicNode->getNodeCodePointCount() + 1);
mIsCachedForNextSuggestion = dicNode->mIsCachedForNextSuggestion; mIsCachedForNextSuggestion = dicNode->mIsCachedForNextSuggestion;
const uint16_t newLeavingDepth = static_cast<uint16_t>( const uint16_t newLeavingDepth = static_cast<uint16_t>(
dicNode->mDicNodeProperties.getLeavingDepth() + mergedNodeCodePointCount); dicNode->mDicNodeProperties.getLeavingDepth() + mergedNodeCodePointCount);
mDicNodeProperties.init(pos, childrenPos, mergedNodeCodePoints[0], probability, mDicNodeProperties.init(ptNodePos, childrenPtNodeArrayPos, mergedNodeCodePoints[0],
isTerminal, hasChildren, isBlacklistedOrNotAWord, newDepth, newLeavingDepth); probability, isTerminal, hasChildren, isBlacklistedOrNotAWord, newDepth,
newLeavingDepth);
mDicNodeState.init(&dicNode->mDicNodeState, mergedNodeCodePointCount, mDicNodeState.init(&dicNode->mDicNodeState, mergedNodeCodePointCount,
mergedNodeCodePoints); mergedNodeCodePoints);
PROF_NODE_COPY(&dicNode->mProfiler, mProfiler); PROF_NODE_COPY(&dicNode->mProfiler, mProfiler);
@ -234,7 +235,7 @@ class DicNode {
} }
bool isFirstWord() const { bool isFirstWord() const {
return mDicNodeState.mDicNodeStatePrevWord.getPrevWordNodePos() == NOT_A_DICT_POS; return mDicNodeState.mDicNodeStatePrevWord.getPrevWordPtNodePos() == NOT_A_DICT_POS;
} }
bool isCompletion(const int inputSize) const { bool isCompletion(const int inputSize) const {
@ -246,29 +247,30 @@ class DicNode {
} }
// Used to get bigram probability in DicNodeUtils // Used to get bigram probability in DicNodeUtils
int getPos() const { int getPtNodePos() const {
return mDicNodeProperties.getPos(); return mDicNodeProperties.getPtNodePos();
} }
// Used to get bigram probability in DicNodeUtils // Used to get bigram probability in DicNodeUtils
int getPrevWordPos() const { int getPrevWordTerminalPtNodePos() const {
return mDicNodeState.mDicNodeStatePrevWord.getPrevWordNodePos(); return mDicNodeState.mDicNodeStatePrevWord.getPrevWordPtNodePos();
} }
// Used in DicNodeUtils // Used in DicNodeUtils
int getChildrenPos() const { int getChildrenPtNodeArrayPos() const {
return mDicNodeProperties.getChildrenPos(); return mDicNodeProperties.getChildrenPtNodeArrayPos();
} }
int getProbability() const { int getProbability() const {
return mDicNodeProperties.getProbability(); return mDicNodeProperties.getProbability();
} }
AK_FORCE_INLINE bool isTerminalWordNode() const { AK_FORCE_INLINE bool isTerminalDicNode() const {
const bool isTerminalNodes = mDicNodeProperties.isTerminal(); const bool isTerminalPtNode = mDicNodeProperties.isTerminal();
const int currentNodeDepth = getNodeCodePointCount(); const int currentDicNodeDepth = getNodeCodePointCount();
const int terminalNodeDepth = mDicNodeProperties.getLeavingDepth(); const int terminalDicNodeDepth = mDicNodeProperties.getLeavingDepth();
return isTerminalNodes && currentNodeDepth > 0 && currentNodeDepth == terminalNodeDepth; return isTerminalPtNode && currentDicNodeDepth > 0
&& currentDicNodeDepth == terminalDicNodeDepth;
} }
bool shouldBeFilteredBySafetyNetForBigram() const { bool shouldBeFilteredBySafetyNetForBigram() const {
@ -374,8 +376,8 @@ class DicNode {
} }
// Used to commit input partially // Used to commit input partially
int getPrevWordNodePos() const { int getPrevWordPtNodePos() const {
return mDicNodeState.mDicNodeStatePrevWord.getPrevWordNodePos(); return mDicNodeState.mDicNodeStatePrevWord.getPrevWordPtNodePos();
} }
AK_FORCE_INLINE const int *getOutputWordBuf() const { AK_FORCE_INLINE const int *getOutputWordBuf() const {
@ -410,7 +412,7 @@ class DicNode {
// TODO: Remove once touch path is merged into ProximityInfoState // TODO: Remove once touch path is merged into ProximityInfoState
// Note: Returned codepoint may be a digraph codepoint if the node is in a composite glyph. // Note: Returned codepoint may be a digraph codepoint if the node is in a composite glyph.
int getNodeCodePoint() const { int getNodeCodePoint() const {
const int codePoint = mDicNodeProperties.getNodeCodePoint(); const int codePoint = mDicNodeProperties.getDicNodeCodePoint();
const DigraphUtils::DigraphCodePointIndex digraphIndex = const DigraphUtils::DigraphCodePointIndex digraphIndex =
mDicNodeState.mDicNodeStateScoring.getDigraphIndex(); mDicNodeState.mDicNodeStateScoring.getDigraphIndex();
if (digraphIndex == DigraphUtils::NOT_A_DIGRAPH_INDEX) { if (digraphIndex == DigraphUtils::NOT_A_DIGRAPH_INDEX) {
@ -423,8 +425,8 @@ class DicNode {
// Utils for cost calculation // // Utils for cost calculation //
//////////////////////////////// ////////////////////////////////
AK_FORCE_INLINE bool isSameNodeCodePoint(const DicNode *const dicNode) const { AK_FORCE_INLINE bool isSameNodeCodePoint(const DicNode *const dicNode) const {
return mDicNodeProperties.getNodeCodePoint() return mDicNodeProperties.getDicNodeCodePoint()
== dicNode->mDicNodeProperties.getNodeCodePoint(); == dicNode->mDicNodeProperties.getDicNodeCodePoint();
} }
// TODO: remove // TODO: remove

View File

@ -22,7 +22,6 @@
#include "suggest/core/dicnode/dic_node_vector.h" #include "suggest/core/dicnode/dic_node_vector.h"
#include "suggest/core/dictionary/multi_bigram_map.h" #include "suggest/core/dictionary/multi_bigram_map.h"
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" #include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
#include "utils/char_utils.h"
namespace latinime { namespace latinime {
@ -32,19 +31,20 @@ namespace latinime {
/* static */ void DicNodeUtils::initAsRoot( /* static */ void DicNodeUtils::initAsRoot(
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
const int prevWordNodePos, DicNode *const newRootNode) { const int prevWordPtNodePos, DicNode *const newRootDicNode) {
newRootNode->initAsRoot(dictionaryStructurePolicy->getRootPosition(), prevWordNodePos); newRootDicNode->initAsRoot(dictionaryStructurePolicy->getRootPosition(), prevWordPtNodePos);
} }
/*static */ void DicNodeUtils::initAsRootWithPreviousWord( /*static */ void DicNodeUtils::initAsRootWithPreviousWord(
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
DicNode *const prevWordLastNode, DicNode *const newRootNode) { const DicNode *const prevWordLastDicNode, DicNode *const newRootDicNode) {
newRootNode->initAsRootWithPreviousWord( newRootDicNode->initAsRootWithPreviousWord(
prevWordLastNode, dictionaryStructurePolicy->getRootPosition()); prevWordLastDicNode, dictionaryStructurePolicy->getRootPosition());
} }
/* static */ void DicNodeUtils::initByCopy(DicNode *srcNode, DicNode *destNode) { /* static */ void DicNodeUtils::initByCopy(const DicNode *const srcDicNode,
destNode->initByCopy(srcNode); DicNode *const destDicNode) {
destDicNode->initByCopy(srcDicNode);
} }
/////////////////////////////////// ///////////////////////////////////
@ -52,14 +52,14 @@ namespace latinime {
/////////////////////////////////// ///////////////////////////////////
/* static */ void DicNodeUtils::getAllChildDicNodes(DicNode *dicNode, /* static */ void DicNodeUtils::getAllChildDicNodes(DicNode *dicNode,
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
DicNodeVector *childDicNodes) { DicNodeVector *const childDicNodes) {
if (dicNode->isTotalInputSizeExceedingLimit()) { if (dicNode->isTotalInputSizeExceedingLimit()) {
return; return;
} }
if (!dicNode->isLeavingNode()) { if (!dicNode->isLeavingNode()) {
childDicNodes->pushPassingChild(dicNode); childDicNodes->pushPassingChild(dicNode);
} else { } else {
dictionaryStructurePolicy->createAndGetAllChildNodes(dicNode, childDicNodes); dictionaryStructurePolicy->createAndGetAllChildDicNodes(dicNode, childDicNodes);
} }
} }
@ -71,11 +71,11 @@ namespace latinime {
*/ */
/* static */ float DicNodeUtils::getBigramNodeImprobability( /* static */ float DicNodeUtils::getBigramNodeImprobability(
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
const DicNode *const node, MultiBigramMap *multiBigramMap) { const DicNode *const dicNode, MultiBigramMap *const multiBigramMap) {
if (node->hasMultipleWords() && !node->isValidMultipleWordSuggestion()) { if (dicNode->hasMultipleWords() && !dicNode->isValidMultipleWordSuggestion()) {
return static_cast<float>(MAX_VALUE_FOR_WEIGHTING); return static_cast<float>(MAX_VALUE_FOR_WEIGHTING);
} }
const int probability = getBigramNodeProbability(dictionaryStructurePolicy, node, const int probability = getBigramNodeProbability(dictionaryStructurePolicy, dicNode,
multiBigramMap); multiBigramMap);
// TODO: This equation to calculate the improbability looks unreasonable. Investigate this. // TODO: This equation to calculate the improbability looks unreasonable. Investigate this.
const float cost = static_cast<float>(MAX_PROBABILITY - probability) const float cost = static_cast<float>(MAX_PROBABILITY - probability)
@ -85,19 +85,19 @@ namespace latinime {
/* static */ int DicNodeUtils::getBigramNodeProbability( /* static */ int DicNodeUtils::getBigramNodeProbability(
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
const DicNode *const node, MultiBigramMap *multiBigramMap) { const DicNode *const dicNode, MultiBigramMap *const multiBigramMap) {
const int unigramProbability = node->getProbability(); const int unigramProbability = dicNode->getProbability();
const int wordPos = node->getPos(); const int ptNodePos = dicNode->getPtNodePos();
const int prevWordPos = node->getPrevWordPos(); const int prevWordTerminalPtNodePos = dicNode->getPrevWordTerminalPtNodePos();
if (NOT_A_DICT_POS == wordPos || NOT_A_DICT_POS == prevWordPos) { if (NOT_A_DICT_POS == ptNodePos || NOT_A_DICT_POS == prevWordTerminalPtNodePos) {
// Note: Normally wordPos comes from the dictionary and should never equal // Note: Normally wordPos comes from the dictionary and should never equal
// NOT_A_VALID_WORD_POS. // NOT_A_VALID_WORD_POS.
return dictionaryStructurePolicy->getProbability(unigramProbability, return dictionaryStructurePolicy->getProbability(unigramProbability,
NOT_A_PROBABILITY); NOT_A_PROBABILITY);
} }
if (multiBigramMap) { if (multiBigramMap) {
return multiBigramMap->getBigramProbability(dictionaryStructurePolicy, prevWordPos, return multiBigramMap->getBigramProbability(dictionaryStructurePolicy,
wordPos, unigramProbability); prevWordTerminalPtNodePos, ptNodePos, unigramProbability);
} }
return dictionaryStructurePolicy->getProbability(unigramProbability, return dictionaryStructurePolicy->getProbability(unigramProbability,
NOT_A_PROBABILITY); NOT_A_PROBABILITY);
@ -109,7 +109,7 @@ namespace latinime {
// TODO: Move to char_utils? // TODO: Move to char_utils?
/* static */ int DicNodeUtils::appendTwoWords(const int *const src0, const int16_t length0, /* static */ int DicNodeUtils::appendTwoWords(const int *const src0, const int16_t length0,
const int *const src1, const int16_t length1, int *dest) { const int *const src1, const int16_t length1, int *const dest) {
int actualLength0 = 0; int actualLength0 = 0;
for (int i = 0; i < length0; ++i) { for (int i = 0; i < length0; ++i) {
if (src0[i] == 0) { if (src0[i] == 0) {

View File

@ -31,20 +31,20 @@ class MultiBigramMap;
class DicNodeUtils { class DicNodeUtils {
public: public:
static int appendTwoWords(const int *src0, const int16_t length0, const int *src1, static int appendTwoWords(const int *src0, const int16_t length0, const int *src1,
const int16_t length1, int *dest); const int16_t length1, int *const dest);
static void initAsRoot( static void initAsRoot(
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
const int prevWordNodePos, DicNode *newRootNode); const int prevWordPtNodePos, DicNode *const newRootDicNode);
static void initAsRootWithPreviousWord( static void initAsRootWithPreviousWord(
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
DicNode *prevWordLastNode, DicNode *newRootNode); const DicNode *const prevWordLastDicNode, DicNode *const newRootDicNode);
static void initByCopy(DicNode *srcNode, DicNode *destNode); static void initByCopy(const DicNode *const srcDicNode, DicNode *const destDicNode);
static void getAllChildDicNodes(DicNode *dicNode, static void getAllChildDicNodes(DicNode *dicNode,
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
DicNodeVector *childDicNodes); DicNodeVector *childDicNodes);
static float getBigramNodeImprobability( static float getBigramNodeImprobability(
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
const DicNode *const node, MultiBigramMap *const multiBigramMap); const DicNode *const dicNode, MultiBigramMap *const multiBigramMap);
private: private:
DISALLOW_IMPLICIT_CONSTRUCTORS(DicNodeUtils); DISALLOW_IMPLICIT_CONSTRUCTORS(DicNodeUtils);
@ -53,7 +53,7 @@ class DicNodeUtils {
static int getBigramNodeProbability( static int getBigramNodeProbability(
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
const DicNode *const node, MultiBigramMap *multiBigramMap); const DicNode *const dicNode, MultiBigramMap *const multiBigramMap);
}; };
} // namespace latinime } // namespace latinime
#endif // LATINIME_DIC_NODE_UTILS_H #endif // LATINIME_DIC_NODE_UTILS_H

View File

@ -62,14 +62,14 @@ class DicNodeVector {
mDicNodes.back().initAsPassingChild(dicNode); mDicNodes.back().initAsPassingChild(dicNode);
} }
void pushLeavingChild(const DicNode *const dicNode, const int pos, const int childrenPos, void pushLeavingChild(const DicNode *const dicNode, const int ptNodePos,
const int probability, const bool isTerminal, const bool hasChildren, const int childrenPtNodeArrayPos, const int probability, const bool isTerminal,
const bool isBlacklistedOrNotAWord, const uint16_t mergedNodeCodePointCount, const bool hasChildren, const bool isBlacklistedOrNotAWord,
const int *const mergedNodeCodePoints) { const uint16_t mergedNodeCodePointCount, const int *const mergedNodeCodePoints) {
ASSERT(!mLock); ASSERT(!mLock);
mDicNodes.push_back(mEmptyNode); mDicNodes.push_back(mEmptyNode);
mDicNodes.back().initAsChild(dicNode, pos, childrenPos, probability, isTerminal, mDicNodes.back().initAsChild(dicNode, ptNodePos, childrenPtNodeArrayPos, probability,
hasChildren, isBlacklistedOrNotAWord, mergedNodeCodePointCount, isTerminal, hasChildren, isBlacklistedOrNotAWord, mergedNodeCodePointCount,
mergedNodeCodePoints); mergedNodeCodePoints);
} }

View File

@ -24,15 +24,14 @@
namespace latinime { namespace latinime {
/** /**
* Node for traversing the lexicon trie. * PtNode information related to the DicNode from the lexicon trie.
*/ */
// TODO: Introduce a dictionary node class which has attribute members required to understand the
// dictionary structure.
class DicNodeProperties { class DicNodeProperties {
public: public:
AK_FORCE_INLINE DicNodeProperties() AK_FORCE_INLINE DicNodeProperties()
: mPos(0), mChildrenPos(0), mProbability(0), mNodeCodePoint(0), mIsTerminal(false), : mPtNodePos(0), mChildrenPtNodeArrayPos(0), mProbability(0), mDicNodeCodePoint(0),
mHasChildren(false), mIsBlacklistedOrNotAWord(false), mDepth(0), mLeavingDepth(0) {} mIsTerminal(false), mHasChildrenPtNodes(false), mIsBlacklistedOrNotAWord(false),
mDepth(0), mLeavingDepth(0) {}
virtual ~DicNodeProperties() {} virtual ~DicNodeProperties() {}
@ -40,57 +39,57 @@ class DicNodeProperties {
void init(const int pos, const int childrenPos, const int nodeCodePoint, const int probability, void init(const int pos, const int childrenPos, const int nodeCodePoint, const int probability,
const bool isTerminal, const bool hasChildren, const bool isBlacklistedOrNotAWord, const bool isTerminal, const bool hasChildren, const bool isBlacklistedOrNotAWord,
const uint16_t depth, const uint16_t leavingDepth) { const uint16_t depth, const uint16_t leavingDepth) {
mPos = pos; mPtNodePos = pos;
mChildrenPos = childrenPos; mChildrenPtNodeArrayPos = childrenPos;
mNodeCodePoint = nodeCodePoint; mDicNodeCodePoint = nodeCodePoint;
mProbability = probability; mProbability = probability;
mIsTerminal = isTerminal; mIsTerminal = isTerminal;
mHasChildren = hasChildren; mHasChildrenPtNodes = hasChildren;
mIsBlacklistedOrNotAWord = isBlacklistedOrNotAWord; mIsBlacklistedOrNotAWord = isBlacklistedOrNotAWord;
mDepth = depth; mDepth = depth;
mLeavingDepth = leavingDepth; mLeavingDepth = leavingDepth;
} }
// Init for copy // Init for copy
void init(const DicNodeProperties *const nodeProp) { void init(const DicNodeProperties *const dicNodeProp) {
mPos = nodeProp->mPos; mPtNodePos = dicNodeProp->mPtNodePos;
mChildrenPos = nodeProp->mChildrenPos; mChildrenPtNodeArrayPos = dicNodeProp->mChildrenPtNodeArrayPos;
mNodeCodePoint = nodeProp->mNodeCodePoint; mDicNodeCodePoint = dicNodeProp->mDicNodeCodePoint;
mProbability = nodeProp->mProbability; mProbability = dicNodeProp->mProbability;
mIsTerminal = nodeProp->mIsTerminal; mIsTerminal = dicNodeProp->mIsTerminal;
mHasChildren = nodeProp->mHasChildren; mHasChildrenPtNodes = dicNodeProp->mHasChildrenPtNodes;
mIsBlacklistedOrNotAWord = nodeProp->mIsBlacklistedOrNotAWord; mIsBlacklistedOrNotAWord = dicNodeProp->mIsBlacklistedOrNotAWord;
mDepth = nodeProp->mDepth; mDepth = dicNodeProp->mDepth;
mLeavingDepth = nodeProp->mLeavingDepth; mLeavingDepth = dicNodeProp->mLeavingDepth;
} }
// Init as passing child // Init as passing child
void init(const DicNodeProperties *const nodeProp, const int codePoint) { void init(const DicNodeProperties *const dicNodeProp, const int codePoint) {
mPos = nodeProp->mPos; mPtNodePos = dicNodeProp->mPtNodePos;
mChildrenPos = nodeProp->mChildrenPos; mChildrenPtNodeArrayPos = dicNodeProp->mChildrenPtNodeArrayPos;
mNodeCodePoint = codePoint; // Overwrite the node char of a passing child mDicNodeCodePoint = codePoint; // Overwrite the node char of a passing child
mProbability = nodeProp->mProbability; mProbability = dicNodeProp->mProbability;
mIsTerminal = nodeProp->mIsTerminal; mIsTerminal = dicNodeProp->mIsTerminal;
mHasChildren = nodeProp->mHasChildren; mHasChildrenPtNodes = dicNodeProp->mHasChildrenPtNodes;
mIsBlacklistedOrNotAWord = nodeProp->mIsBlacklistedOrNotAWord; mIsBlacklistedOrNotAWord = dicNodeProp->mIsBlacklistedOrNotAWord;
mDepth = nodeProp->mDepth + 1; // Increment the depth of a passing child mDepth = dicNodeProp->mDepth + 1; // Increment the depth of a passing child
mLeavingDepth = nodeProp->mLeavingDepth; mLeavingDepth = dicNodeProp->mLeavingDepth;
} }
int getPos() const { int getPtNodePos() const {
return mPos; return mPtNodePos;
} }
int getChildrenPos() const { int getChildrenPtNodeArrayPos() const {
return mChildrenPos; return mChildrenPtNodeArrayPos;
} }
int getProbability() const { int getProbability() const {
return mProbability; return mProbability;
} }
int getNodeCodePoint() const { int getDicNodeCodePoint() const {
return mNodeCodePoint; return mDicNodeCodePoint;
} }
uint16_t getDepth() const { uint16_t getDepth() const {
@ -107,7 +106,7 @@ class DicNodeProperties {
} }
bool hasChildren() const { bool hasChildren() const {
return mHasChildren || mDepth != mLeavingDepth; return mHasChildrenPtNodes || mDepth != mLeavingDepth;
} }
bool isBlacklistedOrNotAWord() const { bool isBlacklistedOrNotAWord() const {
@ -118,12 +117,12 @@ class DicNodeProperties {
// Caution!!! // Caution!!!
// Use a default copy constructor and an assign operator because shallow copies are ok // Use a default copy constructor and an assign operator because shallow copies are ok
// for this class // for this class
int mPos; int mPtNodePos;
int mChildrenPos; int mChildrenPtNodeArrayPos;
int mProbability; int mProbability;
int mNodeCodePoint; int mDicNodeCodePoint;
bool mIsTerminal; bool mIsTerminal;
bool mHasChildren; bool mHasChildrenPtNodes;
bool mIsBlacklistedOrNotAWord; bool mIsBlacklistedOrNotAWord;
uint16_t mDepth; uint16_t mDepth;
uint16_t mLeavingDepth; uint16_t mLeavingDepth;

View File

@ -30,7 +30,7 @@ class DicNodeStatePrevWord {
public: public:
AK_FORCE_INLINE DicNodeStatePrevWord() AK_FORCE_INLINE DicNodeStatePrevWord()
: mPrevWordCount(0), mPrevWordLength(0), mPrevWordStart(0), mPrevWordProbability(0), : mPrevWordCount(0), mPrevWordLength(0), mPrevWordStart(0), mPrevWordProbability(0),
mPrevWordNodePos(NOT_A_DICT_POS), mSecondWordFirstInputIndex(NOT_AN_INDEX) { mPrevWordPtNodePos(NOT_A_DICT_POS), mSecondWordFirstInputIndex(NOT_AN_INDEX) {
memset(mPrevWord, 0, sizeof(mPrevWord)); memset(mPrevWord, 0, sizeof(mPrevWord));
} }
@ -41,7 +41,7 @@ class DicNodeStatePrevWord {
mPrevWordCount = 0; mPrevWordCount = 0;
mPrevWordStart = 0; mPrevWordStart = 0;
mPrevWordProbability = -1; mPrevWordProbability = -1;
mPrevWordNodePos = NOT_A_DICT_POS; mPrevWordPtNodePos = NOT_A_DICT_POS;
mSecondWordFirstInputIndex = NOT_AN_INDEX; mSecondWordFirstInputIndex = NOT_AN_INDEX;
} }
@ -50,7 +50,7 @@ class DicNodeStatePrevWord {
mPrevWordCount = 0; mPrevWordCount = 0;
mPrevWordStart = 0; mPrevWordStart = 0;
mPrevWordProbability = -1; mPrevWordProbability = -1;
mPrevWordNodePos = prevWordNodePos; mPrevWordPtNodePos = prevWordNodePos;
mSecondWordFirstInputIndex = NOT_AN_INDEX; mSecondWordFirstInputIndex = NOT_AN_INDEX;
} }
@ -60,7 +60,7 @@ class DicNodeStatePrevWord {
mPrevWordCount = prevWord->mPrevWordCount; mPrevWordCount = prevWord->mPrevWordCount;
mPrevWordStart = prevWord->mPrevWordStart; mPrevWordStart = prevWord->mPrevWordStart;
mPrevWordProbability = prevWord->mPrevWordProbability; mPrevWordProbability = prevWord->mPrevWordProbability;
mPrevWordNodePos = prevWord->mPrevWordNodePos; mPrevWordPtNodePos = prevWord->mPrevWordPtNodePos;
mSecondWordFirstInputIndex = prevWord->mSecondWordFirstInputIndex; mSecondWordFirstInputIndex = prevWord->mSecondWordFirstInputIndex;
memcpy(mPrevWord, prevWord->mPrevWord, prevWord->mPrevWordLength * sizeof(mPrevWord[0])); memcpy(mPrevWord, prevWord->mPrevWord, prevWord->mPrevWordLength * sizeof(mPrevWord[0]));
} }
@ -71,7 +71,7 @@ class DicNodeStatePrevWord {
const int prevWordSecondWordFirstInputIndex, const int lastInputIndex) { const int prevWordSecondWordFirstInputIndex, const int lastInputIndex) {
mPrevWordCount = min(prevWordCount, static_cast<int16_t>(MAX_RESULTS)); mPrevWordCount = min(prevWordCount, static_cast<int16_t>(MAX_RESULTS));
mPrevWordProbability = prevWordProbability; mPrevWordProbability = prevWordProbability;
mPrevWordNodePos = prevWordNodePos; mPrevWordPtNodePos = prevWordNodePos;
int twoWordsLen = int twoWordsLen =
DicNodeUtils::appendTwoWords(src0, length0, src1, length1, mPrevWord); DicNodeUtils::appendTwoWords(src0, length0, src1, length1, mPrevWord);
if (twoWordsLen >= MAX_WORD_LENGTH) { if (twoWordsLen >= MAX_WORD_LENGTH) {
@ -116,8 +116,8 @@ class DicNodeStatePrevWord {
return mPrevWordStart; return mPrevWordStart;
} }
int getPrevWordNodePos() const { int getPrevWordPtNodePos() const {
return mPrevWordNodePos; return mPrevWordPtNodePos;
} }
int getPrevWordCodePointAt(const int id) const { int getPrevWordCodePointAt(const int id) const {
@ -147,7 +147,7 @@ class DicNodeStatePrevWord {
int16_t mPrevWordLength; int16_t mPrevWordLength;
int16_t mPrevWordStart; int16_t mPrevWordStart;
int16_t mPrevWordProbability; int16_t mPrevWordProbability;
int mPrevWordNodePos; int mPrevWordPtNodePos;
int mSecondWordFirstInputIndex; int mSecondWordFirstInputIndex;
}; };
} // namespace latinime } // namespace latinime

View File

@ -144,7 +144,7 @@ int BigramDictionary::getPredictions(const int *prevWord, const int prevWordLeng
int BigramDictionary::getBigramListPositionForWord(const int *prevWord, const int prevWordLength, int BigramDictionary::getBigramListPositionForWord(const int *prevWord, const int prevWordLength,
const bool forceLowerCaseSearch) const { const bool forceLowerCaseSearch) const {
if (0 >= prevWordLength) return NOT_A_DICT_POS; if (0 >= prevWordLength) return NOT_A_DICT_POS;
int pos = mDictionaryStructurePolicy->getTerminalNodePositionOfWord(prevWord, prevWordLength, int pos = mDictionaryStructurePolicy->getTerminalPtNodePositionOfWord(prevWord, prevWordLength,
forceLowerCaseSearch); forceLowerCaseSearch);
if (NOT_A_DICT_POS == pos) return NOT_A_DICT_POS; if (NOT_A_DICT_POS == pos) return NOT_A_DICT_POS;
return mDictionaryStructurePolicy->getBigramsPositionOfPtNode(pos); return mDictionaryStructurePolicy->getBigramsPositionOfPtNode(pos);
@ -155,7 +155,7 @@ int BigramDictionary::getBigramProbability(const int *word0, int length0, const
int pos = getBigramListPositionForWord(word0, length0, false /* forceLowerCaseSearch */); int pos = getBigramListPositionForWord(word0, length0, false /* forceLowerCaseSearch */);
// getBigramListPositionForWord returns 0 if this word isn't in the dictionary or has no bigrams // getBigramListPositionForWord returns 0 if this word isn't in the dictionary or has no bigrams
if (NOT_A_DICT_POS == pos) return NOT_A_PROBABILITY; if (NOT_A_DICT_POS == pos) return NOT_A_PROBABILITY;
int nextWordPos = mDictionaryStructurePolicy->getTerminalNodePositionOfWord(word1, length1, int nextWordPos = mDictionaryStructurePolicy->getTerminalPtNodePositionOfWord(word1, length1,
false /* forceLowerCaseSearch */); false /* forceLowerCaseSearch */);
if (NOT_A_DICT_POS == nextWordPos) return NOT_A_PROBABILITY; if (NOT_A_DICT_POS == nextWordPos) return NOT_A_PROBABILITY;

View File

@ -88,7 +88,7 @@ int Dictionary::getBigrams(const int *word, int length, int *outWords, int *freq
} }
int Dictionary::getProbability(const int *word, int length) const { int Dictionary::getProbability(const int *word, int length) const {
int pos = getDictionaryStructurePolicy()->getTerminalNodePositionOfWord(word, length, int pos = getDictionaryStructurePolicy()->getTerminalPtNodePositionOfWord(word, length,
false /* forceLowerCaseSearch */); false /* forceLowerCaseSearch */);
if (NOT_A_DICT_POS == pos) { if (NOT_A_DICT_POS == pos) {
return NOT_A_PROBABILITY; return NOT_A_PROBABILITY;

View File

@ -37,14 +37,14 @@ class DictionaryStructureWithBufferPolicy {
virtual int getRootPosition() const = 0; virtual int getRootPosition() const = 0;
virtual void createAndGetAllChildNodes(const DicNode *const dicNode, virtual void createAndGetAllChildDicNodes(const DicNode *const dicNode,
DicNodeVector *const childDicNodes) const = 0; DicNodeVector *const childDicNodes) const = 0;
virtual int getCodePointsAndProbabilityAndReturnCodePointCount( virtual int getCodePointsAndProbabilityAndReturnCodePointCount(
const int nodePos, const int maxCodePointCount, int *const outCodePoints, const int nodePos, const int maxCodePointCount, int *const outCodePoints,
int *const outUnigramProbability) const = 0; int *const outUnigramProbability) const = 0;
virtual int getTerminalNodePositionOfWord(const int *const inWord, virtual int getTerminalPtNodePositionOfWord(const int *const inWord,
const int length, const bool forceLowerCaseSearch) const = 0; const int length, const bool forceLowerCaseSearch) const = 0;
virtual int getProbability(const int unigramProbability, virtual int getProbability(const int unigramProbability,

View File

@ -35,16 +35,16 @@ void DicTraverseSession::init(const Dictionary *const dictionary, const int *pre
->getMultiWordCostMultiplier(); ->getMultiWordCostMultiplier();
mSuggestOptions = suggestOptions; mSuggestOptions = suggestOptions;
if (!prevWord) { if (!prevWord) {
mPrevWordPos = NOT_A_DICT_POS; mPrevWordPtNodePos = NOT_A_DICT_POS;
return; return;
} }
// TODO: merge following similar calls to getTerminalPosition into one case-insensitive call. // TODO: merge following similar calls to getTerminalPosition into one case-insensitive call.
mPrevWordPos = getDictionaryStructurePolicy()->getTerminalNodePositionOfWord( mPrevWordPtNodePos = getDictionaryStructurePolicy()->getTerminalPtNodePositionOfWord(
prevWord, prevWordLength, false /* forceLowerCaseSearch */); prevWord, prevWordLength, false /* forceLowerCaseSearch */);
if (mPrevWordPos == NOT_A_DICT_POS) { if (mPrevWordPtNodePos == NOT_A_DICT_POS) {
// Check bigrams for lower-cased previous word if original was not found. Useful for // Check bigrams for lower-cased previous word if original was not found. Useful for
// auto-capitalized words like "The [current_word]". // auto-capitalized words like "The [current_word]".
mPrevWordPos = getDictionaryStructurePolicy()->getTerminalNodePositionOfWord( mPrevWordPtNodePos = getDictionaryStructurePolicy()->getTerminalPtNodePositionOfWord(
prevWord, prevWordLength, true /* forceLowerCaseSearch */); prevWord, prevWordLength, true /* forceLowerCaseSearch */);
} }
} }

View File

@ -59,7 +59,7 @@ class DicTraverseSession {
} }
AK_FORCE_INLINE DicTraverseSession(JNIEnv *env, jstring localeStr, bool usesLargeCache) AK_FORCE_INLINE DicTraverseSession(JNIEnv *env, jstring localeStr, bool usesLargeCache)
: mPrevWordPos(NOT_A_DICT_POS), mProximityInfo(0), : mPrevWordPtNodePos(NOT_A_DICT_POS), mProximityInfo(0),
mDictionary(0), mSuggestOptions(0), mDicNodesCache(usesLargeCache), mDictionary(0), mSuggestOptions(0), mDicNodesCache(usesLargeCache),
mMultiBigramMap(), mInputSize(0), mPartiallyCommited(false), mMaxPointerCount(1), mMultiBigramMap(), mInputSize(0), mPartiallyCommited(false), mMaxPointerCount(1),
mMultiWordCostMultiplier(1.0f) { mMultiWordCostMultiplier(1.0f) {
@ -86,11 +86,9 @@ class DicTraverseSession {
//-------------------- //--------------------
const ProximityInfo *getProximityInfo() const { return mProximityInfo; } const ProximityInfo *getProximityInfo() const { return mProximityInfo; }
const SuggestOptions *getSuggestOptions() const { return mSuggestOptions; } const SuggestOptions *getSuggestOptions() const { return mSuggestOptions; }
int getPrevWordPos() const { return mPrevWordPos; } int getPrevWordPtNodePos() const { return mPrevWordPtNodePos; }
// TODO: REMOVE // TODO: REMOVE
void setPrevWordPos(int pos) { mPrevWordPos = pos; } void setPrevWordPtNodePos(const int ptNodePos) { mPrevWordPtNodePos = ptNodePos; }
// TODO: Use proper parameter when changed
int getDicRootPos() const { return 0; }
DicNodesCache *getDicTraverseCache() { return &mDicNodesCache; } DicNodesCache *getDicTraverseCache() { return &mDicNodesCache; }
MultiBigramMap *getMultiBigramMap() { return &mMultiBigramMap; } MultiBigramMap *getMultiBigramMap() { return &mMultiBigramMap; }
const ProximityInfoState *getProximityInfoState(int id) const { const ProximityInfoState *getProximityInfoState(int id) const {
@ -119,26 +117,13 @@ class DicTraverseSession {
return true; return true;
} }
void getSearchKeys(const DicNode *node, std::vector<int> *const outputSearchKeyVector) const { ProximityType getProximityTypeG(const DicNode *const dicNode, const int childCodePoint) const {
for (int i = 0; i < MAX_POINTER_COUNT_G; ++i) {
if (!mProximityInfoStates[i].isUsed()) {
continue;
}
const int pointerId = node->getInputIndex(i);
const std::vector<int> *const searchKeyVector =
mProximityInfoStates[i].getSearchKeyVector(pointerId);
outputSearchKeyVector->insert(outputSearchKeyVector->end(), searchKeyVector->begin(),
searchKeyVector->end());
}
}
ProximityType getProximityTypeG(const DicNode *const node, const int childCodePoint) const {
ProximityType proximityType = UNRELATED_CHAR; ProximityType proximityType = UNRELATED_CHAR;
for (int i = 0; i < MAX_POINTER_COUNT_G; ++i) { for (int i = 0; i < MAX_POINTER_COUNT_G; ++i) {
if (!mProximityInfoStates[i].isUsed()) { if (!mProximityInfoStates[i].isUsed()) {
continue; continue;
} }
const int pointerId = node->getInputIndex(i); const int pointerId = dicNode->getInputIndex(i);
proximityType = mProximityInfoStates[i].getProximityTypeG(pointerId, childCodePoint); proximityType = mProximityInfoStates[i].getProximityTypeG(pointerId, childCodePoint);
ASSERT(proximityType == UNRELATED_CHAR || proximityType == MATCH_CHAR); ASSERT(proximityType == UNRELATED_CHAR || proximityType == MATCH_CHAR);
// TODO: Make this more generic // TODO: Make this more generic
@ -192,7 +177,7 @@ class DicTraverseSession {
const int *const inputYs, const int *const times, const int *const pointerIds, const int *const inputYs, const int *const times, const int *const pointerIds,
const int inputSize, const float maxSpatialDistance, const int maxPointerCount); const int inputSize, const float maxSpatialDistance, const int maxPointerCount);
int mPrevWordPos; int mPrevWordPtNodePos;
const ProximityInfo *mProximityInfo; const ProximityInfo *mProximityInfo;
const Dictionary *mDictionary; const Dictionary *mDictionary;
const SuggestOptions *mSuggestOptions; const SuggestOptions *mSuggestOptions;

View File

@ -98,7 +98,7 @@ void Suggest::initializeSearch(DicTraverseSession *traverseSession, int commitPo
// Continue suggestion after partial commit. // Continue suggestion after partial commit.
DicNode *topDicNode = DicNode *topDicNode =
traverseSession->getDicTraverseCache()->setCommitPoint(commitPoint); traverseSession->getDicTraverseCache()->setCommitPoint(commitPoint);
traverseSession->setPrevWordPos(topDicNode->getPrevWordNodePos()); traverseSession->setPrevWordPtNodePos(topDicNode->getPrevWordPtNodePos());
traverseSession->getDicTraverseCache()->continueSearch(); traverseSession->getDicTraverseCache()->continueSearch();
traverseSession->setPartiallyCommited(); traverseSession->setPartiallyCommited();
} }
@ -109,7 +109,7 @@ void Suggest::initializeSearch(DicTraverseSession *traverseSession, int commitPo
// Create a new dic node here // Create a new dic node here
DicNode rootNode; DicNode rootNode;
DicNodeUtils::initAsRoot(traverseSession->getDictionaryStructurePolicy(), DicNodeUtils::initAsRoot(traverseSession->getDictionaryStructurePolicy(),
traverseSession->getPrevWordPos(), &rootNode); traverseSession->getPrevWordPtNodePos(), &rootNode);
traverseSession->getDicTraverseCache()->copyPushActive(&rootNode); traverseSession->getDicTraverseCache()->copyPushActive(&rootNode);
} }
} }
@ -231,7 +231,7 @@ int Suggest::outputSuggestions(DicTraverseSession *traverseSession, int *frequen
BinaryDictionaryShortcutIterator shortcutIt( BinaryDictionaryShortcutIterator shortcutIt(
traverseSession->getDictionaryStructurePolicy()->getShortcutsStructurePolicy(), traverseSession->getDictionaryStructurePolicy()->getShortcutsStructurePolicy(),
traverseSession->getDictionaryStructurePolicy() traverseSession->getDictionaryStructurePolicy()
->getShortcutPositionOfPtNode(terminalDicNode->getPos())); ->getShortcutPositionOfPtNode(terminalDicNode->getPtNodePos()));
// Shortcut is not supported for multiple words suggestions. // Shortcut is not supported for multiple words suggestions.
// TODO: Check shortcuts during traversal for multiple words suggestions. // TODO: Check shortcuts during traversal for multiple words suggestions.
const bool sameAsTyped = TRAVERSAL->sameAsTyped(traverseSession, terminalDicNode); const bool sameAsTyped = TRAVERSAL->sameAsTyped(traverseSession, terminalDicNode);
@ -421,15 +421,15 @@ void Suggest::expandCurrentDicNodes(DicTraverseSession *traverseSession) const {
} }
break; break;
case UNRELATED_CHAR: case UNRELATED_CHAR:
// Just drop this node and do nothing. // Just drop this dicNode and do nothing.
break; break;
default: default:
// Just drop this node and do nothing. // Just drop this dicNode and do nothing.
break; break;
} }
} }
// Push the node for look-ahead correction // Push the dicNode for look-ahead correction
if (allowsErrorCorrections && canDoLookAheadCorrection) { if (allowsErrorCorrections && canDoLookAheadCorrection) {
traverseSession->getDicTraverseCache()->copyPushNextActive(&dicNode); traverseSession->getDicTraverseCache()->copyPushNextActive(&dicNode);
} }
@ -442,7 +442,7 @@ void Suggest::processTerminalDicNode(
if (dicNode->getCompoundDistance() >= static_cast<float>(MAX_VALUE_FOR_WEIGHTING)) { if (dicNode->getCompoundDistance() >= static_cast<float>(MAX_VALUE_FOR_WEIGHTING)) {
return; return;
} }
if (!dicNode->isTerminalWordNode()) { if (!dicNode->isTerminalDicNode()) {
return; return;
} }
if (dicNode->shouldBeFilteredBySafetyNetForBigram()) { if (dicNode->shouldBeFilteredBySafetyNetForBigram()) {
@ -463,7 +463,7 @@ void Suggest::processTerminalDicNode(
/** /**
* Adds the expanded dicNode to the next search priority queue. Also creates an additional next word * Adds the expanded dicNode to the next search priority queue. Also creates an additional next word
* (by the space omission error correction) search path if input dicNode is on a terminal node. * (by the space omission error correction) search path if input dicNode is on a terminal.
*/ */
void Suggest::processExpandedDicNode( void Suggest::processExpandedDicNode(
DicTraverseSession *traverseSession, DicNode *dicNode) const { DicTraverseSession *traverseSession, DicNode *dicNode) const {
@ -505,7 +505,7 @@ void Suggest::processDicNodeAsSubstitution(DicTraverseSession *traverseSession,
processExpandedDicNode(traverseSession, childDicNode); processExpandedDicNode(traverseSession, childDicNode);
} }
// Process the node codepoint as a digraph. This means that composite glyphs like the German // Process the DicNode codepoint as a digraph. This means that composite glyphs like the German
// u-umlaut is expanded to the transliteration "ue". Note that this happens in parallel with // u-umlaut is expanded to the transliteration "ue". Note that this happens in parallel with
// the normal non-digraph traversal, so both "uber" and "ueber" can be corrected to "[u-umlaut]ber". // the normal non-digraph traversal, so both "uber" and "ueber" can be corrected to "[u-umlaut]ber".
void Suggest::processDicNodeAsDigraph(DicTraverseSession *traverseSession, void Suggest::processDicNodeAsDigraph(DicTraverseSession *traverseSession,
@ -518,7 +518,7 @@ void Suggest::processDicNodeAsDigraph(DicTraverseSession *traverseSession,
/** /**
* Handle the dicNode as an omission error (e.g., ths => this). Skip the current letter and consider * Handle the dicNode as an omission error (e.g., ths => this). Skip the current letter and consider
* matches for all possible next letters. Note that just skipping the current letter without any * matches for all possible next letters. Note that just skipping the current letter without any
* other conditions tends to flood the search dic nodes cache with omission nodes. Instead, check * other conditions tends to flood the search DicNodes cache with omission DicNodes. Instead, check
* the possible *next* letters after the omission to better limit search to plausible omissions. * the possible *next* letters after the omission to better limit search to plausible omissions.
* Note that apostrophes are handled as omissions. * Note that apostrophes are handled as omissions.
*/ */
@ -605,7 +605,7 @@ void Suggest::processDicNodeAsTransposition(DicTraverseSession *traverseSession,
} }
/** /**
* Weight child node by aligning it to the key * Weight child dicNode by aligning it to the key
*/ */
void Suggest::weightChildNode(DicTraverseSession *traverseSession, DicNode *dicNode) const { void Suggest::weightChildNode(DicTraverseSession *traverseSession, DicNode *dicNode) const {
const int inputSize = traverseSession->getInputSize(); const int inputSize = traverseSession->getInputSize();

View File

@ -45,14 +45,14 @@ const int DynamicPatriciaTriePolicy::MAX_DICT_EXTENDED_REGION_SIZE = 1024 * 1024
const int DynamicPatriciaTriePolicy::MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS = const int DynamicPatriciaTriePolicy::MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS =
DynamicPatriciaTrieWritingHelper::MAX_DICTIONARY_SIZE - 1024; DynamicPatriciaTrieWritingHelper::MAX_DICTIONARY_SIZE - 1024;
void DynamicPatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode, void DynamicPatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const dicNode,
DicNodeVector *const childDicNodes) const { DicNodeVector *const childDicNodes) const {
if (!dicNode->hasChildren()) { if (!dicNode->hasChildren()) {
return; return;
} }
DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer, DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer,
getBigramsStructurePolicy(), getShortcutsStructurePolicy()); getBigramsStructurePolicy(), getShortcutsStructurePolicy());
readingHelper.initWithPtNodeArrayPos(dicNode->getChildrenPos()); readingHelper.initWithPtNodeArrayPos(dicNode->getChildrenPtNodeArrayPos());
const DynamicPatriciaTrieNodeReader *const nodeReader = readingHelper.getNodeReader(); const DynamicPatriciaTrieNodeReader *const nodeReader = readingHelper.getNodeReader();
while (!readingHelper.isEnd()) { while (!readingHelper.isEnd()) {
childDicNodes->pushLeavingChild(dicNode, nodeReader->getHeadPos(), childDicNodes->pushLeavingChild(dicNode, nodeReader->getHeadPos(),
@ -107,7 +107,7 @@ int DynamicPatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCoun
return codePointCount; return codePointCount;
} }
int DynamicPatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const inWord, int DynamicPatriciaTriePolicy::getTerminalPtNodePositionOfWord(const int *const inWord,
const int length, const bool forceLowerCaseSearch) const { const int length, const bool forceLowerCaseSearch) const {
int searchCodePoints[length]; int searchCodePoints[length];
for (int i = 0; i < length; ++i) { for (int i = 0; i < length; ++i) {
@ -246,12 +246,12 @@ bool DynamicPatriciaTriePolicy::addBigramWords(const int *const word0, const int
AKLOGE("The dictionary is too large to dynamically update."); AKLOGE("The dictionary is too large to dynamically update.");
return false; return false;
} }
const int word0Pos = getTerminalNodePositionOfWord(word0, length0, const int word0Pos = getTerminalPtNodePositionOfWord(word0, length0,
false /* forceLowerCaseSearch */); false /* forceLowerCaseSearch */);
if (word0Pos == NOT_A_DICT_POS) { if (word0Pos == NOT_A_DICT_POS) {
return false; return false;
} }
const int word1Pos = getTerminalNodePositionOfWord(word1, length1, const int word1Pos = getTerminalPtNodePositionOfWord(word1, length1,
false /* forceLowerCaseSearch */); false /* forceLowerCaseSearch */);
if (word1Pos == NOT_A_DICT_POS) { if (word1Pos == NOT_A_DICT_POS) {
return false; return false;
@ -280,12 +280,12 @@ bool DynamicPatriciaTriePolicy::removeBigramWords(const int *const word0, const
AKLOGE("The dictionary is too large to dynamically update."); AKLOGE("The dictionary is too large to dynamically update.");
return false; return false;
} }
const int word0Pos = getTerminalNodePositionOfWord(word0, length0, const int word0Pos = getTerminalPtNodePositionOfWord(word0, length0,
false /* forceLowerCaseSearch */); false /* forceLowerCaseSearch */);
if (word0Pos == NOT_A_DICT_POS) { if (word0Pos == NOT_A_DICT_POS) {
return false; return false;
} }
const int word1Pos = getTerminalNodePositionOfWord(word1, length1, const int word1Pos = getTerminalPtNodePositionOfWord(word1, length1,
false /* forceLowerCaseSearch */); false /* forceLowerCaseSearch */);
if (word1Pos == NOT_A_DICT_POS) { if (word1Pos == NOT_A_DICT_POS) {
return false; return false;

View File

@ -50,14 +50,14 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
return 0; return 0;
} }
void createAndGetAllChildNodes(const DicNode *const dicNode, void createAndGetAllChildDicNodes(const DicNode *const dicNode,
DicNodeVector *const childDicNodes) const; DicNodeVector *const childDicNodes) const;
int getCodePointsAndProbabilityAndReturnCodePointCount( int getCodePointsAndProbabilityAndReturnCodePointCount(
const int terminalPtNodePos, const int maxCodePointCount, int *const outCodePoints, const int terminalPtNodePos, const int maxCodePointCount, int *const outCodePoints,
int *const outUnigramProbability) const; int *const outUnigramProbability) const;
int getTerminalNodePositionOfWord(const int *const inWord, int getTerminalPtNodePositionOfWord(const int *const inWord,
const int length, const bool forceLowerCaseSearch) const; const int length, const bool forceLowerCaseSearch) const;
int getProbability(const int unigramProbability, const int bigramProbability) const; int getProbability(const int unigramProbability, const int bigramProbability) const;

View File

@ -22,7 +22,7 @@ namespace latinime {
// To avoid infinite loop caused by invalid or malicious forward links. // To avoid infinite loop caused by invalid or malicious forward links.
const int DynamicPatriciaTrieReadingHelper::MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP = 100000; const int DynamicPatriciaTrieReadingHelper::MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP = 100000;
const int DynamicPatriciaTrieReadingHelper::MAX_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP = 100000; const int DynamicPatriciaTrieReadingHelper::MAX_PT_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP = 100000;
const size_t DynamicPatriciaTrieReadingHelper::MAX_READING_STATE_STACK_SIZE = MAX_WORD_LENGTH; const size_t DynamicPatriciaTrieReadingHelper::MAX_READING_STATE_STACK_SIZE = MAX_WORD_LENGTH;
// Visits all PtNodes in post-order depth first manner. // Visits all PtNodes in post-order depth first manner.
@ -170,35 +170,41 @@ void DynamicPatriciaTrieReadingHelper::nextPtNodeArray() {
mReadingState.mPos = NOT_A_DICT_POS; mReadingState.mPos = NOT_A_DICT_POS;
return; return;
} }
mReadingState.mPosOfLastPtNodeArrayHead = mReadingState.mPos; mReadingState.mPosOfThisPtNodeArrayHead = mReadingState.mPos;
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(mReadingState.mPos); const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(mReadingState.mPos);
const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer); const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
if (usesAdditionalBuffer) { if (usesAdditionalBuffer) {
mReadingState.mPos -= mBuffer->getOriginalBufferSize(); mReadingState.mPos -= mBuffer->getOriginalBufferSize();
} }
mReadingState.mNodeCount = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition( mReadingState.mRemainingPtNodeCountInThisArray =
dictBuf, &mReadingState.mPos); PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition(dictBuf,
&mReadingState.mPos);
if (usesAdditionalBuffer) { if (usesAdditionalBuffer) {
mReadingState.mPos += mBuffer->getOriginalBufferSize(); mReadingState.mPos += mBuffer->getOriginalBufferSize();
} }
// Count up nodes and node arrays to avoid infinite loop. // Count up nodes and node arrays to avoid infinite loop.
mReadingState.mTotalNodeCount += mReadingState.mNodeCount; mReadingState.mTotalPtNodeIndexInThisArrayChain +=
mReadingState.mNodeArrayCount++; mReadingState.mRemainingPtNodeCountInThisArray;
if (mReadingState.mNodeCount < 0 mReadingState.mPtNodeArrayIndexInThisArrayChain++;
|| mReadingState.mTotalNodeCount > MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP if (mReadingState.mRemainingPtNodeCountInThisArray < 0
|| mReadingState.mNodeArrayCount > MAX_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP) { || mReadingState.mTotalPtNodeIndexInThisArrayChain
> MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP
|| mReadingState.mPtNodeArrayIndexInThisArrayChain
> MAX_PT_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP) {
// Invalid dictionary. // Invalid dictionary.
AKLOGI("Invalid dictionary. nodeCount: %d, totalNodeCount: %d, MAX_CHILD_COUNT: %d" AKLOGI("Invalid dictionary. nodeCount: %d, totalNodeCount: %d, MAX_CHILD_COUNT: %d"
"nodeArrayCount: %d, MAX_NODE_ARRAY_COUNT: %d", "nodeArrayCount: %d, MAX_NODE_ARRAY_COUNT: %d",
mReadingState.mNodeCount, mReadingState.mTotalNodeCount, mReadingState.mRemainingPtNodeCountInThisArray,
MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP, mReadingState.mNodeArrayCount, mReadingState.mTotalPtNodeIndexInThisArrayChain,
MAX_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP); MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP,
mReadingState.mPtNodeArrayIndexInThisArrayChain,
MAX_PT_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP);
ASSERT(false); ASSERT(false);
mIsError = true; mIsError = true;
mReadingState.mPos = NOT_A_DICT_POS; mReadingState.mPos = NOT_A_DICT_POS;
return; return;
} }
if (mReadingState.mNodeCount == 0) { if (mReadingState.mRemainingPtNodeCountInThisArray == 0) {
// Empty node array. Try following forward link. // Empty node array. Try following forward link.
followForwardLink(); followForwardLink();
} }

View File

@ -84,9 +84,9 @@ class DynamicPatriciaTrieReadingHelper {
} else { } else {
mIsError = false; mIsError = false;
mReadingState.mPos = ptNodeArrayPos; mReadingState.mPos = ptNodeArrayPos;
mReadingState.mPrevTotalCodePointCount = 0; mReadingState.mTotalCodePointCountSinceInitialization = 0;
mReadingState.mTotalNodeCount = 0; mReadingState.mTotalPtNodeIndexInThisArrayChain = 0;
mReadingState.mNodeArrayCount = 0; mReadingState.mPtNodeArrayIndexInThisArrayChain = 0;
mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS; mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS;
mReadingStateStack.clear(); mReadingStateStack.clear();
nextPtNodeArray(); nextPtNodeArray();
@ -103,12 +103,12 @@ class DynamicPatriciaTrieReadingHelper {
} else { } else {
mIsError = false; mIsError = false;
mReadingState.mPos = ptNodePos; mReadingState.mPos = ptNodePos;
mReadingState.mNodeCount = 1; mReadingState.mRemainingPtNodeCountInThisArray = 1;
mReadingState.mPrevTotalCodePointCount = 0; mReadingState.mTotalCodePointCountSinceInitialization = 0;
mReadingState.mTotalNodeCount = 1; mReadingState.mTotalPtNodeIndexInThisArrayChain = 1;
mReadingState.mNodeArrayCount = 1; mReadingState.mPtNodeArrayIndexInThisArrayChain = 1;
mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS; mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS;
mReadingState.mPosOfLastPtNodeArrayHead = NOT_A_DICT_POS; mReadingState.mPosOfThisPtNodeArrayHead = NOT_A_DICT_POS;
mReadingStateStack.clear(); mReadingStateStack.clear();
fetchPtNodeInfo(); fetchPtNodeInfo();
} }
@ -128,12 +128,13 @@ class DynamicPatriciaTrieReadingHelper {
// Return code point count exclude the last read node's code points. // Return code point count exclude the last read node's code points.
AK_FORCE_INLINE int getPrevTotalCodePointCount() const { AK_FORCE_INLINE int getPrevTotalCodePointCount() const {
return mReadingState.mPrevTotalCodePointCount; return mReadingState.mTotalCodePointCountSinceInitialization;
} }
// Return code point count include the last read node's code points. // Return code point count include the last read node's code points.
AK_FORCE_INLINE int getTotalCodePointCount() const { AK_FORCE_INLINE int getTotalCodePointCount() const {
return mReadingState.mPrevTotalCodePointCount + mNodeReader.getCodePointCount(); return mReadingState.mTotalCodePointCountSinceInitialization
+ mNodeReader.getCodePointCount();
} }
AK_FORCE_INLINE void fetchMergedNodeCodePointsInReverseOrder( AK_FORCE_INLINE void fetchMergedNodeCodePointsInReverseOrder(
@ -149,9 +150,9 @@ class DynamicPatriciaTrieReadingHelper {
} }
AK_FORCE_INLINE void readNextSiblingNode() { AK_FORCE_INLINE void readNextSiblingNode() {
mReadingState.mNodeCount -= 1; mReadingState.mRemainingPtNodeCountInThisArray -= 1;
mReadingState.mPos = mNodeReader.getSiblingNodePos(); mReadingState.mPos = mNodeReader.getSiblingNodePos();
if (mReadingState.mNodeCount <= 0) { if (mReadingState.mRemainingPtNodeCountInThisArray <= 0) {
// All nodes in the current node array have been read. // All nodes in the current node array have been read.
followForwardLink(); followForwardLink();
if (!isEnd()) { if (!isEnd()) {
@ -165,9 +166,10 @@ class DynamicPatriciaTrieReadingHelper {
// Read the first child node of the current node. // Read the first child node of the current node.
AK_FORCE_INLINE void readChildNode() { AK_FORCE_INLINE void readChildNode() {
if (mNodeReader.hasChildren()) { if (mNodeReader.hasChildren()) {
mReadingState.mPrevTotalCodePointCount += mNodeReader.getCodePointCount(); mReadingState.mTotalCodePointCountSinceInitialization +=
mReadingState.mTotalNodeCount = 0; mNodeReader.getCodePointCount();
mReadingState.mNodeArrayCount = 0; mReadingState.mTotalPtNodeIndexInThisArrayChain = 0;
mReadingState.mPtNodeArrayIndexInThisArrayChain = 0;
mReadingState.mPos = mNodeReader.getChildrenPos(); mReadingState.mPos = mNodeReader.getChildrenPos();
mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS; mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS;
// Read children node array. // Read children node array.
@ -183,13 +185,14 @@ class DynamicPatriciaTrieReadingHelper {
// Read the parent node of the current node. // Read the parent node of the current node.
AK_FORCE_INLINE void readParentNode() { AK_FORCE_INLINE void readParentNode() {
if (mNodeReader.getParentPos() != NOT_A_DICT_POS) { if (mNodeReader.getParentPos() != NOT_A_DICT_POS) {
mReadingState.mPrevTotalCodePointCount += mNodeReader.getCodePointCount(); mReadingState.mTotalCodePointCountSinceInitialization +=
mReadingState.mTotalNodeCount = 1; mNodeReader.getCodePointCount();
mReadingState.mNodeArrayCount = 1; mReadingState.mTotalPtNodeIndexInThisArrayChain = 1;
mReadingState.mNodeCount = 1; mReadingState.mPtNodeArrayIndexInThisArrayChain = 1;
mReadingState.mRemainingPtNodeCountInThisArray = 1;
mReadingState.mPos = mNodeReader.getParentPos(); mReadingState.mPos = mNodeReader.getParentPos();
mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS; mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS;
mReadingState.mPosOfLastPtNodeArrayHead = NOT_A_DICT_POS; mReadingState.mPosOfThisPtNodeArrayHead = NOT_A_DICT_POS;
fetchPtNodeInfo(); fetchPtNodeInfo();
} else { } else {
mReadingState.mPos = NOT_A_DICT_POS; mReadingState.mPos = NOT_A_DICT_POS;
@ -201,7 +204,7 @@ class DynamicPatriciaTrieReadingHelper {
} }
AK_FORCE_INLINE int getPosOfLastPtNodeArrayHead() const { AK_FORCE_INLINE int getPosOfLastPtNodeArrayHead() const {
return mReadingState.mPosOfLastPtNodeArrayHead; return mReadingState.mPosOfThisPtNodeArrayHead;
} }
AK_FORCE_INLINE void reloadCurrentPtNodeInfo() { AK_FORCE_INLINE void reloadCurrentPtNodeInfo() {
@ -218,35 +221,41 @@ class DynamicPatriciaTrieReadingHelper {
private: private:
DISALLOW_COPY_AND_ASSIGN(DynamicPatriciaTrieReadingHelper); DISALLOW_COPY_AND_ASSIGN(DynamicPatriciaTrieReadingHelper);
class ReadingState { // This class encapsulates the reading state of a position in the dictionary. It points at a
// specific PtNode in the dictionary.
class PtNodeReadingState {
public: public:
// Note that copy constructor and assignment operator are used for this class to use // Note that copy constructor and assignment operator are used for this class to use
// std::vector. // std::vector.
ReadingState() : mPos(NOT_A_DICT_POS), mNodeCount(0), mPrevTotalCodePointCount(0), PtNodeReadingState() : mPos(NOT_A_DICT_POS), mRemainingPtNodeCountInThisArray(0),
mTotalNodeCount(0), mNodeArrayCount(0), mPosOfLastForwardLinkField(NOT_A_DICT_POS), mTotalCodePointCountSinceInitialization(0), mTotalPtNodeIndexInThisArrayChain(0),
mPosOfLastPtNodeArrayHead(NOT_A_DICT_POS) {} mPtNodeArrayIndexInThisArrayChain(0), mPosOfLastForwardLinkField(NOT_A_DICT_POS),
mPosOfThisPtNodeArrayHead(NOT_A_DICT_POS) {}
int mPos; int mPos;
// Node count of a node array. // Remaining node count in the current array.
int mNodeCount; int mRemainingPtNodeCountInThisArray;
int mPrevTotalCodePointCount; int mTotalCodePointCountSinceInitialization;
int mTotalNodeCount; // Counter of PtNodes used to avoid infinite loops caused by broken or malicious links.
int mNodeArrayCount; int mTotalPtNodeIndexInThisArrayChain;
// Counter of PtNode arrays used to avoid infinite loops caused by cyclic links of empty
// PtNode arrays.
int mPtNodeArrayIndexInThisArrayChain;
int mPosOfLastForwardLinkField; int mPosOfLastForwardLinkField;
int mPosOfLastPtNodeArrayHead; int mPosOfThisPtNodeArrayHead;
}; };
static const int MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP; static const int MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP;
static const int MAX_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP; static const int MAX_PT_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP;
static const size_t MAX_READING_STATE_STACK_SIZE; static const size_t MAX_READING_STATE_STACK_SIZE;
// TODO: Introduce error code to track what caused the error. // TODO: Introduce error code to track what caused the error.
bool mIsError; bool mIsError;
ReadingState mReadingState; PtNodeReadingState mReadingState;
const BufferWithExtendableBuffer *const mBuffer; const BufferWithExtendableBuffer *const mBuffer;
DynamicPatriciaTrieNodeReader mNodeReader; DynamicPatriciaTrieNodeReader mNodeReader;
int mMergedNodeCodePoints[MAX_WORD_LENGTH]; int mMergedNodeCodePoints[MAX_WORD_LENGTH];
std::vector<ReadingState> mReadingStateStack; std::vector<PtNodeReadingState> mReadingStateStack;
void nextPtNodeArray(); void nextPtNodeArray();

View File

@ -25,12 +25,12 @@
namespace latinime { namespace latinime {
void PatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode, void PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const dicNode,
DicNodeVector *const childDicNodes) const { DicNodeVector *const childDicNodes) const {
if (!dicNode->hasChildren()) { if (!dicNode->hasChildren()) {
return; return;
} }
int nextPos = dicNode->getChildrenPos(); int nextPos = dicNode->getChildrenPtNodeArrayPos();
if (nextPos < 0 || nextPos >= mDictBufferSize) { if (nextPos < 0 || nextPos >= mDictBufferSize) {
AKLOGE("Children PtNode array position is invalid. pos: %d, dict size: %d", AKLOGE("Children PtNode array position is invalid. pos: %d, dict size: %d",
nextPos, mDictBufferSize); nextPos, mDictBufferSize);
@ -52,14 +52,14 @@ void PatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode,
// This retrieves code points and the probability of the word by its terminal position. // This retrieves code points and the probability of the word by its terminal position.
// Due to the fact that words are ordered in the dictionary in a strict breadth-first order, // Due to the fact that words are ordered in the dictionary in a strict breadth-first order,
// it is possible to check for this with advantageous complexity. For each node, we search // it is possible to check for this with advantageous complexity. For each PtNode array, we search
// for PtNodes with children and compare the children position with the position we look for. // for PtNodes with children and compare the children position with the position we look for.
// When we shoot the position we look for, it means the word we look for is in the children // When we shoot the position we look for, it means the word we look for is in the children
// of the previous PtNode. The only tricky part is the fact that if we arrive at the end of a // of the previous PtNode. The only tricky part is the fact that if we arrive at the end of a
// PtNode array with the last PtNode's children position still less than what we are searching for, // PtNode array with the last PtNode's children position still less than what we are searching for,
// we must descend the last PtNode's children (for example, if the word we are searching for starts // we must descend the last PtNode's children (for example, if the word we are searching for starts
// with a z, it's the last PtNode of the root array, so all children addresses will be smaller // with a z, it's the last PtNode of the root array, so all children addresses will be smaller
// than the position we look for, and we have to descend the z node). // than the position we look for, and we have to descend the z PtNode).
/* Parameters : /* Parameters :
* ptNodePos: the byte position of the terminal PtNode of the word we are searching for (this is * ptNodePos: the byte position of the terminal PtNode of the word we are searching for (this is
* what is stored as the "bigram position" in each bigram) * what is stored as the "bigram position" in each bigram)
@ -74,9 +74,9 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
int pos = getRootPosition(); int pos = getRootPosition();
int wordPos = 0; int wordPos = 0;
// One iteration of the outer loop iterates through PtNode arrays. As stated above, we will // One iteration of the outer loop iterates through PtNode arrays. As stated above, we will
// only traverse nodes that are actually a part of the terminal we are searching, so each time // only traverse PtNodes that are actually a part of the terminal we are searching, so each
// we enter this loop we are one depth level further than last time. // time we enter this loop we are one depth level further than last time.
// The only reason we count nodes is because we want to reduce the probability of infinite // The only reason we count PtNodes is because we want to reduce the probability of infinite
// looping in case there is a bug. Since we know there is an upper bound to the depth we are // looping in case there is a bug. Since we know there is an upper bound to the depth we are
// supposed to traverse, it does not hurt to count iterations. // supposed to traverse, it does not hurt to count iterations.
for (int loopCount = maxCodePointCount; loopCount > 0; --loopCount) { for (int loopCount = maxCodePointCount; loopCount > 0; --loopCount) {
@ -140,8 +140,9 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
found = true; found = true;
} else if (1 >= ptNodeCount) { } else if (1 >= ptNodeCount) {
// However if we are on the LAST PtNode of this array, and we have NOT shot the // However if we are on the LAST PtNode of this array, and we have NOT shot the
// position we should descend THIS node. So we trick the lastCandidatePtNodePos // position we should descend THIS PtNode. So we trick the
// so that we will descend this PtNode, not the previous one. // lastCandidatePtNodePos so that we will descend this PtNode, not the previous
// one.
lastCandidatePtNodePos = startPos; lastCandidatePtNodePos = startPos;
found = true; found = true;
} else { } else {
@ -149,7 +150,7 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
found = false; found = false;
} }
} else { } else {
// Even if we don't have children here, we could still be on the last PtNode of / // Even if we don't have children here, we could still be on the last PtNode of
// this array. If this is the case, we should descend the last PtNode that had // this array. If this is the case, we should descend the last PtNode that had
// children, and their position is already in lastCandidatePtNodePos. // children, and their position is already in lastCandidatePtNodePos.
found = (1 >= ptNodeCount); found = (1 >= ptNodeCount);
@ -230,9 +231,9 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
return 0; return 0;
} }
// This function gets the position of the terminal node of the exact matching word in the // This function gets the position of the terminal PtNode of the exact matching word in the
// dictionary. If no match is found, it returns NOT_A_DICT_POS. // dictionary. If no match is found, it returns NOT_A_DICT_POS.
int PatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const inWord, int PatriciaTriePolicy::getTerminalPtNodePositionOfWord(const int *const inWord,
const int length, const bool forceLowerCaseSearch) const { const int length, const bool forceLowerCaseSearch) const {
int pos = getRootPosition(); int pos = getRootPosition();
int wordPos = 0; int wordPos = 0;

View File

@ -47,14 +47,14 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
return 0; return 0;
} }
void createAndGetAllChildNodes(const DicNode *const dicNode, void createAndGetAllChildDicNodes(const DicNode *const dicNode,
DicNodeVector *const childDicNodes) const; DicNodeVector *const childDicNodes) const;
int getCodePointsAndProbabilityAndReturnCodePointCount( int getCodePointsAndProbabilityAndReturnCodePointCount(
const int terminalNodePos, const int maxCodePointCount, int *const outCodePoints, const int terminalNodePos, const int maxCodePointCount, int *const outCodePoints,
int *const outUnigramProbability) const; int *const outUnigramProbability) const;
int getTerminalNodePositionOfWord(const int *const inWord, int getTerminalPtNodePositionOfWord(const int *const inWord,
const int length, const bool forceLowerCaseSearch) const; const int length, const bool forceLowerCaseSearch) const;
int getProbability(const int unigramProbability, const int bigramProbability) const; int getProbability(const int unigramProbability, const int bigramProbability) const;

View File

@ -81,7 +81,7 @@ class TypingTraversal : public Traversal {
return false; return false;
} }
const int point0Index = dicNode->getInputIndex(0); const int point0Index = dicNode->getInputIndex(0);
return dicNode->isTerminalWordNode() return dicNode->isTerminalDicNode()
&& traverseSession->getProximityInfoState(0)-> && traverseSession->getProximityInfoState(0)->
hasSpaceProximity(point0Index); hasSpaceProximity(point0Index);
} }
@ -96,7 +96,7 @@ class TypingTraversal : public Traversal {
if (dicNode->isCompletion(inputSize)) { if (dicNode->isCompletion(inputSize)) {
return false; return false;
} }
if (!dicNode->isTerminalWordNode()) { if (!dicNode->isTerminalDicNode()) {
return false; return false;
} }
const int16_t pointIndex = dicNode->getInputIndex(0); const int16_t pointIndex = dicNode->getInputIndex(0);