Merge "Make "node"s clear by renaming to PtNode or DicNode."

This commit is contained in:
Keisuke Kuroyanagi 2013-10-17 08:23:01 +00:00 committed by Android (Google) Code Review
commit be5e8f18e9
19 changed files with 233 additions and 231 deletions

View file

@ -99,7 +99,7 @@ class DicNode {
virtual ~DicNode() {}
// Init for copy
void initByCopy(const DicNode *dicNode) {
void initByCopy(const DicNode *const dicNode) {
mIsUsed = true;
mIsCachedForNextSuggestion = dicNode->mIsCachedForNextSuggestion;
mDicNodeProperties.init(&dicNode->mDicNodeProperties);
@ -107,25 +107,25 @@ class DicNode {
PROF_NODE_COPY(&dicNode->mProfiler, mProfiler);
}
// Init for root with prevWordNodePos which is used for bigram
void initAsRoot(const int rootGroupPos, const int prevWordNodePos) {
// Init for root with prevWordPtNodePos which is used for bigram
void initAsRoot(const int rootPtNodeArrayPos, const int prevWordPtNodePos) {
mIsUsed = true;
mIsCachedForNextSuggestion = false;
mDicNodeProperties.init(
NOT_A_DICT_POS /* pos */, rootGroupPos, NOT_A_CODE_POINT /* nodeCodePoint */,
NOT_A_DICT_POS /* pos */, rootPtNodeArrayPos, NOT_A_CODE_POINT /* nodeCodePoint */,
NOT_A_PROBABILITY /* probability */, false /* isTerminal */,
true /* hasChildren */, false /* isBlacklistedOrNotAWord */, 0 /* depth */,
0 /* terminalDepth */);
mDicNodeState.init(prevWordNodePos);
mDicNodeState.init(prevWordPtNodePos);
PROF_NODE_RESET(mProfiler);
}
// Init for root with previous word
void initAsRootWithPreviousWord(DicNode *dicNode, const int rootGroupPos) {
void initAsRootWithPreviousWord(const DicNode *const dicNode, const int rootPtNodeArrayPos) {
mIsUsed = true;
mIsCachedForNextSuggestion = dicNode->mIsCachedForNextSuggestion;
mDicNodeProperties.init(
NOT_A_DICT_POS /* pos */, rootGroupPos, NOT_A_CODE_POINT /* nodeCodePoint */,
NOT_A_DICT_POS /* pos */, rootPtNodeArrayPos, NOT_A_CODE_POINT /* nodeCodePoint */,
NOT_A_PROBABILITY /* probability */, false /* isTerminal */,
true /* hasChildren */, false /* isBlacklistedOrNotAWord */, 0 /* depth */,
0 /* terminalDepth */);
@ -138,7 +138,7 @@ class DicNode {
mDicNodeState.mDicNodeStatePrevWord.init(
dicNode->mDicNodeState.mDicNodeStatePrevWord.getPrevWordCount() + 1,
dicNode->mDicNodeProperties.getProbability(),
dicNode->mDicNodeProperties.getPos(),
dicNode->mDicNodeProperties.getPtNodePos(),
dicNode->mDicNodeState.mDicNodeStatePrevWord.mPrevWord,
dicNode->mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength(),
dicNode->getOutputWordBuf(),
@ -148,26 +148,27 @@ class DicNode {
PROF_NODE_COPY(&dicNode->mProfiler, mProfiler);
}
void initAsPassingChild(DicNode *parentNode) {
void initAsPassingChild(DicNode *parentDicNode) {
mIsUsed = true;
mIsCachedForNextSuggestion = parentNode->mIsCachedForNextSuggestion;
const int c = parentNode->getNodeTypedCodePoint();
mDicNodeProperties.init(&parentNode->mDicNodeProperties, c);
mDicNodeState.init(&parentNode->mDicNodeState);
PROF_NODE_COPY(&parentNode->mProfiler, mProfiler);
mIsCachedForNextSuggestion = parentDicNode->mIsCachedForNextSuggestion;
const int parentCodePoint = parentDicNode->getNodeTypedCodePoint();
mDicNodeProperties.init(&parentDicNode->mDicNodeProperties, parentCodePoint);
mDicNodeState.init(&parentDicNode->mDicNodeState);
PROF_NODE_COPY(&parentDicNode->mProfiler, mProfiler);
}
void initAsChild(const DicNode *const dicNode, const int pos, const int childrenPos,
const int probability, const bool isTerminal, const bool hasChildren,
const bool isBlacklistedOrNotAWord, const uint16_t mergedNodeCodePointCount,
const int *const mergedNodeCodePoints) {
void initAsChild(const DicNode *const dicNode, const int ptNodePos,
const int childrenPtNodeArrayPos, const int probability, const bool isTerminal,
const bool hasChildren, const bool isBlacklistedOrNotAWord,
const uint16_t mergedNodeCodePointCount, const int *const mergedNodeCodePoints) {
mIsUsed = true;
uint16_t newDepth = static_cast<uint16_t>(dicNode->getNodeCodePointCount() + 1);
mIsCachedForNextSuggestion = dicNode->mIsCachedForNextSuggestion;
const uint16_t newLeavingDepth = static_cast<uint16_t>(
dicNode->mDicNodeProperties.getLeavingDepth() + mergedNodeCodePointCount);
mDicNodeProperties.init(pos, childrenPos, mergedNodeCodePoints[0], probability,
isTerminal, hasChildren, isBlacklistedOrNotAWord, newDepth, newLeavingDepth);
mDicNodeProperties.init(ptNodePos, childrenPtNodeArrayPos, mergedNodeCodePoints[0],
probability, isTerminal, hasChildren, isBlacklistedOrNotAWord, newDepth,
newLeavingDepth);
mDicNodeState.init(&dicNode->mDicNodeState, mergedNodeCodePointCount,
mergedNodeCodePoints);
PROF_NODE_COPY(&dicNode->mProfiler, mProfiler);
@ -234,7 +235,7 @@ class DicNode {
}
bool isFirstWord() const {
return mDicNodeState.mDicNodeStatePrevWord.getPrevWordNodePos() == NOT_A_DICT_POS;
return mDicNodeState.mDicNodeStatePrevWord.getPrevWordPtNodePos() == NOT_A_DICT_POS;
}
bool isCompletion(const int inputSize) const {
@ -246,29 +247,30 @@ class DicNode {
}
// Used to get bigram probability in DicNodeUtils
int getPos() const {
return mDicNodeProperties.getPos();
int getPtNodePos() const {
return mDicNodeProperties.getPtNodePos();
}
// Used to get bigram probability in DicNodeUtils
int getPrevWordPos() const {
return mDicNodeState.mDicNodeStatePrevWord.getPrevWordNodePos();
int getPrevWordTerminalPtNodePos() const {
return mDicNodeState.mDicNodeStatePrevWord.getPrevWordPtNodePos();
}
// Used in DicNodeUtils
int getChildrenPos() const {
return mDicNodeProperties.getChildrenPos();
int getChildrenPtNodeArrayPos() const {
return mDicNodeProperties.getChildrenPtNodeArrayPos();
}
int getProbability() const {
return mDicNodeProperties.getProbability();
}
AK_FORCE_INLINE bool isTerminalWordNode() const {
const bool isTerminalNodes = mDicNodeProperties.isTerminal();
const int currentNodeDepth = getNodeCodePointCount();
const int terminalNodeDepth = mDicNodeProperties.getLeavingDepth();
return isTerminalNodes && currentNodeDepth > 0 && currentNodeDepth == terminalNodeDepth;
AK_FORCE_INLINE bool isTerminalDicNode() const {
const bool isTerminalPtNode = mDicNodeProperties.isTerminal();
const int currentDicNodeDepth = getNodeCodePointCount();
const int terminalDicNodeDepth = mDicNodeProperties.getLeavingDepth();
return isTerminalPtNode && currentDicNodeDepth > 0
&& currentDicNodeDepth == terminalDicNodeDepth;
}
bool shouldBeFilteredBySafetyNetForBigram() const {
@ -374,8 +376,8 @@ class DicNode {
}
// Used to commit input partially
int getPrevWordNodePos() const {
return mDicNodeState.mDicNodeStatePrevWord.getPrevWordNodePos();
int getPrevWordPtNodePos() const {
return mDicNodeState.mDicNodeStatePrevWord.getPrevWordPtNodePos();
}
AK_FORCE_INLINE const int *getOutputWordBuf() const {
@ -410,7 +412,7 @@ class DicNode {
// TODO: Remove once touch path is merged into ProximityInfoState
// Note: Returned codepoint may be a digraph codepoint if the node is in a composite glyph.
int getNodeCodePoint() const {
const int codePoint = mDicNodeProperties.getNodeCodePoint();
const int codePoint = mDicNodeProperties.getDicNodeCodePoint();
const DigraphUtils::DigraphCodePointIndex digraphIndex =
mDicNodeState.mDicNodeStateScoring.getDigraphIndex();
if (digraphIndex == DigraphUtils::NOT_A_DIGRAPH_INDEX) {
@ -423,8 +425,8 @@ class DicNode {
// Utils for cost calculation //
////////////////////////////////
AK_FORCE_INLINE bool isSameNodeCodePoint(const DicNode *const dicNode) const {
return mDicNodeProperties.getNodeCodePoint()
== dicNode->mDicNodeProperties.getNodeCodePoint();
return mDicNodeProperties.getDicNodeCodePoint()
== dicNode->mDicNodeProperties.getDicNodeCodePoint();
}
// TODO: remove

View file

@ -22,7 +22,6 @@
#include "suggest/core/dicnode/dic_node_vector.h"
#include "suggest/core/dictionary/multi_bigram_map.h"
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
#include "utils/char_utils.h"
namespace latinime {
@ -32,19 +31,20 @@ namespace latinime {
/* static */ void DicNodeUtils::initAsRoot(
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
const int prevWordNodePos, DicNode *const newRootNode) {
newRootNode->initAsRoot(dictionaryStructurePolicy->getRootPosition(), prevWordNodePos);
const int prevWordPtNodePos, DicNode *const newRootDicNode) {
newRootDicNode->initAsRoot(dictionaryStructurePolicy->getRootPosition(), prevWordPtNodePos);
}
/*static */ void DicNodeUtils::initAsRootWithPreviousWord(
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
DicNode *const prevWordLastNode, DicNode *const newRootNode) {
newRootNode->initAsRootWithPreviousWord(
prevWordLastNode, dictionaryStructurePolicy->getRootPosition());
const DicNode *const prevWordLastDicNode, DicNode *const newRootDicNode) {
newRootDicNode->initAsRootWithPreviousWord(
prevWordLastDicNode, dictionaryStructurePolicy->getRootPosition());
}
/* static */ void DicNodeUtils::initByCopy(DicNode *srcNode, DicNode *destNode) {
destNode->initByCopy(srcNode);
/* static */ void DicNodeUtils::initByCopy(const DicNode *const srcDicNode,
DicNode *const destDicNode) {
destDicNode->initByCopy(srcDicNode);
}
///////////////////////////////////
@ -52,14 +52,14 @@ namespace latinime {
///////////////////////////////////
/* static */ void DicNodeUtils::getAllChildDicNodes(DicNode *dicNode,
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
DicNodeVector *childDicNodes) {
DicNodeVector *const childDicNodes) {
if (dicNode->isTotalInputSizeExceedingLimit()) {
return;
}
if (!dicNode->isLeavingNode()) {
childDicNodes->pushPassingChild(dicNode);
} else {
dictionaryStructurePolicy->createAndGetAllChildNodes(dicNode, childDicNodes);
dictionaryStructurePolicy->createAndGetAllChildDicNodes(dicNode, childDicNodes);
}
}
@ -71,11 +71,11 @@ namespace latinime {
*/
/* static */ float DicNodeUtils::getBigramNodeImprobability(
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
const DicNode *const node, MultiBigramMap *multiBigramMap) {
if (node->hasMultipleWords() && !node->isValidMultipleWordSuggestion()) {
const DicNode *const dicNode, MultiBigramMap *const multiBigramMap) {
if (dicNode->hasMultipleWords() && !dicNode->isValidMultipleWordSuggestion()) {
return static_cast<float>(MAX_VALUE_FOR_WEIGHTING);
}
const int probability = getBigramNodeProbability(dictionaryStructurePolicy, node,
const int probability = getBigramNodeProbability(dictionaryStructurePolicy, dicNode,
multiBigramMap);
// TODO: This equation to calculate the improbability looks unreasonable. Investigate this.
const float cost = static_cast<float>(MAX_PROBABILITY - probability)
@ -85,19 +85,19 @@ namespace latinime {
/* static */ int DicNodeUtils::getBigramNodeProbability(
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
const DicNode *const node, MultiBigramMap *multiBigramMap) {
const int unigramProbability = node->getProbability();
const int wordPos = node->getPos();
const int prevWordPos = node->getPrevWordPos();
if (NOT_A_DICT_POS == wordPos || NOT_A_DICT_POS == prevWordPos) {
const DicNode *const dicNode, MultiBigramMap *const multiBigramMap) {
const int unigramProbability = dicNode->getProbability();
const int ptNodePos = dicNode->getPtNodePos();
const int prevWordTerminalPtNodePos = dicNode->getPrevWordTerminalPtNodePos();
if (NOT_A_DICT_POS == ptNodePos || NOT_A_DICT_POS == prevWordTerminalPtNodePos) {
// Note: Normally wordPos comes from the dictionary and should never equal
// NOT_A_VALID_WORD_POS.
return dictionaryStructurePolicy->getProbability(unigramProbability,
NOT_A_PROBABILITY);
}
if (multiBigramMap) {
return multiBigramMap->getBigramProbability(dictionaryStructurePolicy, prevWordPos,
wordPos, unigramProbability);
return multiBigramMap->getBigramProbability(dictionaryStructurePolicy,
prevWordTerminalPtNodePos, ptNodePos, unigramProbability);
}
return dictionaryStructurePolicy->getProbability(unigramProbability,
NOT_A_PROBABILITY);
@ -109,7 +109,7 @@ namespace latinime {
// TODO: Move to char_utils?
/* static */ int DicNodeUtils::appendTwoWords(const int *const src0, const int16_t length0,
const int *const src1, const int16_t length1, int *dest) {
const int *const src1, const int16_t length1, int *const dest) {
int actualLength0 = 0;
for (int i = 0; i < length0; ++i) {
if (src0[i] == 0) {

View file

@ -31,20 +31,20 @@ class MultiBigramMap;
class DicNodeUtils {
public:
static int appendTwoWords(const int *src0, const int16_t length0, const int *src1,
const int16_t length1, int *dest);
const int16_t length1, int *const dest);
static void initAsRoot(
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
const int prevWordNodePos, DicNode *newRootNode);
const int prevWordPtNodePos, DicNode *const newRootDicNode);
static void initAsRootWithPreviousWord(
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
DicNode *prevWordLastNode, DicNode *newRootNode);
static void initByCopy(DicNode *srcNode, DicNode *destNode);
const DicNode *const prevWordLastDicNode, DicNode *const newRootDicNode);
static void initByCopy(const DicNode *const srcDicNode, DicNode *const destDicNode);
static void getAllChildDicNodes(DicNode *dicNode,
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
DicNodeVector *childDicNodes);
static float getBigramNodeImprobability(
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
const DicNode *const node, MultiBigramMap *const multiBigramMap);
const DicNode *const dicNode, MultiBigramMap *const multiBigramMap);
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(DicNodeUtils);
@ -53,7 +53,7 @@ class DicNodeUtils {
static int getBigramNodeProbability(
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
const DicNode *const node, MultiBigramMap *multiBigramMap);
const DicNode *const dicNode, MultiBigramMap *const multiBigramMap);
};
} // namespace latinime
#endif // LATINIME_DIC_NODE_UTILS_H

View file

@ -62,14 +62,14 @@ class DicNodeVector {
mDicNodes.back().initAsPassingChild(dicNode);
}
void pushLeavingChild(const DicNode *const dicNode, const int pos, const int childrenPos,
const int probability, const bool isTerminal, const bool hasChildren,
const bool isBlacklistedOrNotAWord, const uint16_t mergedNodeCodePointCount,
const int *const mergedNodeCodePoints) {
void pushLeavingChild(const DicNode *const dicNode, const int ptNodePos,
const int childrenPtNodeArrayPos, const int probability, const bool isTerminal,
const bool hasChildren, const bool isBlacklistedOrNotAWord,
const uint16_t mergedNodeCodePointCount, const int *const mergedNodeCodePoints) {
ASSERT(!mLock);
mDicNodes.push_back(mEmptyNode);
mDicNodes.back().initAsChild(dicNode, pos, childrenPos, probability, isTerminal,
hasChildren, isBlacklistedOrNotAWord, mergedNodeCodePointCount,
mDicNodes.back().initAsChild(dicNode, ptNodePos, childrenPtNodeArrayPos, probability,
isTerminal, hasChildren, isBlacklistedOrNotAWord, mergedNodeCodePointCount,
mergedNodeCodePoints);
}

View file

@ -24,15 +24,14 @@
namespace latinime {
/**
* Node for traversing the lexicon trie.
* PtNode information related to the DicNode from the lexicon trie.
*/
// TODO: Introduce a dictionary node class which has attribute members required to understand the
// dictionary structure.
class DicNodeProperties {
public:
AK_FORCE_INLINE DicNodeProperties()
: mPos(0), mChildrenPos(0), mProbability(0), mNodeCodePoint(0), mIsTerminal(false),
mHasChildren(false), mIsBlacklistedOrNotAWord(false), mDepth(0), mLeavingDepth(0) {}
: mPtNodePos(0), mChildrenPtNodeArrayPos(0), mProbability(0), mDicNodeCodePoint(0),
mIsTerminal(false), mHasChildrenPtNodes(false), mIsBlacklistedOrNotAWord(false),
mDepth(0), mLeavingDepth(0) {}
virtual ~DicNodeProperties() {}
@ -40,57 +39,57 @@ class DicNodeProperties {
void init(const int pos, const int childrenPos, const int nodeCodePoint, const int probability,
const bool isTerminal, const bool hasChildren, const bool isBlacklistedOrNotAWord,
const uint16_t depth, const uint16_t leavingDepth) {
mPos = pos;
mChildrenPos = childrenPos;
mNodeCodePoint = nodeCodePoint;
mPtNodePos = pos;
mChildrenPtNodeArrayPos = childrenPos;
mDicNodeCodePoint = nodeCodePoint;
mProbability = probability;
mIsTerminal = isTerminal;
mHasChildren = hasChildren;
mHasChildrenPtNodes = hasChildren;
mIsBlacklistedOrNotAWord = isBlacklistedOrNotAWord;
mDepth = depth;
mLeavingDepth = leavingDepth;
}
// Init for copy
void init(const DicNodeProperties *const nodeProp) {
mPos = nodeProp->mPos;
mChildrenPos = nodeProp->mChildrenPos;
mNodeCodePoint = nodeProp->mNodeCodePoint;
mProbability = nodeProp->mProbability;
mIsTerminal = nodeProp->mIsTerminal;
mHasChildren = nodeProp->mHasChildren;
mIsBlacklistedOrNotAWord = nodeProp->mIsBlacklistedOrNotAWord;
mDepth = nodeProp->mDepth;
mLeavingDepth = nodeProp->mLeavingDepth;
void init(const DicNodeProperties *const dicNodeProp) {
mPtNodePos = dicNodeProp->mPtNodePos;
mChildrenPtNodeArrayPos = dicNodeProp->mChildrenPtNodeArrayPos;
mDicNodeCodePoint = dicNodeProp->mDicNodeCodePoint;
mProbability = dicNodeProp->mProbability;
mIsTerminal = dicNodeProp->mIsTerminal;
mHasChildrenPtNodes = dicNodeProp->mHasChildrenPtNodes;
mIsBlacklistedOrNotAWord = dicNodeProp->mIsBlacklistedOrNotAWord;
mDepth = dicNodeProp->mDepth;
mLeavingDepth = dicNodeProp->mLeavingDepth;
}
// Init as passing child
void init(const DicNodeProperties *const nodeProp, const int codePoint) {
mPos = nodeProp->mPos;
mChildrenPos = nodeProp->mChildrenPos;
mNodeCodePoint = codePoint; // Overwrite the node char of a passing child
mProbability = nodeProp->mProbability;
mIsTerminal = nodeProp->mIsTerminal;
mHasChildren = nodeProp->mHasChildren;
mIsBlacklistedOrNotAWord = nodeProp->mIsBlacklistedOrNotAWord;
mDepth = nodeProp->mDepth + 1; // Increment the depth of a passing child
mLeavingDepth = nodeProp->mLeavingDepth;
void init(const DicNodeProperties *const dicNodeProp, const int codePoint) {
mPtNodePos = dicNodeProp->mPtNodePos;
mChildrenPtNodeArrayPos = dicNodeProp->mChildrenPtNodeArrayPos;
mDicNodeCodePoint = codePoint; // Overwrite the node char of a passing child
mProbability = dicNodeProp->mProbability;
mIsTerminal = dicNodeProp->mIsTerminal;
mHasChildrenPtNodes = dicNodeProp->mHasChildrenPtNodes;
mIsBlacklistedOrNotAWord = dicNodeProp->mIsBlacklistedOrNotAWord;
mDepth = dicNodeProp->mDepth + 1; // Increment the depth of a passing child
mLeavingDepth = dicNodeProp->mLeavingDepth;
}
int getPos() const {
return mPos;
int getPtNodePos() const {
return mPtNodePos;
}
int getChildrenPos() const {
return mChildrenPos;
int getChildrenPtNodeArrayPos() const {
return mChildrenPtNodeArrayPos;
}
int getProbability() const {
return mProbability;
}
int getNodeCodePoint() const {
return mNodeCodePoint;
int getDicNodeCodePoint() const {
return mDicNodeCodePoint;
}
uint16_t getDepth() const {
@ -107,7 +106,7 @@ class DicNodeProperties {
}
bool hasChildren() const {
return mHasChildren || mDepth != mLeavingDepth;
return mHasChildrenPtNodes || mDepth != mLeavingDepth;
}
bool isBlacklistedOrNotAWord() const {
@ -118,12 +117,12 @@ class DicNodeProperties {
// Caution!!!
// Use a default copy constructor and an assign operator because shallow copies are ok
// for this class
int mPos;
int mChildrenPos;
int mPtNodePos;
int mChildrenPtNodeArrayPos;
int mProbability;
int mNodeCodePoint;
int mDicNodeCodePoint;
bool mIsTerminal;
bool mHasChildren;
bool mHasChildrenPtNodes;
bool mIsBlacklistedOrNotAWord;
uint16_t mDepth;
uint16_t mLeavingDepth;

View file

@ -30,7 +30,7 @@ class DicNodeStatePrevWord {
public:
AK_FORCE_INLINE DicNodeStatePrevWord()
: mPrevWordCount(0), mPrevWordLength(0), mPrevWordStart(0), mPrevWordProbability(0),
mPrevWordNodePos(NOT_A_DICT_POS), mSecondWordFirstInputIndex(NOT_AN_INDEX) {
mPrevWordPtNodePos(NOT_A_DICT_POS), mSecondWordFirstInputIndex(NOT_AN_INDEX) {
memset(mPrevWord, 0, sizeof(mPrevWord));
}
@ -41,7 +41,7 @@ class DicNodeStatePrevWord {
mPrevWordCount = 0;
mPrevWordStart = 0;
mPrevWordProbability = -1;
mPrevWordNodePos = NOT_A_DICT_POS;
mPrevWordPtNodePos = NOT_A_DICT_POS;
mSecondWordFirstInputIndex = NOT_AN_INDEX;
}
@ -50,7 +50,7 @@ class DicNodeStatePrevWord {
mPrevWordCount = 0;
mPrevWordStart = 0;
mPrevWordProbability = -1;
mPrevWordNodePos = prevWordNodePos;
mPrevWordPtNodePos = prevWordNodePos;
mSecondWordFirstInputIndex = NOT_AN_INDEX;
}
@ -60,7 +60,7 @@ class DicNodeStatePrevWord {
mPrevWordCount = prevWord->mPrevWordCount;
mPrevWordStart = prevWord->mPrevWordStart;
mPrevWordProbability = prevWord->mPrevWordProbability;
mPrevWordNodePos = prevWord->mPrevWordNodePos;
mPrevWordPtNodePos = prevWord->mPrevWordPtNodePos;
mSecondWordFirstInputIndex = prevWord->mSecondWordFirstInputIndex;
memcpy(mPrevWord, prevWord->mPrevWord, prevWord->mPrevWordLength * sizeof(mPrevWord[0]));
}
@ -71,7 +71,7 @@ class DicNodeStatePrevWord {
const int prevWordSecondWordFirstInputIndex, const int lastInputIndex) {
mPrevWordCount = min(prevWordCount, static_cast<int16_t>(MAX_RESULTS));
mPrevWordProbability = prevWordProbability;
mPrevWordNodePos = prevWordNodePos;
mPrevWordPtNodePos = prevWordNodePos;
int twoWordsLen =
DicNodeUtils::appendTwoWords(src0, length0, src1, length1, mPrevWord);
if (twoWordsLen >= MAX_WORD_LENGTH) {
@ -116,8 +116,8 @@ class DicNodeStatePrevWord {
return mPrevWordStart;
}
int getPrevWordNodePos() const {
return mPrevWordNodePos;
int getPrevWordPtNodePos() const {
return mPrevWordPtNodePos;
}
int getPrevWordCodePointAt(const int id) const {
@ -147,7 +147,7 @@ class DicNodeStatePrevWord {
int16_t mPrevWordLength;
int16_t mPrevWordStart;
int16_t mPrevWordProbability;
int mPrevWordNodePos;
int mPrevWordPtNodePos;
int mSecondWordFirstInputIndex;
};
} // namespace latinime

View file

@ -144,7 +144,7 @@ int BigramDictionary::getPredictions(const int *prevWord, const int prevWordLeng
int BigramDictionary::getBigramListPositionForWord(const int *prevWord, const int prevWordLength,
const bool forceLowerCaseSearch) const {
if (0 >= prevWordLength) return NOT_A_DICT_POS;
int pos = mDictionaryStructurePolicy->getTerminalNodePositionOfWord(prevWord, prevWordLength,
int pos = mDictionaryStructurePolicy->getTerminalPtNodePositionOfWord(prevWord, prevWordLength,
forceLowerCaseSearch);
if (NOT_A_DICT_POS == pos) return NOT_A_DICT_POS;
return mDictionaryStructurePolicy->getBigramsPositionOfPtNode(pos);
@ -155,7 +155,7 @@ int BigramDictionary::getBigramProbability(const int *word0, int length0, const
int pos = getBigramListPositionForWord(word0, length0, false /* forceLowerCaseSearch */);
// getBigramListPositionForWord returns 0 if this word isn't in the dictionary or has no bigrams
if (NOT_A_DICT_POS == pos) return NOT_A_PROBABILITY;
int nextWordPos = mDictionaryStructurePolicy->getTerminalNodePositionOfWord(word1, length1,
int nextWordPos = mDictionaryStructurePolicy->getTerminalPtNodePositionOfWord(word1, length1,
false /* forceLowerCaseSearch */);
if (NOT_A_DICT_POS == nextWordPos) return NOT_A_PROBABILITY;

View file

@ -88,7 +88,7 @@ int Dictionary::getBigrams(const int *word, int length, int *outWords, int *freq
}
int Dictionary::getProbability(const int *word, int length) const {
int pos = getDictionaryStructurePolicy()->getTerminalNodePositionOfWord(word, length,
int pos = getDictionaryStructurePolicy()->getTerminalPtNodePositionOfWord(word, length,
false /* forceLowerCaseSearch */);
if (NOT_A_DICT_POS == pos) {
return NOT_A_PROBABILITY;

View file

@ -37,14 +37,14 @@ class DictionaryStructureWithBufferPolicy {
virtual int getRootPosition() const = 0;
virtual void createAndGetAllChildNodes(const DicNode *const dicNode,
virtual void createAndGetAllChildDicNodes(const DicNode *const dicNode,
DicNodeVector *const childDicNodes) const = 0;
virtual int getCodePointsAndProbabilityAndReturnCodePointCount(
const int nodePos, const int maxCodePointCount, int *const outCodePoints,
int *const outUnigramProbability) const = 0;
virtual int getTerminalNodePositionOfWord(const int *const inWord,
virtual int getTerminalPtNodePositionOfWord(const int *const inWord,
const int length, const bool forceLowerCaseSearch) const = 0;
virtual int getProbability(const int unigramProbability,

View file

@ -35,16 +35,16 @@ void DicTraverseSession::init(const Dictionary *const dictionary, const int *pre
->getMultiWordCostMultiplier();
mSuggestOptions = suggestOptions;
if (!prevWord) {
mPrevWordPos = NOT_A_DICT_POS;
mPrevWordPtNodePos = NOT_A_DICT_POS;
return;
}
// TODO: merge following similar calls to getTerminalPosition into one case-insensitive call.
mPrevWordPos = getDictionaryStructurePolicy()->getTerminalNodePositionOfWord(
mPrevWordPtNodePos = getDictionaryStructurePolicy()->getTerminalPtNodePositionOfWord(
prevWord, prevWordLength, false /* forceLowerCaseSearch */);
if (mPrevWordPos == NOT_A_DICT_POS) {
if (mPrevWordPtNodePos == NOT_A_DICT_POS) {
// Check bigrams for lower-cased previous word if original was not found. Useful for
// auto-capitalized words like "The [current_word]".
mPrevWordPos = getDictionaryStructurePolicy()->getTerminalNodePositionOfWord(
mPrevWordPtNodePos = getDictionaryStructurePolicy()->getTerminalPtNodePositionOfWord(
prevWord, prevWordLength, true /* forceLowerCaseSearch */);
}
}

View file

@ -59,7 +59,7 @@ class DicTraverseSession {
}
AK_FORCE_INLINE DicTraverseSession(JNIEnv *env, jstring localeStr, bool usesLargeCache)
: mPrevWordPos(NOT_A_DICT_POS), mProximityInfo(0),
: mPrevWordPtNodePos(NOT_A_DICT_POS), mProximityInfo(0),
mDictionary(0), mSuggestOptions(0), mDicNodesCache(usesLargeCache),
mMultiBigramMap(), mInputSize(0), mPartiallyCommited(false), mMaxPointerCount(1),
mMultiWordCostMultiplier(1.0f) {
@ -86,11 +86,9 @@ class DicTraverseSession {
//--------------------
const ProximityInfo *getProximityInfo() const { return mProximityInfo; }
const SuggestOptions *getSuggestOptions() const { return mSuggestOptions; }
int getPrevWordPos() const { return mPrevWordPos; }
int getPrevWordPtNodePos() const { return mPrevWordPtNodePos; }
// TODO: REMOVE
void setPrevWordPos(int pos) { mPrevWordPos = pos; }
// TODO: Use proper parameter when changed
int getDicRootPos() const { return 0; }
void setPrevWordPtNodePos(const int ptNodePos) { mPrevWordPtNodePos = ptNodePos; }
DicNodesCache *getDicTraverseCache() { return &mDicNodesCache; }
MultiBigramMap *getMultiBigramMap() { return &mMultiBigramMap; }
const ProximityInfoState *getProximityInfoState(int id) const {
@ -119,26 +117,13 @@ class DicTraverseSession {
return true;
}
void getSearchKeys(const DicNode *node, std::vector<int> *const outputSearchKeyVector) const {
for (int i = 0; i < MAX_POINTER_COUNT_G; ++i) {
if (!mProximityInfoStates[i].isUsed()) {
continue;
}
const int pointerId = node->getInputIndex(i);
const std::vector<int> *const searchKeyVector =
mProximityInfoStates[i].getSearchKeyVector(pointerId);
outputSearchKeyVector->insert(outputSearchKeyVector->end(), searchKeyVector->begin(),
searchKeyVector->end());
}
}
ProximityType getProximityTypeG(const DicNode *const node, const int childCodePoint) const {
ProximityType getProximityTypeG(const DicNode *const dicNode, const int childCodePoint) const {
ProximityType proximityType = UNRELATED_CHAR;
for (int i = 0; i < MAX_POINTER_COUNT_G; ++i) {
if (!mProximityInfoStates[i].isUsed()) {
continue;
}
const int pointerId = node->getInputIndex(i);
const int pointerId = dicNode->getInputIndex(i);
proximityType = mProximityInfoStates[i].getProximityTypeG(pointerId, childCodePoint);
ASSERT(proximityType == UNRELATED_CHAR || proximityType == MATCH_CHAR);
// TODO: Make this more generic
@ -192,7 +177,7 @@ class DicTraverseSession {
const int *const inputYs, const int *const times, const int *const pointerIds,
const int inputSize, const float maxSpatialDistance, const int maxPointerCount);
int mPrevWordPos;
int mPrevWordPtNodePos;
const ProximityInfo *mProximityInfo;
const Dictionary *mDictionary;
const SuggestOptions *mSuggestOptions;

View file

@ -98,7 +98,7 @@ void Suggest::initializeSearch(DicTraverseSession *traverseSession, int commitPo
// Continue suggestion after partial commit.
DicNode *topDicNode =
traverseSession->getDicTraverseCache()->setCommitPoint(commitPoint);
traverseSession->setPrevWordPos(topDicNode->getPrevWordNodePos());
traverseSession->setPrevWordPtNodePos(topDicNode->getPrevWordPtNodePos());
traverseSession->getDicTraverseCache()->continueSearch();
traverseSession->setPartiallyCommited();
}
@ -109,7 +109,7 @@ void Suggest::initializeSearch(DicTraverseSession *traverseSession, int commitPo
// Create a new dic node here
DicNode rootNode;
DicNodeUtils::initAsRoot(traverseSession->getDictionaryStructurePolicy(),
traverseSession->getPrevWordPos(), &rootNode);
traverseSession->getPrevWordPtNodePos(), &rootNode);
traverseSession->getDicTraverseCache()->copyPushActive(&rootNode);
}
}
@ -231,7 +231,7 @@ int Suggest::outputSuggestions(DicTraverseSession *traverseSession, int *frequen
BinaryDictionaryShortcutIterator shortcutIt(
traverseSession->getDictionaryStructurePolicy()->getShortcutsStructurePolicy(),
traverseSession->getDictionaryStructurePolicy()
->getShortcutPositionOfPtNode(terminalDicNode->getPos()));
->getShortcutPositionOfPtNode(terminalDicNode->getPtNodePos()));
// Shortcut is not supported for multiple words suggestions.
// TODO: Check shortcuts during traversal for multiple words suggestions.
const bool sameAsTyped = TRAVERSAL->sameAsTyped(traverseSession, terminalDicNode);
@ -421,15 +421,15 @@ void Suggest::expandCurrentDicNodes(DicTraverseSession *traverseSession) const {
}
break;
case UNRELATED_CHAR:
// Just drop this node and do nothing.
// Just drop this dicNode and do nothing.
break;
default:
// Just drop this node and do nothing.
// Just drop this dicNode and do nothing.
break;
}
}
// Push the node for look-ahead correction
// Push the dicNode for look-ahead correction
if (allowsErrorCorrections && canDoLookAheadCorrection) {
traverseSession->getDicTraverseCache()->copyPushNextActive(&dicNode);
}
@ -442,7 +442,7 @@ void Suggest::processTerminalDicNode(
if (dicNode->getCompoundDistance() >= static_cast<float>(MAX_VALUE_FOR_WEIGHTING)) {
return;
}
if (!dicNode->isTerminalWordNode()) {
if (!dicNode->isTerminalDicNode()) {
return;
}
if (dicNode->shouldBeFilteredBySafetyNetForBigram()) {
@ -463,7 +463,7 @@ void Suggest::processTerminalDicNode(
/**
* Adds the expanded dicNode to the next search priority queue. Also creates an additional next word
* (by the space omission error correction) search path if input dicNode is on a terminal node.
* (by the space omission error correction) search path if input dicNode is on a terminal.
*/
void Suggest::processExpandedDicNode(
DicTraverseSession *traverseSession, DicNode *dicNode) const {
@ -505,7 +505,7 @@ void Suggest::processDicNodeAsSubstitution(DicTraverseSession *traverseSession,
processExpandedDicNode(traverseSession, childDicNode);
}
// Process the node codepoint as a digraph. This means that composite glyphs like the German
// Process the DicNode codepoint as a digraph. This means that composite glyphs like the German
// u-umlaut is expanded to the transliteration "ue". Note that this happens in parallel with
// the normal non-digraph traversal, so both "uber" and "ueber" can be corrected to "[u-umlaut]ber".
void Suggest::processDicNodeAsDigraph(DicTraverseSession *traverseSession,
@ -518,7 +518,7 @@ void Suggest::processDicNodeAsDigraph(DicTraverseSession *traverseSession,
/**
* Handle the dicNode as an omission error (e.g., ths => this). Skip the current letter and consider
* matches for all possible next letters. Note that just skipping the current letter without any
* other conditions tends to flood the search dic nodes cache with omission nodes. Instead, check
* other conditions tends to flood the search DicNodes cache with omission DicNodes. Instead, check
* the possible *next* letters after the omission to better limit search to plausible omissions.
* Note that apostrophes are handled as omissions.
*/
@ -605,7 +605,7 @@ void Suggest::processDicNodeAsTransposition(DicTraverseSession *traverseSession,
}
/**
* Weight child node by aligning it to the key
* Weight child dicNode by aligning it to the key
*/
void Suggest::weightChildNode(DicTraverseSession *traverseSession, DicNode *dicNode) const {
const int inputSize = traverseSession->getInputSize();

View file

@ -45,14 +45,14 @@ const int DynamicPatriciaTriePolicy::MAX_DICT_EXTENDED_REGION_SIZE = 1024 * 1024
const int DynamicPatriciaTriePolicy::MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS =
DynamicPatriciaTrieWritingHelper::MAX_DICTIONARY_SIZE - 1024;
void DynamicPatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode,
void DynamicPatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const dicNode,
DicNodeVector *const childDicNodes) const {
if (!dicNode->hasChildren()) {
return;
}
DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer,
getBigramsStructurePolicy(), getShortcutsStructurePolicy());
readingHelper.initWithPtNodeArrayPos(dicNode->getChildrenPos());
readingHelper.initWithPtNodeArrayPos(dicNode->getChildrenPtNodeArrayPos());
const DynamicPatriciaTrieNodeReader *const nodeReader = readingHelper.getNodeReader();
while (!readingHelper.isEnd()) {
childDicNodes->pushLeavingChild(dicNode, nodeReader->getHeadPos(),
@ -107,7 +107,7 @@ int DynamicPatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCoun
return codePointCount;
}
int DynamicPatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const inWord,
int DynamicPatriciaTriePolicy::getTerminalPtNodePositionOfWord(const int *const inWord,
const int length, const bool forceLowerCaseSearch) const {
int searchCodePoints[length];
for (int i = 0; i < length; ++i) {
@ -246,12 +246,12 @@ bool DynamicPatriciaTriePolicy::addBigramWords(const int *const word0, const int
AKLOGE("The dictionary is too large to dynamically update.");
return false;
}
const int word0Pos = getTerminalNodePositionOfWord(word0, length0,
const int word0Pos = getTerminalPtNodePositionOfWord(word0, length0,
false /* forceLowerCaseSearch */);
if (word0Pos == NOT_A_DICT_POS) {
return false;
}
const int word1Pos = getTerminalNodePositionOfWord(word1, length1,
const int word1Pos = getTerminalPtNodePositionOfWord(word1, length1,
false /* forceLowerCaseSearch */);
if (word1Pos == NOT_A_DICT_POS) {
return false;
@ -280,12 +280,12 @@ bool DynamicPatriciaTriePolicy::removeBigramWords(const int *const word0, const
AKLOGE("The dictionary is too large to dynamically update.");
return false;
}
const int word0Pos = getTerminalNodePositionOfWord(word0, length0,
const int word0Pos = getTerminalPtNodePositionOfWord(word0, length0,
false /* forceLowerCaseSearch */);
if (word0Pos == NOT_A_DICT_POS) {
return false;
}
const int word1Pos = getTerminalNodePositionOfWord(word1, length1,
const int word1Pos = getTerminalPtNodePositionOfWord(word1, length1,
false /* forceLowerCaseSearch */);
if (word1Pos == NOT_A_DICT_POS) {
return false;

View file

@ -50,14 +50,14 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
return 0;
}
void createAndGetAllChildNodes(const DicNode *const dicNode,
void createAndGetAllChildDicNodes(const DicNode *const dicNode,
DicNodeVector *const childDicNodes) const;
int getCodePointsAndProbabilityAndReturnCodePointCount(
const int terminalPtNodePos, const int maxCodePointCount, int *const outCodePoints,
int *const outUnigramProbability) const;
int getTerminalNodePositionOfWord(const int *const inWord,
int getTerminalPtNodePositionOfWord(const int *const inWord,
const int length, const bool forceLowerCaseSearch) const;
int getProbability(const int unigramProbability, const int bigramProbability) const;

View file

@ -22,7 +22,7 @@ namespace latinime {
// To avoid infinite loop caused by invalid or malicious forward links.
const int DynamicPatriciaTrieReadingHelper::MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP = 100000;
const int DynamicPatriciaTrieReadingHelper::MAX_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP = 100000;
const int DynamicPatriciaTrieReadingHelper::MAX_PT_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP = 100000;
const size_t DynamicPatriciaTrieReadingHelper::MAX_READING_STATE_STACK_SIZE = MAX_WORD_LENGTH;
// Visits all PtNodes in post-order depth first manner.
@ -170,35 +170,41 @@ void DynamicPatriciaTrieReadingHelper::nextPtNodeArray() {
mReadingState.mPos = NOT_A_DICT_POS;
return;
}
mReadingState.mPosOfLastPtNodeArrayHead = mReadingState.mPos;
mReadingState.mPosOfThisPtNodeArrayHead = mReadingState.mPos;
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(mReadingState.mPos);
const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
if (usesAdditionalBuffer) {
mReadingState.mPos -= mBuffer->getOriginalBufferSize();
}
mReadingState.mNodeCount = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition(
dictBuf, &mReadingState.mPos);
mReadingState.mRemainingPtNodeCountInThisArray =
PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition(dictBuf,
&mReadingState.mPos);
if (usesAdditionalBuffer) {
mReadingState.mPos += mBuffer->getOriginalBufferSize();
}
// Count up nodes and node arrays to avoid infinite loop.
mReadingState.mTotalNodeCount += mReadingState.mNodeCount;
mReadingState.mNodeArrayCount++;
if (mReadingState.mNodeCount < 0
|| mReadingState.mTotalNodeCount > MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP
|| mReadingState.mNodeArrayCount > MAX_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP) {
mReadingState.mTotalPtNodeIndexInThisArrayChain +=
mReadingState.mRemainingPtNodeCountInThisArray;
mReadingState.mPtNodeArrayIndexInThisArrayChain++;
if (mReadingState.mRemainingPtNodeCountInThisArray < 0
|| mReadingState.mTotalPtNodeIndexInThisArrayChain
> MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP
|| mReadingState.mPtNodeArrayIndexInThisArrayChain
> MAX_PT_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP) {
// Invalid dictionary.
AKLOGI("Invalid dictionary. nodeCount: %d, totalNodeCount: %d, MAX_CHILD_COUNT: %d"
"nodeArrayCount: %d, MAX_NODE_ARRAY_COUNT: %d",
mReadingState.mNodeCount, mReadingState.mTotalNodeCount,
MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP, mReadingState.mNodeArrayCount,
MAX_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP);
mReadingState.mRemainingPtNodeCountInThisArray,
mReadingState.mTotalPtNodeIndexInThisArrayChain,
MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP,
mReadingState.mPtNodeArrayIndexInThisArrayChain,
MAX_PT_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP);
ASSERT(false);
mIsError = true;
mReadingState.mPos = NOT_A_DICT_POS;
return;
}
if (mReadingState.mNodeCount == 0) {
if (mReadingState.mRemainingPtNodeCountInThisArray == 0) {
// Empty node array. Try following forward link.
followForwardLink();
}

View file

@ -84,9 +84,9 @@ class DynamicPatriciaTrieReadingHelper {
} else {
mIsError = false;
mReadingState.mPos = ptNodeArrayPos;
mReadingState.mPrevTotalCodePointCount = 0;
mReadingState.mTotalNodeCount = 0;
mReadingState.mNodeArrayCount = 0;
mReadingState.mTotalCodePointCountSinceInitialization = 0;
mReadingState.mTotalPtNodeIndexInThisArrayChain = 0;
mReadingState.mPtNodeArrayIndexInThisArrayChain = 0;
mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS;
mReadingStateStack.clear();
nextPtNodeArray();
@ -103,12 +103,12 @@ class DynamicPatriciaTrieReadingHelper {
} else {
mIsError = false;
mReadingState.mPos = ptNodePos;
mReadingState.mNodeCount = 1;
mReadingState.mPrevTotalCodePointCount = 0;
mReadingState.mTotalNodeCount = 1;
mReadingState.mNodeArrayCount = 1;
mReadingState.mRemainingPtNodeCountInThisArray = 1;
mReadingState.mTotalCodePointCountSinceInitialization = 0;
mReadingState.mTotalPtNodeIndexInThisArrayChain = 1;
mReadingState.mPtNodeArrayIndexInThisArrayChain = 1;
mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS;
mReadingState.mPosOfLastPtNodeArrayHead = NOT_A_DICT_POS;
mReadingState.mPosOfThisPtNodeArrayHead = NOT_A_DICT_POS;
mReadingStateStack.clear();
fetchPtNodeInfo();
}
@ -128,12 +128,13 @@ class DynamicPatriciaTrieReadingHelper {
// Return code point count exclude the last read node's code points.
AK_FORCE_INLINE int getPrevTotalCodePointCount() const {
return mReadingState.mPrevTotalCodePointCount;
return mReadingState.mTotalCodePointCountSinceInitialization;
}
// Return code point count include the last read node's code points.
AK_FORCE_INLINE int getTotalCodePointCount() const {
return mReadingState.mPrevTotalCodePointCount + mNodeReader.getCodePointCount();
return mReadingState.mTotalCodePointCountSinceInitialization
+ mNodeReader.getCodePointCount();
}
AK_FORCE_INLINE void fetchMergedNodeCodePointsInReverseOrder(
@ -149,9 +150,9 @@ class DynamicPatriciaTrieReadingHelper {
}
AK_FORCE_INLINE void readNextSiblingNode() {
mReadingState.mNodeCount -= 1;
mReadingState.mRemainingPtNodeCountInThisArray -= 1;
mReadingState.mPos = mNodeReader.getSiblingNodePos();
if (mReadingState.mNodeCount <= 0) {
if (mReadingState.mRemainingPtNodeCountInThisArray <= 0) {
// All nodes in the current node array have been read.
followForwardLink();
if (!isEnd()) {
@ -165,9 +166,10 @@ class DynamicPatriciaTrieReadingHelper {
// Read the first child node of the current node.
AK_FORCE_INLINE void readChildNode() {
if (mNodeReader.hasChildren()) {
mReadingState.mPrevTotalCodePointCount += mNodeReader.getCodePointCount();
mReadingState.mTotalNodeCount = 0;
mReadingState.mNodeArrayCount = 0;
mReadingState.mTotalCodePointCountSinceInitialization +=
mNodeReader.getCodePointCount();
mReadingState.mTotalPtNodeIndexInThisArrayChain = 0;
mReadingState.mPtNodeArrayIndexInThisArrayChain = 0;
mReadingState.mPos = mNodeReader.getChildrenPos();
mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS;
// Read children node array.
@ -183,13 +185,14 @@ class DynamicPatriciaTrieReadingHelper {
// Read the parent node of the current node.
AK_FORCE_INLINE void readParentNode() {
if (mNodeReader.getParentPos() != NOT_A_DICT_POS) {
mReadingState.mPrevTotalCodePointCount += mNodeReader.getCodePointCount();
mReadingState.mTotalNodeCount = 1;
mReadingState.mNodeArrayCount = 1;
mReadingState.mNodeCount = 1;
mReadingState.mTotalCodePointCountSinceInitialization +=
mNodeReader.getCodePointCount();
mReadingState.mTotalPtNodeIndexInThisArrayChain = 1;
mReadingState.mPtNodeArrayIndexInThisArrayChain = 1;
mReadingState.mRemainingPtNodeCountInThisArray = 1;
mReadingState.mPos = mNodeReader.getParentPos();
mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS;
mReadingState.mPosOfLastPtNodeArrayHead = NOT_A_DICT_POS;
mReadingState.mPosOfThisPtNodeArrayHead = NOT_A_DICT_POS;
fetchPtNodeInfo();
} else {
mReadingState.mPos = NOT_A_DICT_POS;
@ -201,7 +204,7 @@ class DynamicPatriciaTrieReadingHelper {
}
AK_FORCE_INLINE int getPosOfLastPtNodeArrayHead() const {
return mReadingState.mPosOfLastPtNodeArrayHead;
return mReadingState.mPosOfThisPtNodeArrayHead;
}
AK_FORCE_INLINE void reloadCurrentPtNodeInfo() {
@ -218,35 +221,41 @@ class DynamicPatriciaTrieReadingHelper {
private:
DISALLOW_COPY_AND_ASSIGN(DynamicPatriciaTrieReadingHelper);
class ReadingState {
// This class encapsulates the reading state of a position in the dictionary. It points at a
// specific PtNode in the dictionary.
class PtNodeReadingState {
public:
// Note that copy constructor and assignment operator are used for this class to use
// std::vector.
ReadingState() : mPos(NOT_A_DICT_POS), mNodeCount(0), mPrevTotalCodePointCount(0),
mTotalNodeCount(0), mNodeArrayCount(0), mPosOfLastForwardLinkField(NOT_A_DICT_POS),
mPosOfLastPtNodeArrayHead(NOT_A_DICT_POS) {}
PtNodeReadingState() : mPos(NOT_A_DICT_POS), mRemainingPtNodeCountInThisArray(0),
mTotalCodePointCountSinceInitialization(0), mTotalPtNodeIndexInThisArrayChain(0),
mPtNodeArrayIndexInThisArrayChain(0), mPosOfLastForwardLinkField(NOT_A_DICT_POS),
mPosOfThisPtNodeArrayHead(NOT_A_DICT_POS) {}
int mPos;
// Node count of a node array.
int mNodeCount;
int mPrevTotalCodePointCount;
int mTotalNodeCount;
int mNodeArrayCount;
// Remaining node count in the current array.
int mRemainingPtNodeCountInThisArray;
int mTotalCodePointCountSinceInitialization;
// Counter of PtNodes used to avoid infinite loops caused by broken or malicious links.
int mTotalPtNodeIndexInThisArrayChain;
// Counter of PtNode arrays used to avoid infinite loops caused by cyclic links of empty
// PtNode arrays.
int mPtNodeArrayIndexInThisArrayChain;
int mPosOfLastForwardLinkField;
int mPosOfLastPtNodeArrayHead;
int mPosOfThisPtNodeArrayHead;
};
static const int MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP;
static const int MAX_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP;
static const int MAX_PT_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP;
static const size_t MAX_READING_STATE_STACK_SIZE;
// TODO: Introduce error code to track what caused the error.
bool mIsError;
ReadingState mReadingState;
PtNodeReadingState mReadingState;
const BufferWithExtendableBuffer *const mBuffer;
DynamicPatriciaTrieNodeReader mNodeReader;
int mMergedNodeCodePoints[MAX_WORD_LENGTH];
std::vector<ReadingState> mReadingStateStack;
std::vector<PtNodeReadingState> mReadingStateStack;
void nextPtNodeArray();

View file

@ -25,12 +25,12 @@
namespace latinime {
void PatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode,
void PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const dicNode,
DicNodeVector *const childDicNodes) const {
if (!dicNode->hasChildren()) {
return;
}
int nextPos = dicNode->getChildrenPos();
int nextPos = dicNode->getChildrenPtNodeArrayPos();
if (nextPos < 0 || nextPos >= mDictBufferSize) {
AKLOGE("Children PtNode array position is invalid. pos: %d, dict size: %d",
nextPos, mDictBufferSize);
@ -52,14 +52,14 @@ void PatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode,
// This retrieves code points and the probability of the word by its terminal position.
// Due to the fact that words are ordered in the dictionary in a strict breadth-first order,
// it is possible to check for this with advantageous complexity. For each node, we search
// it is possible to check for this with advantageous complexity. For each PtNode array, we search
// for PtNodes with children and compare the children position with the position we look for.
// When we shoot the position we look for, it means the word we look for is in the children
// of the previous PtNode. The only tricky part is the fact that if we arrive at the end of a
// PtNode array with the last PtNode's children position still less than what we are searching for,
// we must descend the last PtNode's children (for example, if the word we are searching for starts
// with a z, it's the last PtNode of the root array, so all children addresses will be smaller
// than the position we look for, and we have to descend the z node).
// than the position we look for, and we have to descend the z PtNode).
/* Parameters :
* ptNodePos: the byte position of the terminal PtNode of the word we are searching for (this is
* what is stored as the "bigram position" in each bigram)
@ -74,9 +74,9 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
int pos = getRootPosition();
int wordPos = 0;
// One iteration of the outer loop iterates through PtNode arrays. As stated above, we will
// only traverse nodes that are actually a part of the terminal we are searching, so each time
// we enter this loop we are one depth level further than last time.
// The only reason we count nodes is because we want to reduce the probability of infinite
// only traverse PtNodes that are actually a part of the terminal we are searching, so each
// time we enter this loop we are one depth level further than last time.
// The only reason we count PtNodes is because we want to reduce the probability of infinite
// looping in case there is a bug. Since we know there is an upper bound to the depth we are
// supposed to traverse, it does not hurt to count iterations.
for (int loopCount = maxCodePointCount; loopCount > 0; --loopCount) {
@ -140,8 +140,9 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
found = true;
} else if (1 >= ptNodeCount) {
// However if we are on the LAST PtNode of this array, and we have NOT shot the
// position we should descend THIS node. So we trick the lastCandidatePtNodePos
// so that we will descend this PtNode, not the previous one.
// position we should descend THIS PtNode. So we trick the
// lastCandidatePtNodePos so that we will descend this PtNode, not the previous
// one.
lastCandidatePtNodePos = startPos;
found = true;
} else {
@ -149,7 +150,7 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
found = false;
}
} else {
// Even if we don't have children here, we could still be on the last PtNode of /
// Even if we don't have children here, we could still be on the last PtNode of
// this array. If this is the case, we should descend the last PtNode that had
// children, and their position is already in lastCandidatePtNodePos.
found = (1 >= ptNodeCount);
@ -230,9 +231,9 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
return 0;
}
// This function gets the position of the terminal node of the exact matching word in the
// This function gets the position of the terminal PtNode of the exact matching word in the
// dictionary. If no match is found, it returns NOT_A_DICT_POS.
int PatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const inWord,
int PatriciaTriePolicy::getTerminalPtNodePositionOfWord(const int *const inWord,
const int length, const bool forceLowerCaseSearch) const {
int pos = getRootPosition();
int wordPos = 0;

View file

@ -47,14 +47,14 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
return 0;
}
void createAndGetAllChildNodes(const DicNode *const dicNode,
void createAndGetAllChildDicNodes(const DicNode *const dicNode,
DicNodeVector *const childDicNodes) const;
int getCodePointsAndProbabilityAndReturnCodePointCount(
const int terminalNodePos, const int maxCodePointCount, int *const outCodePoints,
int *const outUnigramProbability) const;
int getTerminalNodePositionOfWord(const int *const inWord,
int getTerminalPtNodePositionOfWord(const int *const inWord,
const int length, const bool forceLowerCaseSearch) const;
int getProbability(const int unigramProbability, const int bigramProbability) const;

View file

@ -81,7 +81,7 @@ class TypingTraversal : public Traversal {
return false;
}
const int point0Index = dicNode->getInputIndex(0);
return dicNode->isTerminalWordNode()
return dicNode->isTerminalDicNode()
&& traverseSession->getProximityInfoState(0)->
hasSpaceProximity(point0Index);
}
@ -96,7 +96,7 @@ class TypingTraversal : public Traversal {
if (dicNode->isCompletion(inputSize)) {
return false;
}
if (!dicNode->isTerminalWordNode()) {
if (!dicNode->isTerminalDicNode()) {
return false;
}
const int16_t pointIndex = dicNode->getInputIndex(0);