Merge "Make "node"s clear by renaming to PtNode or DicNode."
commit
be5e8f18e9
|
@ -99,7 +99,7 @@ class DicNode {
|
||||||
virtual ~DicNode() {}
|
virtual ~DicNode() {}
|
||||||
|
|
||||||
// Init for copy
|
// Init for copy
|
||||||
void initByCopy(const DicNode *dicNode) {
|
void initByCopy(const DicNode *const dicNode) {
|
||||||
mIsUsed = true;
|
mIsUsed = true;
|
||||||
mIsCachedForNextSuggestion = dicNode->mIsCachedForNextSuggestion;
|
mIsCachedForNextSuggestion = dicNode->mIsCachedForNextSuggestion;
|
||||||
mDicNodeProperties.init(&dicNode->mDicNodeProperties);
|
mDicNodeProperties.init(&dicNode->mDicNodeProperties);
|
||||||
|
@ -107,25 +107,25 @@ class DicNode {
|
||||||
PROF_NODE_COPY(&dicNode->mProfiler, mProfiler);
|
PROF_NODE_COPY(&dicNode->mProfiler, mProfiler);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Init for root with prevWordNodePos which is used for bigram
|
// Init for root with prevWordPtNodePos which is used for bigram
|
||||||
void initAsRoot(const int rootGroupPos, const int prevWordNodePos) {
|
void initAsRoot(const int rootPtNodeArrayPos, const int prevWordPtNodePos) {
|
||||||
mIsUsed = true;
|
mIsUsed = true;
|
||||||
mIsCachedForNextSuggestion = false;
|
mIsCachedForNextSuggestion = false;
|
||||||
mDicNodeProperties.init(
|
mDicNodeProperties.init(
|
||||||
NOT_A_DICT_POS /* pos */, rootGroupPos, NOT_A_CODE_POINT /* nodeCodePoint */,
|
NOT_A_DICT_POS /* pos */, rootPtNodeArrayPos, NOT_A_CODE_POINT /* nodeCodePoint */,
|
||||||
NOT_A_PROBABILITY /* probability */, false /* isTerminal */,
|
NOT_A_PROBABILITY /* probability */, false /* isTerminal */,
|
||||||
true /* hasChildren */, false /* isBlacklistedOrNotAWord */, 0 /* depth */,
|
true /* hasChildren */, false /* isBlacklistedOrNotAWord */, 0 /* depth */,
|
||||||
0 /* terminalDepth */);
|
0 /* terminalDepth */);
|
||||||
mDicNodeState.init(prevWordNodePos);
|
mDicNodeState.init(prevWordPtNodePos);
|
||||||
PROF_NODE_RESET(mProfiler);
|
PROF_NODE_RESET(mProfiler);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Init for root with previous word
|
// Init for root with previous word
|
||||||
void initAsRootWithPreviousWord(DicNode *dicNode, const int rootGroupPos) {
|
void initAsRootWithPreviousWord(const DicNode *const dicNode, const int rootPtNodeArrayPos) {
|
||||||
mIsUsed = true;
|
mIsUsed = true;
|
||||||
mIsCachedForNextSuggestion = dicNode->mIsCachedForNextSuggestion;
|
mIsCachedForNextSuggestion = dicNode->mIsCachedForNextSuggestion;
|
||||||
mDicNodeProperties.init(
|
mDicNodeProperties.init(
|
||||||
NOT_A_DICT_POS /* pos */, rootGroupPos, NOT_A_CODE_POINT /* nodeCodePoint */,
|
NOT_A_DICT_POS /* pos */, rootPtNodeArrayPos, NOT_A_CODE_POINT /* nodeCodePoint */,
|
||||||
NOT_A_PROBABILITY /* probability */, false /* isTerminal */,
|
NOT_A_PROBABILITY /* probability */, false /* isTerminal */,
|
||||||
true /* hasChildren */, false /* isBlacklistedOrNotAWord */, 0 /* depth */,
|
true /* hasChildren */, false /* isBlacklistedOrNotAWord */, 0 /* depth */,
|
||||||
0 /* terminalDepth */);
|
0 /* terminalDepth */);
|
||||||
|
@ -138,7 +138,7 @@ class DicNode {
|
||||||
mDicNodeState.mDicNodeStatePrevWord.init(
|
mDicNodeState.mDicNodeStatePrevWord.init(
|
||||||
dicNode->mDicNodeState.mDicNodeStatePrevWord.getPrevWordCount() + 1,
|
dicNode->mDicNodeState.mDicNodeStatePrevWord.getPrevWordCount() + 1,
|
||||||
dicNode->mDicNodeProperties.getProbability(),
|
dicNode->mDicNodeProperties.getProbability(),
|
||||||
dicNode->mDicNodeProperties.getPos(),
|
dicNode->mDicNodeProperties.getPtNodePos(),
|
||||||
dicNode->mDicNodeState.mDicNodeStatePrevWord.mPrevWord,
|
dicNode->mDicNodeState.mDicNodeStatePrevWord.mPrevWord,
|
||||||
dicNode->mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength(),
|
dicNode->mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength(),
|
||||||
dicNode->getOutputWordBuf(),
|
dicNode->getOutputWordBuf(),
|
||||||
|
@ -148,26 +148,27 @@ class DicNode {
|
||||||
PROF_NODE_COPY(&dicNode->mProfiler, mProfiler);
|
PROF_NODE_COPY(&dicNode->mProfiler, mProfiler);
|
||||||
}
|
}
|
||||||
|
|
||||||
void initAsPassingChild(DicNode *parentNode) {
|
void initAsPassingChild(DicNode *parentDicNode) {
|
||||||
mIsUsed = true;
|
mIsUsed = true;
|
||||||
mIsCachedForNextSuggestion = parentNode->mIsCachedForNextSuggestion;
|
mIsCachedForNextSuggestion = parentDicNode->mIsCachedForNextSuggestion;
|
||||||
const int c = parentNode->getNodeTypedCodePoint();
|
const int parentCodePoint = parentDicNode->getNodeTypedCodePoint();
|
||||||
mDicNodeProperties.init(&parentNode->mDicNodeProperties, c);
|
mDicNodeProperties.init(&parentDicNode->mDicNodeProperties, parentCodePoint);
|
||||||
mDicNodeState.init(&parentNode->mDicNodeState);
|
mDicNodeState.init(&parentDicNode->mDicNodeState);
|
||||||
PROF_NODE_COPY(&parentNode->mProfiler, mProfiler);
|
PROF_NODE_COPY(&parentDicNode->mProfiler, mProfiler);
|
||||||
}
|
}
|
||||||
|
|
||||||
void initAsChild(const DicNode *const dicNode, const int pos, const int childrenPos,
|
void initAsChild(const DicNode *const dicNode, const int ptNodePos,
|
||||||
const int probability, const bool isTerminal, const bool hasChildren,
|
const int childrenPtNodeArrayPos, const int probability, const bool isTerminal,
|
||||||
const bool isBlacklistedOrNotAWord, const uint16_t mergedNodeCodePointCount,
|
const bool hasChildren, const bool isBlacklistedOrNotAWord,
|
||||||
const int *const mergedNodeCodePoints) {
|
const uint16_t mergedNodeCodePointCount, const int *const mergedNodeCodePoints) {
|
||||||
mIsUsed = true;
|
mIsUsed = true;
|
||||||
uint16_t newDepth = static_cast<uint16_t>(dicNode->getNodeCodePointCount() + 1);
|
uint16_t newDepth = static_cast<uint16_t>(dicNode->getNodeCodePointCount() + 1);
|
||||||
mIsCachedForNextSuggestion = dicNode->mIsCachedForNextSuggestion;
|
mIsCachedForNextSuggestion = dicNode->mIsCachedForNextSuggestion;
|
||||||
const uint16_t newLeavingDepth = static_cast<uint16_t>(
|
const uint16_t newLeavingDepth = static_cast<uint16_t>(
|
||||||
dicNode->mDicNodeProperties.getLeavingDepth() + mergedNodeCodePointCount);
|
dicNode->mDicNodeProperties.getLeavingDepth() + mergedNodeCodePointCount);
|
||||||
mDicNodeProperties.init(pos, childrenPos, mergedNodeCodePoints[0], probability,
|
mDicNodeProperties.init(ptNodePos, childrenPtNodeArrayPos, mergedNodeCodePoints[0],
|
||||||
isTerminal, hasChildren, isBlacklistedOrNotAWord, newDepth, newLeavingDepth);
|
probability, isTerminal, hasChildren, isBlacklistedOrNotAWord, newDepth,
|
||||||
|
newLeavingDepth);
|
||||||
mDicNodeState.init(&dicNode->mDicNodeState, mergedNodeCodePointCount,
|
mDicNodeState.init(&dicNode->mDicNodeState, mergedNodeCodePointCount,
|
||||||
mergedNodeCodePoints);
|
mergedNodeCodePoints);
|
||||||
PROF_NODE_COPY(&dicNode->mProfiler, mProfiler);
|
PROF_NODE_COPY(&dicNode->mProfiler, mProfiler);
|
||||||
|
@ -234,7 +235,7 @@ class DicNode {
|
||||||
}
|
}
|
||||||
|
|
||||||
bool isFirstWord() const {
|
bool isFirstWord() const {
|
||||||
return mDicNodeState.mDicNodeStatePrevWord.getPrevWordNodePos() == NOT_A_DICT_POS;
|
return mDicNodeState.mDicNodeStatePrevWord.getPrevWordPtNodePos() == NOT_A_DICT_POS;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool isCompletion(const int inputSize) const {
|
bool isCompletion(const int inputSize) const {
|
||||||
|
@ -246,29 +247,30 @@ class DicNode {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Used to get bigram probability in DicNodeUtils
|
// Used to get bigram probability in DicNodeUtils
|
||||||
int getPos() const {
|
int getPtNodePos() const {
|
||||||
return mDicNodeProperties.getPos();
|
return mDicNodeProperties.getPtNodePos();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Used to get bigram probability in DicNodeUtils
|
// Used to get bigram probability in DicNodeUtils
|
||||||
int getPrevWordPos() const {
|
int getPrevWordTerminalPtNodePos() const {
|
||||||
return mDicNodeState.mDicNodeStatePrevWord.getPrevWordNodePos();
|
return mDicNodeState.mDicNodeStatePrevWord.getPrevWordPtNodePos();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Used in DicNodeUtils
|
// Used in DicNodeUtils
|
||||||
int getChildrenPos() const {
|
int getChildrenPtNodeArrayPos() const {
|
||||||
return mDicNodeProperties.getChildrenPos();
|
return mDicNodeProperties.getChildrenPtNodeArrayPos();
|
||||||
}
|
}
|
||||||
|
|
||||||
int getProbability() const {
|
int getProbability() const {
|
||||||
return mDicNodeProperties.getProbability();
|
return mDicNodeProperties.getProbability();
|
||||||
}
|
}
|
||||||
|
|
||||||
AK_FORCE_INLINE bool isTerminalWordNode() const {
|
AK_FORCE_INLINE bool isTerminalDicNode() const {
|
||||||
const bool isTerminalNodes = mDicNodeProperties.isTerminal();
|
const bool isTerminalPtNode = mDicNodeProperties.isTerminal();
|
||||||
const int currentNodeDepth = getNodeCodePointCount();
|
const int currentDicNodeDepth = getNodeCodePointCount();
|
||||||
const int terminalNodeDepth = mDicNodeProperties.getLeavingDepth();
|
const int terminalDicNodeDepth = mDicNodeProperties.getLeavingDepth();
|
||||||
return isTerminalNodes && currentNodeDepth > 0 && currentNodeDepth == terminalNodeDepth;
|
return isTerminalPtNode && currentDicNodeDepth > 0
|
||||||
|
&& currentDicNodeDepth == terminalDicNodeDepth;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool shouldBeFilteredBySafetyNetForBigram() const {
|
bool shouldBeFilteredBySafetyNetForBigram() const {
|
||||||
|
@ -374,8 +376,8 @@ class DicNode {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Used to commit input partially
|
// Used to commit input partially
|
||||||
int getPrevWordNodePos() const {
|
int getPrevWordPtNodePos() const {
|
||||||
return mDicNodeState.mDicNodeStatePrevWord.getPrevWordNodePos();
|
return mDicNodeState.mDicNodeStatePrevWord.getPrevWordPtNodePos();
|
||||||
}
|
}
|
||||||
|
|
||||||
AK_FORCE_INLINE const int *getOutputWordBuf() const {
|
AK_FORCE_INLINE const int *getOutputWordBuf() const {
|
||||||
|
@ -410,7 +412,7 @@ class DicNode {
|
||||||
// TODO: Remove once touch path is merged into ProximityInfoState
|
// TODO: Remove once touch path is merged into ProximityInfoState
|
||||||
// Note: Returned codepoint may be a digraph codepoint if the node is in a composite glyph.
|
// Note: Returned codepoint may be a digraph codepoint if the node is in a composite glyph.
|
||||||
int getNodeCodePoint() const {
|
int getNodeCodePoint() const {
|
||||||
const int codePoint = mDicNodeProperties.getNodeCodePoint();
|
const int codePoint = mDicNodeProperties.getDicNodeCodePoint();
|
||||||
const DigraphUtils::DigraphCodePointIndex digraphIndex =
|
const DigraphUtils::DigraphCodePointIndex digraphIndex =
|
||||||
mDicNodeState.mDicNodeStateScoring.getDigraphIndex();
|
mDicNodeState.mDicNodeStateScoring.getDigraphIndex();
|
||||||
if (digraphIndex == DigraphUtils::NOT_A_DIGRAPH_INDEX) {
|
if (digraphIndex == DigraphUtils::NOT_A_DIGRAPH_INDEX) {
|
||||||
|
@ -423,8 +425,8 @@ class DicNode {
|
||||||
// Utils for cost calculation //
|
// Utils for cost calculation //
|
||||||
////////////////////////////////
|
////////////////////////////////
|
||||||
AK_FORCE_INLINE bool isSameNodeCodePoint(const DicNode *const dicNode) const {
|
AK_FORCE_INLINE bool isSameNodeCodePoint(const DicNode *const dicNode) const {
|
||||||
return mDicNodeProperties.getNodeCodePoint()
|
return mDicNodeProperties.getDicNodeCodePoint()
|
||||||
== dicNode->mDicNodeProperties.getNodeCodePoint();
|
== dicNode->mDicNodeProperties.getDicNodeCodePoint();
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: remove
|
// TODO: remove
|
||||||
|
|
|
@ -22,7 +22,6 @@
|
||||||
#include "suggest/core/dicnode/dic_node_vector.h"
|
#include "suggest/core/dicnode/dic_node_vector.h"
|
||||||
#include "suggest/core/dictionary/multi_bigram_map.h"
|
#include "suggest/core/dictionary/multi_bigram_map.h"
|
||||||
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
|
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
|
||||||
#include "utils/char_utils.h"
|
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
|
||||||
|
@ -32,19 +31,20 @@ namespace latinime {
|
||||||
|
|
||||||
/* static */ void DicNodeUtils::initAsRoot(
|
/* static */ void DicNodeUtils::initAsRoot(
|
||||||
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
|
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
|
||||||
const int prevWordNodePos, DicNode *const newRootNode) {
|
const int prevWordPtNodePos, DicNode *const newRootDicNode) {
|
||||||
newRootNode->initAsRoot(dictionaryStructurePolicy->getRootPosition(), prevWordNodePos);
|
newRootDicNode->initAsRoot(dictionaryStructurePolicy->getRootPosition(), prevWordPtNodePos);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*static */ void DicNodeUtils::initAsRootWithPreviousWord(
|
/*static */ void DicNodeUtils::initAsRootWithPreviousWord(
|
||||||
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
|
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
|
||||||
DicNode *const prevWordLastNode, DicNode *const newRootNode) {
|
const DicNode *const prevWordLastDicNode, DicNode *const newRootDicNode) {
|
||||||
newRootNode->initAsRootWithPreviousWord(
|
newRootDicNode->initAsRootWithPreviousWord(
|
||||||
prevWordLastNode, dictionaryStructurePolicy->getRootPosition());
|
prevWordLastDicNode, dictionaryStructurePolicy->getRootPosition());
|
||||||
}
|
}
|
||||||
|
|
||||||
/* static */ void DicNodeUtils::initByCopy(DicNode *srcNode, DicNode *destNode) {
|
/* static */ void DicNodeUtils::initByCopy(const DicNode *const srcDicNode,
|
||||||
destNode->initByCopy(srcNode);
|
DicNode *const destDicNode) {
|
||||||
|
destDicNode->initByCopy(srcDicNode);
|
||||||
}
|
}
|
||||||
|
|
||||||
///////////////////////////////////
|
///////////////////////////////////
|
||||||
|
@ -52,14 +52,14 @@ namespace latinime {
|
||||||
///////////////////////////////////
|
///////////////////////////////////
|
||||||
/* static */ void DicNodeUtils::getAllChildDicNodes(DicNode *dicNode,
|
/* static */ void DicNodeUtils::getAllChildDicNodes(DicNode *dicNode,
|
||||||
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
|
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
|
||||||
DicNodeVector *childDicNodes) {
|
DicNodeVector *const childDicNodes) {
|
||||||
if (dicNode->isTotalInputSizeExceedingLimit()) {
|
if (dicNode->isTotalInputSizeExceedingLimit()) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (!dicNode->isLeavingNode()) {
|
if (!dicNode->isLeavingNode()) {
|
||||||
childDicNodes->pushPassingChild(dicNode);
|
childDicNodes->pushPassingChild(dicNode);
|
||||||
} else {
|
} else {
|
||||||
dictionaryStructurePolicy->createAndGetAllChildNodes(dicNode, childDicNodes);
|
dictionaryStructurePolicy->createAndGetAllChildDicNodes(dicNode, childDicNodes);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -71,11 +71,11 @@ namespace latinime {
|
||||||
*/
|
*/
|
||||||
/* static */ float DicNodeUtils::getBigramNodeImprobability(
|
/* static */ float DicNodeUtils::getBigramNodeImprobability(
|
||||||
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
|
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
|
||||||
const DicNode *const node, MultiBigramMap *multiBigramMap) {
|
const DicNode *const dicNode, MultiBigramMap *const multiBigramMap) {
|
||||||
if (node->hasMultipleWords() && !node->isValidMultipleWordSuggestion()) {
|
if (dicNode->hasMultipleWords() && !dicNode->isValidMultipleWordSuggestion()) {
|
||||||
return static_cast<float>(MAX_VALUE_FOR_WEIGHTING);
|
return static_cast<float>(MAX_VALUE_FOR_WEIGHTING);
|
||||||
}
|
}
|
||||||
const int probability = getBigramNodeProbability(dictionaryStructurePolicy, node,
|
const int probability = getBigramNodeProbability(dictionaryStructurePolicy, dicNode,
|
||||||
multiBigramMap);
|
multiBigramMap);
|
||||||
// TODO: This equation to calculate the improbability looks unreasonable. Investigate this.
|
// TODO: This equation to calculate the improbability looks unreasonable. Investigate this.
|
||||||
const float cost = static_cast<float>(MAX_PROBABILITY - probability)
|
const float cost = static_cast<float>(MAX_PROBABILITY - probability)
|
||||||
|
@ -85,19 +85,19 @@ namespace latinime {
|
||||||
|
|
||||||
/* static */ int DicNodeUtils::getBigramNodeProbability(
|
/* static */ int DicNodeUtils::getBigramNodeProbability(
|
||||||
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
|
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
|
||||||
const DicNode *const node, MultiBigramMap *multiBigramMap) {
|
const DicNode *const dicNode, MultiBigramMap *const multiBigramMap) {
|
||||||
const int unigramProbability = node->getProbability();
|
const int unigramProbability = dicNode->getProbability();
|
||||||
const int wordPos = node->getPos();
|
const int ptNodePos = dicNode->getPtNodePos();
|
||||||
const int prevWordPos = node->getPrevWordPos();
|
const int prevWordTerminalPtNodePos = dicNode->getPrevWordTerminalPtNodePos();
|
||||||
if (NOT_A_DICT_POS == wordPos || NOT_A_DICT_POS == prevWordPos) {
|
if (NOT_A_DICT_POS == ptNodePos || NOT_A_DICT_POS == prevWordTerminalPtNodePos) {
|
||||||
// Note: Normally wordPos comes from the dictionary and should never equal
|
// Note: Normally wordPos comes from the dictionary and should never equal
|
||||||
// NOT_A_VALID_WORD_POS.
|
// NOT_A_VALID_WORD_POS.
|
||||||
return dictionaryStructurePolicy->getProbability(unigramProbability,
|
return dictionaryStructurePolicy->getProbability(unigramProbability,
|
||||||
NOT_A_PROBABILITY);
|
NOT_A_PROBABILITY);
|
||||||
}
|
}
|
||||||
if (multiBigramMap) {
|
if (multiBigramMap) {
|
||||||
return multiBigramMap->getBigramProbability(dictionaryStructurePolicy, prevWordPos,
|
return multiBigramMap->getBigramProbability(dictionaryStructurePolicy,
|
||||||
wordPos, unigramProbability);
|
prevWordTerminalPtNodePos, ptNodePos, unigramProbability);
|
||||||
}
|
}
|
||||||
return dictionaryStructurePolicy->getProbability(unigramProbability,
|
return dictionaryStructurePolicy->getProbability(unigramProbability,
|
||||||
NOT_A_PROBABILITY);
|
NOT_A_PROBABILITY);
|
||||||
|
@ -109,7 +109,7 @@ namespace latinime {
|
||||||
|
|
||||||
// TODO: Move to char_utils?
|
// TODO: Move to char_utils?
|
||||||
/* static */ int DicNodeUtils::appendTwoWords(const int *const src0, const int16_t length0,
|
/* static */ int DicNodeUtils::appendTwoWords(const int *const src0, const int16_t length0,
|
||||||
const int *const src1, const int16_t length1, int *dest) {
|
const int *const src1, const int16_t length1, int *const dest) {
|
||||||
int actualLength0 = 0;
|
int actualLength0 = 0;
|
||||||
for (int i = 0; i < length0; ++i) {
|
for (int i = 0; i < length0; ++i) {
|
||||||
if (src0[i] == 0) {
|
if (src0[i] == 0) {
|
||||||
|
|
|
@ -31,20 +31,20 @@ class MultiBigramMap;
|
||||||
class DicNodeUtils {
|
class DicNodeUtils {
|
||||||
public:
|
public:
|
||||||
static int appendTwoWords(const int *src0, const int16_t length0, const int *src1,
|
static int appendTwoWords(const int *src0, const int16_t length0, const int *src1,
|
||||||
const int16_t length1, int *dest);
|
const int16_t length1, int *const dest);
|
||||||
static void initAsRoot(
|
static void initAsRoot(
|
||||||
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
|
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
|
||||||
const int prevWordNodePos, DicNode *newRootNode);
|
const int prevWordPtNodePos, DicNode *const newRootDicNode);
|
||||||
static void initAsRootWithPreviousWord(
|
static void initAsRootWithPreviousWord(
|
||||||
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
|
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
|
||||||
DicNode *prevWordLastNode, DicNode *newRootNode);
|
const DicNode *const prevWordLastDicNode, DicNode *const newRootDicNode);
|
||||||
static void initByCopy(DicNode *srcNode, DicNode *destNode);
|
static void initByCopy(const DicNode *const srcDicNode, DicNode *const destDicNode);
|
||||||
static void getAllChildDicNodes(DicNode *dicNode,
|
static void getAllChildDicNodes(DicNode *dicNode,
|
||||||
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
|
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
|
||||||
DicNodeVector *childDicNodes);
|
DicNodeVector *childDicNodes);
|
||||||
static float getBigramNodeImprobability(
|
static float getBigramNodeImprobability(
|
||||||
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
|
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
|
||||||
const DicNode *const node, MultiBigramMap *const multiBigramMap);
|
const DicNode *const dicNode, MultiBigramMap *const multiBigramMap);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
DISALLOW_IMPLICIT_CONSTRUCTORS(DicNodeUtils);
|
DISALLOW_IMPLICIT_CONSTRUCTORS(DicNodeUtils);
|
||||||
|
@ -53,7 +53,7 @@ class DicNodeUtils {
|
||||||
|
|
||||||
static int getBigramNodeProbability(
|
static int getBigramNodeProbability(
|
||||||
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
|
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
|
||||||
const DicNode *const node, MultiBigramMap *multiBigramMap);
|
const DicNode *const dicNode, MultiBigramMap *const multiBigramMap);
|
||||||
};
|
};
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
#endif // LATINIME_DIC_NODE_UTILS_H
|
#endif // LATINIME_DIC_NODE_UTILS_H
|
||||||
|
|
|
@ -62,14 +62,14 @@ class DicNodeVector {
|
||||||
mDicNodes.back().initAsPassingChild(dicNode);
|
mDicNodes.back().initAsPassingChild(dicNode);
|
||||||
}
|
}
|
||||||
|
|
||||||
void pushLeavingChild(const DicNode *const dicNode, const int pos, const int childrenPos,
|
void pushLeavingChild(const DicNode *const dicNode, const int ptNodePos,
|
||||||
const int probability, const bool isTerminal, const bool hasChildren,
|
const int childrenPtNodeArrayPos, const int probability, const bool isTerminal,
|
||||||
const bool isBlacklistedOrNotAWord, const uint16_t mergedNodeCodePointCount,
|
const bool hasChildren, const bool isBlacklistedOrNotAWord,
|
||||||
const int *const mergedNodeCodePoints) {
|
const uint16_t mergedNodeCodePointCount, const int *const mergedNodeCodePoints) {
|
||||||
ASSERT(!mLock);
|
ASSERT(!mLock);
|
||||||
mDicNodes.push_back(mEmptyNode);
|
mDicNodes.push_back(mEmptyNode);
|
||||||
mDicNodes.back().initAsChild(dicNode, pos, childrenPos, probability, isTerminal,
|
mDicNodes.back().initAsChild(dicNode, ptNodePos, childrenPtNodeArrayPos, probability,
|
||||||
hasChildren, isBlacklistedOrNotAWord, mergedNodeCodePointCount,
|
isTerminal, hasChildren, isBlacklistedOrNotAWord, mergedNodeCodePointCount,
|
||||||
mergedNodeCodePoints);
|
mergedNodeCodePoints);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -24,15 +24,14 @@
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Node for traversing the lexicon trie.
|
* PtNode information related to the DicNode from the lexicon trie.
|
||||||
*/
|
*/
|
||||||
// TODO: Introduce a dictionary node class which has attribute members required to understand the
|
|
||||||
// dictionary structure.
|
|
||||||
class DicNodeProperties {
|
class DicNodeProperties {
|
||||||
public:
|
public:
|
||||||
AK_FORCE_INLINE DicNodeProperties()
|
AK_FORCE_INLINE DicNodeProperties()
|
||||||
: mPos(0), mChildrenPos(0), mProbability(0), mNodeCodePoint(0), mIsTerminal(false),
|
: mPtNodePos(0), mChildrenPtNodeArrayPos(0), mProbability(0), mDicNodeCodePoint(0),
|
||||||
mHasChildren(false), mIsBlacklistedOrNotAWord(false), mDepth(0), mLeavingDepth(0) {}
|
mIsTerminal(false), mHasChildrenPtNodes(false), mIsBlacklistedOrNotAWord(false),
|
||||||
|
mDepth(0), mLeavingDepth(0) {}
|
||||||
|
|
||||||
virtual ~DicNodeProperties() {}
|
virtual ~DicNodeProperties() {}
|
||||||
|
|
||||||
|
@ -40,57 +39,57 @@ class DicNodeProperties {
|
||||||
void init(const int pos, const int childrenPos, const int nodeCodePoint, const int probability,
|
void init(const int pos, const int childrenPos, const int nodeCodePoint, const int probability,
|
||||||
const bool isTerminal, const bool hasChildren, const bool isBlacklistedOrNotAWord,
|
const bool isTerminal, const bool hasChildren, const bool isBlacklistedOrNotAWord,
|
||||||
const uint16_t depth, const uint16_t leavingDepth) {
|
const uint16_t depth, const uint16_t leavingDepth) {
|
||||||
mPos = pos;
|
mPtNodePos = pos;
|
||||||
mChildrenPos = childrenPos;
|
mChildrenPtNodeArrayPos = childrenPos;
|
||||||
mNodeCodePoint = nodeCodePoint;
|
mDicNodeCodePoint = nodeCodePoint;
|
||||||
mProbability = probability;
|
mProbability = probability;
|
||||||
mIsTerminal = isTerminal;
|
mIsTerminal = isTerminal;
|
||||||
mHasChildren = hasChildren;
|
mHasChildrenPtNodes = hasChildren;
|
||||||
mIsBlacklistedOrNotAWord = isBlacklistedOrNotAWord;
|
mIsBlacklistedOrNotAWord = isBlacklistedOrNotAWord;
|
||||||
mDepth = depth;
|
mDepth = depth;
|
||||||
mLeavingDepth = leavingDepth;
|
mLeavingDepth = leavingDepth;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Init for copy
|
// Init for copy
|
||||||
void init(const DicNodeProperties *const nodeProp) {
|
void init(const DicNodeProperties *const dicNodeProp) {
|
||||||
mPos = nodeProp->mPos;
|
mPtNodePos = dicNodeProp->mPtNodePos;
|
||||||
mChildrenPos = nodeProp->mChildrenPos;
|
mChildrenPtNodeArrayPos = dicNodeProp->mChildrenPtNodeArrayPos;
|
||||||
mNodeCodePoint = nodeProp->mNodeCodePoint;
|
mDicNodeCodePoint = dicNodeProp->mDicNodeCodePoint;
|
||||||
mProbability = nodeProp->mProbability;
|
mProbability = dicNodeProp->mProbability;
|
||||||
mIsTerminal = nodeProp->mIsTerminal;
|
mIsTerminal = dicNodeProp->mIsTerminal;
|
||||||
mHasChildren = nodeProp->mHasChildren;
|
mHasChildrenPtNodes = dicNodeProp->mHasChildrenPtNodes;
|
||||||
mIsBlacklistedOrNotAWord = nodeProp->mIsBlacklistedOrNotAWord;
|
mIsBlacklistedOrNotAWord = dicNodeProp->mIsBlacklistedOrNotAWord;
|
||||||
mDepth = nodeProp->mDepth;
|
mDepth = dicNodeProp->mDepth;
|
||||||
mLeavingDepth = nodeProp->mLeavingDepth;
|
mLeavingDepth = dicNodeProp->mLeavingDepth;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Init as passing child
|
// Init as passing child
|
||||||
void init(const DicNodeProperties *const nodeProp, const int codePoint) {
|
void init(const DicNodeProperties *const dicNodeProp, const int codePoint) {
|
||||||
mPos = nodeProp->mPos;
|
mPtNodePos = dicNodeProp->mPtNodePos;
|
||||||
mChildrenPos = nodeProp->mChildrenPos;
|
mChildrenPtNodeArrayPos = dicNodeProp->mChildrenPtNodeArrayPos;
|
||||||
mNodeCodePoint = codePoint; // Overwrite the node char of a passing child
|
mDicNodeCodePoint = codePoint; // Overwrite the node char of a passing child
|
||||||
mProbability = nodeProp->mProbability;
|
mProbability = dicNodeProp->mProbability;
|
||||||
mIsTerminal = nodeProp->mIsTerminal;
|
mIsTerminal = dicNodeProp->mIsTerminal;
|
||||||
mHasChildren = nodeProp->mHasChildren;
|
mHasChildrenPtNodes = dicNodeProp->mHasChildrenPtNodes;
|
||||||
mIsBlacklistedOrNotAWord = nodeProp->mIsBlacklistedOrNotAWord;
|
mIsBlacklistedOrNotAWord = dicNodeProp->mIsBlacklistedOrNotAWord;
|
||||||
mDepth = nodeProp->mDepth + 1; // Increment the depth of a passing child
|
mDepth = dicNodeProp->mDepth + 1; // Increment the depth of a passing child
|
||||||
mLeavingDepth = nodeProp->mLeavingDepth;
|
mLeavingDepth = dicNodeProp->mLeavingDepth;
|
||||||
}
|
}
|
||||||
|
|
||||||
int getPos() const {
|
int getPtNodePos() const {
|
||||||
return mPos;
|
return mPtNodePos;
|
||||||
}
|
}
|
||||||
|
|
||||||
int getChildrenPos() const {
|
int getChildrenPtNodeArrayPos() const {
|
||||||
return mChildrenPos;
|
return mChildrenPtNodeArrayPos;
|
||||||
}
|
}
|
||||||
|
|
||||||
int getProbability() const {
|
int getProbability() const {
|
||||||
return mProbability;
|
return mProbability;
|
||||||
}
|
}
|
||||||
|
|
||||||
int getNodeCodePoint() const {
|
int getDicNodeCodePoint() const {
|
||||||
return mNodeCodePoint;
|
return mDicNodeCodePoint;
|
||||||
}
|
}
|
||||||
|
|
||||||
uint16_t getDepth() const {
|
uint16_t getDepth() const {
|
||||||
|
@ -107,7 +106,7 @@ class DicNodeProperties {
|
||||||
}
|
}
|
||||||
|
|
||||||
bool hasChildren() const {
|
bool hasChildren() const {
|
||||||
return mHasChildren || mDepth != mLeavingDepth;
|
return mHasChildrenPtNodes || mDepth != mLeavingDepth;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool isBlacklistedOrNotAWord() const {
|
bool isBlacklistedOrNotAWord() const {
|
||||||
|
@ -118,12 +117,12 @@ class DicNodeProperties {
|
||||||
// Caution!!!
|
// Caution!!!
|
||||||
// Use a default copy constructor and an assign operator because shallow copies are ok
|
// Use a default copy constructor and an assign operator because shallow copies are ok
|
||||||
// for this class
|
// for this class
|
||||||
int mPos;
|
int mPtNodePos;
|
||||||
int mChildrenPos;
|
int mChildrenPtNodeArrayPos;
|
||||||
int mProbability;
|
int mProbability;
|
||||||
int mNodeCodePoint;
|
int mDicNodeCodePoint;
|
||||||
bool mIsTerminal;
|
bool mIsTerminal;
|
||||||
bool mHasChildren;
|
bool mHasChildrenPtNodes;
|
||||||
bool mIsBlacklistedOrNotAWord;
|
bool mIsBlacklistedOrNotAWord;
|
||||||
uint16_t mDepth;
|
uint16_t mDepth;
|
||||||
uint16_t mLeavingDepth;
|
uint16_t mLeavingDepth;
|
||||||
|
|
|
@ -30,7 +30,7 @@ class DicNodeStatePrevWord {
|
||||||
public:
|
public:
|
||||||
AK_FORCE_INLINE DicNodeStatePrevWord()
|
AK_FORCE_INLINE DicNodeStatePrevWord()
|
||||||
: mPrevWordCount(0), mPrevWordLength(0), mPrevWordStart(0), mPrevWordProbability(0),
|
: mPrevWordCount(0), mPrevWordLength(0), mPrevWordStart(0), mPrevWordProbability(0),
|
||||||
mPrevWordNodePos(NOT_A_DICT_POS), mSecondWordFirstInputIndex(NOT_AN_INDEX) {
|
mPrevWordPtNodePos(NOT_A_DICT_POS), mSecondWordFirstInputIndex(NOT_AN_INDEX) {
|
||||||
memset(mPrevWord, 0, sizeof(mPrevWord));
|
memset(mPrevWord, 0, sizeof(mPrevWord));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -41,7 +41,7 @@ class DicNodeStatePrevWord {
|
||||||
mPrevWordCount = 0;
|
mPrevWordCount = 0;
|
||||||
mPrevWordStart = 0;
|
mPrevWordStart = 0;
|
||||||
mPrevWordProbability = -1;
|
mPrevWordProbability = -1;
|
||||||
mPrevWordNodePos = NOT_A_DICT_POS;
|
mPrevWordPtNodePos = NOT_A_DICT_POS;
|
||||||
mSecondWordFirstInputIndex = NOT_AN_INDEX;
|
mSecondWordFirstInputIndex = NOT_AN_INDEX;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -50,7 +50,7 @@ class DicNodeStatePrevWord {
|
||||||
mPrevWordCount = 0;
|
mPrevWordCount = 0;
|
||||||
mPrevWordStart = 0;
|
mPrevWordStart = 0;
|
||||||
mPrevWordProbability = -1;
|
mPrevWordProbability = -1;
|
||||||
mPrevWordNodePos = prevWordNodePos;
|
mPrevWordPtNodePos = prevWordNodePos;
|
||||||
mSecondWordFirstInputIndex = NOT_AN_INDEX;
|
mSecondWordFirstInputIndex = NOT_AN_INDEX;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -60,7 +60,7 @@ class DicNodeStatePrevWord {
|
||||||
mPrevWordCount = prevWord->mPrevWordCount;
|
mPrevWordCount = prevWord->mPrevWordCount;
|
||||||
mPrevWordStart = prevWord->mPrevWordStart;
|
mPrevWordStart = prevWord->mPrevWordStart;
|
||||||
mPrevWordProbability = prevWord->mPrevWordProbability;
|
mPrevWordProbability = prevWord->mPrevWordProbability;
|
||||||
mPrevWordNodePos = prevWord->mPrevWordNodePos;
|
mPrevWordPtNodePos = prevWord->mPrevWordPtNodePos;
|
||||||
mSecondWordFirstInputIndex = prevWord->mSecondWordFirstInputIndex;
|
mSecondWordFirstInputIndex = prevWord->mSecondWordFirstInputIndex;
|
||||||
memcpy(mPrevWord, prevWord->mPrevWord, prevWord->mPrevWordLength * sizeof(mPrevWord[0]));
|
memcpy(mPrevWord, prevWord->mPrevWord, prevWord->mPrevWordLength * sizeof(mPrevWord[0]));
|
||||||
}
|
}
|
||||||
|
@ -71,7 +71,7 @@ class DicNodeStatePrevWord {
|
||||||
const int prevWordSecondWordFirstInputIndex, const int lastInputIndex) {
|
const int prevWordSecondWordFirstInputIndex, const int lastInputIndex) {
|
||||||
mPrevWordCount = min(prevWordCount, static_cast<int16_t>(MAX_RESULTS));
|
mPrevWordCount = min(prevWordCount, static_cast<int16_t>(MAX_RESULTS));
|
||||||
mPrevWordProbability = prevWordProbability;
|
mPrevWordProbability = prevWordProbability;
|
||||||
mPrevWordNodePos = prevWordNodePos;
|
mPrevWordPtNodePos = prevWordNodePos;
|
||||||
int twoWordsLen =
|
int twoWordsLen =
|
||||||
DicNodeUtils::appendTwoWords(src0, length0, src1, length1, mPrevWord);
|
DicNodeUtils::appendTwoWords(src0, length0, src1, length1, mPrevWord);
|
||||||
if (twoWordsLen >= MAX_WORD_LENGTH) {
|
if (twoWordsLen >= MAX_WORD_LENGTH) {
|
||||||
|
@ -116,8 +116,8 @@ class DicNodeStatePrevWord {
|
||||||
return mPrevWordStart;
|
return mPrevWordStart;
|
||||||
}
|
}
|
||||||
|
|
||||||
int getPrevWordNodePos() const {
|
int getPrevWordPtNodePos() const {
|
||||||
return mPrevWordNodePos;
|
return mPrevWordPtNodePos;
|
||||||
}
|
}
|
||||||
|
|
||||||
int getPrevWordCodePointAt(const int id) const {
|
int getPrevWordCodePointAt(const int id) const {
|
||||||
|
@ -147,7 +147,7 @@ class DicNodeStatePrevWord {
|
||||||
int16_t mPrevWordLength;
|
int16_t mPrevWordLength;
|
||||||
int16_t mPrevWordStart;
|
int16_t mPrevWordStart;
|
||||||
int16_t mPrevWordProbability;
|
int16_t mPrevWordProbability;
|
||||||
int mPrevWordNodePos;
|
int mPrevWordPtNodePos;
|
||||||
int mSecondWordFirstInputIndex;
|
int mSecondWordFirstInputIndex;
|
||||||
};
|
};
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
|
|
|
@ -144,7 +144,7 @@ int BigramDictionary::getPredictions(const int *prevWord, const int prevWordLeng
|
||||||
int BigramDictionary::getBigramListPositionForWord(const int *prevWord, const int prevWordLength,
|
int BigramDictionary::getBigramListPositionForWord(const int *prevWord, const int prevWordLength,
|
||||||
const bool forceLowerCaseSearch) const {
|
const bool forceLowerCaseSearch) const {
|
||||||
if (0 >= prevWordLength) return NOT_A_DICT_POS;
|
if (0 >= prevWordLength) return NOT_A_DICT_POS;
|
||||||
int pos = mDictionaryStructurePolicy->getTerminalNodePositionOfWord(prevWord, prevWordLength,
|
int pos = mDictionaryStructurePolicy->getTerminalPtNodePositionOfWord(prevWord, prevWordLength,
|
||||||
forceLowerCaseSearch);
|
forceLowerCaseSearch);
|
||||||
if (NOT_A_DICT_POS == pos) return NOT_A_DICT_POS;
|
if (NOT_A_DICT_POS == pos) return NOT_A_DICT_POS;
|
||||||
return mDictionaryStructurePolicy->getBigramsPositionOfPtNode(pos);
|
return mDictionaryStructurePolicy->getBigramsPositionOfPtNode(pos);
|
||||||
|
@ -155,7 +155,7 @@ int BigramDictionary::getBigramProbability(const int *word0, int length0, const
|
||||||
int pos = getBigramListPositionForWord(word0, length0, false /* forceLowerCaseSearch */);
|
int pos = getBigramListPositionForWord(word0, length0, false /* forceLowerCaseSearch */);
|
||||||
// getBigramListPositionForWord returns 0 if this word isn't in the dictionary or has no bigrams
|
// getBigramListPositionForWord returns 0 if this word isn't in the dictionary or has no bigrams
|
||||||
if (NOT_A_DICT_POS == pos) return NOT_A_PROBABILITY;
|
if (NOT_A_DICT_POS == pos) return NOT_A_PROBABILITY;
|
||||||
int nextWordPos = mDictionaryStructurePolicy->getTerminalNodePositionOfWord(word1, length1,
|
int nextWordPos = mDictionaryStructurePolicy->getTerminalPtNodePositionOfWord(word1, length1,
|
||||||
false /* forceLowerCaseSearch */);
|
false /* forceLowerCaseSearch */);
|
||||||
if (NOT_A_DICT_POS == nextWordPos) return NOT_A_PROBABILITY;
|
if (NOT_A_DICT_POS == nextWordPos) return NOT_A_PROBABILITY;
|
||||||
|
|
||||||
|
|
|
@ -88,7 +88,7 @@ int Dictionary::getBigrams(const int *word, int length, int *outWords, int *freq
|
||||||
}
|
}
|
||||||
|
|
||||||
int Dictionary::getProbability(const int *word, int length) const {
|
int Dictionary::getProbability(const int *word, int length) const {
|
||||||
int pos = getDictionaryStructurePolicy()->getTerminalNodePositionOfWord(word, length,
|
int pos = getDictionaryStructurePolicy()->getTerminalPtNodePositionOfWord(word, length,
|
||||||
false /* forceLowerCaseSearch */);
|
false /* forceLowerCaseSearch */);
|
||||||
if (NOT_A_DICT_POS == pos) {
|
if (NOT_A_DICT_POS == pos) {
|
||||||
return NOT_A_PROBABILITY;
|
return NOT_A_PROBABILITY;
|
||||||
|
|
|
@ -37,14 +37,14 @@ class DictionaryStructureWithBufferPolicy {
|
||||||
|
|
||||||
virtual int getRootPosition() const = 0;
|
virtual int getRootPosition() const = 0;
|
||||||
|
|
||||||
virtual void createAndGetAllChildNodes(const DicNode *const dicNode,
|
virtual void createAndGetAllChildDicNodes(const DicNode *const dicNode,
|
||||||
DicNodeVector *const childDicNodes) const = 0;
|
DicNodeVector *const childDicNodes) const = 0;
|
||||||
|
|
||||||
virtual int getCodePointsAndProbabilityAndReturnCodePointCount(
|
virtual int getCodePointsAndProbabilityAndReturnCodePointCount(
|
||||||
const int nodePos, const int maxCodePointCount, int *const outCodePoints,
|
const int nodePos, const int maxCodePointCount, int *const outCodePoints,
|
||||||
int *const outUnigramProbability) const = 0;
|
int *const outUnigramProbability) const = 0;
|
||||||
|
|
||||||
virtual int getTerminalNodePositionOfWord(const int *const inWord,
|
virtual int getTerminalPtNodePositionOfWord(const int *const inWord,
|
||||||
const int length, const bool forceLowerCaseSearch) const = 0;
|
const int length, const bool forceLowerCaseSearch) const = 0;
|
||||||
|
|
||||||
virtual int getProbability(const int unigramProbability,
|
virtual int getProbability(const int unigramProbability,
|
||||||
|
|
|
@ -35,16 +35,16 @@ void DicTraverseSession::init(const Dictionary *const dictionary, const int *pre
|
||||||
->getMultiWordCostMultiplier();
|
->getMultiWordCostMultiplier();
|
||||||
mSuggestOptions = suggestOptions;
|
mSuggestOptions = suggestOptions;
|
||||||
if (!prevWord) {
|
if (!prevWord) {
|
||||||
mPrevWordPos = NOT_A_DICT_POS;
|
mPrevWordPtNodePos = NOT_A_DICT_POS;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
// TODO: merge following similar calls to getTerminalPosition into one case-insensitive call.
|
// TODO: merge following similar calls to getTerminalPosition into one case-insensitive call.
|
||||||
mPrevWordPos = getDictionaryStructurePolicy()->getTerminalNodePositionOfWord(
|
mPrevWordPtNodePos = getDictionaryStructurePolicy()->getTerminalPtNodePositionOfWord(
|
||||||
prevWord, prevWordLength, false /* forceLowerCaseSearch */);
|
prevWord, prevWordLength, false /* forceLowerCaseSearch */);
|
||||||
if (mPrevWordPos == NOT_A_DICT_POS) {
|
if (mPrevWordPtNodePos == NOT_A_DICT_POS) {
|
||||||
// Check bigrams for lower-cased previous word if original was not found. Useful for
|
// Check bigrams for lower-cased previous word if original was not found. Useful for
|
||||||
// auto-capitalized words like "The [current_word]".
|
// auto-capitalized words like "The [current_word]".
|
||||||
mPrevWordPos = getDictionaryStructurePolicy()->getTerminalNodePositionOfWord(
|
mPrevWordPtNodePos = getDictionaryStructurePolicy()->getTerminalPtNodePositionOfWord(
|
||||||
prevWord, prevWordLength, true /* forceLowerCaseSearch */);
|
prevWord, prevWordLength, true /* forceLowerCaseSearch */);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -59,7 +59,7 @@ class DicTraverseSession {
|
||||||
}
|
}
|
||||||
|
|
||||||
AK_FORCE_INLINE DicTraverseSession(JNIEnv *env, jstring localeStr, bool usesLargeCache)
|
AK_FORCE_INLINE DicTraverseSession(JNIEnv *env, jstring localeStr, bool usesLargeCache)
|
||||||
: mPrevWordPos(NOT_A_DICT_POS), mProximityInfo(0),
|
: mPrevWordPtNodePos(NOT_A_DICT_POS), mProximityInfo(0),
|
||||||
mDictionary(0), mSuggestOptions(0), mDicNodesCache(usesLargeCache),
|
mDictionary(0), mSuggestOptions(0), mDicNodesCache(usesLargeCache),
|
||||||
mMultiBigramMap(), mInputSize(0), mPartiallyCommited(false), mMaxPointerCount(1),
|
mMultiBigramMap(), mInputSize(0), mPartiallyCommited(false), mMaxPointerCount(1),
|
||||||
mMultiWordCostMultiplier(1.0f) {
|
mMultiWordCostMultiplier(1.0f) {
|
||||||
|
@ -86,11 +86,9 @@ class DicTraverseSession {
|
||||||
//--------------------
|
//--------------------
|
||||||
const ProximityInfo *getProximityInfo() const { return mProximityInfo; }
|
const ProximityInfo *getProximityInfo() const { return mProximityInfo; }
|
||||||
const SuggestOptions *getSuggestOptions() const { return mSuggestOptions; }
|
const SuggestOptions *getSuggestOptions() const { return mSuggestOptions; }
|
||||||
int getPrevWordPos() const { return mPrevWordPos; }
|
int getPrevWordPtNodePos() const { return mPrevWordPtNodePos; }
|
||||||
// TODO: REMOVE
|
// TODO: REMOVE
|
||||||
void setPrevWordPos(int pos) { mPrevWordPos = pos; }
|
void setPrevWordPtNodePos(const int ptNodePos) { mPrevWordPtNodePos = ptNodePos; }
|
||||||
// TODO: Use proper parameter when changed
|
|
||||||
int getDicRootPos() const { return 0; }
|
|
||||||
DicNodesCache *getDicTraverseCache() { return &mDicNodesCache; }
|
DicNodesCache *getDicTraverseCache() { return &mDicNodesCache; }
|
||||||
MultiBigramMap *getMultiBigramMap() { return &mMultiBigramMap; }
|
MultiBigramMap *getMultiBigramMap() { return &mMultiBigramMap; }
|
||||||
const ProximityInfoState *getProximityInfoState(int id) const {
|
const ProximityInfoState *getProximityInfoState(int id) const {
|
||||||
|
@ -119,26 +117,13 @@ class DicTraverseSession {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
void getSearchKeys(const DicNode *node, std::vector<int> *const outputSearchKeyVector) const {
|
ProximityType getProximityTypeG(const DicNode *const dicNode, const int childCodePoint) const {
|
||||||
for (int i = 0; i < MAX_POINTER_COUNT_G; ++i) {
|
|
||||||
if (!mProximityInfoStates[i].isUsed()) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
const int pointerId = node->getInputIndex(i);
|
|
||||||
const std::vector<int> *const searchKeyVector =
|
|
||||||
mProximityInfoStates[i].getSearchKeyVector(pointerId);
|
|
||||||
outputSearchKeyVector->insert(outputSearchKeyVector->end(), searchKeyVector->begin(),
|
|
||||||
searchKeyVector->end());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
ProximityType getProximityTypeG(const DicNode *const node, const int childCodePoint) const {
|
|
||||||
ProximityType proximityType = UNRELATED_CHAR;
|
ProximityType proximityType = UNRELATED_CHAR;
|
||||||
for (int i = 0; i < MAX_POINTER_COUNT_G; ++i) {
|
for (int i = 0; i < MAX_POINTER_COUNT_G; ++i) {
|
||||||
if (!mProximityInfoStates[i].isUsed()) {
|
if (!mProximityInfoStates[i].isUsed()) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
const int pointerId = node->getInputIndex(i);
|
const int pointerId = dicNode->getInputIndex(i);
|
||||||
proximityType = mProximityInfoStates[i].getProximityTypeG(pointerId, childCodePoint);
|
proximityType = mProximityInfoStates[i].getProximityTypeG(pointerId, childCodePoint);
|
||||||
ASSERT(proximityType == UNRELATED_CHAR || proximityType == MATCH_CHAR);
|
ASSERT(proximityType == UNRELATED_CHAR || proximityType == MATCH_CHAR);
|
||||||
// TODO: Make this more generic
|
// TODO: Make this more generic
|
||||||
|
@ -192,7 +177,7 @@ class DicTraverseSession {
|
||||||
const int *const inputYs, const int *const times, const int *const pointerIds,
|
const int *const inputYs, const int *const times, const int *const pointerIds,
|
||||||
const int inputSize, const float maxSpatialDistance, const int maxPointerCount);
|
const int inputSize, const float maxSpatialDistance, const int maxPointerCount);
|
||||||
|
|
||||||
int mPrevWordPos;
|
int mPrevWordPtNodePos;
|
||||||
const ProximityInfo *mProximityInfo;
|
const ProximityInfo *mProximityInfo;
|
||||||
const Dictionary *mDictionary;
|
const Dictionary *mDictionary;
|
||||||
const SuggestOptions *mSuggestOptions;
|
const SuggestOptions *mSuggestOptions;
|
||||||
|
|
|
@ -98,7 +98,7 @@ void Suggest::initializeSearch(DicTraverseSession *traverseSession, int commitPo
|
||||||
// Continue suggestion after partial commit.
|
// Continue suggestion after partial commit.
|
||||||
DicNode *topDicNode =
|
DicNode *topDicNode =
|
||||||
traverseSession->getDicTraverseCache()->setCommitPoint(commitPoint);
|
traverseSession->getDicTraverseCache()->setCommitPoint(commitPoint);
|
||||||
traverseSession->setPrevWordPos(topDicNode->getPrevWordNodePos());
|
traverseSession->setPrevWordPtNodePos(topDicNode->getPrevWordPtNodePos());
|
||||||
traverseSession->getDicTraverseCache()->continueSearch();
|
traverseSession->getDicTraverseCache()->continueSearch();
|
||||||
traverseSession->setPartiallyCommited();
|
traverseSession->setPartiallyCommited();
|
||||||
}
|
}
|
||||||
|
@ -109,7 +109,7 @@ void Suggest::initializeSearch(DicTraverseSession *traverseSession, int commitPo
|
||||||
// Create a new dic node here
|
// Create a new dic node here
|
||||||
DicNode rootNode;
|
DicNode rootNode;
|
||||||
DicNodeUtils::initAsRoot(traverseSession->getDictionaryStructurePolicy(),
|
DicNodeUtils::initAsRoot(traverseSession->getDictionaryStructurePolicy(),
|
||||||
traverseSession->getPrevWordPos(), &rootNode);
|
traverseSession->getPrevWordPtNodePos(), &rootNode);
|
||||||
traverseSession->getDicTraverseCache()->copyPushActive(&rootNode);
|
traverseSession->getDicTraverseCache()->copyPushActive(&rootNode);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -231,7 +231,7 @@ int Suggest::outputSuggestions(DicTraverseSession *traverseSession, int *frequen
|
||||||
BinaryDictionaryShortcutIterator shortcutIt(
|
BinaryDictionaryShortcutIterator shortcutIt(
|
||||||
traverseSession->getDictionaryStructurePolicy()->getShortcutsStructurePolicy(),
|
traverseSession->getDictionaryStructurePolicy()->getShortcutsStructurePolicy(),
|
||||||
traverseSession->getDictionaryStructurePolicy()
|
traverseSession->getDictionaryStructurePolicy()
|
||||||
->getShortcutPositionOfPtNode(terminalDicNode->getPos()));
|
->getShortcutPositionOfPtNode(terminalDicNode->getPtNodePos()));
|
||||||
// Shortcut is not supported for multiple words suggestions.
|
// Shortcut is not supported for multiple words suggestions.
|
||||||
// TODO: Check shortcuts during traversal for multiple words suggestions.
|
// TODO: Check shortcuts during traversal for multiple words suggestions.
|
||||||
const bool sameAsTyped = TRAVERSAL->sameAsTyped(traverseSession, terminalDicNode);
|
const bool sameAsTyped = TRAVERSAL->sameAsTyped(traverseSession, terminalDicNode);
|
||||||
|
@ -421,15 +421,15 @@ void Suggest::expandCurrentDicNodes(DicTraverseSession *traverseSession) const {
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case UNRELATED_CHAR:
|
case UNRELATED_CHAR:
|
||||||
// Just drop this node and do nothing.
|
// Just drop this dicNode and do nothing.
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
// Just drop this node and do nothing.
|
// Just drop this dicNode and do nothing.
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Push the node for look-ahead correction
|
// Push the dicNode for look-ahead correction
|
||||||
if (allowsErrorCorrections && canDoLookAheadCorrection) {
|
if (allowsErrorCorrections && canDoLookAheadCorrection) {
|
||||||
traverseSession->getDicTraverseCache()->copyPushNextActive(&dicNode);
|
traverseSession->getDicTraverseCache()->copyPushNextActive(&dicNode);
|
||||||
}
|
}
|
||||||
|
@ -442,7 +442,7 @@ void Suggest::processTerminalDicNode(
|
||||||
if (dicNode->getCompoundDistance() >= static_cast<float>(MAX_VALUE_FOR_WEIGHTING)) {
|
if (dicNode->getCompoundDistance() >= static_cast<float>(MAX_VALUE_FOR_WEIGHTING)) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (!dicNode->isTerminalWordNode()) {
|
if (!dicNode->isTerminalDicNode()) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (dicNode->shouldBeFilteredBySafetyNetForBigram()) {
|
if (dicNode->shouldBeFilteredBySafetyNetForBigram()) {
|
||||||
|
@ -463,7 +463,7 @@ void Suggest::processTerminalDicNode(
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Adds the expanded dicNode to the next search priority queue. Also creates an additional next word
|
* Adds the expanded dicNode to the next search priority queue. Also creates an additional next word
|
||||||
* (by the space omission error correction) search path if input dicNode is on a terminal node.
|
* (by the space omission error correction) search path if input dicNode is on a terminal.
|
||||||
*/
|
*/
|
||||||
void Suggest::processExpandedDicNode(
|
void Suggest::processExpandedDicNode(
|
||||||
DicTraverseSession *traverseSession, DicNode *dicNode) const {
|
DicTraverseSession *traverseSession, DicNode *dicNode) const {
|
||||||
|
@ -505,7 +505,7 @@ void Suggest::processDicNodeAsSubstitution(DicTraverseSession *traverseSession,
|
||||||
processExpandedDicNode(traverseSession, childDicNode);
|
processExpandedDicNode(traverseSession, childDicNode);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Process the node codepoint as a digraph. This means that composite glyphs like the German
|
// Process the DicNode codepoint as a digraph. This means that composite glyphs like the German
|
||||||
// u-umlaut is expanded to the transliteration "ue". Note that this happens in parallel with
|
// u-umlaut is expanded to the transliteration "ue". Note that this happens in parallel with
|
||||||
// the normal non-digraph traversal, so both "uber" and "ueber" can be corrected to "[u-umlaut]ber".
|
// the normal non-digraph traversal, so both "uber" and "ueber" can be corrected to "[u-umlaut]ber".
|
||||||
void Suggest::processDicNodeAsDigraph(DicTraverseSession *traverseSession,
|
void Suggest::processDicNodeAsDigraph(DicTraverseSession *traverseSession,
|
||||||
|
@ -518,7 +518,7 @@ void Suggest::processDicNodeAsDigraph(DicTraverseSession *traverseSession,
|
||||||
/**
|
/**
|
||||||
* Handle the dicNode as an omission error (e.g., ths => this). Skip the current letter and consider
|
* Handle the dicNode as an omission error (e.g., ths => this). Skip the current letter and consider
|
||||||
* matches for all possible next letters. Note that just skipping the current letter without any
|
* matches for all possible next letters. Note that just skipping the current letter without any
|
||||||
* other conditions tends to flood the search dic nodes cache with omission nodes. Instead, check
|
* other conditions tends to flood the search DicNodes cache with omission DicNodes. Instead, check
|
||||||
* the possible *next* letters after the omission to better limit search to plausible omissions.
|
* the possible *next* letters after the omission to better limit search to plausible omissions.
|
||||||
* Note that apostrophes are handled as omissions.
|
* Note that apostrophes are handled as omissions.
|
||||||
*/
|
*/
|
||||||
|
@ -605,7 +605,7 @@ void Suggest::processDicNodeAsTransposition(DicTraverseSession *traverseSession,
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Weight child node by aligning it to the key
|
* Weight child dicNode by aligning it to the key
|
||||||
*/
|
*/
|
||||||
void Suggest::weightChildNode(DicTraverseSession *traverseSession, DicNode *dicNode) const {
|
void Suggest::weightChildNode(DicTraverseSession *traverseSession, DicNode *dicNode) const {
|
||||||
const int inputSize = traverseSession->getInputSize();
|
const int inputSize = traverseSession->getInputSize();
|
||||||
|
|
|
@ -45,14 +45,14 @@ const int DynamicPatriciaTriePolicy::MAX_DICT_EXTENDED_REGION_SIZE = 1024 * 1024
|
||||||
const int DynamicPatriciaTriePolicy::MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS =
|
const int DynamicPatriciaTriePolicy::MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS =
|
||||||
DynamicPatriciaTrieWritingHelper::MAX_DICTIONARY_SIZE - 1024;
|
DynamicPatriciaTrieWritingHelper::MAX_DICTIONARY_SIZE - 1024;
|
||||||
|
|
||||||
void DynamicPatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode,
|
void DynamicPatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const dicNode,
|
||||||
DicNodeVector *const childDicNodes) const {
|
DicNodeVector *const childDicNodes) const {
|
||||||
if (!dicNode->hasChildren()) {
|
if (!dicNode->hasChildren()) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer,
|
DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer,
|
||||||
getBigramsStructurePolicy(), getShortcutsStructurePolicy());
|
getBigramsStructurePolicy(), getShortcutsStructurePolicy());
|
||||||
readingHelper.initWithPtNodeArrayPos(dicNode->getChildrenPos());
|
readingHelper.initWithPtNodeArrayPos(dicNode->getChildrenPtNodeArrayPos());
|
||||||
const DynamicPatriciaTrieNodeReader *const nodeReader = readingHelper.getNodeReader();
|
const DynamicPatriciaTrieNodeReader *const nodeReader = readingHelper.getNodeReader();
|
||||||
while (!readingHelper.isEnd()) {
|
while (!readingHelper.isEnd()) {
|
||||||
childDicNodes->pushLeavingChild(dicNode, nodeReader->getHeadPos(),
|
childDicNodes->pushLeavingChild(dicNode, nodeReader->getHeadPos(),
|
||||||
|
@ -107,7 +107,7 @@ int DynamicPatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCoun
|
||||||
return codePointCount;
|
return codePointCount;
|
||||||
}
|
}
|
||||||
|
|
||||||
int DynamicPatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const inWord,
|
int DynamicPatriciaTriePolicy::getTerminalPtNodePositionOfWord(const int *const inWord,
|
||||||
const int length, const bool forceLowerCaseSearch) const {
|
const int length, const bool forceLowerCaseSearch) const {
|
||||||
int searchCodePoints[length];
|
int searchCodePoints[length];
|
||||||
for (int i = 0; i < length; ++i) {
|
for (int i = 0; i < length; ++i) {
|
||||||
|
@ -246,12 +246,12 @@ bool DynamicPatriciaTriePolicy::addBigramWords(const int *const word0, const int
|
||||||
AKLOGE("The dictionary is too large to dynamically update.");
|
AKLOGE("The dictionary is too large to dynamically update.");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
const int word0Pos = getTerminalNodePositionOfWord(word0, length0,
|
const int word0Pos = getTerminalPtNodePositionOfWord(word0, length0,
|
||||||
false /* forceLowerCaseSearch */);
|
false /* forceLowerCaseSearch */);
|
||||||
if (word0Pos == NOT_A_DICT_POS) {
|
if (word0Pos == NOT_A_DICT_POS) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
const int word1Pos = getTerminalNodePositionOfWord(word1, length1,
|
const int word1Pos = getTerminalPtNodePositionOfWord(word1, length1,
|
||||||
false /* forceLowerCaseSearch */);
|
false /* forceLowerCaseSearch */);
|
||||||
if (word1Pos == NOT_A_DICT_POS) {
|
if (word1Pos == NOT_A_DICT_POS) {
|
||||||
return false;
|
return false;
|
||||||
|
@ -280,12 +280,12 @@ bool DynamicPatriciaTriePolicy::removeBigramWords(const int *const word0, const
|
||||||
AKLOGE("The dictionary is too large to dynamically update.");
|
AKLOGE("The dictionary is too large to dynamically update.");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
const int word0Pos = getTerminalNodePositionOfWord(word0, length0,
|
const int word0Pos = getTerminalPtNodePositionOfWord(word0, length0,
|
||||||
false /* forceLowerCaseSearch */);
|
false /* forceLowerCaseSearch */);
|
||||||
if (word0Pos == NOT_A_DICT_POS) {
|
if (word0Pos == NOT_A_DICT_POS) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
const int word1Pos = getTerminalNodePositionOfWord(word1, length1,
|
const int word1Pos = getTerminalPtNodePositionOfWord(word1, length1,
|
||||||
false /* forceLowerCaseSearch */);
|
false /* forceLowerCaseSearch */);
|
||||||
if (word1Pos == NOT_A_DICT_POS) {
|
if (word1Pos == NOT_A_DICT_POS) {
|
||||||
return false;
|
return false;
|
||||||
|
|
|
@ -50,14 +50,14 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void createAndGetAllChildNodes(const DicNode *const dicNode,
|
void createAndGetAllChildDicNodes(const DicNode *const dicNode,
|
||||||
DicNodeVector *const childDicNodes) const;
|
DicNodeVector *const childDicNodes) const;
|
||||||
|
|
||||||
int getCodePointsAndProbabilityAndReturnCodePointCount(
|
int getCodePointsAndProbabilityAndReturnCodePointCount(
|
||||||
const int terminalPtNodePos, const int maxCodePointCount, int *const outCodePoints,
|
const int terminalPtNodePos, const int maxCodePointCount, int *const outCodePoints,
|
||||||
int *const outUnigramProbability) const;
|
int *const outUnigramProbability) const;
|
||||||
|
|
||||||
int getTerminalNodePositionOfWord(const int *const inWord,
|
int getTerminalPtNodePositionOfWord(const int *const inWord,
|
||||||
const int length, const bool forceLowerCaseSearch) const;
|
const int length, const bool forceLowerCaseSearch) const;
|
||||||
|
|
||||||
int getProbability(const int unigramProbability, const int bigramProbability) const;
|
int getProbability(const int unigramProbability, const int bigramProbability) const;
|
||||||
|
|
|
@ -22,7 +22,7 @@ namespace latinime {
|
||||||
|
|
||||||
// To avoid infinite loop caused by invalid or malicious forward links.
|
// To avoid infinite loop caused by invalid or malicious forward links.
|
||||||
const int DynamicPatriciaTrieReadingHelper::MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP = 100000;
|
const int DynamicPatriciaTrieReadingHelper::MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP = 100000;
|
||||||
const int DynamicPatriciaTrieReadingHelper::MAX_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP = 100000;
|
const int DynamicPatriciaTrieReadingHelper::MAX_PT_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP = 100000;
|
||||||
const size_t DynamicPatriciaTrieReadingHelper::MAX_READING_STATE_STACK_SIZE = MAX_WORD_LENGTH;
|
const size_t DynamicPatriciaTrieReadingHelper::MAX_READING_STATE_STACK_SIZE = MAX_WORD_LENGTH;
|
||||||
|
|
||||||
// Visits all PtNodes in post-order depth first manner.
|
// Visits all PtNodes in post-order depth first manner.
|
||||||
|
@ -170,35 +170,41 @@ void DynamicPatriciaTrieReadingHelper::nextPtNodeArray() {
|
||||||
mReadingState.mPos = NOT_A_DICT_POS;
|
mReadingState.mPos = NOT_A_DICT_POS;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
mReadingState.mPosOfLastPtNodeArrayHead = mReadingState.mPos;
|
mReadingState.mPosOfThisPtNodeArrayHead = mReadingState.mPos;
|
||||||
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(mReadingState.mPos);
|
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(mReadingState.mPos);
|
||||||
const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
|
const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
|
||||||
if (usesAdditionalBuffer) {
|
if (usesAdditionalBuffer) {
|
||||||
mReadingState.mPos -= mBuffer->getOriginalBufferSize();
|
mReadingState.mPos -= mBuffer->getOriginalBufferSize();
|
||||||
}
|
}
|
||||||
mReadingState.mNodeCount = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition(
|
mReadingState.mRemainingPtNodeCountInThisArray =
|
||||||
dictBuf, &mReadingState.mPos);
|
PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition(dictBuf,
|
||||||
|
&mReadingState.mPos);
|
||||||
if (usesAdditionalBuffer) {
|
if (usesAdditionalBuffer) {
|
||||||
mReadingState.mPos += mBuffer->getOriginalBufferSize();
|
mReadingState.mPos += mBuffer->getOriginalBufferSize();
|
||||||
}
|
}
|
||||||
// Count up nodes and node arrays to avoid infinite loop.
|
// Count up nodes and node arrays to avoid infinite loop.
|
||||||
mReadingState.mTotalNodeCount += mReadingState.mNodeCount;
|
mReadingState.mTotalPtNodeIndexInThisArrayChain +=
|
||||||
mReadingState.mNodeArrayCount++;
|
mReadingState.mRemainingPtNodeCountInThisArray;
|
||||||
if (mReadingState.mNodeCount < 0
|
mReadingState.mPtNodeArrayIndexInThisArrayChain++;
|
||||||
|| mReadingState.mTotalNodeCount > MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP
|
if (mReadingState.mRemainingPtNodeCountInThisArray < 0
|
||||||
|| mReadingState.mNodeArrayCount > MAX_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP) {
|
|| mReadingState.mTotalPtNodeIndexInThisArrayChain
|
||||||
|
> MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP
|
||||||
|
|| mReadingState.mPtNodeArrayIndexInThisArrayChain
|
||||||
|
> MAX_PT_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP) {
|
||||||
// Invalid dictionary.
|
// Invalid dictionary.
|
||||||
AKLOGI("Invalid dictionary. nodeCount: %d, totalNodeCount: %d, MAX_CHILD_COUNT: %d"
|
AKLOGI("Invalid dictionary. nodeCount: %d, totalNodeCount: %d, MAX_CHILD_COUNT: %d"
|
||||||
"nodeArrayCount: %d, MAX_NODE_ARRAY_COUNT: %d",
|
"nodeArrayCount: %d, MAX_NODE_ARRAY_COUNT: %d",
|
||||||
mReadingState.mNodeCount, mReadingState.mTotalNodeCount,
|
mReadingState.mRemainingPtNodeCountInThisArray,
|
||||||
MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP, mReadingState.mNodeArrayCount,
|
mReadingState.mTotalPtNodeIndexInThisArrayChain,
|
||||||
MAX_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP);
|
MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP,
|
||||||
|
mReadingState.mPtNodeArrayIndexInThisArrayChain,
|
||||||
|
MAX_PT_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP);
|
||||||
ASSERT(false);
|
ASSERT(false);
|
||||||
mIsError = true;
|
mIsError = true;
|
||||||
mReadingState.mPos = NOT_A_DICT_POS;
|
mReadingState.mPos = NOT_A_DICT_POS;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (mReadingState.mNodeCount == 0) {
|
if (mReadingState.mRemainingPtNodeCountInThisArray == 0) {
|
||||||
// Empty node array. Try following forward link.
|
// Empty node array. Try following forward link.
|
||||||
followForwardLink();
|
followForwardLink();
|
||||||
}
|
}
|
||||||
|
|
|
@ -84,9 +84,9 @@ class DynamicPatriciaTrieReadingHelper {
|
||||||
} else {
|
} else {
|
||||||
mIsError = false;
|
mIsError = false;
|
||||||
mReadingState.mPos = ptNodeArrayPos;
|
mReadingState.mPos = ptNodeArrayPos;
|
||||||
mReadingState.mPrevTotalCodePointCount = 0;
|
mReadingState.mTotalCodePointCountSinceInitialization = 0;
|
||||||
mReadingState.mTotalNodeCount = 0;
|
mReadingState.mTotalPtNodeIndexInThisArrayChain = 0;
|
||||||
mReadingState.mNodeArrayCount = 0;
|
mReadingState.mPtNodeArrayIndexInThisArrayChain = 0;
|
||||||
mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS;
|
mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS;
|
||||||
mReadingStateStack.clear();
|
mReadingStateStack.clear();
|
||||||
nextPtNodeArray();
|
nextPtNodeArray();
|
||||||
|
@ -103,12 +103,12 @@ class DynamicPatriciaTrieReadingHelper {
|
||||||
} else {
|
} else {
|
||||||
mIsError = false;
|
mIsError = false;
|
||||||
mReadingState.mPos = ptNodePos;
|
mReadingState.mPos = ptNodePos;
|
||||||
mReadingState.mNodeCount = 1;
|
mReadingState.mRemainingPtNodeCountInThisArray = 1;
|
||||||
mReadingState.mPrevTotalCodePointCount = 0;
|
mReadingState.mTotalCodePointCountSinceInitialization = 0;
|
||||||
mReadingState.mTotalNodeCount = 1;
|
mReadingState.mTotalPtNodeIndexInThisArrayChain = 1;
|
||||||
mReadingState.mNodeArrayCount = 1;
|
mReadingState.mPtNodeArrayIndexInThisArrayChain = 1;
|
||||||
mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS;
|
mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS;
|
||||||
mReadingState.mPosOfLastPtNodeArrayHead = NOT_A_DICT_POS;
|
mReadingState.mPosOfThisPtNodeArrayHead = NOT_A_DICT_POS;
|
||||||
mReadingStateStack.clear();
|
mReadingStateStack.clear();
|
||||||
fetchPtNodeInfo();
|
fetchPtNodeInfo();
|
||||||
}
|
}
|
||||||
|
@ -128,12 +128,13 @@ class DynamicPatriciaTrieReadingHelper {
|
||||||
|
|
||||||
// Return code point count exclude the last read node's code points.
|
// Return code point count exclude the last read node's code points.
|
||||||
AK_FORCE_INLINE int getPrevTotalCodePointCount() const {
|
AK_FORCE_INLINE int getPrevTotalCodePointCount() const {
|
||||||
return mReadingState.mPrevTotalCodePointCount;
|
return mReadingState.mTotalCodePointCountSinceInitialization;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Return code point count include the last read node's code points.
|
// Return code point count include the last read node's code points.
|
||||||
AK_FORCE_INLINE int getTotalCodePointCount() const {
|
AK_FORCE_INLINE int getTotalCodePointCount() const {
|
||||||
return mReadingState.mPrevTotalCodePointCount + mNodeReader.getCodePointCount();
|
return mReadingState.mTotalCodePointCountSinceInitialization
|
||||||
|
+ mNodeReader.getCodePointCount();
|
||||||
}
|
}
|
||||||
|
|
||||||
AK_FORCE_INLINE void fetchMergedNodeCodePointsInReverseOrder(
|
AK_FORCE_INLINE void fetchMergedNodeCodePointsInReverseOrder(
|
||||||
|
@ -149,9 +150,9 @@ class DynamicPatriciaTrieReadingHelper {
|
||||||
}
|
}
|
||||||
|
|
||||||
AK_FORCE_INLINE void readNextSiblingNode() {
|
AK_FORCE_INLINE void readNextSiblingNode() {
|
||||||
mReadingState.mNodeCount -= 1;
|
mReadingState.mRemainingPtNodeCountInThisArray -= 1;
|
||||||
mReadingState.mPos = mNodeReader.getSiblingNodePos();
|
mReadingState.mPos = mNodeReader.getSiblingNodePos();
|
||||||
if (mReadingState.mNodeCount <= 0) {
|
if (mReadingState.mRemainingPtNodeCountInThisArray <= 0) {
|
||||||
// All nodes in the current node array have been read.
|
// All nodes in the current node array have been read.
|
||||||
followForwardLink();
|
followForwardLink();
|
||||||
if (!isEnd()) {
|
if (!isEnd()) {
|
||||||
|
@ -165,9 +166,10 @@ class DynamicPatriciaTrieReadingHelper {
|
||||||
// Read the first child node of the current node.
|
// Read the first child node of the current node.
|
||||||
AK_FORCE_INLINE void readChildNode() {
|
AK_FORCE_INLINE void readChildNode() {
|
||||||
if (mNodeReader.hasChildren()) {
|
if (mNodeReader.hasChildren()) {
|
||||||
mReadingState.mPrevTotalCodePointCount += mNodeReader.getCodePointCount();
|
mReadingState.mTotalCodePointCountSinceInitialization +=
|
||||||
mReadingState.mTotalNodeCount = 0;
|
mNodeReader.getCodePointCount();
|
||||||
mReadingState.mNodeArrayCount = 0;
|
mReadingState.mTotalPtNodeIndexInThisArrayChain = 0;
|
||||||
|
mReadingState.mPtNodeArrayIndexInThisArrayChain = 0;
|
||||||
mReadingState.mPos = mNodeReader.getChildrenPos();
|
mReadingState.mPos = mNodeReader.getChildrenPos();
|
||||||
mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS;
|
mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS;
|
||||||
// Read children node array.
|
// Read children node array.
|
||||||
|
@ -183,13 +185,14 @@ class DynamicPatriciaTrieReadingHelper {
|
||||||
// Read the parent node of the current node.
|
// Read the parent node of the current node.
|
||||||
AK_FORCE_INLINE void readParentNode() {
|
AK_FORCE_INLINE void readParentNode() {
|
||||||
if (mNodeReader.getParentPos() != NOT_A_DICT_POS) {
|
if (mNodeReader.getParentPos() != NOT_A_DICT_POS) {
|
||||||
mReadingState.mPrevTotalCodePointCount += mNodeReader.getCodePointCount();
|
mReadingState.mTotalCodePointCountSinceInitialization +=
|
||||||
mReadingState.mTotalNodeCount = 1;
|
mNodeReader.getCodePointCount();
|
||||||
mReadingState.mNodeArrayCount = 1;
|
mReadingState.mTotalPtNodeIndexInThisArrayChain = 1;
|
||||||
mReadingState.mNodeCount = 1;
|
mReadingState.mPtNodeArrayIndexInThisArrayChain = 1;
|
||||||
|
mReadingState.mRemainingPtNodeCountInThisArray = 1;
|
||||||
mReadingState.mPos = mNodeReader.getParentPos();
|
mReadingState.mPos = mNodeReader.getParentPos();
|
||||||
mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS;
|
mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS;
|
||||||
mReadingState.mPosOfLastPtNodeArrayHead = NOT_A_DICT_POS;
|
mReadingState.mPosOfThisPtNodeArrayHead = NOT_A_DICT_POS;
|
||||||
fetchPtNodeInfo();
|
fetchPtNodeInfo();
|
||||||
} else {
|
} else {
|
||||||
mReadingState.mPos = NOT_A_DICT_POS;
|
mReadingState.mPos = NOT_A_DICT_POS;
|
||||||
|
@ -201,7 +204,7 @@ class DynamicPatriciaTrieReadingHelper {
|
||||||
}
|
}
|
||||||
|
|
||||||
AK_FORCE_INLINE int getPosOfLastPtNodeArrayHead() const {
|
AK_FORCE_INLINE int getPosOfLastPtNodeArrayHead() const {
|
||||||
return mReadingState.mPosOfLastPtNodeArrayHead;
|
return mReadingState.mPosOfThisPtNodeArrayHead;
|
||||||
}
|
}
|
||||||
|
|
||||||
AK_FORCE_INLINE void reloadCurrentPtNodeInfo() {
|
AK_FORCE_INLINE void reloadCurrentPtNodeInfo() {
|
||||||
|
@ -218,35 +221,41 @@ class DynamicPatriciaTrieReadingHelper {
|
||||||
private:
|
private:
|
||||||
DISALLOW_COPY_AND_ASSIGN(DynamicPatriciaTrieReadingHelper);
|
DISALLOW_COPY_AND_ASSIGN(DynamicPatriciaTrieReadingHelper);
|
||||||
|
|
||||||
class ReadingState {
|
// This class encapsulates the reading state of a position in the dictionary. It points at a
|
||||||
|
// specific PtNode in the dictionary.
|
||||||
|
class PtNodeReadingState {
|
||||||
public:
|
public:
|
||||||
// Note that copy constructor and assignment operator are used for this class to use
|
// Note that copy constructor and assignment operator are used for this class to use
|
||||||
// std::vector.
|
// std::vector.
|
||||||
ReadingState() : mPos(NOT_A_DICT_POS), mNodeCount(0), mPrevTotalCodePointCount(0),
|
PtNodeReadingState() : mPos(NOT_A_DICT_POS), mRemainingPtNodeCountInThisArray(0),
|
||||||
mTotalNodeCount(0), mNodeArrayCount(0), mPosOfLastForwardLinkField(NOT_A_DICT_POS),
|
mTotalCodePointCountSinceInitialization(0), mTotalPtNodeIndexInThisArrayChain(0),
|
||||||
mPosOfLastPtNodeArrayHead(NOT_A_DICT_POS) {}
|
mPtNodeArrayIndexInThisArrayChain(0), mPosOfLastForwardLinkField(NOT_A_DICT_POS),
|
||||||
|
mPosOfThisPtNodeArrayHead(NOT_A_DICT_POS) {}
|
||||||
|
|
||||||
int mPos;
|
int mPos;
|
||||||
// Node count of a node array.
|
// Remaining node count in the current array.
|
||||||
int mNodeCount;
|
int mRemainingPtNodeCountInThisArray;
|
||||||
int mPrevTotalCodePointCount;
|
int mTotalCodePointCountSinceInitialization;
|
||||||
int mTotalNodeCount;
|
// Counter of PtNodes used to avoid infinite loops caused by broken or malicious links.
|
||||||
int mNodeArrayCount;
|
int mTotalPtNodeIndexInThisArrayChain;
|
||||||
|
// Counter of PtNode arrays used to avoid infinite loops caused by cyclic links of empty
|
||||||
|
// PtNode arrays.
|
||||||
|
int mPtNodeArrayIndexInThisArrayChain;
|
||||||
int mPosOfLastForwardLinkField;
|
int mPosOfLastForwardLinkField;
|
||||||
int mPosOfLastPtNodeArrayHead;
|
int mPosOfThisPtNodeArrayHead;
|
||||||
};
|
};
|
||||||
|
|
||||||
static const int MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP;
|
static const int MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP;
|
||||||
static const int MAX_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP;
|
static const int MAX_PT_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP;
|
||||||
static const size_t MAX_READING_STATE_STACK_SIZE;
|
static const size_t MAX_READING_STATE_STACK_SIZE;
|
||||||
|
|
||||||
// TODO: Introduce error code to track what caused the error.
|
// TODO: Introduce error code to track what caused the error.
|
||||||
bool mIsError;
|
bool mIsError;
|
||||||
ReadingState mReadingState;
|
PtNodeReadingState mReadingState;
|
||||||
const BufferWithExtendableBuffer *const mBuffer;
|
const BufferWithExtendableBuffer *const mBuffer;
|
||||||
DynamicPatriciaTrieNodeReader mNodeReader;
|
DynamicPatriciaTrieNodeReader mNodeReader;
|
||||||
int mMergedNodeCodePoints[MAX_WORD_LENGTH];
|
int mMergedNodeCodePoints[MAX_WORD_LENGTH];
|
||||||
std::vector<ReadingState> mReadingStateStack;
|
std::vector<PtNodeReadingState> mReadingStateStack;
|
||||||
|
|
||||||
void nextPtNodeArray();
|
void nextPtNodeArray();
|
||||||
|
|
||||||
|
|
|
@ -25,12 +25,12 @@
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
|
||||||
void PatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode,
|
void PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const dicNode,
|
||||||
DicNodeVector *const childDicNodes) const {
|
DicNodeVector *const childDicNodes) const {
|
||||||
if (!dicNode->hasChildren()) {
|
if (!dicNode->hasChildren()) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
int nextPos = dicNode->getChildrenPos();
|
int nextPos = dicNode->getChildrenPtNodeArrayPos();
|
||||||
if (nextPos < 0 || nextPos >= mDictBufferSize) {
|
if (nextPos < 0 || nextPos >= mDictBufferSize) {
|
||||||
AKLOGE("Children PtNode array position is invalid. pos: %d, dict size: %d",
|
AKLOGE("Children PtNode array position is invalid. pos: %d, dict size: %d",
|
||||||
nextPos, mDictBufferSize);
|
nextPos, mDictBufferSize);
|
||||||
|
@ -52,14 +52,14 @@ void PatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode,
|
||||||
|
|
||||||
// This retrieves code points and the probability of the word by its terminal position.
|
// This retrieves code points and the probability of the word by its terminal position.
|
||||||
// Due to the fact that words are ordered in the dictionary in a strict breadth-first order,
|
// Due to the fact that words are ordered in the dictionary in a strict breadth-first order,
|
||||||
// it is possible to check for this with advantageous complexity. For each node, we search
|
// it is possible to check for this with advantageous complexity. For each PtNode array, we search
|
||||||
// for PtNodes with children and compare the children position with the position we look for.
|
// for PtNodes with children and compare the children position with the position we look for.
|
||||||
// When we shoot the position we look for, it means the word we look for is in the children
|
// When we shoot the position we look for, it means the word we look for is in the children
|
||||||
// of the previous PtNode. The only tricky part is the fact that if we arrive at the end of a
|
// of the previous PtNode. The only tricky part is the fact that if we arrive at the end of a
|
||||||
// PtNode array with the last PtNode's children position still less than what we are searching for,
|
// PtNode array with the last PtNode's children position still less than what we are searching for,
|
||||||
// we must descend the last PtNode's children (for example, if the word we are searching for starts
|
// we must descend the last PtNode's children (for example, if the word we are searching for starts
|
||||||
// with a z, it's the last PtNode of the root array, so all children addresses will be smaller
|
// with a z, it's the last PtNode of the root array, so all children addresses will be smaller
|
||||||
// than the position we look for, and we have to descend the z node).
|
// than the position we look for, and we have to descend the z PtNode).
|
||||||
/* Parameters :
|
/* Parameters :
|
||||||
* ptNodePos: the byte position of the terminal PtNode of the word we are searching for (this is
|
* ptNodePos: the byte position of the terminal PtNode of the word we are searching for (this is
|
||||||
* what is stored as the "bigram position" in each bigram)
|
* what is stored as the "bigram position" in each bigram)
|
||||||
|
@ -74,9 +74,9 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
|
||||||
int pos = getRootPosition();
|
int pos = getRootPosition();
|
||||||
int wordPos = 0;
|
int wordPos = 0;
|
||||||
// One iteration of the outer loop iterates through PtNode arrays. As stated above, we will
|
// One iteration of the outer loop iterates through PtNode arrays. As stated above, we will
|
||||||
// only traverse nodes that are actually a part of the terminal we are searching, so each time
|
// only traverse PtNodes that are actually a part of the terminal we are searching, so each
|
||||||
// we enter this loop we are one depth level further than last time.
|
// time we enter this loop we are one depth level further than last time.
|
||||||
// The only reason we count nodes is because we want to reduce the probability of infinite
|
// The only reason we count PtNodes is because we want to reduce the probability of infinite
|
||||||
// looping in case there is a bug. Since we know there is an upper bound to the depth we are
|
// looping in case there is a bug. Since we know there is an upper bound to the depth we are
|
||||||
// supposed to traverse, it does not hurt to count iterations.
|
// supposed to traverse, it does not hurt to count iterations.
|
||||||
for (int loopCount = maxCodePointCount; loopCount > 0; --loopCount) {
|
for (int loopCount = maxCodePointCount; loopCount > 0; --loopCount) {
|
||||||
|
@ -140,8 +140,9 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
|
||||||
found = true;
|
found = true;
|
||||||
} else if (1 >= ptNodeCount) {
|
} else if (1 >= ptNodeCount) {
|
||||||
// However if we are on the LAST PtNode of this array, and we have NOT shot the
|
// However if we are on the LAST PtNode of this array, and we have NOT shot the
|
||||||
// position we should descend THIS node. So we trick the lastCandidatePtNodePos
|
// position we should descend THIS PtNode. So we trick the
|
||||||
// so that we will descend this PtNode, not the previous one.
|
// lastCandidatePtNodePos so that we will descend this PtNode, not the previous
|
||||||
|
// one.
|
||||||
lastCandidatePtNodePos = startPos;
|
lastCandidatePtNodePos = startPos;
|
||||||
found = true;
|
found = true;
|
||||||
} else {
|
} else {
|
||||||
|
@ -149,7 +150,7 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
|
||||||
found = false;
|
found = false;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// Even if we don't have children here, we could still be on the last PtNode of /
|
// Even if we don't have children here, we could still be on the last PtNode of
|
||||||
// this array. If this is the case, we should descend the last PtNode that had
|
// this array. If this is the case, we should descend the last PtNode that had
|
||||||
// children, and their position is already in lastCandidatePtNodePos.
|
// children, and their position is already in lastCandidatePtNodePos.
|
||||||
found = (1 >= ptNodeCount);
|
found = (1 >= ptNodeCount);
|
||||||
|
@ -230,9 +231,9 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
// This function gets the position of the terminal node of the exact matching word in the
|
// This function gets the position of the terminal PtNode of the exact matching word in the
|
||||||
// dictionary. If no match is found, it returns NOT_A_DICT_POS.
|
// dictionary. If no match is found, it returns NOT_A_DICT_POS.
|
||||||
int PatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const inWord,
|
int PatriciaTriePolicy::getTerminalPtNodePositionOfWord(const int *const inWord,
|
||||||
const int length, const bool forceLowerCaseSearch) const {
|
const int length, const bool forceLowerCaseSearch) const {
|
||||||
int pos = getRootPosition();
|
int pos = getRootPosition();
|
||||||
int wordPos = 0;
|
int wordPos = 0;
|
||||||
|
|
|
@ -47,14 +47,14 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void createAndGetAllChildNodes(const DicNode *const dicNode,
|
void createAndGetAllChildDicNodes(const DicNode *const dicNode,
|
||||||
DicNodeVector *const childDicNodes) const;
|
DicNodeVector *const childDicNodes) const;
|
||||||
|
|
||||||
int getCodePointsAndProbabilityAndReturnCodePointCount(
|
int getCodePointsAndProbabilityAndReturnCodePointCount(
|
||||||
const int terminalNodePos, const int maxCodePointCount, int *const outCodePoints,
|
const int terminalNodePos, const int maxCodePointCount, int *const outCodePoints,
|
||||||
int *const outUnigramProbability) const;
|
int *const outUnigramProbability) const;
|
||||||
|
|
||||||
int getTerminalNodePositionOfWord(const int *const inWord,
|
int getTerminalPtNodePositionOfWord(const int *const inWord,
|
||||||
const int length, const bool forceLowerCaseSearch) const;
|
const int length, const bool forceLowerCaseSearch) const;
|
||||||
|
|
||||||
int getProbability(const int unigramProbability, const int bigramProbability) const;
|
int getProbability(const int unigramProbability, const int bigramProbability) const;
|
||||||
|
|
|
@ -81,7 +81,7 @@ class TypingTraversal : public Traversal {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
const int point0Index = dicNode->getInputIndex(0);
|
const int point0Index = dicNode->getInputIndex(0);
|
||||||
return dicNode->isTerminalWordNode()
|
return dicNode->isTerminalDicNode()
|
||||||
&& traverseSession->getProximityInfoState(0)->
|
&& traverseSession->getProximityInfoState(0)->
|
||||||
hasSpaceProximity(point0Index);
|
hasSpaceProximity(point0Index);
|
||||||
}
|
}
|
||||||
|
@ -96,7 +96,7 @@ class TypingTraversal : public Traversal {
|
||||||
if (dicNode->isCompletion(inputSize)) {
|
if (dicNode->isCompletion(inputSize)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (!dicNode->isTerminalWordNode()) {
|
if (!dicNode->isTerminalDicNode()) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
const int16_t pointIndex = dicNode->getInputIndex(0);
|
const int16_t pointIndex = dicNode->getInputIndex(0);
|
||||||
|
|
Loading…
Reference in New Issue