Move code related to ranking algorithm to correction_state.cpp

Change-Id: I52b34de45969fef82e46d9c10079c2d45e0b94eb
main
satok 2011-08-03 02:19:44 +09:00
parent e486290013
commit 0f6c8e8aeb
5 changed files with 93 additions and 79 deletions

View File

@ -58,10 +58,32 @@ int CorrectionState::getFreqForSplitTwoWords(const int firstFreq, const int seco
return CorrectionState::RankingAlgorithm::calcFreqForSplitTwoWords(firstFreq, secondFreq, this); return CorrectionState::RankingAlgorithm::calcFreqForSplitTwoWords(firstFreq, secondFreq, this);
} }
int CorrectionState::getFinalFreq(const int inputIndex, const int depth, const int matchWeight, int CorrectionState::getFinalFreq(const int inputIndex, const int outputIndex, const int freq) {
const int freq, const bool sameLength) { const bool sameLength = (mExcessivePos == mInputLength - 1) ? (mInputLength == inputIndex + 2)
return CorrectionState::RankingAlgorithm::calculateFinalFreq(inputIndex, depth, matchWeight, : (mInputLength == inputIndex + 1);
freq, sameLength, this); const int matchCount = mMatchedCharCount;
return CorrectionState::RankingAlgorithm::calculateFinalFreq(
inputIndex, outputIndex, matchCount, freq, sameLength, this);
}
void CorrectionState::initDepth() {
mMatchedCharCount = 0;
}
void CorrectionState::charMatched() {
++mMatchedCharCount;
}
void CorrectionState::goUpTree(const int matchCount) {
mMatchedCharCount = matchCount;
}
void CorrectionState::slideTree(const int matchCount) {
mMatchedCharCount = matchCount;
}
void CorrectionState::goDownTree(int *matchedCount) {
*matchedCount = mMatchedCharCount;
} }
CorrectionState::~CorrectionState() { CorrectionState::~CorrectionState() {
@ -117,7 +139,8 @@ inline static void multiplyRate(const int rate, int *freq) {
// RankingAlgorithm // // RankingAlgorithm //
////////////////////// //////////////////////
int CorrectionState::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const int depth, int CorrectionState::RankingAlgorithm::calculateFinalFreq(
const int inputIndex, const int outputIndex,
const int matchCount, const int freq, const bool sameLength, const int matchCount, const int freq, const bool sameLength,
const CorrectionState* correctionState) { const CorrectionState* correctionState) {
const int skipPos = correctionState->getSkipPos(); const int skipPos = correctionState->getSkipPos();
@ -156,10 +179,10 @@ int CorrectionState::RankingAlgorithm::calculateFinalFreq(const int inputIndex,
} }
} }
int lengthFreq = typedLetterMultiplier; int lengthFreq = typedLetterMultiplier;
multiplyIntCapped(powerIntCapped(typedLetterMultiplier, depth), &lengthFreq); multiplyIntCapped(powerIntCapped(typedLetterMultiplier, outputIndex), &lengthFreq);
if (lengthFreq == matchWeight) { if ((outputIndex + 1) == matchCount) {
// Full exact match // Full exact match
if (depth > 1) { if (outputIndex > 1) {
if (DEBUG_DICT) { if (DEBUG_DICT) {
LOGI("Found full matched word."); LOGI("Found full matched word.");
} }
@ -168,7 +191,8 @@ int CorrectionState::RankingAlgorithm::calculateFinalFreq(const int inputIndex,
if (sameLength && transposedPos < 0 && skipPos < 0 && excessivePos < 0) { if (sameLength && transposedPos < 0 && skipPos < 0 && excessivePos < 0) {
finalFreq = capped255MultForFullMatchAccentsOrCapitalizationDifference(finalFreq); finalFreq = capped255MultForFullMatchAccentsOrCapitalizationDifference(finalFreq);
} }
} else if (sameLength && transposedPos < 0 && skipPos < 0 && excessivePos < 0 && depth > 0) { } else if (sameLength && transposedPos < 0 && skipPos < 0 && excessivePos < 0
&& outputIndex > 0) {
// A word with proximity corrections // A word with proximity corrections
if (DEBUG_DICT) { if (DEBUG_DICT) {
LOGI("Found one proximity correction."); LOGI("Found one proximity correction.");
@ -177,7 +201,7 @@ int CorrectionState::RankingAlgorithm::calculateFinalFreq(const int inputIndex,
multiplyRate(WORDS_WITH_PROXIMITY_CHARACTER_DEMOTION_RATE, &finalFreq); multiplyRate(WORDS_WITH_PROXIMITY_CHARACTER_DEMOTION_RATE, &finalFreq);
} }
if (DEBUG_DICT) { if (DEBUG_DICT) {
LOGI("calc: %d, %d", depth, sameLength); LOGI("calc: %d, %d", outputIndex, sameLength);
} }
if (sameLength) multiplyIntCapped(fullWordMultiplier, &finalFreq); if (sameLength) multiplyIntCapped(fullWordMultiplier, &finalFreq);
return finalFreq; return finalFreq;

View File

@ -32,7 +32,12 @@ public:
void initCorrectionState(const ProximityInfo *pi, const int inputLength); void initCorrectionState(const ProximityInfo *pi, const int inputLength);
void setCorrectionParams(const int skipPos, const int excessivePos, const int transposedPos, void setCorrectionParams(const int skipPos, const int excessivePos, const int transposedPos,
const int spaceProximityPos, const int missingSpacePos); const int spaceProximityPos, const int missingSpacePos);
void initDepth();
void checkState(); void checkState();
void goUpTree(const int matchCount);
void slideTree(const int matchCount);
void goDownTree(int *matchedCount);
void charMatched();
virtual ~CorrectionState(); virtual ~CorrectionState();
int getSkipPos() const { int getSkipPos() const {
return mSkipPos; return mSkipPos;
@ -50,13 +55,13 @@ public:
return mMissingSpacePos; return mMissingSpacePos;
} }
int getFreqForSplitTwoWords(const int firstFreq, const int secondFreq); int getFreqForSplitTwoWords(const int firstFreq, const int secondFreq);
int getFinalFreq(const int inputIndex, const int depth, const int matchWeight, const int freq, int getFinalFreq(const int inputIndex, const int outputIndex, const int freq);
const bool sameLength);
private: private:
const int TYPED_LETTER_MULTIPLIER; const int TYPED_LETTER_MULTIPLIER;
const int FULL_WORD_MULTIPLIER; const int FULL_WORD_MULTIPLIER;
const ProximityInfo *mProximityInfo; const ProximityInfo *mProximityInfo;
int mInputLength; int mInputLength;
int mSkipPos; int mSkipPos;
@ -65,6 +70,8 @@ private:
int mSpaceProximityPos; int mSpaceProximityPos;
int mMissingSpacePos; int mMissingSpacePos;
int mMatchedCharCount;
class RankingAlgorithm { class RankingAlgorithm {
public: public:
static int calculateFinalFreq(const int inputIndex, const int depth, static int calculateFinalFreq(const int inputIndex, const int depth,

View File

@ -176,9 +176,6 @@ static void prof_out(void) {
#define MIN_USER_TYPED_LENGTH_FOR_MISSING_SPACE_SUGGESTION 3 #define MIN_USER_TYPED_LENGTH_FOR_MISSING_SPACE_SUGGESTION 3
#define MIN_USER_TYPED_LENGTH_FOR_EXCESSIVE_CHARACTER_SUGGESTION 3 #define MIN_USER_TYPED_LENGTH_FOR_EXCESSIVE_CHARACTER_SUGGESTION 3
// The size of next letters frequency array. Zero will disable the feature.
#define NEXT_LETTERS_SIZE 0
#define min(a,b) ((a)<(b)?(a):(b)) #define min(a,b) ((a)<(b)?(a):(b))
#endif // LATINIME_DEFINES_H #endif // LATINIME_DEFINES_H

View File

@ -167,12 +167,6 @@ int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo, const int *x
LOGI("%s %i", s, mFrequencies[j]); LOGI("%s %i", s, mFrequencies[j]);
#endif #endif
} }
LOGI("Next letters: ");
for (int k = 0; k < NEXT_LETTERS_SIZE; k++) {
if (mNextLettersFrequency[k] > 0) {
LOGI("%c = %d,", k, mNextLettersFrequency[k]);
}
}
} }
PROF_END(20); PROF_END(20);
PROF_CLOSE; PROF_CLOSE;
@ -194,7 +188,7 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo,
PROF_END(0); PROF_END(0);
PROF_START(1); PROF_START(1);
getSuggestionCandidates(-1, -1, -1, mNextLettersFrequency, NEXT_LETTERS_SIZE, MAX_DEPTH); getSuggestionCandidates(-1, -1, -1, MAX_DEPTH);
PROF_END(1); PROF_END(1);
PROF_START(2); PROF_START(2);
@ -204,7 +198,7 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo,
if (DEBUG_DICT) { if (DEBUG_DICT) {
LOGI("--- Suggest missing characters %d", i); LOGI("--- Suggest missing characters %d", i);
} }
getSuggestionCandidates(i, -1, -1, NULL, 0, MAX_DEPTH); getSuggestionCandidates(i, -1, -1, MAX_DEPTH);
} }
} }
PROF_END(2); PROF_END(2);
@ -217,7 +211,7 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo,
if (DEBUG_DICT) { if (DEBUG_DICT) {
LOGI("--- Suggest excessive characters %d", i); LOGI("--- Suggest excessive characters %d", i);
} }
getSuggestionCandidates(-1, i, -1, NULL, 0, MAX_DEPTH); getSuggestionCandidates(-1, i, -1, MAX_DEPTH);
} }
} }
PROF_END(3); PROF_END(3);
@ -230,7 +224,7 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo,
if (DEBUG_DICT) { if (DEBUG_DICT) {
LOGI("--- Suggest transposed characters %d", i); LOGI("--- Suggest transposed characters %d", i);
} }
getSuggestionCandidates(-1, -1, i, NULL, 0, mInputLength - 1); getSuggestionCandidates(-1, -1, i, mInputLength - 1);
} }
} }
PROF_END(4); PROF_END(4);
@ -348,8 +342,7 @@ static const char QUOTE = '\'';
static const char SPACE = ' '; static const char SPACE = ' ';
void UnigramDictionary::getSuggestionCandidates(const int skipPos, void UnigramDictionary::getSuggestionCandidates(const int skipPos,
const int excessivePos, const int transposedPos, int *nextLetters, const int excessivePos, const int transposedPos, const int maxDepth) {
const int nextLettersSize, const int maxDepth) {
if (DEBUG_DICT) { if (DEBUG_DICT) {
LOGI("getSuggestionCandidates %d", maxDepth); LOGI("getSuggestionCandidates %d", maxDepth);
assert(transposedPos + 1 < mInputLength); assert(transposedPos + 1 < mInputLength);
@ -365,29 +358,31 @@ void UnigramDictionary::getSuggestionCandidates(const int skipPos,
mStackChildCount[0] = childCount; mStackChildCount[0] = childCount;
mStackTraverseAll[0] = (mInputLength <= 0); mStackTraverseAll[0] = (mInputLength <= 0);
mStackMatchCount[0] = 0;
mStackInputIndex[0] = 0; mStackInputIndex[0] = 0;
mStackDiffs[0] = 0; mStackDiffs[0] = 0;
mStackSiblingPos[0] = rootPosition; mStackSiblingPos[0] = rootPosition;
mStackOutputIndex[0] = 0; mStackOutputIndex[0] = 0;
mStackMatchedCount[0] = 0;
mCorrectionState->initDepth();
// Depth first search // Depth first search
while (depth >= 0) { while (depth >= 0) {
if (mStackChildCount[depth] > 0) { if (mStackChildCount[depth] > 0) {
--mStackChildCount[depth]; --mStackChildCount[depth];
bool traverseAllNodes = mStackTraverseAll[depth]; bool traverseAllNodes = mStackTraverseAll[depth];
int matchCount = mStackMatchCount[depth];
int inputIndex = mStackInputIndex[depth]; int inputIndex = mStackInputIndex[depth];
int diffs = mStackDiffs[depth]; int diffs = mStackDiffs[depth];
int siblingPos = mStackSiblingPos[depth]; int siblingPos = mStackSiblingPos[depth];
int outputIndex = mStackOutputIndex[depth]; int outputIndex = mStackOutputIndex[depth];
int firstChildPos; int firstChildPos;
mCorrectionState->slideTree(mStackMatchedCount[depth]);
// depth will never be greater than maxDepth because in that case, // depth will never be greater than maxDepth because in that case,
// needsToTraverseChildrenNodes should be false // needsToTraverseChildrenNodes should be false
const bool needsToTraverseChildrenNodes = processCurrentNode(siblingPos, outputIndex, const bool needsToTraverseChildrenNodes = processCurrentNode(siblingPos, outputIndex,
maxDepth, traverseAllNodes, matchCount, inputIndex, diffs, maxDepth, traverseAllNodes, inputIndex, diffs,
nextLetters, nextLettersSize, mCorrectionState, &childCount, mCorrectionState, &childCount,
&firstChildPos, &traverseAllNodes, &matchCount, &inputIndex, &diffs, &firstChildPos, &traverseAllNodes, &inputIndex, &diffs,
&siblingPos, &outputIndex); &siblingPos, &outputIndex);
// Update next sibling pos // Update next sibling pos
mStackSiblingPos[depth] = siblingPos; mStackSiblingPos[depth] = siblingPos;
@ -396,15 +391,21 @@ void UnigramDictionary::getSuggestionCandidates(const int skipPos,
++depth; ++depth;
mStackChildCount[depth] = childCount; mStackChildCount[depth] = childCount;
mStackTraverseAll[depth] = traverseAllNodes; mStackTraverseAll[depth] = traverseAllNodes;
mStackMatchCount[depth] = matchCount;
mStackInputIndex[depth] = inputIndex; mStackInputIndex[depth] = inputIndex;
mStackDiffs[depth] = diffs; mStackDiffs[depth] = diffs;
mStackSiblingPos[depth] = firstChildPos; mStackSiblingPos[depth] = firstChildPos;
mStackOutputIndex[depth] = outputIndex; mStackOutputIndex[depth] = outputIndex;
int matchedCount;
mCorrectionState->goDownTree(&matchedCount);
mStackMatchedCount[depth] = matchedCount;
} else {
mCorrectionState->slideTree(mStackMatchedCount[depth]);
} }
} else { } else {
// Goes to parent sibling node // Goes to parent sibling node
--depth; --depth;
mCorrectionState->goUpTree(mStackMatchedCount[depth]);
} }
} }
} }
@ -445,24 +446,13 @@ inline bool UnigramDictionary::needsToSkipCurrentNode(const unsigned short c,
} }
inline void UnigramDictionary::onTerminal(unsigned short int* word, const int depth, inline void UnigramDictionary::onTerminal(unsigned short int* word, const int outputIndex,
const uint8_t* const root, const uint8_t flags, const int pos, const int inputIndex, const int freq, CorrectionState *correctionState) {
const int inputIndex, const int matchCount, const int freq, const bool sameLength, if (!mProximityInfo->sameAsTyped(word, outputIndex + 1) && outputIndex >= MIN_SUGGEST_DEPTH) {
int* nextLetters, const int nextLettersSize, CorrectionState *correctionState) { const int finalFreq = correctionState->getFinalFreq(inputIndex, outputIndex, freq);
const int skipPos = correctionState->getSkipPos(); if (finalFreq >= 0) {
addWord(word, outputIndex + 1, finalFreq);
const bool isSameAsTyped = sameLength ? mProximityInfo->sameAsTyped(word, depth + 1) : false; }
if (isSameAsTyped) return;
if (depth >= MIN_SUGGEST_DEPTH) {
const int finalFreq = correctionState->getFinalFreq(inputIndex, depth, matchCount,
freq, sameLength);
if (!isSameAsTyped)
addWord(word, depth + 1, finalFreq);
}
if (sameLength && depth >= mInputLength && skipPos < 0) {
registerNextLetter(word[mInputLength], nextLetters, nextLettersSize);
} }
} }
@ -677,11 +667,11 @@ int UnigramDictionary::getBigramPosition(int pos, unsigned short *word, int offs
// there aren't any more nodes at this level, it merely returns the address of the first byte after // there aren't any more nodes at this level, it merely returns the address of the first byte after
// the current node in nextSiblingPosition. Thus, the caller must keep count of the nodes at any // the current node in nextSiblingPosition. Thus, the caller must keep count of the nodes at any
// given level, as output into newCount when traversing this level's parent. // given level, as output into newCount when traversing this level's parent.
inline bool UnigramDictionary::processCurrentNode(const int initialPos, const int initialDepth, inline bool UnigramDictionary::processCurrentNode(const int initialPos, const int initialOutputPos,
const int maxDepth, const bool initialTraverseAllNodes, int matchCount, int inputIndex, const int maxDepth, const bool initialTraverseAllNodes, int inputIndex,
const int initialDiffs, int *nextLetters, const int nextLettersSize, const int initialDiffs,
CorrectionState *correctionState, int *newCount, int *newChildrenPosition, CorrectionState *correctionState, int *newCount, int *newChildrenPosition,
bool *newTraverseAllNodes, int *newMatchRate, int *newInputIndex, int *newDiffs, bool *newTraverseAllNodes, int *newInputIndex, int *newDiffs,
int *nextSiblingPosition, int *newOutputIndex) { int *nextSiblingPosition, int *newOutputIndex) {
const int skipPos = correctionState->getSkipPos(); const int skipPos = correctionState->getSkipPos();
const int excessivePos = correctionState->getExcessivePos(); const int excessivePos = correctionState->getExcessivePos();
@ -690,7 +680,7 @@ inline bool UnigramDictionary::processCurrentNode(const int initialPos, const in
correctionState->checkState(); correctionState->checkState();
} }
int pos = initialPos; int pos = initialPos;
int depth = initialDepth; int internalOutputPos = initialOutputPos;
int traverseAllNodes = initialTraverseAllNodes; int traverseAllNodes = initialTraverseAllNodes;
int diffs = initialDiffs; int diffs = initialDiffs;
@ -736,15 +726,16 @@ inline bool UnigramDictionary::processCurrentNode(const int initialPos, const in
// This has to be done for each virtual char (this forwards the "inputIndex" which // This has to be done for each virtual char (this forwards the "inputIndex" which
// is the index in the user-inputted chars, as read by proximity chars. // is the index in the user-inputted chars, as read by proximity chars.
if (excessivePos == depth && inputIndex < mInputLength - 1) ++inputIndex; if (excessivePos == internalOutputPos && inputIndex < mInputLength - 1) {
if (traverseAllNodes || needsToSkipCurrentNode(c, inputIndex, skipPos, depth)) { ++inputIndex;
mWord[depth] = c; }
if (traverseAllNodes || needsToSkipCurrentNode(c, inputIndex, skipPos, internalOutputPos)) {
mWord[internalOutputPos] = c;
if (traverseAllNodes && isTerminal) { if (traverseAllNodes && isTerminal) {
// The frequency should be here, because we come here only if this is actually // The frequency should be here, because we come here only if this is actually
// a terminal node, and we are on its last char. // a terminal node, and we are on its last char.
const int freq = BinaryFormat::readFrequencyWithoutMovingPointer(DICT_ROOT, pos); const int freq = BinaryFormat::readFrequencyWithoutMovingPointer(DICT_ROOT, pos);
onTerminal(mWord, depth, DICT_ROOT, flags, pos, inputIndex, matchCount, onTerminal(mWord, internalOutputPos, inputIndex, freq, mCorrectionState);
freq, false, nextLetters, nextLettersSize, mCorrectionState);
} }
if (!hasChildren) { if (!hasChildren) {
// If we don't have children here, that means we finished processing all // If we don't have children here, that means we finished processing all
@ -784,18 +775,17 @@ inline bool UnigramDictionary::processCurrentNode(const int initialPos, const in
BinaryFormat::skipChildrenPosAndAttributes(DICT_ROOT, flags, pos); BinaryFormat::skipChildrenPosAndAttributes(DICT_ROOT, flags, pos);
return false; return false;
} }
mWord[depth] = c; mWord[internalOutputPos] = c;
// If inputIndex is greater than mInputLength, that means there is no // If inputIndex is greater than mInputLength, that means there is no
// proximity chars. So, we don't need to check proximity. // proximity chars. So, we don't need to check proximity.
if (ProximityInfo::SAME_OR_ACCENTED_OR_CAPITALIZED_CHAR == matchedProximityCharId) { if (ProximityInfo::SAME_OR_ACCENTED_OR_CAPITALIZED_CHAR == matchedProximityCharId) {
++matchCount; correctionState->charMatched();
} }
const bool isSameAsUserTypedLength = mInputLength == inputIndex + 1 const bool isSameAsUserTypedLength = mInputLength == inputIndex + 1
|| (excessivePos == mInputLength - 1 && inputIndex == mInputLength - 2); || (excessivePos == mInputLength - 1 && inputIndex == mInputLength - 2);
if (isSameAsUserTypedLength && isTerminal) { if (isSameAsUserTypedLength && isTerminal) {
const int freq = BinaryFormat::readFrequencyWithoutMovingPointer(DICT_ROOT, pos); const int freq = BinaryFormat::readFrequencyWithoutMovingPointer(DICT_ROOT, pos);
onTerminal(mWord, depth, DICT_ROOT, flags, pos, inputIndex, matchCount, onTerminal(mWord, internalOutputPos, inputIndex, freq, mCorrectionState);
freq, true, nextLetters, nextLettersSize, mCorrectionState);
} }
// This character matched the typed character (enough to traverse the node at least) // This character matched the typed character (enough to traverse the node at least)
// so we just evaluated it. Now we should evaluate this virtual node's children - that // so we just evaluated it. Now we should evaluate this virtual node's children - that
@ -821,7 +811,7 @@ inline bool UnigramDictionary::processCurrentNode(const int initialPos, const in
++inputIndex; ++inputIndex;
} }
// Optimization: Prune out words that are too long compared to how much was typed. // Optimization: Prune out words that are too long compared to how much was typed.
if (depth >= maxDepth || diffs > mMaxEditDistance) { if (internalOutputPos >= maxDepth || diffs > mMaxEditDistance) {
// We are giving up parsing this node and its children. Skip the rest of the node, // We are giving up parsing this node and its children. Skip the rest of the node,
// output the sibling position, and return that we don't want to traverse children. // output the sibling position, and return that we don't want to traverse children.
if (!isLastChar) { if (!isLastChar) {
@ -838,7 +828,7 @@ inline bool UnigramDictionary::processCurrentNode(const int initialPos, const in
// contain NOT_A_CHARACTER. // contain NOT_A_CHARACTER.
c = nextc; c = nextc;
// Also, the next char is one "virtual node" depth more than this char. // Also, the next char is one "virtual node" depth more than this char.
++depth; ++internalOutputPos;
} while (NOT_A_CHARACTER != c); } while (NOT_A_CHARACTER != c);
// If inputIndex is greater than mInputLength, that means there are no proximity chars. // If inputIndex is greater than mInputLength, that means there are no proximity chars.
@ -850,10 +840,9 @@ inline bool UnigramDictionary::processCurrentNode(const int initialPos, const in
// All the output values that are purely computation by this function are held in local // All the output values that are purely computation by this function are held in local
// variables. Output them to the caller. // variables. Output them to the caller.
*newTraverseAllNodes = traverseAllNodes; *newTraverseAllNodes = traverseAllNodes;
*newMatchRate = matchCount;
*newDiffs = diffs; *newDiffs = diffs;
*newInputIndex = inputIndex; *newInputIndex = inputIndex;
*newOutputIndex = depth; *newOutputIndex = internalOutputPos;
// Now we finished processing this node, and we want to traverse children. If there are no // Now we finished processing this node, and we want to traverse children. If there are no
// children, we can't come here. // children, we can't come here.

View File

@ -87,8 +87,7 @@ private:
const int *ycoordinates, const int *codes, const int codesSize, const int *ycoordinates, const int *codes, const int codesSize,
unsigned short *outWords, int *frequencies); unsigned short *outWords, int *frequencies);
void getSuggestionCandidates(const int skipPos, const int excessivePos, void getSuggestionCandidates(const int skipPos, const int excessivePos,
const int transposedPos, int *nextLetters, const int nextLettersSize, const int transposedPos, const int maxDepth);
const int maxDepth);
bool addWord(unsigned short *word, int length, int frequency); bool addWord(unsigned short *word, int length, int frequency);
void getSplitTwoWordsSuggestion(const int inputLength, CorrectionState *correctionState); void getSplitTwoWordsSuggestion(const int inputLength, CorrectionState *correctionState);
void getMissingSpaceWords( void getMissingSpaceWords(
@ -96,17 +95,16 @@ private:
void getMistypedSpaceWords( void getMistypedSpaceWords(
const int inputLength, const int spaceProximityPos, CorrectionState *correctionState); const int inputLength, const int spaceProximityPos, CorrectionState *correctionState);
void onTerminal(unsigned short int* word, const int depth, void onTerminal(unsigned short int* word, const int depth,
const uint8_t* const root, const uint8_t flags, const int pos, const int inputIndex, const int freq,
const int inputIndex, const int matchWeight, const int freq, const bool sameLength, CorrectionState *correctionState);
int* nextLetters, const int nextLettersSize, CorrectionState *correctionState);
bool needsToSkipCurrentNode(const unsigned short c, bool needsToSkipCurrentNode(const unsigned short c,
const int inputIndex, const int skipPos, const int depth); const int inputIndex, const int skipPos, const int depth);
// Process a node by considering proximity, missing and excessive character // Process a node by considering proximity, missing and excessive character
bool processCurrentNode(const int initialPos, const int initialDepth, bool processCurrentNode(const int initialPos, const int initialDepth,
const int maxDepth, const bool initialTraverseAllNodes, int matchWeight, int inputIndex, const int maxDepth, const bool initialTraverseAllNodes, int inputIndex,
const int initialDiffs, int *nextLetters, const int nextLettersSize, const int initialDiffs,
CorrectionState *correctionState, int *newCount, int *newChildPosition, CorrectionState *correctionState, int *newCount, int *newChildPosition,
bool *newTraverseAllNodes, int *newMatchRate, int *newInputIndex, int *newDiffs, bool *newTraverseAllNodes, int *newInputIndex, int *newDiffs,
int *nextSiblingPosition, int *nextOutputIndex); int *nextSiblingPosition, int *nextOutputIndex);
int getMostFrequentWordLike(const int startInputIndex, const int inputLength, int getMostFrequentWordLike(const int startInputIndex, const int inputLength,
unsigned short *word); unsigned short *word);
@ -142,14 +140,13 @@ private:
unsigned short mWord[MAX_WORD_LENGTH_INTERNAL]; unsigned short mWord[MAX_WORD_LENGTH_INTERNAL];
int mMaxEditDistance; int mMaxEditDistance;
int mStackMatchedCount[MAX_WORD_LENGTH_INTERNAL];
int mStackChildCount[MAX_WORD_LENGTH_INTERNAL]; int mStackChildCount[MAX_WORD_LENGTH_INTERNAL];
bool mStackTraverseAll[MAX_WORD_LENGTH_INTERNAL]; bool mStackTraverseAll[MAX_WORD_LENGTH_INTERNAL];
int mStackMatchCount[MAX_WORD_LENGTH_INTERNAL];
int mStackInputIndex[MAX_WORD_LENGTH_INTERNAL]; int mStackInputIndex[MAX_WORD_LENGTH_INTERNAL];
int mStackDiffs[MAX_WORD_LENGTH_INTERNAL]; int mStackDiffs[MAX_WORD_LENGTH_INTERNAL];
int mStackSiblingPos[MAX_WORD_LENGTH_INTERNAL]; int mStackSiblingPos[MAX_WORD_LENGTH_INTERNAL];
int mStackOutputIndex[MAX_WORD_LENGTH_INTERNAL]; int mStackOutputIndex[MAX_WORD_LENGTH_INTERNAL];
int mNextLettersFrequency[NEXT_LETTERS_SIZE];
}; };
} // namespace latinime } // namespace latinime