Suggest words with transposed chars
Bug: 3193883 Change-Id: I884b669258bfc522bc04e14f22a7646164a4cac5main
parent
3f7eac0d2c
commit
a3d78f606e
|
@ -24,13 +24,17 @@
|
||||||
#define LOG_TAG "LatinIME: "
|
#define LOG_TAG "LatinIME: "
|
||||||
#endif
|
#endif
|
||||||
#define DEBUG_DICT true
|
#define DEBUG_DICT true
|
||||||
#define DEBUG_SHOW_FOUND_WORD false
|
#define DEBUG_DICT_FULL true
|
||||||
#define DEBUG_NODE true
|
#define DEBUG_SHOW_FOUND_WORD DEBUG_DICT_FULL
|
||||||
|
#define DEBUG_NODE DEBUG_DICT_FULL
|
||||||
|
#define DEBUG_TRACE DEBUG_DICT_FULL
|
||||||
#else // FLAG_DBG
|
#else // FLAG_DBG
|
||||||
#define LOGI
|
#define LOGI
|
||||||
#define DEBUG_DICT false
|
#define DEBUG_DICT false
|
||||||
|
#define DEBUG_DICT_FULL false
|
||||||
#define DEBUG_SHOW_FOUND_WORD false
|
#define DEBUG_SHOW_FOUND_WORD false
|
||||||
#define DEBUG_NODE false
|
#define DEBUG_NODE false
|
||||||
|
#define DEBUG_TRACE false
|
||||||
#endif // FLAG_DBG
|
#endif // FLAG_DBG
|
||||||
|
|
||||||
#ifndef U_SHORT_MAX
|
#ifndef U_SHORT_MAX
|
||||||
|
@ -58,6 +62,12 @@
|
||||||
#define SUGGEST_WORDS_WITH_MISSING_CHARACTER true
|
#define SUGGEST_WORDS_WITH_MISSING_CHARACTER true
|
||||||
#define SUGGEST_WORDS_WITH_MISSING_SPACE_CHARACTER true
|
#define SUGGEST_WORDS_WITH_MISSING_SPACE_CHARACTER true
|
||||||
#define SUGGEST_WORDS_WITH_EXCESSIVE_CHARACTER true
|
#define SUGGEST_WORDS_WITH_EXCESSIVE_CHARACTER true
|
||||||
|
#define SUGGEST_WORDS_WITH_TRANSPOSED_CHARACTERS true
|
||||||
|
|
||||||
|
#define WORDS_WITH_MISSING_CHARACTER_DEMOTION_RATE 75
|
||||||
|
#define WORDS_WITH_MISSING_SPACE_CHARACTER_DEMOTION_RATE 80
|
||||||
|
#define WORDS_WITH_EXCESSIVE_CHARACTER_DEMOTION_RATE 75
|
||||||
|
#define WORDS_WITH_TRANSPOSED_CHARACTERS_DEMOTION_RATE 60
|
||||||
|
|
||||||
// This should be greater than or equal to MAX_WORD_LENGTH defined in BinaryDictionary.java
|
// This should be greater than or equal to MAX_WORD_LENGTH defined in BinaryDictionary.java
|
||||||
// This is only used for the size of array. Not to be used in c functions.
|
// This is only used for the size of array. Not to be used in c functions.
|
||||||
|
|
|
@ -36,7 +36,7 @@ UnigramDictionary::UnigramDictionary(const unsigned char *dict, int typedLetterM
|
||||||
MAX_PROXIMITY_CHARS(maxProximityChars), IS_LATEST_DICT_VERSION(isLatestDictVersion),
|
MAX_PROXIMITY_CHARS(maxProximityChars), IS_LATEST_DICT_VERSION(isLatestDictVersion),
|
||||||
TYPED_LETTER_MULTIPLIER(typedLetterMultiplier), FULL_WORD_MULTIPLIER(fullWordMultiplier),
|
TYPED_LETTER_MULTIPLIER(typedLetterMultiplier), FULL_WORD_MULTIPLIER(fullWordMultiplier),
|
||||||
ROOT_POS(isLatestDictVersion ? DICTIONARY_HEADER_SIZE : 0) {
|
ROOT_POS(isLatestDictVersion ? DICTIONARY_HEADER_SIZE : 0) {
|
||||||
LOGI("UnigramDictionary - constructor");
|
if (DEBUG_DICT) LOGI("UnigramDictionary - constructor");
|
||||||
}
|
}
|
||||||
|
|
||||||
UnigramDictionary::~UnigramDictionary() {}
|
UnigramDictionary::~UnigramDictionary() {}
|
||||||
|
@ -45,26 +45,36 @@ int UnigramDictionary::getSuggestions(int *codes, int codesSize, unsigned short
|
||||||
int *frequencies, int *nextLetters, int nextLettersSize)
|
int *frequencies, int *nextLetters, int nextLettersSize)
|
||||||
{
|
{
|
||||||
initSuggestions(codes, codesSize, outWords, frequencies);
|
initSuggestions(codes, codesSize, outWords, frequencies);
|
||||||
getSuggestionCandidates(codesSize, -1, -1, nextLetters, nextLettersSize);
|
const int MAX_DEPTH = min(mInputLength * MAX_DEPTH_MULTIPLIER, MAX_WORD_LENGTH);
|
||||||
|
getSuggestionCandidates(codesSize, -1, -1, -1, nextLetters, nextLettersSize, MAX_DEPTH);
|
||||||
|
|
||||||
// Suggestion with missing character
|
// Suggestion with missing character
|
||||||
if (SUGGEST_WORDS_WITH_MISSING_CHARACTER) {
|
if (SUGGEST_WORDS_WITH_MISSING_CHARACTER) {
|
||||||
for (int i = 0; i < codesSize; ++i) {
|
for (int i = 0; i < codesSize; ++i) {
|
||||||
if (DEBUG_DICT) LOGI("--- Suggest missing characters %d", i);
|
if (DEBUG_DICT) LOGI("--- Suggest missing characters %d", i);
|
||||||
getSuggestionCandidates(codesSize, i, -1, NULL, 0);
|
getSuggestionCandidates(codesSize, i, -1, -1, NULL, 0, MAX_DEPTH);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Suggestion with excessive character
|
// Suggestion with excessive character
|
||||||
if (SUGGEST_WORDS_WITH_EXCESSIVE_CHARACTER) {
|
if (SUGGEST_WORDS_WITH_EXCESSIVE_CHARACTER && mInputLength > MIN_SUGGEST_DEPTH) {
|
||||||
for (int i = 0; i < codesSize; ++i) {
|
for (int i = 0; i < codesSize; ++i) {
|
||||||
if (existsAdjacentProximityChars(i, codesSize)) {
|
if (existsAdjacentProximityChars(i, codesSize)) {
|
||||||
if (DEBUG_DICT) LOGI("--- Suggest excessive characters %d", i);
|
if (DEBUG_DICT) LOGI("--- Suggest excessive characters %d", i);
|
||||||
getSuggestionCandidates(codesSize, -1, i, NULL, 0);
|
getSuggestionCandidates(codesSize, -1, i, -1, NULL, 0, MAX_DEPTH);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Suggestion with transposed characters
|
||||||
|
// Only suggest words that length is mInputLength
|
||||||
|
if (SUGGEST_WORDS_WITH_TRANSPOSED_CHARACTERS) {
|
||||||
|
for (int i = 0; i < codesSize; ++i) {
|
||||||
|
if (DEBUG_DICT) LOGI("--- Suggest transposed characters %d", i);
|
||||||
|
getSuggestionCandidates(codesSize, -1, -1, i, NULL, 0, mInputLength - 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Suggestions with missing space
|
// Suggestions with missing space
|
||||||
if (SUGGEST_WORDS_WITH_MISSING_SPACE_CHARACTER && mInputLength > MIN_SUGGEST_DEPTH) {
|
if (SUGGEST_WORDS_WITH_MISSING_SPACE_CHARACTER && mInputLength > MIN_SUGGEST_DEPTH) {
|
||||||
for (int i = 1; i < codesSize; ++i) {
|
for (int i = 1; i < codesSize; ++i) {
|
||||||
|
@ -187,10 +197,13 @@ static const char QUOTE = '\'';
|
||||||
static const char SPACE = ' ';
|
static const char SPACE = ' ';
|
||||||
|
|
||||||
void UnigramDictionary::getSuggestionCandidates(const int inputLength, const int skipPos,
|
void UnigramDictionary::getSuggestionCandidates(const int inputLength, const int skipPos,
|
||||||
const int excessivePos, int *nextLetters, const int nextLettersSize) {
|
const int excessivePos, const int transposedPos, int *nextLetters,
|
||||||
if (DEBUG_DICT) LOGI("getSuggestionCandidates");
|
const int nextLettersSize, const int maxDepth) {
|
||||||
|
if (DEBUG_DICT) LOGI("getSuggestionCandidates %d", maxDepth);
|
||||||
|
if (DEBUG_DICT) assert(transposedPos + 1 < inputLength);
|
||||||
|
if (DEBUG_DICT) assert(excessivePos < inputLength);
|
||||||
|
if (DEBUG_DICT) assert(missingPos < inputLength);
|
||||||
int rootPosition = ROOT_POS;
|
int rootPosition = ROOT_POS;
|
||||||
const int MAX_DEPTH = min(inputLength * MAX_DEPTH_MULTIPLIER, MAX_WORD_LENGTH);
|
|
||||||
// Get the number of child of root, then increment the position
|
// Get the number of child of root, then increment the position
|
||||||
int childCount = Dictionary::getCount(DICT, &rootPosition);
|
int childCount = Dictionary::getCount(DICT, &rootPosition);
|
||||||
int depth = 0;
|
int depth = 0;
|
||||||
|
@ -212,12 +225,12 @@ void UnigramDictionary::getSuggestionCandidates(const int inputLength, const int
|
||||||
int diffs = mStackDiffs[depth];
|
int diffs = mStackDiffs[depth];
|
||||||
int siblingPos = mStackSiblingPos[depth];
|
int siblingPos = mStackSiblingPos[depth];
|
||||||
int firstChildPos;
|
int firstChildPos;
|
||||||
// depth will never be greater than MAX_DEPTH because in that case,
|
// depth will never be greater than maxDepth because in that case,
|
||||||
// needsToTraverseChildrenNodes should be false
|
// needsToTraverseChildrenNodes should be false
|
||||||
const bool needsToTraverseChildrenNodes = processCurrentNode(siblingPos, depth,
|
const bool needsToTraverseChildrenNodes = processCurrentNode(siblingPos, depth,
|
||||||
MAX_DEPTH, traverseAllNodes, snr, inputIndex, diffs, skipPos, excessivePos,
|
maxDepth, traverseAllNodes, snr, inputIndex, diffs, skipPos, excessivePos,
|
||||||
nextLetters, nextLettersSize, &childCount, &firstChildPos, &traverseAllNodes,
|
transposedPos, nextLetters, nextLettersSize, &childCount, &firstChildPos,
|
||||||
&snr, &inputIndex, &diffs, &siblingPos);
|
&traverseAllNodes, &snr, &inputIndex, &diffs, &siblingPos);
|
||||||
// Update next sibling pos
|
// Update next sibling pos
|
||||||
mStackSiblingPos[depth] = siblingPos;
|
mStackSiblingPos[depth] = siblingPos;
|
||||||
if (needsToTraverseChildrenNodes) {
|
if (needsToTraverseChildrenNodes) {
|
||||||
|
@ -252,7 +265,7 @@ bool UnigramDictionary::getMissingSpaceWords(const int inputLength, const int mi
|
||||||
}
|
}
|
||||||
|
|
||||||
const int secondFreq = getBestWordFreq(missingSpacePos, inputLength - missingSpacePos, mWord);
|
const int secondFreq = getBestWordFreq(missingSpacePos, inputLength - missingSpacePos, mWord);
|
||||||
if (DEBUG_DICT) LOGI("Second freq: %d", secondFreq);
|
if (DEBUG_DICT) LOGI("Second freq: %d", secondFreq);
|
||||||
if (secondFreq <= 0) return false;
|
if (secondFreq <= 0) return false;
|
||||||
|
|
||||||
word[missingSpacePos] = SPACE;
|
word[missingSpacePos] = SPACE;
|
||||||
|
@ -262,24 +275,27 @@ bool UnigramDictionary::getMissingSpaceWords(const int inputLength, const int mi
|
||||||
|
|
||||||
int pairFreq = ((firstFreq + secondFreq) / 2);
|
int pairFreq = ((firstFreq + secondFreq) / 2);
|
||||||
for (int i = 0; i < inputLength; ++i) pairFreq *= TYPED_LETTER_MULTIPLIER;
|
for (int i = 0; i < inputLength; ++i) pairFreq *= TYPED_LETTER_MULTIPLIER;
|
||||||
|
pairFreq = pairFreq * WORDS_WITH_MISSING_SPACE_CHARACTER_DEMOTION_RATE / 100;
|
||||||
addWord(word, newWordLength, pairFreq);
|
addWord(word, newWordLength, pairFreq);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Keep this for comparing spec to new getWords
|
// Keep this for comparing spec to new getWords
|
||||||
void UnigramDictionary::getWordsOld(const int initialPos, const int inputLength, const int skipPos,
|
void UnigramDictionary::getWordsOld(const int initialPos, const int inputLength, const int skipPos,
|
||||||
const int excessivePos, int *nextLetters, const int nextLettersSize) {
|
const int excessivePos, const int transposedPos,int *nextLetters,
|
||||||
|
const int nextLettersSize) {
|
||||||
int initialPosition = initialPos;
|
int initialPosition = initialPos;
|
||||||
const int count = Dictionary::getCount(DICT, &initialPosition);
|
const int count = Dictionary::getCount(DICT, &initialPosition);
|
||||||
getWordsRec(count, initialPosition, 0,
|
getWordsRec(count, initialPosition, 0,
|
||||||
min(inputLength * MAX_DEPTH_MULTIPLIER, MAX_WORD_LENGTH),
|
min(inputLength * MAX_DEPTH_MULTIPLIER, MAX_WORD_LENGTH),
|
||||||
mInputLength <= 0, 1, 0, 0, skipPos, excessivePos, nextLetters, nextLettersSize);
|
mInputLength <= 0, 1, 0, 0, skipPos, excessivePos, transposedPos, nextLetters,
|
||||||
|
nextLettersSize);
|
||||||
}
|
}
|
||||||
|
|
||||||
void UnigramDictionary::getWordsRec(const int childrenCount, const int pos, const int depth,
|
void UnigramDictionary::getWordsRec(const int childrenCount, const int pos, const int depth,
|
||||||
const int maxDepth, const bool traverseAllNodes, const int snr, const int inputIndex,
|
const int maxDepth, const bool traverseAllNodes, const int snr, const int inputIndex,
|
||||||
const int diffs, const int skipPos, const int excessivePos, int *nextLetters,
|
const int diffs, const int skipPos, const int excessivePos, const int transposedPos,
|
||||||
const int nextLettersSize) {
|
int *nextLetters, const int nextLettersSize) {
|
||||||
int siblingPos = pos;
|
int siblingPos = pos;
|
||||||
for (int i = 0; i < childrenCount; ++i) {
|
for (int i = 0; i < childrenCount; ++i) {
|
||||||
int newCount;
|
int newCount;
|
||||||
|
@ -291,34 +307,50 @@ void UnigramDictionary::getWordsRec(const int childrenCount, const int pos, cons
|
||||||
int newDiffs;
|
int newDiffs;
|
||||||
int newSiblingPos;
|
int newSiblingPos;
|
||||||
const bool needsToTraverseChildrenNodes = processCurrentNode(siblingPos, depth, maxDepth,
|
const bool needsToTraverseChildrenNodes = processCurrentNode(siblingPos, depth, maxDepth,
|
||||||
traverseAllNodes, snr, inputIndex, diffs, skipPos, excessivePos, nextLetters,
|
traverseAllNodes, snr, inputIndex, diffs, skipPos, excessivePos, transposedPos,
|
||||||
nextLettersSize,
|
nextLetters, nextLettersSize,
|
||||||
&newCount, &newChildPosition, &newTraverseAllNodes, &newSnr,
|
&newCount, &newChildPosition, &newTraverseAllNodes, &newSnr,
|
||||||
&newInputIndex, &newDiffs, &newSiblingPos);
|
&newInputIndex, &newDiffs, &newSiblingPos);
|
||||||
siblingPos = newSiblingPos;
|
siblingPos = newSiblingPos;
|
||||||
|
|
||||||
if (needsToTraverseChildrenNodes) {
|
if (needsToTraverseChildrenNodes) {
|
||||||
getWordsRec(newCount, newChildPosition, newDepth, maxDepth, newTraverseAllNodes,
|
getWordsRec(newCount, newChildPosition, newDepth, maxDepth, newTraverseAllNodes,
|
||||||
newSnr, newInputIndex, newDiffs, skipPos, excessivePos, nextLetters,
|
newSnr, newInputIndex, newDiffs, skipPos, excessivePos, transposedPos,
|
||||||
nextLettersSize);
|
nextLetters, nextLettersSize);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void UnigramDictionary::onTerminalWhenUserTypedLengthIsGreaterThanInputLength(
|
inline void UnigramDictionary::onTerminalWhenUserTypedLengthIsGreaterThanInputLength(
|
||||||
unsigned short *word, const int inputLength, const int depth, const int snr,
|
unsigned short *word, const int inputLength, const int depth, const int snr,
|
||||||
int *nextLetters, const int nextLettersSize, const int skipPos, const int freq) {
|
int *nextLetters, const int nextLettersSize, const int skipPos, const int excessivePos,
|
||||||
if (depth >= MIN_SUGGEST_DEPTH) addWord(word, depth + 1, freq * snr);
|
const int transposedPos, const int freq) {
|
||||||
|
int finalFreq = freq * snr;
|
||||||
|
// TODO: Demote by edit distance
|
||||||
|
if (skipPos >= 0) finalFreq = finalFreq * WORDS_WITH_MISSING_CHARACTER_DEMOTION_RATE / 100;
|
||||||
|
if (excessivePos >= 0) finalFreq = finalFreq
|
||||||
|
* WORDS_WITH_EXCESSIVE_CHARACTER_DEMOTION_RATE / 100;
|
||||||
|
if (transposedPos >= 0) finalFreq = finalFreq
|
||||||
|
* WORDS_WITH_TRANSPOSED_CHARACTERS_DEMOTION_RATE / 100;
|
||||||
|
|
||||||
|
if (depth >= MIN_SUGGEST_DEPTH) addWord(word, depth + 1, finalFreq);
|
||||||
if (depth >= inputLength && skipPos < 0) {
|
if (depth >= inputLength && skipPos < 0) {
|
||||||
registerNextLetter(mWord[mInputLength], nextLetters, nextLettersSize);
|
registerNextLetter(mWord[mInputLength], nextLetters, nextLettersSize);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void UnigramDictionary::onTerminalWhenUserTypedLengthIsSameAsInputLength(
|
inline void UnigramDictionary::onTerminalWhenUserTypedLengthIsSameAsInputLength(
|
||||||
unsigned short *word, const int depth, const int snr, const int skipPos, const int freq,
|
unsigned short *word, const int depth, const int snr, const int skipPos,
|
||||||
const int addedWeight) {
|
const int excessivePos, const int transposedPos, const int freq, const int addedWeight) {
|
||||||
if (!sameAsTyped(word, depth + 1)) {
|
if (!sameAsTyped(word, depth + 1)) {
|
||||||
int finalFreq = freq * snr * addedWeight;
|
int finalFreq = freq * snr * addedWeight;
|
||||||
|
// TODO: Demote by edit distance
|
||||||
|
if (skipPos >= 0) finalFreq = finalFreq * WORDS_WITH_MISSING_CHARACTER_DEMOTION_RATE / 100;
|
||||||
|
if (excessivePos >= 0) finalFreq = finalFreq
|
||||||
|
* WORDS_WITH_EXCESSIVE_CHARACTER_DEMOTION_RATE / 100;
|
||||||
|
if (transposedPos >= 0) finalFreq = finalFreq
|
||||||
|
* WORDS_WITH_TRANSPOSED_CHARACTERS_DEMOTION_RATE / 100;
|
||||||
|
|
||||||
// Proximity collection will promote a word of the same length as
|
// Proximity collection will promote a word of the same length as
|
||||||
// what user typed.
|
// what user typed.
|
||||||
if (skipPos < 0) finalFreq *= FULL_WORD_MULTIPLIER;
|
if (skipPos < 0) finalFreq *= FULL_WORD_MULTIPLIER;
|
||||||
|
@ -357,16 +389,18 @@ inline bool UnigramDictionary::existsAdjacentProximityChars(const int inputIndex
|
||||||
}
|
}
|
||||||
|
|
||||||
inline int UnigramDictionary::getMatchedProximityId(const int *currentChars,
|
inline int UnigramDictionary::getMatchedProximityId(const int *currentChars,
|
||||||
const unsigned short c, const int skipPos) {
|
const unsigned short c, const int skipPos, const int excessivePos,
|
||||||
|
const int transposedPos) {
|
||||||
const unsigned short lowerC = toLowerCase(c);
|
const unsigned short lowerC = toLowerCase(c);
|
||||||
int j = 0;
|
int j = 0;
|
||||||
while (currentChars[j] > 0 && j < MAX_PROXIMITY_CHARS) {
|
while (currentChars[j] > 0 && j < MAX_PROXIMITY_CHARS) {
|
||||||
const bool matched = (currentChars[j] == lowerC || currentChars[j] == c);
|
const bool matched = (currentChars[j] == lowerC || currentChars[j] == c);
|
||||||
// If skipPos is defined, not to search proximity collections.
|
// If skipPos is defined, not to search proximity collections.
|
||||||
// First char is what user typed.
|
// First char is what user typed.
|
||||||
if (matched) {
|
if (matched) {
|
||||||
return j;
|
return j;
|
||||||
} else if (skipPos >= 0) {
|
} else if (skipPos >= 0 || excessivePos >= 0 || transposedPos >= 0) {
|
||||||
|
// Not to check proximity characters
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
++j;
|
++j;
|
||||||
|
@ -376,10 +410,17 @@ inline int UnigramDictionary::getMatchedProximityId(const int *currentChars,
|
||||||
|
|
||||||
inline bool UnigramDictionary::processCurrentNode(const int pos, const int depth,
|
inline bool UnigramDictionary::processCurrentNode(const int pos, const int depth,
|
||||||
const int maxDepth, const bool traverseAllNodes, const int snr, int inputIndex,
|
const int maxDepth, const bool traverseAllNodes, const int snr, int inputIndex,
|
||||||
const int diffs, const int skipPos, const int excessivePos, int *nextLetters,
|
const int diffs, const int skipPos, const int excessivePos, const int transposedPos,
|
||||||
const int nextLettersSize, int *newCount, int *newChildPosition, bool *newTraverseAllNodes,
|
int *nextLetters, const int nextLettersSize, int *newCount, int *newChildPosition,
|
||||||
int *newSnr, int*newInputIndex, int *newDiffs, int *nextSiblingPosition) {
|
bool *newTraverseAllNodes, int *newSnr, int*newInputIndex, int *newDiffs,
|
||||||
if (DEBUG_DICT) assert(skipPos < 0 || excessivePos < 0);
|
int *nextSiblingPosition) {
|
||||||
|
if (DEBUG_DICT) {
|
||||||
|
int inputCount = 0;
|
||||||
|
if (skipPos >= 0) ++inputCount;
|
||||||
|
if (excessivePos >= 0) ++inputCount;
|
||||||
|
if (transposedPos >= 0) ++inputCount;
|
||||||
|
assert(inputCount <= 1);
|
||||||
|
}
|
||||||
unsigned short c;
|
unsigned short c;
|
||||||
int childPosition;
|
int childPosition;
|
||||||
bool terminal;
|
bool terminal;
|
||||||
|
@ -397,7 +438,7 @@ inline bool UnigramDictionary::processCurrentNode(const int pos, const int depth
|
||||||
mWord[depth] = c;
|
mWord[depth] = c;
|
||||||
if (traverseAllNodes && terminal) {
|
if (traverseAllNodes && terminal) {
|
||||||
onTerminalWhenUserTypedLengthIsGreaterThanInputLength(mWord, mInputLength, depth,
|
onTerminalWhenUserTypedLengthIsGreaterThanInputLength(mWord, mInputLength, depth,
|
||||||
snr, nextLetters, nextLettersSize, skipPos, freq);
|
snr, nextLetters, nextLettersSize, skipPos, excessivePos, transposedPos, freq);
|
||||||
}
|
}
|
||||||
if (!needsToTraverseChildrenNodes) return false;
|
if (!needsToTraverseChildrenNodes) return false;
|
||||||
*newTraverseAllNodes = traverseAllNodes;
|
*newTraverseAllNodes = traverseAllNodes;
|
||||||
|
@ -406,7 +447,14 @@ inline bool UnigramDictionary::processCurrentNode(const int pos, const int depth
|
||||||
*newInputIndex = inputIndex;
|
*newInputIndex = inputIndex;
|
||||||
} else {
|
} else {
|
||||||
int *currentChars = mInputCodes + (inputIndex * MAX_PROXIMITY_CHARS);
|
int *currentChars = mInputCodes + (inputIndex * MAX_PROXIMITY_CHARS);
|
||||||
int matchedProximityCharId = getMatchedProximityId(currentChars, c, skipPos);
|
|
||||||
|
if (transposedPos >= 0) {
|
||||||
|
if (inputIndex == transposedPos) currentChars += MAX_PROXIMITY_CHARS;
|
||||||
|
if (inputIndex == (transposedPos + 1)) currentChars -= MAX_PROXIMITY_CHARS;
|
||||||
|
}
|
||||||
|
|
||||||
|
int matchedProximityCharId = getMatchedProximityId(currentChars, c, skipPos, excessivePos,
|
||||||
|
transposedPos);
|
||||||
if (matchedProximityCharId < 0) return false;
|
if (matchedProximityCharId < 0) return false;
|
||||||
mWord[depth] = c;
|
mWord[depth] = c;
|
||||||
// If inputIndex is greater than mInputLength, that means there is no
|
// If inputIndex is greater than mInputLength, that means there is no
|
||||||
|
@ -415,13 +463,13 @@ inline bool UnigramDictionary::processCurrentNode(const int pos, const int depth
|
||||||
const bool isSameAsUserTypedLength = mInputLength == inputIndex + 1;
|
const bool isSameAsUserTypedLength = mInputLength == inputIndex + 1;
|
||||||
if (isSameAsUserTypedLength && terminal) {
|
if (isSameAsUserTypedLength && terminal) {
|
||||||
onTerminalWhenUserTypedLengthIsSameAsInputLength(mWord, depth, snr,
|
onTerminalWhenUserTypedLengthIsSameAsInputLength(mWord, depth, snr,
|
||||||
skipPos, freq, addedWeight);
|
skipPos, excessivePos, transposedPos, freq, addedWeight);
|
||||||
}
|
}
|
||||||
if (!needsToTraverseChildrenNodes) return false;
|
if (!needsToTraverseChildrenNodes) return false;
|
||||||
// Start traversing all nodes after the index exceeds the user typed length
|
// Start traversing all nodes after the index exceeds the user typed length
|
||||||
*newTraverseAllNodes = isSameAsUserTypedLength;
|
*newTraverseAllNodes = isSameAsUserTypedLength;
|
||||||
*newSnr = snr * addedWeight;
|
*newSnr = snr * addedWeight;
|
||||||
*newDiffs = diffs + (matchedProximityCharId > 0);
|
*newDiffs = diffs + ((matchedProximityCharId > 0) ? 1 : 0);
|
||||||
*newInputIndex = inputIndex + 1;
|
*newInputIndex = inputIndex + 1;
|
||||||
}
|
}
|
||||||
// Optimization: Prune out words that are too long compared to how much was typed.
|
// Optimization: Prune out words that are too long compared to how much was typed.
|
||||||
|
|
|
@ -32,7 +32,8 @@ public:
|
||||||
private:
|
private:
|
||||||
void initSuggestions(int *codes, int codesSize, unsigned short *outWords, int *frequencies);
|
void initSuggestions(int *codes, int codesSize, unsigned short *outWords, int *frequencies);
|
||||||
void getSuggestionCandidates(const int inputLength, const int skipPos, const int excessivePos,
|
void getSuggestionCandidates(const int inputLength, const int skipPos, const int excessivePos,
|
||||||
int *nextLetters, const int nextLettersSize);
|
const int transposedPos, int *nextLetters, const int nextLettersSize,
|
||||||
|
const int maxDepth);
|
||||||
void getVersionNumber();
|
void getVersionNumber();
|
||||||
bool checkIfDictVersionIsLatest();
|
bool checkIfDictVersionIsLatest();
|
||||||
int getAddress(int *pos);
|
int getAddress(int *pos);
|
||||||
|
@ -43,25 +44,30 @@ private:
|
||||||
unsigned short toLowerCase(unsigned short c);
|
unsigned short toLowerCase(unsigned short c);
|
||||||
void getWordsRec(const int childrenCount, const int pos, const int depth, const int maxDepth,
|
void getWordsRec(const int childrenCount, const int pos, const int depth, const int maxDepth,
|
||||||
const bool traverseAllNodes, const int snr, const int inputIndex, const int diffs,
|
const bool traverseAllNodes, const int snr, const int inputIndex, const int diffs,
|
||||||
const int skipPos, const int excessivePos, int *nextLetters, const int nextLettersSize);
|
const int skipPos, const int excessivePos, const int transposedPos, int *nextLetters,
|
||||||
|
const int nextLettersSize);
|
||||||
bool getMissingSpaceWords(const int inputLength, const int missingSpacePos);
|
bool getMissingSpaceWords(const int inputLength, const int missingSpacePos);
|
||||||
// Keep getWordsOld for comparing performance between getWords and getWordsOld
|
// Keep getWordsOld for comparing performance between getWords and getWordsOld
|
||||||
void getWordsOld(const int initialPos, const int inputLength, const int skipPos,
|
void getWordsOld(const int initialPos, const int inputLength, const int skipPos,
|
||||||
const int excessivePos, int *nextLetters, const int nextLettersSize);
|
const int excessivePos, const int transposedPos, int *nextLetters,
|
||||||
|
const int nextLettersSize);
|
||||||
void registerNextLetter(unsigned short c, int *nextLetters, int nextLettersSize);
|
void registerNextLetter(unsigned short c, int *nextLetters, int nextLettersSize);
|
||||||
void onTerminalWhenUserTypedLengthIsGreaterThanInputLength(unsigned short *word,
|
void onTerminalWhenUserTypedLengthIsGreaterThanInputLength(unsigned short *word,
|
||||||
const int mInputLength, const int depth, const int snr, int *nextLetters,
|
const int mInputLength, const int depth, const int snr, int *nextLetters,
|
||||||
const int nextLettersSize, const int skipPos, const int freq);
|
const int nextLettersSize, const int skipPos, const int excessivePos,
|
||||||
|
const int transposedPos, const int freq);
|
||||||
void onTerminalWhenUserTypedLengthIsSameAsInputLength(unsigned short *word, const int depth,
|
void onTerminalWhenUserTypedLengthIsSameAsInputLength(unsigned short *word, const int depth,
|
||||||
const int snr, const int skipPos, const int freq, const int addedWeight);
|
const int snr, const int skipPos, const int excessivePos, const int transposedPos,
|
||||||
|
const int freq, const int addedWeight);
|
||||||
bool needsToSkipCurrentNode(const unsigned short c,
|
bool needsToSkipCurrentNode(const unsigned short c,
|
||||||
const int inputIndex, const int skipPos, const int depth);
|
const int inputIndex, const int skipPos, const int depth);
|
||||||
int getMatchedProximityId(const int *currentChars, const unsigned short c, const int skipPos);
|
int getMatchedProximityId(const int *currentChars, const unsigned short c, const int skipPos,
|
||||||
|
const int excessivePos, const int transposedPos);
|
||||||
// Process a node by considering proximity, missing and excessive character
|
// Process a node by considering proximity, missing and excessive character
|
||||||
bool processCurrentNode(const int pos, const int depth,
|
bool processCurrentNode(const int pos, const int depth,
|
||||||
const int maxDepth, const bool traverseAllNodes, const int snr, int inputIndex,
|
const int maxDepth, const bool traverseAllNodes, const int snr, int inputIndex,
|
||||||
const int diffs, const int skipPos, const int excessivePos, int *nextLetters,
|
const int diffs, const int skipPos, const int excessivePos, const int transposedPos,
|
||||||
const int nextLettersSize, int *newCount, int *newChildPosition,
|
int *nextLetters, const int nextLettersSize, int *newCount, int *newChildPosition,
|
||||||
bool *newTraverseAllNodes, int *newSnr, int*newInputIndex, int *newDiffs,
|
bool *newTraverseAllNodes, int *newSnr, int*newInputIndex, int *newDiffs,
|
||||||
int *nextSiblingPosition);
|
int *nextSiblingPosition);
|
||||||
int getBestWordFreq(const int startInputIndex, const int inputLength, unsigned short *word);
|
int getBestWordFreq(const int startInputIndex, const int inputLength, unsigned short *word);
|
||||||
|
|
Loading…
Reference in New Issue