Prune out suggestions that have a very large edit distance.

If the number of keys picked from proximity is too large, prune out
the subtree. Otherwise you get vastly unrelated suggestions.

Fix a bug introduced with the missing_chars checkin.
main
Amith Yamasani 2009-07-15 18:30:47 -07:00 committed by Jean-Baptiste Queru
parent c3df2d6fd2
commit 322dc3d3f3
3 changed files with 23 additions and 11 deletions

View File

@ -60,8 +60,9 @@ int Dictionary::getSuggestions(int *codes, int codesSize, unsigned short *outWor
mMaxWords = maxWords; mMaxWords = maxWords;
mWords = 0; mWords = 0;
mSkipPos = skipPos; mSkipPos = skipPos;
mMaxEditDistance = mInputLength < 5 ? 2 : mInputLength / 2;
getWordsRec(0, 0, mInputLength * 3, false, 1, 0); getWordsRec(0, 0, mInputLength * 3, false, 1, 0, 0);
if (DEBUG_DICT) LOGI("Returning %d words", mWords); if (DEBUG_DICT) LOGI("Returning %d words", mWords);
return mWords; return mWords;
@ -108,7 +109,11 @@ bool
Dictionary::addWord(unsigned short *word, int length, int frequency) Dictionary::addWord(unsigned short *word, int length, int frequency)
{ {
word[length] = 0; word[length] = 0;
if (DEBUG_DICT) LOGI("Found word = %s, freq = %d : \n", word, frequency); if (DEBUG_DICT) {
char s[length + 1];
for (int i = 0; i <= length; i++) s[i] = word[i];
LOGI("Found word = %s, freq = %d : \n", s, frequency);
}
// Find the right insertion point // Find the right insertion point
int insertAt = 0; int insertAt = 0;
@ -176,12 +181,16 @@ Dictionary::sameAsTyped(unsigned short *word, int length)
static char QUOTE = '\''; static char QUOTE = '\'';
void void
Dictionary::getWordsRec(int pos, int depth, int maxDepth, bool completion, int snr, int inputIndex) Dictionary::getWordsRec(int pos, int depth, int maxDepth, bool completion, int snr, int inputIndex,
int diffs)
{ {
// Optimization: Prune out words that are too long compared to how much was typed. // Optimization: Prune out words that are too long compared to how much was typed.
if (depth > maxDepth) { if (depth > maxDepth) {
return; return;
} }
if (diffs > mMaxEditDistance) {
return;
}
int count = getCount(&pos); int count = getCount(&pos);
int *currentChars = NULL; int *currentChars = NULL;
if (mInputLength <= inputIndex) { if (mInputLength <= inputIndex) {
@ -205,19 +214,19 @@ Dictionary::getWordsRec(int pos, int depth, int maxDepth, bool completion, int s
} }
if (childrenAddress != 0) { if (childrenAddress != 0) {
getWordsRec(childrenAddress, depth + 1, maxDepth, getWordsRec(childrenAddress, depth + 1, maxDepth,
completion, snr, inputIndex); completion, snr, inputIndex, diffs);
} }
} else if (c == QUOTE && currentChars[0] != QUOTE || mSkipPos == depth) { } else if (c == QUOTE && currentChars[0] != QUOTE || mSkipPos == depth) {
// Skip the ' or other letter and continue deeper // Skip the ' or other letter and continue deeper
mWord[depth] = c; mWord[depth] = c;
if (childrenAddress != 0) { if (childrenAddress != 0) {
getWordsRec(childrenAddress, depth + 1, maxDepth, false, snr, inputIndex); getWordsRec(childrenAddress, depth + 1, maxDepth, false, snr, inputIndex, diffs);
} }
} else { } else {
int j = 0; int j = 0;
while (currentChars[j] > 0) { while (currentChars[j] > 0) {
int addedWeight = j == 0 ? mTypedLetterMultiplier : 1;
if (currentChars[j] == lowerC || currentChars[j] == c) { if (currentChars[j] == lowerC || currentChars[j] == c) {
int addedWeight = j == 0 ? mTypedLetterMultiplier : 1;
mWord[depth] = c; mWord[depth] = c;
if (mInputLength == inputIndex + 1) { if (mInputLength == inputIndex + 1) {
if (terminal) { if (terminal) {
@ -229,11 +238,12 @@ Dictionary::getWordsRec(int pos, int depth, int maxDepth, bool completion, int s
} }
if (childrenAddress != 0) { if (childrenAddress != 0) {
getWordsRec(childrenAddress, depth + 1, getWordsRec(childrenAddress, depth + 1,
maxDepth, true, snr * addedWeight, inputIndex + 1); maxDepth, true, snr * addedWeight, inputIndex + 1,
diffs + (j > 0));
} }
} else if (childrenAddress != 0) { } else if (childrenAddress != 0) {
getWordsRec(childrenAddress, depth + 1, maxDepth, getWordsRec(childrenAddress, depth + 1, maxDepth,
false, snr * addedWeight, inputIndex + 1); false, snr * addedWeight, inputIndex + 1, diffs + (j > 0));
} }
} }
j++; j++;

View File

@ -51,7 +51,7 @@ private:
bool addWord(unsigned short *word, int length, int frequency); bool addWord(unsigned short *word, int length, int frequency);
unsigned short toLowerCase(unsigned short c, int depth); unsigned short toLowerCase(unsigned short c, int depth);
void getWordsRec(int pos, int depth, int maxDepth, bool completion, int frequency, void getWordsRec(int pos, int depth, int maxDepth, bool completion, int frequency,
int inputIndex); int inputIndex, int diffs);
bool isValidWordRec(int pos, unsigned short *word, int offset, int length); bool isValidWordRec(int pos, unsigned short *word, int offset, int length);
unsigned char *mDict; unsigned char *mDict;
@ -67,6 +67,7 @@ private:
int mMaxAlternatives; int mMaxAlternatives;
unsigned short mWord[128]; unsigned short mWord[128];
int mSkipPos; int mSkipPos;
int mMaxEditDistance;
int mFullWordMultiplier; int mFullWordMultiplier;
int mTypedLetterMultiplier; int mTypedLetterMultiplier;

View File

@ -101,10 +101,11 @@ public class BinaryDictionary extends Dictionary {
// completions. // completions.
if (ENABLE_MISSED_CHARACTERS && count < 5) { if (ENABLE_MISSED_CHARACTERS && count < 5) {
for (int skip = 0; skip < codesSize; skip++) { for (int skip = 0; skip < codesSize; skip++) {
count = getSuggestionsNative(mNativeDict, mInputCodes, codesSize, int tempCount = getSuggestionsNative(mNativeDict, mInputCodes, codesSize,
mOutputChars, mFrequencies, mOutputChars, mFrequencies,
MAX_WORD_LENGTH, MAX_WORDS, MAX_ALTERNATIVES, skip); MAX_WORD_LENGTH, MAX_WORDS, MAX_ALTERNATIVES, skip);
if (count > 0) break; count = Math.max(count, tempCount);
if (tempCount > 0) break;
} }
} }