From cdbbea735f590784791f0c1fe33a514c4e864836 Mon Sep 17 00:00:00 2001 From: satok Date: Wed, 8 Dec 2010 16:04:16 +0900 Subject: [PATCH] Suggest excessive characters bug: 3193883 Change-Id: Iea7a0fce7ce62d8779a7c7e4613d50db30d82b07 --- .../android/inputmethod/latin/Suggest.java | 43 +++---------- native/src/defines.h | 9 ++- native/src/unigram_dictionary.cpp | 63 +++++++++++++------ native/src/unigram_dictionary.h | 17 ++--- 4 files changed, 65 insertions(+), 67 deletions(-) diff --git a/java/src/com/android/inputmethod/latin/Suggest.java b/java/src/com/android/inputmethod/latin/Suggest.java index 4c9b7509a..2f188879c 100644 --- a/java/src/com/android/inputmethod/latin/Suggest.java +++ b/java/src/com/android/inputmethod/latin/Suggest.java @@ -34,6 +34,8 @@ import java.util.List; */ public class Suggest implements Dictionary.WordCallback { + public static final String TAG = "Suggest"; + public static final int APPROX_MAX_WORD_LENGTH = 32; public static final int CORRECTION_NONE = 0; @@ -188,33 +190,6 @@ public class Suggest implements Dictionary.WordCallback { } } - private boolean haveSufficientCommonality(String original, CharSequence suggestion) { - final int originalLength = original.length(); - final int suggestionLength = suggestion.length(); - final int minLength = Math.min(originalLength, suggestionLength); - if (minLength <= 2) return true; - int matching = 0; - int lessMatching = 0; // Count matches if we skip one character - int i; - for (i = 0; i < minLength; i++) { - final char origChar = ExpandableDictionary.toLowerCase(original.charAt(i)); - if (origChar == ExpandableDictionary.toLowerCase(suggestion.charAt(i))) { - matching++; - lessMatching++; - } else if (i + 1 < suggestionLength - && origChar == ExpandableDictionary.toLowerCase(suggestion.charAt(i + 1))) { - lessMatching++; - } - } - matching = Math.max(matching, lessMatching); - - if (minLength <= 4) { - return matching >= 2; - } else { - return matching > minLength / 2; - } - } - /** * Returns a list of words that match the list of character codes passed in. * This list will be overwritten the next time this function is called. @@ -311,6 +286,11 @@ public class Suggest implements Dictionary.WordCallback { // the normalized score of the second suggestion, behave less aggressive. final double normalizedScore = LatinIMEUtil.calcNormalizedScore( mOriginalWord, mSuggestions.get(0), mPriorities[0]); + if (LatinImeLogger.sDBG) { + Log.d(TAG, "Normalized " + mOriginalWord + "," + mSuggestions.get(0) + "," + + mPriorities[0] + normalizedScore + + "(" + mAutoCompleteThreshold + ")"); + } if (normalizedScore >= mAutoCompleteThreshold) { mHaveCorrection = true; } @@ -319,15 +299,6 @@ public class Suggest implements Dictionary.WordCallback { if (mOriginalWord != null) { mSuggestions.add(0, mOriginalWord.toString()); } - - // Check if the first suggestion has a minimum number of characters in common - if (wordComposer.size() > 1 && mSuggestions.size() > 1 - && (mCorrectionMode == CORRECTION_FULL - || mCorrectionMode == CORRECTION_FULL_BIGRAM)) { - if (!haveSufficientCommonality(mLowerOriginalWord, mSuggestions.get(1))) { - mHaveCorrection = false; - } - } if (mAutoTextEnabled) { int i = 0; int max = 6; diff --git a/native/src/defines.h b/native/src/defines.h index 8b817fbc1..c4f13e634 100644 --- a/native/src/defines.h +++ b/native/src/defines.h @@ -23,10 +23,12 @@ #ifndef LOG_TAG #define LOG_TAG "LatinIME: " #endif -#define DEBUG_DICT 1 +#define DEBUG_DICT true +#define DEBUG_SHOW_FOUND_WORD false #else // FLAG_DBG #define LOGI -#define DEBUG_DICT 0 +#define DEBUG_DICT false +#define DEBUG_SHOW_FOUND_WORD false #endif // FLAG_DBG // 22-bit address = ~4MB dictionary size limit, which on average would be about 200k-300k words @@ -48,7 +50,8 @@ #define NOT_VALID_WORD -99 #define SUGGEST_MISSING_CHARACTERS true -#define SUGGEST_MISSING_CHARACTERS_THRESHOLD 5 + +#define SUGGEST_EXCESSIVE_CHARACTERS true // This should be greater than or equal to MAX_WORD_LENGTH defined in BinaryDictionary.java // This is only used for the size of array. Not to be used in c functions. diff --git a/native/src/unigram_dictionary.cpp b/native/src/unigram_dictionary.cpp index d307ba2f8..d8bb2f030 100644 --- a/native/src/unigram_dictionary.cpp +++ b/native/src/unigram_dictionary.cpp @@ -46,19 +46,30 @@ int UnigramDictionary::getSuggestions(int *codes, int codesSize, unsigned short initSuggestions(codes, codesSize, outWords, frequencies); - int suggestedWordsCount = getSuggestionCandidates(codesSize, -1, nextLetters, + int suggestedWordsCount = getSuggestionCandidates(codesSize, -1, -1, nextLetters, nextLettersSize); // If there aren't sufficient suggestions, search for words by allowing wild cards at // the different character positions. This feature is not ready for prime-time as we need // to figure out the best ranking for such words compared to proximity corrections and // completions. - if (SUGGEST_MISSING_CHARACTERS && suggestedWordsCount < SUGGEST_MISSING_CHARACTERS_THRESHOLD) { + if (SUGGEST_MISSING_CHARACTERS) { for (int i = 0; i < codesSize; ++i) { - int tempCount = getSuggestionCandidates(codesSize, i, NULL, 0); + if (DEBUG_DICT) LOGI("--- Suggest missing characters %d", i); + const int tempCount = getSuggestionCandidates(codesSize, i, -1, NULL, 0); + if (tempCount > suggestedWordsCount) { + suggestedWordsCount = tempCount; + } + } + } + + // Suggest excessive characters + if (SUGGEST_EXCESSIVE_CHARACTERS) { + for (int i = 0; i < codesSize; ++i) { + if (DEBUG_DICT) LOGI("--- Suggest excessive characters %d", i); + const int tempCount = getSuggestionCandidates(codesSize, -1, i, NULL, 0); if (tempCount > suggestedWordsCount) { suggestedWordsCount = tempCount; - break; } } } @@ -86,14 +97,14 @@ void UnigramDictionary::initSuggestions(int *codes, int codesSize, unsigned shor mMaxEditDistance = mInputLength < 5 ? 2 : mInputLength / 2; } -int UnigramDictionary::getSuggestionCandidates(int inputLength, int skipPos, +int UnigramDictionary::getSuggestionCandidates(int inputLength, int skipPos, int excessivePos, int *nextLetters, int nextLettersSize) { if (DEBUG_DICT) LOGI("getSuggestionCandidates"); int initialPos = 0; if (IS_LATEST_DICT_VERSION) { initialPos = DICTIONARY_HEADER_SIZE; } - getWords(initialPos, inputLength, skipPos, nextLetters, nextLettersSize); + getWords(initialPos, inputLength, skipPos, excessivePos, nextLetters, nextLettersSize); // Get the word count int suggestedWordsCount = 0; @@ -115,7 +126,7 @@ bool UnigramDictionary::addWord(unsigned short *word, int length, int frequency) if (DEBUG_DICT) { char s[length + 1]; for (int i = 0; i <= length; i++) s[i] = word[i]; - LOGI("Found word = %s, freq = %d : \n", s, frequency); + if (DEBUG_SHOW_FOUND_WORD) LOGI("Found word = %s, freq = %d : \n", s, frequency); } if (length > MAX_WORD_LENGTH) { if (DEBUG_DICT) LOGI("Exceeded max word length."); @@ -132,6 +143,11 @@ bool UnigramDictionary::addWord(unsigned short *word, int length, int frequency) insertAt++; } if (insertAt < MAX_WORDS) { + if (DEBUG_DICT) { + char s[length + 1]; + for (int i = 0; i <= length; i++) s[i] = word[i]; + LOGI("Added word = %s, freq = %d : \n", s, frequency); + } memmove((char*) mFrequencies + (insertAt + 1) * sizeof(mFrequencies[0]), (char*) mFrequencies + insertAt * sizeof(mFrequencies[0]), (MAX_WORDS - insertAt - 1) * sizeof(mFrequencies[0])); @@ -181,16 +197,16 @@ static const char QUOTE = '\''; // Keep this for comparing spec to new getWords void UnigramDictionary::getWordsOld(const int initialPos, const int inputLength, const int skipPos, - int *nextLetters, const int nextLettersSize) { + const int excessivePos, int *nextLetters, const int nextLettersSize) { int initialPosition = initialPos; const int count = Dictionary::getCount(DICT, &initialPosition); getWordsRec(count, initialPosition, 0, min(inputLength * MAX_DEPTH_MULTIPLIER, MAX_WORD_LENGTH), - mInputLength <= 0, 1, 0, 0, skipPos, nextLetters, nextLettersSize); + mInputLength <= 0, 1, 0, 0, skipPos, excessivePos, nextLetters, nextLettersSize); } void UnigramDictionary::getWords(const int rootPos, const int inputLength, const int skipPos, - int *nextLetters, const int nextLettersSize) { + const int excessivePos, int *nextLetters, const int nextLettersSize) { int rootPosition = rootPos; const int MAX_DEPTH = min(inputLength * MAX_DEPTH_MULTIPLIER, MAX_WORD_LENGTH); // Get the number of child of root, then increment the position @@ -216,9 +232,9 @@ void UnigramDictionary::getWords(const int rootPos, const int inputLength, const // depth will never be greater than MAX_DEPTH because in that case, // needsToTraverseChildrenNodes should be false const bool needsToTraverseChildrenNodes = processCurrentNode(siblingPos, depth, - MAX_DEPTH, traverseAllNodes, snr, inputIndex, diffs, skipPos, nextLetters, - nextLettersSize, &childCount, &firstChildPos, &traverseAllNodes, &snr, - &inputIndex, &diffs, &siblingPos); + MAX_DEPTH, traverseAllNodes, snr, inputIndex, diffs, skipPos, excessivePos, + nextLetters, nextLettersSize, &childCount, &firstChildPos, &traverseAllNodes, + &snr, &inputIndex, &diffs, &siblingPos); // Next sibling pos mStackSiblingPos[depth] = siblingPos; if (needsToTraverseChildrenNodes) { @@ -232,7 +248,7 @@ void UnigramDictionary::getWords(const int rootPos, const int inputLength, const mStackSiblingPos[depth] = firstChildPos; } } else { - // Goes to parent node + // Goes to parent sibling node --depth; } } @@ -241,7 +257,8 @@ void UnigramDictionary::getWords(const int rootPos, const int inputLength, const // snr : frequency? void UnigramDictionary::getWordsRec(const int childrenCount, const int pos, const int depth, const int maxDepth, const bool traverseAllNodes, const int snr, const int inputIndex, - const int diffs, const int skipPos, int *nextLetters, const int nextLettersSize) { + const int diffs, const int skipPos, const int excessivePos, int *nextLetters, + const int nextLettersSize) { int siblingPos = pos; for (int i = 0; i < childrenCount; ++i) { int newCount; @@ -253,14 +270,16 @@ void UnigramDictionary::getWordsRec(const int childrenCount, const int pos, cons int newDiffs; int newSiblingPos; const bool needsToTraverseChildrenNodes = processCurrentNode(siblingPos, depth, maxDepth, - traverseAllNodes, snr, inputIndex, diffs, skipPos, nextLetters, nextLettersSize, + traverseAllNodes, snr, inputIndex, diffs, skipPos, excessivePos, nextLetters, + nextLettersSize, &newCount, &newChildPosition, &newTraverseAllNodes, &newSnr, &newInputIndex, &newDiffs, &newSiblingPos); siblingPos = newSiblingPos; if (needsToTraverseChildrenNodes) { getWordsRec(newCount, newChildPosition, newDepth, maxDepth, newTraverseAllNodes, - newSnr, newInputIndex, newDiffs, skipPos, nextLetters, nextLettersSize); + newSnr, newInputIndex, newDiffs, skipPos, excessivePos, nextLetters, + nextLettersSize); } } } @@ -312,14 +331,18 @@ inline int UnigramDictionary::getMatchedProximityId(const int *currentChars, } inline bool UnigramDictionary::processCurrentNode(const int pos, const int depth, - const int maxDepth, const bool traverseAllNodes, const int snr, const int inputIndex, - const int diffs, const int skipPos, int *nextLetters, const int nextLettersSize, - int *newCount, int *newChildPosition, bool *newTraverseAllNodes, + const int maxDepth, const bool traverseAllNodes, const int snr, int inputIndex, + const int diffs, const int skipPos, const int excessivePos, int *nextLetters, + const int nextLettersSize, int *newCount, int *newChildPosition, bool *newTraverseAllNodes, int *newSnr, int*newInputIndex, int *newDiffs, int *nextSiblingPosition) { + if (DEBUG_DICT) assert(skipPos < 0 || excessivePos < 0); unsigned short c; int childPosition; bool terminal; int freq; + + if (excessivePos == depth) ++inputIndex; + *nextSiblingPosition = Dictionary::setDictionaryValues(DICT, IS_LATEST_DICT_VERSION, pos, &c, &childPosition, &terminal, &freq); diff --git a/native/src/unigram_dictionary.h b/native/src/unigram_dictionary.h index c53e77c0d..c3fb38b77 100644 --- a/native/src/unigram_dictionary.h +++ b/native/src/unigram_dictionary.h @@ -31,7 +31,7 @@ public: private: void initSuggestions(int *codes, int codesSize, unsigned short *outWords, int *frequencies); - int getSuggestionCandidates(int inputLength, int skipPos, int *nextLetters, + int getSuggestionCandidates(int inputLength, int skipPos, int excessivePos, int *nextLetters, int nextLettersSize); void getVersionNumber(); bool checkIfDictVersionIsLatest(); @@ -43,12 +43,12 @@ private: unsigned short toLowerCase(unsigned short c); void getWordsRec(const int childrenCount, const int pos, const int depth, const int maxDepth, const bool traverseAllNodes, const int snr, const int inputIndex, const int diffs, - const int skipPos, int *nextLetters, const int nextLettersSize); + const int skipPos, const int excessivePos, int *nextLetters, const int nextLettersSize); void getWords(const int rootPos, const int inputLength, const int skipPos, - int *nextLetters, const int nextLettersSize); + const int excessivePos, int *nextLetters, const int nextLettersSize); // Keep getWordsOld for comparing performance between getWords and getWordsOld void getWordsOld(const int initialPos, const int inputLength, const int skipPos, - int *nextLetters, const int nextLettersSize); + const int excessivePos, int *nextLetters, const int nextLettersSize); void registerNextLetter(unsigned short c, int *nextLetters, int nextLettersSize); void onTerminalWhenUserTypedLengthIsGreaterThanInputLength(unsigned short *word, const int mInputLength, const int depth, const int snr, int *nextLetters, @@ -59,10 +59,11 @@ private: const int inputIndex, const int skipPos, const int depth); int getMatchedProximityId(const int *currentChars, const unsigned short c, const int skipPos); bool processCurrentNode(const int pos, const int depth, - const int maxDepth, const bool traverseAllNodes, const int snr, const int inputIndex, - const int diffs, const int skipPos, int *nextLetters, const int nextLettersSize, - int *newCount, int *newChildPosition, bool *newTraverseAllNodes, - int *newSnr, int*newInputIndex, int *newDiffs, int *nextSiblingPosition); + const int maxDepth, const bool traverseAllNodes, const int snr, int inputIndex, + const int diffs, const int skipPos, const int excessivePos, int *nextLetters, + const int nextLettersSize, int *newCount, int *newChildPosition, + bool *newTraverseAllNodes, int *newSnr, int*newInputIndex, int *newDiffs, + int *nextSiblingPosition); const unsigned char *DICT; const int MAX_WORDS; const int MAX_WORD_LENGTH;