From 58c49b91322847dc453742cb34c2899da9b44479 Mon Sep 17 00:00:00 2001 From: satok Date: Thu, 27 Jan 2011 03:23:39 +0900 Subject: [PATCH] Fix auto-correction threshold and promote full matched words Bug: 3374359 Bug: 3278422 "zbe" will be auto corrected to "be" by fixing s-line "teh" will be auto corrected to "the" by promotion of full matched words Change-Id: I314c632820e4e0b1501edeca60ada205d291451f --- .../com/android/inputmethod/latin/Suggest.java | 2 +- .../com/android/inputmethod/latin/Utils.java | 5 ++++- native/src/defines.h | 1 + native/src/unigram_dictionary.cpp | 18 ++++++++++++------ native/src/unigram_dictionary.h | 2 +- 5 files changed, 19 insertions(+), 9 deletions(-) diff --git a/java/src/com/android/inputmethod/latin/Suggest.java b/java/src/com/android/inputmethod/latin/Suggest.java index 1772b2669..d5ed1f83b 100644 --- a/java/src/com/android/inputmethod/latin/Suggest.java +++ b/java/src/com/android/inputmethod/latin/Suggest.java @@ -290,7 +290,7 @@ public class Suggest implements Dictionary.WordCallback { typedWord, mSuggestions.get(0), mPriorities[0]); if (LatinImeLogger.sDBG) { Log.d(TAG, "Normalized " + typedWord + "," + mSuggestions.get(0) + "," - + mPriorities[0] + normalizedScore + + mPriorities[0] + ", " + normalizedScore + "(" + mAutoCorrectionThreshold + ")"); } if (normalizedScore >= mAutoCorrectionThreshold) { diff --git a/java/src/com/android/inputmethod/latin/Utils.java b/java/src/com/android/inputmethod/latin/Utils.java index 5059860d7..53009e4a6 100644 --- a/java/src/com/android/inputmethod/latin/Utils.java +++ b/java/src/com/android/inputmethod/latin/Utils.java @@ -267,9 +267,12 @@ public class Utils { public static double calcNormalizedScore(CharSequence before, CharSequence after, int score) { final int beforeLength = before.length(); final int afterLength = after.length(); + if (beforeLength == 0 || afterLength == 0) return 0; final int distance = editDistance(before, after); + // If afterLength < beforeLength, the algorithm is suggesting a word by excessive character + // correction. final double maximumScore = MAX_INITIAL_SCORE - * Math.pow(TYPED_LETTER_MULTIPLIER, beforeLength) + * Math.pow(TYPED_LETTER_MULTIPLIER, Math.min(beforeLength, afterLength)) * FULL_WORD_MULTIPLYER; // add a weight based on edit distance. // distance <= max(afterLength, beforeLength) == afterLength, diff --git a/native/src/defines.h b/native/src/defines.h index 71aaf28ae..7374526ca 100644 --- a/native/src/defines.h +++ b/native/src/defines.h @@ -134,6 +134,7 @@ static void prof_out(void) { #define WORDS_WITH_EXCESSIVE_CHARACTER_DEMOTION_RATE 75 #define WORDS_WITH_EXCESSIVE_CHARACTER_OUT_OF_PROXIMITY_DEMOTION_RATE 75 #define WORDS_WITH_TRANSPOSED_CHARACTERS_DEMOTION_RATE 60 +#define FULL_MATCHED_WORDS_PROMOTION_RATE 120 // This should be greater than or equal to MAX_WORD_LENGTH defined in BinaryDictionary.java // This is only used for the size of array. Not to be used in c functions. diff --git a/native/src/unigram_dictionary.cpp b/native/src/unigram_dictionary.cpp index 3f9bcd758..06dd39aaa 100644 --- a/native/src/unigram_dictionary.cpp +++ b/native/src/unigram_dictionary.cpp @@ -347,9 +347,9 @@ void UnigramDictionary::getWordsRec(const int childrenCount, const int pos, cons } } -inline int UnigramDictionary::calculateFinalFreq(const int inputIndex, const int snr, - const int skipPos, const int excessivePos, const int transposedPos, const int freq, - const bool sameLength) { +inline int UnigramDictionary::calculateFinalFreq(const int inputIndex, const int depth, + const int snr, const int skipPos, const int excessivePos, const int transposedPos, + const int freq, const bool sameLength) { // TODO: Demote by edit distance int finalFreq = freq * snr; if (skipPos >= 0) multiplyRate(WORDS_WITH_MISSING_CHARACTER_DEMOTION_RATE, &finalFreq); @@ -361,6 +361,12 @@ inline int UnigramDictionary::calculateFinalFreq(const int inputIndex, const int multiplyRate(WORDS_WITH_EXCESSIVE_CHARACTER_OUT_OF_PROXIMITY_DEMOTION_RATE, &finalFreq); } } + int lengthFreq = TYPED_LETTER_MULTIPLIER; + for (int i = 0; i < depth; ++i) lengthFreq *= TYPED_LETTER_MULTIPLIER; + if (depth > 1 && lengthFreq == snr) { + if (DEBUG_DICT) LOGI("Found full matched word."); + multiplyRate(FULL_MATCHED_WORDS_PROMOTION_RATE, &finalFreq); + } if (sameLength && skipPos < 0) finalFreq *= FULL_WORD_MULTIPLIER; return finalFreq; } @@ -369,8 +375,8 @@ inline void UnigramDictionary::onTerminalWhenUserTypedLengthIsGreaterThanInputLe unsigned short *word, const int inputIndex, const int depth, const int snr, int *nextLetters, const int nextLettersSize, const int skipPos, const int excessivePos, const int transposedPos, const int freq) { - const int finalFreq = calculateFinalFreq(inputIndex, snr, skipPos, excessivePos, transposedPos, - freq, false); + const int finalFreq = calculateFinalFreq(inputIndex, depth, snr, skipPos, excessivePos, + transposedPos, freq, false); if (depth >= MIN_SUGGEST_DEPTH) addWord(word, depth + 1, finalFreq); if (depth >= mInputLength && skipPos < 0) { registerNextLetter(mWord[mInputLength], nextLetters, nextLettersSize); @@ -382,7 +388,7 @@ inline void UnigramDictionary::onTerminalWhenUserTypedLengthIsSameAsInputLength( const int skipPos, const int excessivePos, const int transposedPos, const int freq, const int addedWeight) { if (sameAsTyped(word, depth + 1)) return; - const int finalFreq = calculateFinalFreq(inputIndex, snr * addedWeight, skipPos, + const int finalFreq = calculateFinalFreq(inputIndex, depth, snr * addedWeight, skipPos, excessivePos, transposedPos, freq, true); // Proximity collection will promote a word of the same length as what user typed. if (depth >= MIN_SUGGEST_DEPTH) addWord(word, depth + 1, finalFreq); diff --git a/native/src/unigram_dictionary.h b/native/src/unigram_dictionary.h index 7f7b7bd21..95f965586 100644 --- a/native/src/unigram_dictionary.h +++ b/native/src/unigram_dictionary.h @@ -52,7 +52,7 @@ private: const int excessivePos, const int transposedPos, int *nextLetters, const int nextLettersSize); void registerNextLetter(unsigned short c, int *nextLetters, int nextLettersSize); - int calculateFinalFreq(const int inputIndex, const int snr, const int skipPos, + int calculateFinalFreq(const int inputIndex, const int depth, const int snr, const int skipPos, const int excessivePos, const int transposedPos, const int freq, const bool sameLength); void onTerminalWhenUserTypedLengthIsGreaterThanInputLength(unsigned short *word, const int inputIndex, const int depth, const int snr, int *nextLetters,