Fix auto-correction threshold and promote full matched words

Bug: 3374359 Bug: 3278422 "zbe" will be auto corrected to "be" by fixing s-line "teh" will be auto corrected to "the" by promotion of full matched words Change-Id: I314c632820e4e0b1501edeca60ada205d291451f
2011-01-27 03:23:39 +09:00 · 2011-01-27 03:23:39 +09:00 · 58c49b9132
commit 58c49b9132
parent 5c35e4109f
5 changed files with 19 additions and 9 deletions
--- a/java/src/com/android/inputmethod/latin/Suggest.java
+++ b/java/src/com/android/inputmethod/latin/Suggest.java
@ -290,7 +290,7 @@ public class Suggest implements Dictionary.WordCallback {
                        typedWord, mSuggestions.get(0), mPriorities[0]);
                if (LatinImeLogger.sDBG) {
                    Log.d(TAG, "Normalized " + typedWord + "," + mSuggestions.get(0) + ","
-                            + mPriorities[0] + normalizedScore
+                            + mPriorities[0] + ", " + normalizedScore
                            + "(" + mAutoCorrectionThreshold + ")");
                }
                if (normalizedScore >= mAutoCorrectionThreshold) {
--- a/java/src/com/android/inputmethod/latin/Utils.java
+++ b/java/src/com/android/inputmethod/latin/Utils.java
@ -267,9 +267,12 @@ public class Utils {
    public static double calcNormalizedScore(CharSequence before, CharSequence after, int score) {
        final int beforeLength = before.length();
        final int afterLength = after.length();
+        if (beforeLength == 0 || afterLength == 0) return 0;
        final int distance = editDistance(before, after);
+        // If afterLength < beforeLength, the algorithm is suggesting a word by excessive character
+        // correction.
        final double maximumScore = MAX_INITIAL_SCORE
-                * Math.pow(TYPED_LETTER_MULTIPLIER, beforeLength)
+                * Math.pow(TYPED_LETTER_MULTIPLIER, Math.min(beforeLength, afterLength))
                * FULL_WORD_MULTIPLYER;
        // add a weight based on edit distance.
        // distance <= max(afterLength, beforeLength) == afterLength,
--- a/native/src/defines.h
+++ b/native/src/defines.h
@ -134,6 +134,7 @@ static void prof_out(void) {
 #define WORDS_WITH_EXCESSIVE_CHARACTER_DEMOTION_RATE 75
 #define WORDS_WITH_EXCESSIVE_CHARACTER_OUT_OF_PROXIMITY_DEMOTION_RATE 75
 #define WORDS_WITH_TRANSPOSED_CHARACTERS_DEMOTION_RATE 60
+#define FULL_MATCHED_WORDS_PROMOTION_RATE 120

 // This should be greater than or equal to MAX_WORD_LENGTH defined in BinaryDictionary.java
 // This is only used for the size of array. Not to be used in c functions.
--- a/native/src/unigram_dictionary.cpp
+++ b/native/src/unigram_dictionary.cpp
@ -347,9 +347,9 @@ void UnigramDictionary::getWordsRec(const int childrenCount, const int pos, cons
    }
 }

-inline int UnigramDictionary::calculateFinalFreq(const int inputIndex, const int snr,
-        const int skipPos, const int excessivePos, const int transposedPos, const int freq,
-        const bool sameLength) {
+inline int UnigramDictionary::calculateFinalFreq(const int inputIndex, const int depth,
+        const int snr, const int skipPos, const int excessivePos, const int transposedPos,
+        const int freq, const bool sameLength) {
    // TODO: Demote by edit distance
    int finalFreq = freq * snr;
    if (skipPos >= 0) multiplyRate(WORDS_WITH_MISSING_CHARACTER_DEMOTION_RATE, &finalFreq);
@ -361,6 +361,12 @@ inline int UnigramDictionary::calculateFinalFreq(const int inputIndex, const int
            multiplyRate(WORDS_WITH_EXCESSIVE_CHARACTER_OUT_OF_PROXIMITY_DEMOTION_RATE, &finalFreq);
        }
    }
+    int lengthFreq = TYPED_LETTER_MULTIPLIER;
+    for (int i = 0; i < depth; ++i) lengthFreq *= TYPED_LETTER_MULTIPLIER;
+    if (depth > 1 && lengthFreq == snr) {
+        if (DEBUG_DICT) LOGI("Found full matched word.");
+        multiplyRate(FULL_MATCHED_WORDS_PROMOTION_RATE, &finalFreq);
+    }
    if (sameLength && skipPos < 0) finalFreq *= FULL_WORD_MULTIPLIER;
    return finalFreq;
 }
@ -369,8 +375,8 @@ inline void UnigramDictionary::onTerminalWhenUserTypedLengthIsGreaterThanInputLe
        unsigned short *word, const int inputIndex, const int depth, const int snr,
        int *nextLetters, const int nextLettersSize, const int skipPos, const int excessivePos,
        const int transposedPos, const int freq) {
-    const int finalFreq = calculateFinalFreq(inputIndex, snr, skipPos, excessivePos, transposedPos,
-            freq, false);
+    const int finalFreq = calculateFinalFreq(inputIndex, depth, snr, skipPos, excessivePos,
+            transposedPos, freq, false);
    if (depth >= MIN_SUGGEST_DEPTH) addWord(word, depth + 1, finalFreq);
    if (depth >= mInputLength && skipPos < 0) {
        registerNextLetter(mWord[mInputLength], nextLetters, nextLettersSize);
@ -382,7 +388,7 @@ inline void UnigramDictionary::onTerminalWhenUserTypedLengthIsSameAsInputLength(
        const int skipPos, const int excessivePos, const int transposedPos, const int freq,
        const int addedWeight) {
    if (sameAsTyped(word, depth + 1)) return;
-    const int finalFreq = calculateFinalFreq(inputIndex, snr * addedWeight, skipPos,
+    const int finalFreq = calculateFinalFreq(inputIndex, depth, snr * addedWeight, skipPos,
            excessivePos, transposedPos, freq, true);
    // Proximity collection will promote a word of the same length as what user typed.
    if (depth >= MIN_SUGGEST_DEPTH) addWord(word, depth + 1, finalFreq);
--- a/native/src/unigram_dictionary.h
+++ b/native/src/unigram_dictionary.h
@ -52,7 +52,7 @@ private:
            const int excessivePos, const int transposedPos, int *nextLetters,
            const int nextLettersSize);
    void registerNextLetter(unsigned short c, int *nextLetters, int nextLettersSize);
-    int calculateFinalFreq(const int inputIndex, const int snr, const int skipPos,
+    int calculateFinalFreq(const int inputIndex, const int depth, const int snr, const int skipPos,
            const int excessivePos, const int transposedPos, const int freq, const bool sameLength);
    void onTerminalWhenUserTypedLengthIsGreaterThanInputLength(unsigned short *word,
            const int inputIndex, const int depth, const int snr, int *nextLetters,