Demote skipped characters matched words with respect to length.

Words that matched user input with skipped characters used to be demoted in BinaryDictionary by a constant factor and not at all in those dictionaries implemented in java code. To represent the fact that the impact of a skipped character gets larger as the word is shorter, this change will implement a demotion that gets larger as the typed word is shorter. The demotion rate is (n - 2) / (n - 1) where n is the length of the typed word for n >= 2. It implements it for both BinaryDictionary and java dictionaries. Bug: 3340731 Change-Id: I3a18be80a9708981d56a950dc25fe08f018b5b89
2011-03-03 10:22:10 +09:00 · 2011-03-03 10:22:10 +09:00 · 07a8406bc1
parent 50a4cb403f
commit 07a8406bc1
4 changed files with 47 additions and 16 deletions
--- a/java/src/com/android/inputmethod/latin/ExpandableDictionary.java
+++ b/java/src/com/android/inputmethod/latin/ExpandableDictionary.java
@ -230,6 +230,16 @@ public class ExpandableDictionary extends Dictionary {
        return (node == null) ? -1 : node.mFrequency;
    }
    private static int computeSkippedWordFinalFreq(int freq, int snr, int inputLength) {
        // The computation itself makes sense for >= 2, but the == 2 case returns 0
        // anyway so we may as well test against 3 instead and return the constant
        if (inputLength >= 3) {
            return (freq * snr * (inputLength - 2)) / (inputLength - 1);
        } else {
            return 0;
        }
    }
    /**
     * Recursively traverse the tree for words that match the input. Input consists of
     * a list of arrays. Each item in the list is one input character position. An input
@ -249,6 +259,7 @@ public class ExpandableDictionary extends Dictionary {
     * inputIndex
     * @param callback the callback class for adding a word
     */
    // TODO: Share this routine with the native code for BinaryDictionary
    protected void getWordsRec(NodeArray roots, final WordComposer codes, final char[] word,
            final int depth, boolean completion, int snr, int inputIndex, int skipPos,
            WordCallback callback) {
@ -275,7 +286,13 @@ public class ExpandableDictionary extends Dictionary {
            if (completion) {
                word[depth] = c;
                if (terminal) {
-                    if (!callback.addWord(word, 0, depth + 1, freq * snr, mDicTypeId,
+                    final int finalFreq;
                    if (skipPos < 0) {
                        finalFreq = freq * snr;
                    } else {
                        finalFreq = computeSkippedWordFinalFreq(freq, snr, mInputLength);
                    }
                    if (!callback.addWord(word, 0, depth + 1, finalFreq, mDicTypeId,
                            DataType.UNIGRAM)) {
                        return;
                    }
@ -307,8 +324,14 @@ public class ExpandableDictionary extends Dictionary {
                            if (terminal) {
                                if (INCLUDE_TYPED_WORD_IF_VALID
                                        || !same(word, depth + 1, codes.getTypedWord())) {
-                                    int finalFreq = freq * snr * addedAttenuation;
+                                    final int finalFreq;
-                                    if (skipPos < 0) finalFreq *= FULL_WORD_FREQ_MULTIPLIER;
+                                    if (skipPos < 0) {
                                        finalFreq = freq * snr * addedAttenuation
                                                * FULL_WORD_FREQ_MULTIPLIER;
                                    } else {
                                        finalFreq = computeSkippedWordFinalFreq(freq,
                                                snr * addedAttenuation, mInputLength);
                                    }
                                    callback.addWord(word, 0, depth + 1, finalFreq, mDicTypeId,
                                            DataType.UNIGRAM);
                                }
--- a/native/src/defines.h
+++ b/native/src/defines.h
@ -135,7 +135,7 @@ static void prof_out(void) {
 #define SUGGEST_WORDS_WITH_TRANSPOSED_CHARACTERS true
 // The following "rate"s are used as a multiplier before dividing by 100, so they are in percent.
-#define WORDS_WITH_MISSING_CHARACTER_DEMOTION_RATE 75
+#define WORDS_WITH_MISSING_CHARACTER_DEMOTION_RATE 100
 #define WORDS_WITH_MISSING_SPACE_CHARACTER_DEMOTION_RATE 80
 #define WORDS_WITH_EXCESSIVE_CHARACTER_DEMOTION_RATE 75
 #define WORDS_WITH_EXCESSIVE_CHARACTER_OUT_OF_PROXIMITY_DEMOTION_RATE 75
--- a/native/src/unigram_dictionary.cpp
+++ b/native/src/unigram_dictionary.cpp
@ -457,10 +457,17 @@ static inline int capped255MultForFullMatchAccentsOrCapitalizationDifference(con
 }
 inline int UnigramDictionary::calculateFinalFreq(const int inputIndex, const int depth,
        const int matchWeight, const int skipPos, const int excessivePos, const int transposedPos,
-        const int freq, const bool sameLength) {
+        const int freq, const bool sameLength) const {
    // TODO: Demote by edit distance
    int finalFreq = freq * matchWeight;
-    if (skipPos >= 0) multiplyRate(WORDS_WITH_MISSING_CHARACTER_DEMOTION_RATE, &finalFreq);
+    if (skipPos >= 0) {
        if (mInputLength >= 3) {
            multiplyRate(WORDS_WITH_MISSING_CHARACTER_DEMOTION_RATE *
                    (mInputLength - 2) / (mInputLength - 1), &finalFreq);
        } else {
            finalFreq = 0;
        }
    }
    if (transposedPos >= 0) multiplyRate(
            WORDS_WITH_TRANSPOSED_CHARACTERS_DEMOTION_RATE, &finalFreq);
    if (excessivePos >= 0) {
@ -514,7 +521,7 @@ inline bool UnigramDictionary::needsToSkipCurrentNode(const unsigned short c,
 }
 inline bool UnigramDictionary::existsAdjacentProximityChars(const int inputIndex,
-        const int inputLength) {
+        const int inputLength) const {
    if (inputIndex < 0 || inputIndex >= inputLength) return false;
    const int currentChar = *getInputCharsAt(inputIndex);
    const int leftIndex = inputIndex - 1;
--- a/native/src/unigram_dictionary.h
+++ b/native/src/unigram_dictionary.h
@ -71,7 +71,8 @@ private:
            const int nextLettersSize);
    void registerNextLetter(unsigned short c, int *nextLetters, int nextLettersSize);
    int calculateFinalFreq(const int inputIndex, const int depth, const int snr, const int skipPos,
-            const int excessivePos, const int transposedPos, const int freq, const bool sameLength);
+            const int excessivePos, const int transposedPos, const int freq,
            const bool sameLength) const;
    void onTerminalWhenUserTypedLengthIsGreaterThanInputLength(unsigned short *word,
            const int inputIndex, const int depth, const int snr, int *nextLetters,
            const int nextLettersSize, const int skipPos, const int excessivePos,
@ -95,8 +96,8 @@ private:
    bool processCurrentNodeForExactMatch(const int firstChildPos,
            const int startInputIndex, const int depth, unsigned short *word,
            int *newChildPosition, int *newCount, bool *newTerminal, int *newFreq, int *siblingPos);
-    bool existsAdjacentProximityChars(const int inputIndex, const int inputLength);
+    bool existsAdjacentProximityChars(const int inputIndex, const int inputLength) const;
-    inline const int* getInputCharsAt(const int index) {
+    inline const int* getInputCharsAt(const int index) const {
        return mInputCodes + (index * MAX_PROXIMITY_CHARS);
    }
    const unsigned char *DICT;