Fix auto-correction threshold and promote full matched words

Bug: 3374359
Bug: 3278422

"zbe" will be auto corrected to "be" by fixing s-line
"teh" will be auto corrected to "the" by promotion of full matched words

Change-Id: I314c632820e4e0b1501edeca60ada205d291451f
main
satok 2011-01-27 03:23:39 +09:00
parent 5c35e4109f
commit 58c49b9132
5 changed files with 19 additions and 9 deletions

View File

@ -290,7 +290,7 @@ public class Suggest implements Dictionary.WordCallback {
typedWord, mSuggestions.get(0), mPriorities[0]); typedWord, mSuggestions.get(0), mPriorities[0]);
if (LatinImeLogger.sDBG) { if (LatinImeLogger.sDBG) {
Log.d(TAG, "Normalized " + typedWord + "," + mSuggestions.get(0) + "," Log.d(TAG, "Normalized " + typedWord + "," + mSuggestions.get(0) + ","
+ mPriorities[0] + normalizedScore + mPriorities[0] + ", " + normalizedScore
+ "(" + mAutoCorrectionThreshold + ")"); + "(" + mAutoCorrectionThreshold + ")");
} }
if (normalizedScore >= mAutoCorrectionThreshold) { if (normalizedScore >= mAutoCorrectionThreshold) {

View File

@ -267,9 +267,12 @@ public class Utils {
public static double calcNormalizedScore(CharSequence before, CharSequence after, int score) { public static double calcNormalizedScore(CharSequence before, CharSequence after, int score) {
final int beforeLength = before.length(); final int beforeLength = before.length();
final int afterLength = after.length(); final int afterLength = after.length();
if (beforeLength == 0 || afterLength == 0) return 0;
final int distance = editDistance(before, after); final int distance = editDistance(before, after);
// If afterLength < beforeLength, the algorithm is suggesting a word by excessive character
// correction.
final double maximumScore = MAX_INITIAL_SCORE final double maximumScore = MAX_INITIAL_SCORE
* Math.pow(TYPED_LETTER_MULTIPLIER, beforeLength) * Math.pow(TYPED_LETTER_MULTIPLIER, Math.min(beforeLength, afterLength))
* FULL_WORD_MULTIPLYER; * FULL_WORD_MULTIPLYER;
// add a weight based on edit distance. // add a weight based on edit distance.
// distance <= max(afterLength, beforeLength) == afterLength, // distance <= max(afterLength, beforeLength) == afterLength,

View File

@ -134,6 +134,7 @@ static void prof_out(void) {
#define WORDS_WITH_EXCESSIVE_CHARACTER_DEMOTION_RATE 75 #define WORDS_WITH_EXCESSIVE_CHARACTER_DEMOTION_RATE 75
#define WORDS_WITH_EXCESSIVE_CHARACTER_OUT_OF_PROXIMITY_DEMOTION_RATE 75 #define WORDS_WITH_EXCESSIVE_CHARACTER_OUT_OF_PROXIMITY_DEMOTION_RATE 75
#define WORDS_WITH_TRANSPOSED_CHARACTERS_DEMOTION_RATE 60 #define WORDS_WITH_TRANSPOSED_CHARACTERS_DEMOTION_RATE 60
#define FULL_MATCHED_WORDS_PROMOTION_RATE 120
// This should be greater than or equal to MAX_WORD_LENGTH defined in BinaryDictionary.java // This should be greater than or equal to MAX_WORD_LENGTH defined in BinaryDictionary.java
// This is only used for the size of array. Not to be used in c functions. // This is only used for the size of array. Not to be used in c functions.

View File

@ -347,9 +347,9 @@ void UnigramDictionary::getWordsRec(const int childrenCount, const int pos, cons
} }
} }
inline int UnigramDictionary::calculateFinalFreq(const int inputIndex, const int snr, inline int UnigramDictionary::calculateFinalFreq(const int inputIndex, const int depth,
const int skipPos, const int excessivePos, const int transposedPos, const int freq, const int snr, const int skipPos, const int excessivePos, const int transposedPos,
const bool sameLength) { const int freq, const bool sameLength) {
// TODO: Demote by edit distance // TODO: Demote by edit distance
int finalFreq = freq * snr; int finalFreq = freq * snr;
if (skipPos >= 0) multiplyRate(WORDS_WITH_MISSING_CHARACTER_DEMOTION_RATE, &finalFreq); if (skipPos >= 0) multiplyRate(WORDS_WITH_MISSING_CHARACTER_DEMOTION_RATE, &finalFreq);
@ -361,6 +361,12 @@ inline int UnigramDictionary::calculateFinalFreq(const int inputIndex, const int
multiplyRate(WORDS_WITH_EXCESSIVE_CHARACTER_OUT_OF_PROXIMITY_DEMOTION_RATE, &finalFreq); multiplyRate(WORDS_WITH_EXCESSIVE_CHARACTER_OUT_OF_PROXIMITY_DEMOTION_RATE, &finalFreq);
} }
} }
int lengthFreq = TYPED_LETTER_MULTIPLIER;
for (int i = 0; i < depth; ++i) lengthFreq *= TYPED_LETTER_MULTIPLIER;
if (depth > 1 && lengthFreq == snr) {
if (DEBUG_DICT) LOGI("Found full matched word.");
multiplyRate(FULL_MATCHED_WORDS_PROMOTION_RATE, &finalFreq);
}
if (sameLength && skipPos < 0) finalFreq *= FULL_WORD_MULTIPLIER; if (sameLength && skipPos < 0) finalFreq *= FULL_WORD_MULTIPLIER;
return finalFreq; return finalFreq;
} }
@ -369,8 +375,8 @@ inline void UnigramDictionary::onTerminalWhenUserTypedLengthIsGreaterThanInputLe
unsigned short *word, const int inputIndex, const int depth, const int snr, unsigned short *word, const int inputIndex, const int depth, const int snr,
int *nextLetters, const int nextLettersSize, const int skipPos, const int excessivePos, int *nextLetters, const int nextLettersSize, const int skipPos, const int excessivePos,
const int transposedPos, const int freq) { const int transposedPos, const int freq) {
const int finalFreq = calculateFinalFreq(inputIndex, snr, skipPos, excessivePos, transposedPos, const int finalFreq = calculateFinalFreq(inputIndex, depth, snr, skipPos, excessivePos,
freq, false); transposedPos, freq, false);
if (depth >= MIN_SUGGEST_DEPTH) addWord(word, depth + 1, finalFreq); if (depth >= MIN_SUGGEST_DEPTH) addWord(word, depth + 1, finalFreq);
if (depth >= mInputLength && skipPos < 0) { if (depth >= mInputLength && skipPos < 0) {
registerNextLetter(mWord[mInputLength], nextLetters, nextLettersSize); registerNextLetter(mWord[mInputLength], nextLetters, nextLettersSize);
@ -382,7 +388,7 @@ inline void UnigramDictionary::onTerminalWhenUserTypedLengthIsSameAsInputLength(
const int skipPos, const int excessivePos, const int transposedPos, const int freq, const int skipPos, const int excessivePos, const int transposedPos, const int freq,
const int addedWeight) { const int addedWeight) {
if (sameAsTyped(word, depth + 1)) return; if (sameAsTyped(word, depth + 1)) return;
const int finalFreq = calculateFinalFreq(inputIndex, snr * addedWeight, skipPos, const int finalFreq = calculateFinalFreq(inputIndex, depth, snr * addedWeight, skipPos,
excessivePos, transposedPos, freq, true); excessivePos, transposedPos, freq, true);
// Proximity collection will promote a word of the same length as what user typed. // Proximity collection will promote a word of the same length as what user typed.
if (depth >= MIN_SUGGEST_DEPTH) addWord(word, depth + 1, finalFreq); if (depth >= MIN_SUGGEST_DEPTH) addWord(word, depth + 1, finalFreq);

View File

@ -52,7 +52,7 @@ private:
const int excessivePos, const int transposedPos, int *nextLetters, const int excessivePos, const int transposedPos, int *nextLetters,
const int nextLettersSize); const int nextLettersSize);
void registerNextLetter(unsigned short c, int *nextLetters, int nextLettersSize); void registerNextLetter(unsigned short c, int *nextLetters, int nextLettersSize);
int calculateFinalFreq(const int inputIndex, const int snr, const int skipPos, int calculateFinalFreq(const int inputIndex, const int depth, const int snr, const int skipPos,
const int excessivePos, const int transposedPos, const int freq, const bool sameLength); const int excessivePos, const int transposedPos, const int freq, const bool sameLength);
void onTerminalWhenUserTypedLengthIsGreaterThanInputLength(unsigned short *word, void onTerminalWhenUserTypedLengthIsGreaterThanInputLength(unsigned short *word,
const int inputIndex, const int depth, const int snr, int *nextLetters, const int inputIndex, const int depth, const int snr, int *nextLetters,