Fix auto-correction threshold and promote full matched words
Bug: 3374359 Bug: 3278422 "zbe" will be auto corrected to "be" by fixing s-line "teh" will be auto corrected to "the" by promotion of full matched words Change-Id: I314c632820e4e0b1501edeca60ada205d291451fmain
parent
5c35e4109f
commit
58c49b9132
|
@ -290,7 +290,7 @@ public class Suggest implements Dictionary.WordCallback {
|
||||||
typedWord, mSuggestions.get(0), mPriorities[0]);
|
typedWord, mSuggestions.get(0), mPriorities[0]);
|
||||||
if (LatinImeLogger.sDBG) {
|
if (LatinImeLogger.sDBG) {
|
||||||
Log.d(TAG, "Normalized " + typedWord + "," + mSuggestions.get(0) + ","
|
Log.d(TAG, "Normalized " + typedWord + "," + mSuggestions.get(0) + ","
|
||||||
+ mPriorities[0] + normalizedScore
|
+ mPriorities[0] + ", " + normalizedScore
|
||||||
+ "(" + mAutoCorrectionThreshold + ")");
|
+ "(" + mAutoCorrectionThreshold + ")");
|
||||||
}
|
}
|
||||||
if (normalizedScore >= mAutoCorrectionThreshold) {
|
if (normalizedScore >= mAutoCorrectionThreshold) {
|
||||||
|
|
|
@ -267,9 +267,12 @@ public class Utils {
|
||||||
public static double calcNormalizedScore(CharSequence before, CharSequence after, int score) {
|
public static double calcNormalizedScore(CharSequence before, CharSequence after, int score) {
|
||||||
final int beforeLength = before.length();
|
final int beforeLength = before.length();
|
||||||
final int afterLength = after.length();
|
final int afterLength = after.length();
|
||||||
|
if (beforeLength == 0 || afterLength == 0) return 0;
|
||||||
final int distance = editDistance(before, after);
|
final int distance = editDistance(before, after);
|
||||||
|
// If afterLength < beforeLength, the algorithm is suggesting a word by excessive character
|
||||||
|
// correction.
|
||||||
final double maximumScore = MAX_INITIAL_SCORE
|
final double maximumScore = MAX_INITIAL_SCORE
|
||||||
* Math.pow(TYPED_LETTER_MULTIPLIER, beforeLength)
|
* Math.pow(TYPED_LETTER_MULTIPLIER, Math.min(beforeLength, afterLength))
|
||||||
* FULL_WORD_MULTIPLYER;
|
* FULL_WORD_MULTIPLYER;
|
||||||
// add a weight based on edit distance.
|
// add a weight based on edit distance.
|
||||||
// distance <= max(afterLength, beforeLength) == afterLength,
|
// distance <= max(afterLength, beforeLength) == afterLength,
|
||||||
|
|
|
@ -134,6 +134,7 @@ static void prof_out(void) {
|
||||||
#define WORDS_WITH_EXCESSIVE_CHARACTER_DEMOTION_RATE 75
|
#define WORDS_WITH_EXCESSIVE_CHARACTER_DEMOTION_RATE 75
|
||||||
#define WORDS_WITH_EXCESSIVE_CHARACTER_OUT_OF_PROXIMITY_DEMOTION_RATE 75
|
#define WORDS_WITH_EXCESSIVE_CHARACTER_OUT_OF_PROXIMITY_DEMOTION_RATE 75
|
||||||
#define WORDS_WITH_TRANSPOSED_CHARACTERS_DEMOTION_RATE 60
|
#define WORDS_WITH_TRANSPOSED_CHARACTERS_DEMOTION_RATE 60
|
||||||
|
#define FULL_MATCHED_WORDS_PROMOTION_RATE 120
|
||||||
|
|
||||||
// This should be greater than or equal to MAX_WORD_LENGTH defined in BinaryDictionary.java
|
// This should be greater than or equal to MAX_WORD_LENGTH defined in BinaryDictionary.java
|
||||||
// This is only used for the size of array. Not to be used in c functions.
|
// This is only used for the size of array. Not to be used in c functions.
|
||||||
|
|
|
@ -347,9 +347,9 @@ void UnigramDictionary::getWordsRec(const int childrenCount, const int pos, cons
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
inline int UnigramDictionary::calculateFinalFreq(const int inputIndex, const int snr,
|
inline int UnigramDictionary::calculateFinalFreq(const int inputIndex, const int depth,
|
||||||
const int skipPos, const int excessivePos, const int transposedPos, const int freq,
|
const int snr, const int skipPos, const int excessivePos, const int transposedPos,
|
||||||
const bool sameLength) {
|
const int freq, const bool sameLength) {
|
||||||
// TODO: Demote by edit distance
|
// TODO: Demote by edit distance
|
||||||
int finalFreq = freq * snr;
|
int finalFreq = freq * snr;
|
||||||
if (skipPos >= 0) multiplyRate(WORDS_WITH_MISSING_CHARACTER_DEMOTION_RATE, &finalFreq);
|
if (skipPos >= 0) multiplyRate(WORDS_WITH_MISSING_CHARACTER_DEMOTION_RATE, &finalFreq);
|
||||||
|
@ -361,6 +361,12 @@ inline int UnigramDictionary::calculateFinalFreq(const int inputIndex, const int
|
||||||
multiplyRate(WORDS_WITH_EXCESSIVE_CHARACTER_OUT_OF_PROXIMITY_DEMOTION_RATE, &finalFreq);
|
multiplyRate(WORDS_WITH_EXCESSIVE_CHARACTER_OUT_OF_PROXIMITY_DEMOTION_RATE, &finalFreq);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
int lengthFreq = TYPED_LETTER_MULTIPLIER;
|
||||||
|
for (int i = 0; i < depth; ++i) lengthFreq *= TYPED_LETTER_MULTIPLIER;
|
||||||
|
if (depth > 1 && lengthFreq == snr) {
|
||||||
|
if (DEBUG_DICT) LOGI("Found full matched word.");
|
||||||
|
multiplyRate(FULL_MATCHED_WORDS_PROMOTION_RATE, &finalFreq);
|
||||||
|
}
|
||||||
if (sameLength && skipPos < 0) finalFreq *= FULL_WORD_MULTIPLIER;
|
if (sameLength && skipPos < 0) finalFreq *= FULL_WORD_MULTIPLIER;
|
||||||
return finalFreq;
|
return finalFreq;
|
||||||
}
|
}
|
||||||
|
@ -369,8 +375,8 @@ inline void UnigramDictionary::onTerminalWhenUserTypedLengthIsGreaterThanInputLe
|
||||||
unsigned short *word, const int inputIndex, const int depth, const int snr,
|
unsigned short *word, const int inputIndex, const int depth, const int snr,
|
||||||
int *nextLetters, const int nextLettersSize, const int skipPos, const int excessivePos,
|
int *nextLetters, const int nextLettersSize, const int skipPos, const int excessivePos,
|
||||||
const int transposedPos, const int freq) {
|
const int transposedPos, const int freq) {
|
||||||
const int finalFreq = calculateFinalFreq(inputIndex, snr, skipPos, excessivePos, transposedPos,
|
const int finalFreq = calculateFinalFreq(inputIndex, depth, snr, skipPos, excessivePos,
|
||||||
freq, false);
|
transposedPos, freq, false);
|
||||||
if (depth >= MIN_SUGGEST_DEPTH) addWord(word, depth + 1, finalFreq);
|
if (depth >= MIN_SUGGEST_DEPTH) addWord(word, depth + 1, finalFreq);
|
||||||
if (depth >= mInputLength && skipPos < 0) {
|
if (depth >= mInputLength && skipPos < 0) {
|
||||||
registerNextLetter(mWord[mInputLength], nextLetters, nextLettersSize);
|
registerNextLetter(mWord[mInputLength], nextLetters, nextLettersSize);
|
||||||
|
@ -382,7 +388,7 @@ inline void UnigramDictionary::onTerminalWhenUserTypedLengthIsSameAsInputLength(
|
||||||
const int skipPos, const int excessivePos, const int transposedPos, const int freq,
|
const int skipPos, const int excessivePos, const int transposedPos, const int freq,
|
||||||
const int addedWeight) {
|
const int addedWeight) {
|
||||||
if (sameAsTyped(word, depth + 1)) return;
|
if (sameAsTyped(word, depth + 1)) return;
|
||||||
const int finalFreq = calculateFinalFreq(inputIndex, snr * addedWeight, skipPos,
|
const int finalFreq = calculateFinalFreq(inputIndex, depth, snr * addedWeight, skipPos,
|
||||||
excessivePos, transposedPos, freq, true);
|
excessivePos, transposedPos, freq, true);
|
||||||
// Proximity collection will promote a word of the same length as what user typed.
|
// Proximity collection will promote a word of the same length as what user typed.
|
||||||
if (depth >= MIN_SUGGEST_DEPTH) addWord(word, depth + 1, finalFreq);
|
if (depth >= MIN_SUGGEST_DEPTH) addWord(word, depth + 1, finalFreq);
|
||||||
|
|
|
@ -52,7 +52,7 @@ private:
|
||||||
const int excessivePos, const int transposedPos, int *nextLetters,
|
const int excessivePos, const int transposedPos, int *nextLetters,
|
||||||
const int nextLettersSize);
|
const int nextLettersSize);
|
||||||
void registerNextLetter(unsigned short c, int *nextLetters, int nextLettersSize);
|
void registerNextLetter(unsigned short c, int *nextLetters, int nextLettersSize);
|
||||||
int calculateFinalFreq(const int inputIndex, const int snr, const int skipPos,
|
int calculateFinalFreq(const int inputIndex, const int depth, const int snr, const int skipPos,
|
||||||
const int excessivePos, const int transposedPos, const int freq, const bool sameLength);
|
const int excessivePos, const int transposedPos, const int freq, const bool sameLength);
|
||||||
void onTerminalWhenUserTypedLengthIsGreaterThanInputLength(unsigned short *word,
|
void onTerminalWhenUserTypedLengthIsGreaterThanInputLength(unsigned short *word,
|
||||||
const int inputIndex, const int depth, const int snr, int *nextLetters,
|
const int inputIndex, const int depth, const int snr, int *nextLetters,
|
||||||
|
|
Loading…
Reference in New Issue