Fix auto-correction threshold and promote full matched words
Bug: 3374359 Bug: 3278422 "zbe" will be auto corrected to "be" by fixing s-line "teh" will be auto corrected to "the" by promotion of full matched words Change-Id: I314c632820e4e0b1501edeca60ada205d291451f
This commit is contained in:
parent
5c35e4109f
commit
58c49b9132
5 changed files with 19 additions and 9 deletions
|
@ -290,7 +290,7 @@ public class Suggest implements Dictionary.WordCallback {
|
|||
typedWord, mSuggestions.get(0), mPriorities[0]);
|
||||
if (LatinImeLogger.sDBG) {
|
||||
Log.d(TAG, "Normalized " + typedWord + "," + mSuggestions.get(0) + ","
|
||||
+ mPriorities[0] + normalizedScore
|
||||
+ mPriorities[0] + ", " + normalizedScore
|
||||
+ "(" + mAutoCorrectionThreshold + ")");
|
||||
}
|
||||
if (normalizedScore >= mAutoCorrectionThreshold) {
|
||||
|
|
|
@ -267,9 +267,12 @@ public class Utils {
|
|||
public static double calcNormalizedScore(CharSequence before, CharSequence after, int score) {
|
||||
final int beforeLength = before.length();
|
||||
final int afterLength = after.length();
|
||||
if (beforeLength == 0 || afterLength == 0) return 0;
|
||||
final int distance = editDistance(before, after);
|
||||
// If afterLength < beforeLength, the algorithm is suggesting a word by excessive character
|
||||
// correction.
|
||||
final double maximumScore = MAX_INITIAL_SCORE
|
||||
* Math.pow(TYPED_LETTER_MULTIPLIER, beforeLength)
|
||||
* Math.pow(TYPED_LETTER_MULTIPLIER, Math.min(beforeLength, afterLength))
|
||||
* FULL_WORD_MULTIPLYER;
|
||||
// add a weight based on edit distance.
|
||||
// distance <= max(afterLength, beforeLength) == afterLength,
|
||||
|
|
|
@ -134,6 +134,7 @@ static void prof_out(void) {
|
|||
#define WORDS_WITH_EXCESSIVE_CHARACTER_DEMOTION_RATE 75
|
||||
#define WORDS_WITH_EXCESSIVE_CHARACTER_OUT_OF_PROXIMITY_DEMOTION_RATE 75
|
||||
#define WORDS_WITH_TRANSPOSED_CHARACTERS_DEMOTION_RATE 60
|
||||
#define FULL_MATCHED_WORDS_PROMOTION_RATE 120
|
||||
|
||||
// This should be greater than or equal to MAX_WORD_LENGTH defined in BinaryDictionary.java
|
||||
// This is only used for the size of array. Not to be used in c functions.
|
||||
|
|
|
@ -347,9 +347,9 @@ void UnigramDictionary::getWordsRec(const int childrenCount, const int pos, cons
|
|||
}
|
||||
}
|
||||
|
||||
inline int UnigramDictionary::calculateFinalFreq(const int inputIndex, const int snr,
|
||||
const int skipPos, const int excessivePos, const int transposedPos, const int freq,
|
||||
const bool sameLength) {
|
||||
inline int UnigramDictionary::calculateFinalFreq(const int inputIndex, const int depth,
|
||||
const int snr, const int skipPos, const int excessivePos, const int transposedPos,
|
||||
const int freq, const bool sameLength) {
|
||||
// TODO: Demote by edit distance
|
||||
int finalFreq = freq * snr;
|
||||
if (skipPos >= 0) multiplyRate(WORDS_WITH_MISSING_CHARACTER_DEMOTION_RATE, &finalFreq);
|
||||
|
@ -361,6 +361,12 @@ inline int UnigramDictionary::calculateFinalFreq(const int inputIndex, const int
|
|||
multiplyRate(WORDS_WITH_EXCESSIVE_CHARACTER_OUT_OF_PROXIMITY_DEMOTION_RATE, &finalFreq);
|
||||
}
|
||||
}
|
||||
int lengthFreq = TYPED_LETTER_MULTIPLIER;
|
||||
for (int i = 0; i < depth; ++i) lengthFreq *= TYPED_LETTER_MULTIPLIER;
|
||||
if (depth > 1 && lengthFreq == snr) {
|
||||
if (DEBUG_DICT) LOGI("Found full matched word.");
|
||||
multiplyRate(FULL_MATCHED_WORDS_PROMOTION_RATE, &finalFreq);
|
||||
}
|
||||
if (sameLength && skipPos < 0) finalFreq *= FULL_WORD_MULTIPLIER;
|
||||
return finalFreq;
|
||||
}
|
||||
|
@ -369,8 +375,8 @@ inline void UnigramDictionary::onTerminalWhenUserTypedLengthIsGreaterThanInputLe
|
|||
unsigned short *word, const int inputIndex, const int depth, const int snr,
|
||||
int *nextLetters, const int nextLettersSize, const int skipPos, const int excessivePos,
|
||||
const int transposedPos, const int freq) {
|
||||
const int finalFreq = calculateFinalFreq(inputIndex, snr, skipPos, excessivePos, transposedPos,
|
||||
freq, false);
|
||||
const int finalFreq = calculateFinalFreq(inputIndex, depth, snr, skipPos, excessivePos,
|
||||
transposedPos, freq, false);
|
||||
if (depth >= MIN_SUGGEST_DEPTH) addWord(word, depth + 1, finalFreq);
|
||||
if (depth >= mInputLength && skipPos < 0) {
|
||||
registerNextLetter(mWord[mInputLength], nextLetters, nextLettersSize);
|
||||
|
@ -382,7 +388,7 @@ inline void UnigramDictionary::onTerminalWhenUserTypedLengthIsSameAsInputLength(
|
|||
const int skipPos, const int excessivePos, const int transposedPos, const int freq,
|
||||
const int addedWeight) {
|
||||
if (sameAsTyped(word, depth + 1)) return;
|
||||
const int finalFreq = calculateFinalFreq(inputIndex, snr * addedWeight, skipPos,
|
||||
const int finalFreq = calculateFinalFreq(inputIndex, depth, snr * addedWeight, skipPos,
|
||||
excessivePos, transposedPos, freq, true);
|
||||
// Proximity collection will promote a word of the same length as what user typed.
|
||||
if (depth >= MIN_SUGGEST_DEPTH) addWord(word, depth + 1, finalFreq);
|
||||
|
|
|
@ -52,7 +52,7 @@ private:
|
|||
const int excessivePos, const int transposedPos, int *nextLetters,
|
||||
const int nextLettersSize);
|
||||
void registerNextLetter(unsigned short c, int *nextLetters, int nextLettersSize);
|
||||
int calculateFinalFreq(const int inputIndex, const int snr, const int skipPos,
|
||||
int calculateFinalFreq(const int inputIndex, const int depth, const int snr, const int skipPos,
|
||||
const int excessivePos, const int transposedPos, const int freq, const bool sameLength);
|
||||
void onTerminalWhenUserTypedLengthIsGreaterThanInputLength(unsigned short *word,
|
||||
const int inputIndex, const int depth, const int snr, int *nextLetters,
|
||||
|
|
Loading…
Reference in a new issue