am 2580492a
: am b5d7857f
: Merge "Fix auto-correction threshold and promote full matched words" into honeycomb
* commit '2580492aa05a1fe22e59e7d6ba3296b19e4ae4e7': Fix auto-correction threshold and promote full matched words
This commit is contained in:
commit
241d221344
5 changed files with 19 additions and 9 deletions
|
@ -294,7 +294,7 @@ public class Suggest implements Dictionary.WordCallback {
|
||||||
typedWord, mSuggestions.get(0), mPriorities[0]);
|
typedWord, mSuggestions.get(0), mPriorities[0]);
|
||||||
if (LatinImeLogger.sDBG) {
|
if (LatinImeLogger.sDBG) {
|
||||||
Log.d(TAG, "Normalized " + typedWord + "," + mSuggestions.get(0) + ","
|
Log.d(TAG, "Normalized " + typedWord + "," + mSuggestions.get(0) + ","
|
||||||
+ mPriorities[0] + normalizedScore
|
+ mPriorities[0] + ", " + normalizedScore
|
||||||
+ "(" + mAutoCorrectionThreshold + ")");
|
+ "(" + mAutoCorrectionThreshold + ")");
|
||||||
}
|
}
|
||||||
if (normalizedScore >= mAutoCorrectionThreshold) {
|
if (normalizedScore >= mAutoCorrectionThreshold) {
|
||||||
|
|
|
@ -275,9 +275,12 @@ public class Utils {
|
||||||
public static double calcNormalizedScore(CharSequence before, CharSequence after, int score) {
|
public static double calcNormalizedScore(CharSequence before, CharSequence after, int score) {
|
||||||
final int beforeLength = before.length();
|
final int beforeLength = before.length();
|
||||||
final int afterLength = after.length();
|
final int afterLength = after.length();
|
||||||
|
if (beforeLength == 0 || afterLength == 0) return 0;
|
||||||
final int distance = editDistance(before, after);
|
final int distance = editDistance(before, after);
|
||||||
|
// If afterLength < beforeLength, the algorithm is suggesting a word by excessive character
|
||||||
|
// correction.
|
||||||
final double maximumScore = MAX_INITIAL_SCORE
|
final double maximumScore = MAX_INITIAL_SCORE
|
||||||
* Math.pow(TYPED_LETTER_MULTIPLIER, beforeLength)
|
* Math.pow(TYPED_LETTER_MULTIPLIER, Math.min(beforeLength, afterLength))
|
||||||
* FULL_WORD_MULTIPLYER;
|
* FULL_WORD_MULTIPLYER;
|
||||||
// add a weight based on edit distance.
|
// add a weight based on edit distance.
|
||||||
// distance <= max(afterLength, beforeLength) == afterLength,
|
// distance <= max(afterLength, beforeLength) == afterLength,
|
||||||
|
|
|
@ -134,6 +134,7 @@ static void prof_out(void) {
|
||||||
#define WORDS_WITH_EXCESSIVE_CHARACTER_DEMOTION_RATE 75
|
#define WORDS_WITH_EXCESSIVE_CHARACTER_DEMOTION_RATE 75
|
||||||
#define WORDS_WITH_EXCESSIVE_CHARACTER_OUT_OF_PROXIMITY_DEMOTION_RATE 75
|
#define WORDS_WITH_EXCESSIVE_CHARACTER_OUT_OF_PROXIMITY_DEMOTION_RATE 75
|
||||||
#define WORDS_WITH_TRANSPOSED_CHARACTERS_DEMOTION_RATE 60
|
#define WORDS_WITH_TRANSPOSED_CHARACTERS_DEMOTION_RATE 60
|
||||||
|
#define FULL_MATCHED_WORDS_PROMOTION_RATE 120
|
||||||
|
|
||||||
// This should be greater than or equal to MAX_WORD_LENGTH defined in BinaryDictionary.java
|
// This should be greater than or equal to MAX_WORD_LENGTH defined in BinaryDictionary.java
|
||||||
// This is only used for the size of array. Not to be used in c functions.
|
// This is only used for the size of array. Not to be used in c functions.
|
||||||
|
|
|
@ -347,9 +347,9 @@ void UnigramDictionary::getWordsRec(const int childrenCount, const int pos, cons
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
inline int UnigramDictionary::calculateFinalFreq(const int inputIndex, const int snr,
|
inline int UnigramDictionary::calculateFinalFreq(const int inputIndex, const int depth,
|
||||||
const int skipPos, const int excessivePos, const int transposedPos, const int freq,
|
const int snr, const int skipPos, const int excessivePos, const int transposedPos,
|
||||||
const bool sameLength) {
|
const int freq, const bool sameLength) {
|
||||||
// TODO: Demote by edit distance
|
// TODO: Demote by edit distance
|
||||||
int finalFreq = freq * snr;
|
int finalFreq = freq * snr;
|
||||||
if (skipPos >= 0) multiplyRate(WORDS_WITH_MISSING_CHARACTER_DEMOTION_RATE, &finalFreq);
|
if (skipPos >= 0) multiplyRate(WORDS_WITH_MISSING_CHARACTER_DEMOTION_RATE, &finalFreq);
|
||||||
|
@ -361,6 +361,12 @@ inline int UnigramDictionary::calculateFinalFreq(const int inputIndex, const int
|
||||||
multiplyRate(WORDS_WITH_EXCESSIVE_CHARACTER_OUT_OF_PROXIMITY_DEMOTION_RATE, &finalFreq);
|
multiplyRate(WORDS_WITH_EXCESSIVE_CHARACTER_OUT_OF_PROXIMITY_DEMOTION_RATE, &finalFreq);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
int lengthFreq = TYPED_LETTER_MULTIPLIER;
|
||||||
|
for (int i = 0; i < depth; ++i) lengthFreq *= TYPED_LETTER_MULTIPLIER;
|
||||||
|
if (depth > 1 && lengthFreq == snr) {
|
||||||
|
if (DEBUG_DICT) LOGI("Found full matched word.");
|
||||||
|
multiplyRate(FULL_MATCHED_WORDS_PROMOTION_RATE, &finalFreq);
|
||||||
|
}
|
||||||
if (sameLength && skipPos < 0) finalFreq *= FULL_WORD_MULTIPLIER;
|
if (sameLength && skipPos < 0) finalFreq *= FULL_WORD_MULTIPLIER;
|
||||||
return finalFreq;
|
return finalFreq;
|
||||||
}
|
}
|
||||||
|
@ -369,8 +375,8 @@ inline void UnigramDictionary::onTerminalWhenUserTypedLengthIsGreaterThanInputLe
|
||||||
unsigned short *word, const int inputIndex, const int depth, const int snr,
|
unsigned short *word, const int inputIndex, const int depth, const int snr,
|
||||||
int *nextLetters, const int nextLettersSize, const int skipPos, const int excessivePos,
|
int *nextLetters, const int nextLettersSize, const int skipPos, const int excessivePos,
|
||||||
const int transposedPos, const int freq) {
|
const int transposedPos, const int freq) {
|
||||||
const int finalFreq = calculateFinalFreq(inputIndex, snr, skipPos, excessivePos, transposedPos,
|
const int finalFreq = calculateFinalFreq(inputIndex, depth, snr, skipPos, excessivePos,
|
||||||
freq, false);
|
transposedPos, freq, false);
|
||||||
if (depth >= MIN_SUGGEST_DEPTH) addWord(word, depth + 1, finalFreq);
|
if (depth >= MIN_SUGGEST_DEPTH) addWord(word, depth + 1, finalFreq);
|
||||||
if (depth >= mInputLength && skipPos < 0) {
|
if (depth >= mInputLength && skipPos < 0) {
|
||||||
registerNextLetter(mWord[mInputLength], nextLetters, nextLettersSize);
|
registerNextLetter(mWord[mInputLength], nextLetters, nextLettersSize);
|
||||||
|
@ -382,7 +388,7 @@ inline void UnigramDictionary::onTerminalWhenUserTypedLengthIsSameAsInputLength(
|
||||||
const int skipPos, const int excessivePos, const int transposedPos, const int freq,
|
const int skipPos, const int excessivePos, const int transposedPos, const int freq,
|
||||||
const int addedWeight) {
|
const int addedWeight) {
|
||||||
if (sameAsTyped(word, depth + 1)) return;
|
if (sameAsTyped(word, depth + 1)) return;
|
||||||
const int finalFreq = calculateFinalFreq(inputIndex, snr * addedWeight, skipPos,
|
const int finalFreq = calculateFinalFreq(inputIndex, depth, snr * addedWeight, skipPos,
|
||||||
excessivePos, transposedPos, freq, true);
|
excessivePos, transposedPos, freq, true);
|
||||||
// Proximity collection will promote a word of the same length as what user typed.
|
// Proximity collection will promote a word of the same length as what user typed.
|
||||||
if (depth >= MIN_SUGGEST_DEPTH) addWord(word, depth + 1, finalFreq);
|
if (depth >= MIN_SUGGEST_DEPTH) addWord(word, depth + 1, finalFreq);
|
||||||
|
|
|
@ -52,7 +52,7 @@ private:
|
||||||
const int excessivePos, const int transposedPos, int *nextLetters,
|
const int excessivePos, const int transposedPos, int *nextLetters,
|
||||||
const int nextLettersSize);
|
const int nextLettersSize);
|
||||||
void registerNextLetter(unsigned short c, int *nextLetters, int nextLettersSize);
|
void registerNextLetter(unsigned short c, int *nextLetters, int nextLettersSize);
|
||||||
int calculateFinalFreq(const int inputIndex, const int snr, const int skipPos,
|
int calculateFinalFreq(const int inputIndex, const int depth, const int snr, const int skipPos,
|
||||||
const int excessivePos, const int transposedPos, const int freq, const bool sameLength);
|
const int excessivePos, const int transposedPos, const int freq, const bool sameLength);
|
||||||
void onTerminalWhenUserTypedLengthIsGreaterThanInputLength(unsigned short *word,
|
void onTerminalWhenUserTypedLengthIsGreaterThanInputLength(unsigned short *word,
|
||||||
const int inputIndex, const int depth, const int snr, int *nextLetters,
|
const int inputIndex, const int depth, const int snr, int *nextLetters,
|
||||||
|
|
Loading…
Reference in a new issue