Move auto correction thresthold to the native code
bug: 5858137 Change-Id: Ic4b6270c6e51ef4ed25a6a1d8ddd7fdfa70fd78dmain
parent
53f56ddef9
commit
be0cf72253
|
@ -118,8 +118,9 @@ public class AutoCorrection {
|
||||||
final int autoCorrectionSuggestionScore = sortedScores[0];
|
final int autoCorrectionSuggestionScore = sortedScores[0];
|
||||||
// TODO: when the normalized score of the first suggestion is nearly equals to
|
// TODO: when the normalized score of the first suggestion is nearly equals to
|
||||||
// the normalized score of the second suggestion, behave less aggressive.
|
// the normalized score of the second suggestion, behave less aggressive.
|
||||||
mNormalizedScore = Utils.calcNormalizedScore(
|
mNormalizedScore = BinaryDictionary.calcNormalizedScore(
|
||||||
typedWord,autoCorrectionSuggestion, autoCorrectionSuggestionScore);
|
typedWord.toString(), autoCorrectionSuggestion.toString(),
|
||||||
|
autoCorrectionSuggestionScore);
|
||||||
if (DBG) {
|
if (DBG) {
|
||||||
Log.d(TAG, "Normalized " + typedWord + "," + autoCorrectionSuggestion + ","
|
Log.d(TAG, "Normalized " + typedWord + "," + autoCorrectionSuggestion + ","
|
||||||
+ autoCorrectionSuggestionScore + ", " + mNormalizedScore
|
+ autoCorrectionSuggestionScore + ", " + mNormalizedScore
|
||||||
|
|
|
@ -118,6 +118,10 @@ public class BinaryDictionary extends Dictionary {
|
||||||
private native int getBigramsNative(long dict, char[] prevWord, int prevWordLength,
|
private native int getBigramsNative(long dict, char[] prevWord, int prevWordLength,
|
||||||
int[] inputCodes, int inputCodesLength, char[] outputChars, int[] scores,
|
int[] inputCodes, int inputCodesLength, char[] outputChars, int[] scores,
|
||||||
int maxWordLength, int maxBigrams, int maxAlternatives);
|
int maxWordLength, int maxBigrams, int maxAlternatives);
|
||||||
|
private static native double calcNormalizedScoreNative(
|
||||||
|
char[] before, int beforeLength, char[] after, int afterLength, int score);
|
||||||
|
private static native int editDistanceNative(
|
||||||
|
char[] before, int beforeLength, char[] after, int afterLength);
|
||||||
|
|
||||||
private final void loadDictionary(String path, long startOffset, long length) {
|
private final void loadDictionary(String path, long startOffset, long length) {
|
||||||
mNativeDict = openNative(path, startOffset, length,
|
mNativeDict = openNative(path, startOffset, length,
|
||||||
|
@ -211,6 +215,16 @@ public class BinaryDictionary extends Dictionary {
|
||||||
mFlags, outputChars, scores);
|
mFlags, outputChars, scores);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static double calcNormalizedScore(String before, String after, int score) {
|
||||||
|
return calcNormalizedScoreNative(before.toCharArray(), before.length(),
|
||||||
|
after.toCharArray(), after.length(), score);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static int editDistance(String before, String after) {
|
||||||
|
return editDistanceNative(
|
||||||
|
before.toCharArray(), before.length(), after.toCharArray(), after.length());
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean isValidWord(CharSequence word) {
|
public boolean isValidWord(CharSequence word) {
|
||||||
if (word == null) return false;
|
if (word == null) return false;
|
||||||
|
|
|
@ -191,7 +191,8 @@ public class Utils {
|
||||||
final int typedWordLength = typedWord.length();
|
final int typedWordLength = typedWord.length();
|
||||||
final int maxEditDistanceOfNativeDictionary =
|
final int maxEditDistanceOfNativeDictionary =
|
||||||
(typedWordLength < 5 ? 2 : typedWordLength / 2) + 1;
|
(typedWordLength < 5 ? 2 : typedWordLength / 2) + 1;
|
||||||
final int distance = Utils.editDistance(typedWord, suggestionWord);
|
final int distance = BinaryDictionary.editDistance(
|
||||||
|
typedWord.toString(), suggestionWord.toString());
|
||||||
if (DBG) {
|
if (DBG) {
|
||||||
Log.d(TAG, "Autocorrected edit distance = " + distance
|
Log.d(TAG, "Autocorrected edit distance = " + distance
|
||||||
+ ", " + maxEditDistanceOfNativeDictionary);
|
+ ", " + maxEditDistanceOfNativeDictionary);
|
||||||
|
@ -323,49 +324,6 @@ public class Utils {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/* Damerau-Levenshtein distance */
|
|
||||||
public static int editDistance(CharSequence s, CharSequence t) {
|
|
||||||
if (s == null || t == null) {
|
|
||||||
throw new IllegalArgumentException("editDistance: Arguments should not be null.");
|
|
||||||
}
|
|
||||||
final int sl = s.length();
|
|
||||||
final int tl = t.length();
|
|
||||||
int[][] dp = new int [sl + 1][tl + 1];
|
|
||||||
for (int i = 0; i <= sl; i++) {
|
|
||||||
dp[i][0] = i;
|
|
||||||
}
|
|
||||||
for (int j = 0; j <= tl; j++) {
|
|
||||||
dp[0][j] = j;
|
|
||||||
}
|
|
||||||
for (int i = 0; i < sl; ++i) {
|
|
||||||
for (int j = 0; j < tl; ++j) {
|
|
||||||
final char sc = Character.toLowerCase(s.charAt(i));
|
|
||||||
final char tc = Character.toLowerCase(t.charAt(j));
|
|
||||||
final int cost = sc == tc ? 0 : 1;
|
|
||||||
dp[i + 1][j + 1] = Math.min(
|
|
||||||
dp[i][j + 1] + 1, Math.min(dp[i + 1][j] + 1, dp[i][j] + cost));
|
|
||||||
// Overwrite for transposition cases
|
|
||||||
if (i > 0 && j > 0
|
|
||||||
&& sc == Character.toLowerCase(t.charAt(j - 1))
|
|
||||||
&& tc == Character.toLowerCase(s.charAt(i - 1))) {
|
|
||||||
dp[i + 1][j + 1] = Math.min(dp[i + 1][j + 1], dp[i - 1][j - 1] + cost);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (DBG_EDIT_DISTANCE) {
|
|
||||||
Log.d(TAG, "editDistance:" + s + "," + t);
|
|
||||||
for (int i = 0; i < dp.length; ++i) {
|
|
||||||
StringBuffer sb = new StringBuffer();
|
|
||||||
for (int j = 0; j < dp[i].length; ++j) {
|
|
||||||
sb.append(dp[i][j]).append(',');
|
|
||||||
}
|
|
||||||
Log.d(TAG, i + ":" + sb.toString());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return dp[sl][tl];
|
|
||||||
}
|
|
||||||
|
|
||||||
// Get the current stack trace
|
// Get the current stack trace
|
||||||
public static String getStackTrace() {
|
public static String getStackTrace() {
|
||||||
StringBuilder sb = new StringBuilder();
|
StringBuilder sb = new StringBuilder();
|
||||||
|
@ -379,55 +337,6 @@ public class Utils {
|
||||||
return sb.toString();
|
return sb.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
// In dictionary.cpp, getSuggestion() method,
|
|
||||||
// suggestion scores are computed using the below formula.
|
|
||||||
// original score
|
|
||||||
// := pow(mTypedLetterMultiplier (this is defined 2),
|
|
||||||
// (the number of matched characters between typed word and suggested word))
|
|
||||||
// * (individual word's score which defined in the unigram dictionary,
|
|
||||||
// and this score is defined in range [0, 255].)
|
|
||||||
// Then, the following processing is applied.
|
|
||||||
// - If the dictionary word is matched up to the point of the user entry
|
|
||||||
// (full match up to min(before.length(), after.length())
|
|
||||||
// => Then multiply by FULL_MATCHED_WORDS_PROMOTION_RATE (this is defined 1.2)
|
|
||||||
// - If the word is a true full match except for differences in accents or
|
|
||||||
// capitalization, then treat it as if the score was 255.
|
|
||||||
// - If before.length() == after.length()
|
|
||||||
// => multiply by mFullWordMultiplier (this is defined 2))
|
|
||||||
// So, maximum original score is pow(2, min(before.length(), after.length())) * 255 * 2 * 1.2
|
|
||||||
// For historical reasons we ignore the 1.2 modifier (because the measure for a good
|
|
||||||
// autocorrection threshold was done at a time when it didn't exist). This doesn't change
|
|
||||||
// the result.
|
|
||||||
// So, we can normalize original score by dividing pow(2, min(b.l(),a.l())) * 255 * 2.
|
|
||||||
private static final int MAX_INITIAL_SCORE = 255;
|
|
||||||
private static final int TYPED_LETTER_MULTIPLIER = 2;
|
|
||||||
private static final int FULL_WORD_MULTIPLIER = 2;
|
|
||||||
private static final int S_INT_MAX = 2147483647;
|
|
||||||
public static double calcNormalizedScore(CharSequence before, CharSequence after, int score) {
|
|
||||||
final int beforeLength = before.length();
|
|
||||||
final int afterLength = after.length();
|
|
||||||
if (beforeLength == 0 || afterLength == 0) return 0;
|
|
||||||
final int distance = editDistance(before, after);
|
|
||||||
// If afterLength < beforeLength, the algorithm is suggesting a word by excessive character
|
|
||||||
// correction.
|
|
||||||
int spaceCount = 0;
|
|
||||||
for (int i = 0; i < afterLength; ++i) {
|
|
||||||
if (after.charAt(i) == Keyboard.CODE_SPACE) {
|
|
||||||
++spaceCount;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (spaceCount == afterLength) return 0;
|
|
||||||
final double maximumScore = score == S_INT_MAX ? S_INT_MAX : MAX_INITIAL_SCORE
|
|
||||||
* Math.pow(
|
|
||||||
TYPED_LETTER_MULTIPLIER, Math.min(beforeLength, afterLength - spaceCount))
|
|
||||||
* FULL_WORD_MULTIPLIER;
|
|
||||||
// add a weight based on edit distance.
|
|
||||||
// distance <= max(afterLength, beforeLength) == afterLength,
|
|
||||||
// so, 0 <= distance / afterLength <= 1
|
|
||||||
final double weight = 1.0 - (double) distance / afterLength;
|
|
||||||
return (score / maximumScore) * weight;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static class UsabilityStudyLogUtils {
|
public static class UsabilityStudyLogUtils {
|
||||||
private static final String USABILITY_TAG = UsabilityStudyLogUtils.class.getSimpleName();
|
private static final String USABILITY_TAG = UsabilityStudyLogUtils.class.getSimpleName();
|
||||||
private static final String FILENAME = "log.txt";
|
private static final String FILENAME = "log.txt";
|
||||||
|
|
|
@ -270,7 +270,7 @@ public class AndroidSpellCheckerService extends SpellCheckerService
|
||||||
// make the threshold.
|
// make the threshold.
|
||||||
final String wordString = new String(word, wordOffset, wordLength);
|
final String wordString = new String(word, wordOffset, wordLength);
|
||||||
final double normalizedScore =
|
final double normalizedScore =
|
||||||
Utils.calcNormalizedScore(mOriginalText, wordString, score);
|
BinaryDictionary.calcNormalizedScore(mOriginalText, wordString, score);
|
||||||
if (normalizedScore < mSuggestionThreshold) {
|
if (normalizedScore < mSuggestionThreshold) {
|
||||||
if (DBG) Log.i(TAG, wordString + " does not make the score threshold");
|
if (DBG) Log.i(TAG, wordString + " does not make the score threshold");
|
||||||
return true;
|
return true;
|
||||||
|
@ -303,8 +303,8 @@ public class AndroidSpellCheckerService extends SpellCheckerService
|
||||||
hasRecommendedSuggestions = false;
|
hasRecommendedSuggestions = false;
|
||||||
} else {
|
} else {
|
||||||
gatheredSuggestions = EMPTY_STRING_ARRAY;
|
gatheredSuggestions = EMPTY_STRING_ARRAY;
|
||||||
final double normalizedScore =
|
final double normalizedScore = BinaryDictionary.calcNormalizedScore(
|
||||||
Utils.calcNormalizedScore(mOriginalText, mBestSuggestion, mBestScore);
|
mOriginalText, mBestSuggestion, mBestScore);
|
||||||
hasRecommendedSuggestions = (normalizedScore > mRecommendedThreshold);
|
hasRecommendedSuggestions = (normalizedScore > mRecommendedThreshold);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
@ -338,7 +338,8 @@ public class AndroidSpellCheckerService extends SpellCheckerService
|
||||||
final int bestScore = mScores[mLength - 1];
|
final int bestScore = mScores[mLength - 1];
|
||||||
final CharSequence bestSuggestion = mSuggestions.get(0);
|
final CharSequence bestSuggestion = mSuggestions.get(0);
|
||||||
final double normalizedScore =
|
final double normalizedScore =
|
||||||
Utils.calcNormalizedScore(mOriginalText, bestSuggestion, bestScore);
|
BinaryDictionary.calcNormalizedScore(
|
||||||
|
mOriginalText, bestSuggestion.toString(), bestScore);
|
||||||
hasRecommendedSuggestions = (normalizedScore > mRecommendedThreshold);
|
hasRecommendedSuggestions = (normalizedScore > mRecommendedThreshold);
|
||||||
if (DBG) {
|
if (DBG) {
|
||||||
Log.i(TAG, "Best suggestion : " + bestSuggestion + ", score " + bestScore);
|
Log.i(TAG, "Best suggestion : " + bestSuggestion + ", score " + bestScore);
|
||||||
|
|
|
@ -18,6 +18,7 @@
|
||||||
#define LOG_TAG "LatinIME: jni: BinaryDictionary"
|
#define LOG_TAG "LatinIME: jni: BinaryDictionary"
|
||||||
|
|
||||||
#include "binary_format.h"
|
#include "binary_format.h"
|
||||||
|
#include "correction.h"
|
||||||
#include "com_android_inputmethod_latin_BinaryDictionary.h"
|
#include "com_android_inputmethod_latin_BinaryDictionary.h"
|
||||||
#include "dictionary.h"
|
#include "dictionary.h"
|
||||||
#include "jni.h"
|
#include "jni.h"
|
||||||
|
@ -188,6 +189,29 @@ static jboolean latinime_BinaryDictionary_isValidWord(JNIEnv *env, jobject objec
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static jdouble latinime_BinaryDictionary_calcNormalizedScore(JNIEnv *env, jobject object,
|
||||||
|
jcharArray before, jint beforeLength, jcharArray after, jint afterLength, jint score) {
|
||||||
|
jchar *beforeChars = env->GetCharArrayElements(before, 0);
|
||||||
|
jchar *afterChars = env->GetCharArrayElements(after, 0);
|
||||||
|
jdouble result = Correction::RankingAlgorithm::calcNormalizedScore(
|
||||||
|
(unsigned short*)beforeChars, beforeLength, (unsigned short*)afterChars, afterLength,
|
||||||
|
score);
|
||||||
|
env->ReleaseCharArrayElements(before, beforeChars, JNI_ABORT);
|
||||||
|
env->ReleaseCharArrayElements(after, afterChars, JNI_ABORT);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
static jint latinime_BinaryDictionary_editDistance(JNIEnv *env, jobject object,
|
||||||
|
jcharArray before, jint beforeLength, jcharArray after, jint afterLength) {
|
||||||
|
jchar *beforeChars = env->GetCharArrayElements(before, 0);
|
||||||
|
jchar *afterChars = env->GetCharArrayElements(after, 0);
|
||||||
|
jint result = Correction::RankingAlgorithm::editDistance(
|
||||||
|
(unsigned short*)beforeChars, beforeLength, (unsigned short*)afterChars, afterLength);
|
||||||
|
env->ReleaseCharArrayElements(before, beforeChars, JNI_ABORT);
|
||||||
|
env->ReleaseCharArrayElements(after, afterChars, JNI_ABORT);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
static void latinime_BinaryDictionary_close(JNIEnv *env, jobject object, jlong dict) {
|
static void latinime_BinaryDictionary_close(JNIEnv *env, jobject object, jlong dict) {
|
||||||
Dictionary *dictionary = (Dictionary*)dict;
|
Dictionary *dictionary = (Dictionary*)dict;
|
||||||
if (!dictionary) return;
|
if (!dictionary) return;
|
||||||
|
@ -222,7 +246,10 @@ static JNINativeMethod sMethods[] = {
|
||||||
{"closeNative", "(J)V", (void*)latinime_BinaryDictionary_close},
|
{"closeNative", "(J)V", (void*)latinime_BinaryDictionary_close},
|
||||||
{"getSuggestionsNative", "(JJ[I[I[III[C[I)I", (void*)latinime_BinaryDictionary_getSuggestions},
|
{"getSuggestionsNative", "(JJ[I[I[III[C[I)I", (void*)latinime_BinaryDictionary_getSuggestions},
|
||||||
{"isValidWordNative", "(J[CI)Z", (void*)latinime_BinaryDictionary_isValidWord},
|
{"isValidWordNative", "(J[CI)Z", (void*)latinime_BinaryDictionary_isValidWord},
|
||||||
{"getBigramsNative", "(J[CI[II[C[IIII)I", (void*)latinime_BinaryDictionary_getBigrams}
|
{"getBigramsNative", "(J[CI[II[C[IIII)I", (void*)latinime_BinaryDictionary_getBigrams},
|
||||||
|
{"calcNormalizedScoreNative", "([CI[CII)D",
|
||||||
|
(void*)latinime_BinaryDictionary_calcNormalizedScore},
|
||||||
|
{"editDistanceNative", "([CI[CI)I", (void*)latinime_BinaryDictionary_editDistance}
|
||||||
};
|
};
|
||||||
|
|
||||||
int register_BinaryDictionary(JNIEnv *env) {
|
int register_BinaryDictionary(JNIEnv *env) {
|
||||||
|
|
|
@ -16,6 +16,7 @@
|
||||||
|
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
#include <ctype.h>
|
#include <ctype.h>
|
||||||
|
#include <math.h>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
|
||||||
|
@ -933,14 +934,14 @@ int Correction::RankingAlgorithm::calcFreqForSplitTwoWords(
|
||||||
return totalFreq;
|
return totalFreq;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if 0 /* no longer used. keep just for reference */
|
/* Damerau-Levenshtein distance */
|
||||||
inline static int editDistance(
|
inline static int editDistanceInternal(
|
||||||
int* editDistanceTable, const unsigned short* input,
|
int* editDistanceTable, const unsigned short* before,
|
||||||
const int inputLength, const unsigned short* output, const int outputLength) {
|
const int beforeLength, const unsigned short* after, const int afterLength) {
|
||||||
// dp[li][lo] dp[a][b] = dp[ a * lo + b]
|
// dp[li][lo] dp[a][b] = dp[ a * lo + b]
|
||||||
int* dp = editDistanceTable;
|
int* dp = editDistanceTable;
|
||||||
const int li = inputLength + 1;
|
const int li = beforeLength + 1;
|
||||||
const int lo = outputLength + 1;
|
const int lo = afterLength + 1;
|
||||||
for (int i = 0; i < li; ++i) {
|
for (int i = 0; i < li; ++i) {
|
||||||
dp[lo * i] = i;
|
dp[lo * i] = i;
|
||||||
}
|
}
|
||||||
|
@ -950,13 +951,13 @@ inline static int editDistance(
|
||||||
|
|
||||||
for (int i = 0; i < li - 1; ++i) {
|
for (int i = 0; i < li - 1; ++i) {
|
||||||
for (int j = 0; j < lo - 1; ++j) {
|
for (int j = 0; j < lo - 1; ++j) {
|
||||||
const uint32_t ci = toBaseLowerCase(input[i]);
|
const uint32_t ci = toBaseLowerCase(before[i]);
|
||||||
const uint32_t co = toBaseLowerCase(output[j]);
|
const uint32_t co = toBaseLowerCase(after[j]);
|
||||||
const uint16_t cost = (ci == co) ? 0 : 1;
|
const uint16_t cost = (ci == co) ? 0 : 1;
|
||||||
dp[(i + 1) * lo + (j + 1)] = min(dp[i * lo + (j + 1)] + 1,
|
dp[(i + 1) * lo + (j + 1)] = min(dp[i * lo + (j + 1)] + 1,
|
||||||
min(dp[(i + 1) * lo + j] + 1, dp[i * lo + j] + cost));
|
min(dp[(i + 1) * lo + j] + 1, dp[i * lo + j] + cost));
|
||||||
if (i > 0 && j > 0 && ci == toBaseLowerCase(output[j - 1])
|
if (i > 0 && j > 0 && ci == toBaseLowerCase(after[j - 1])
|
||||||
&& co == toBaseLowerCase(input[i - 1])) {
|
&& co == toBaseLowerCase(before[i - 1])) {
|
||||||
dp[(i + 1) * lo + (j + 1)] = min(
|
dp[(i + 1) * lo + (j + 1)] = min(
|
||||||
dp[(i + 1) * lo + (j + 1)], dp[(i - 1) * lo + (j - 1)] + cost);
|
dp[(i + 1) * lo + (j + 1)], dp[(i - 1) * lo + (j - 1)] + cost);
|
||||||
}
|
}
|
||||||
|
@ -964,7 +965,7 @@ inline static int editDistance(
|
||||||
}
|
}
|
||||||
|
|
||||||
if (DEBUG_EDIT_DISTANCE) {
|
if (DEBUG_EDIT_DISTANCE) {
|
||||||
LOGI("IN = %d, OUT = %d", inputLength, outputLength);
|
LOGI("IN = %d, OUT = %d", beforeLength, afterLength);
|
||||||
for (int i = 0; i < li; ++i) {
|
for (int i = 0; i < li; ++i) {
|
||||||
for (int j = 0; j < lo; ++j) {
|
for (int j = 0; j < lo; ++j) {
|
||||||
LOGI("EDIT[%d][%d], %d", i, j, dp[i * lo + j]);
|
LOGI("EDIT[%d][%d], %d", i, j, dp[i * lo + j]);
|
||||||
|
@ -973,6 +974,63 @@ inline static int editDistance(
|
||||||
}
|
}
|
||||||
return dp[li * lo - 1];
|
return dp[li * lo - 1];
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
|
int Correction::RankingAlgorithm::editDistance(const unsigned short* before,
|
||||||
|
const int beforeLength, const unsigned short* after, const int afterLength) {
|
||||||
|
int table[(beforeLength + 1) * (afterLength + 1)];
|
||||||
|
return editDistanceInternal(table, before, beforeLength, after, afterLength);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// In dictionary.cpp, getSuggestion() method,
|
||||||
|
// suggestion scores are computed using the below formula.
|
||||||
|
// original score
|
||||||
|
// := pow(mTypedLetterMultiplier (this is defined 2),
|
||||||
|
// (the number of matched characters between typed word and suggested word))
|
||||||
|
// * (individual word's score which defined in the unigram dictionary,
|
||||||
|
// and this score is defined in range [0, 255].)
|
||||||
|
// Then, the following processing is applied.
|
||||||
|
// - If the dictionary word is matched up to the point of the user entry
|
||||||
|
// (full match up to min(before.length(), after.length())
|
||||||
|
// => Then multiply by FULL_MATCHED_WORDS_PROMOTION_RATE (this is defined 1.2)
|
||||||
|
// - If the word is a true full match except for differences in accents or
|
||||||
|
// capitalization, then treat it as if the score was 255.
|
||||||
|
// - If before.length() == after.length()
|
||||||
|
// => multiply by mFullWordMultiplier (this is defined 2))
|
||||||
|
// So, maximum original score is pow(2, min(before.length(), after.length())) * 255 * 2 * 1.2
|
||||||
|
// For historical reasons we ignore the 1.2 modifier (because the measure for a good
|
||||||
|
// autocorrection threshold was done at a time when it didn't exist). This doesn't change
|
||||||
|
// the result.
|
||||||
|
// So, we can normalize original score by dividing pow(2, min(b.l(),a.l())) * 255 * 2.
|
||||||
|
|
||||||
|
/* static */
|
||||||
|
double Correction::RankingAlgorithm::calcNormalizedScore(const unsigned short* before,
|
||||||
|
const int beforeLength, const unsigned short* after, const int afterLength,
|
||||||
|
const int score) {
|
||||||
|
if (0 == beforeLength || 0 == afterLength) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
const int distance = editDistance(before, beforeLength, after, afterLength);
|
||||||
|
int spaceCount = 0;
|
||||||
|
for (int i = 0; i < afterLength; ++i) {
|
||||||
|
if (after[i] == CODE_SPACE) {
|
||||||
|
++spaceCount;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (spaceCount == afterLength) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
const double maxScore = score >= S_INT_MAX ? S_INT_MAX : MAX_INITIAL_SCORE
|
||||||
|
* pow((double)TYPED_LETTER_MULTIPLIER,
|
||||||
|
(double)min(beforeLength, afterLength - spaceCount)) * FULL_WORD_MULTIPLIER;
|
||||||
|
|
||||||
|
// add a weight based on edit distance.
|
||||||
|
// distance <= max(afterLength, beforeLength) == afterLength,
|
||||||
|
// so, 0 <= distance / afterLength <= 1
|
||||||
|
const double weight = 1.0 - (double) distance / afterLength;
|
||||||
|
return (score / maxScore) * weight;
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
|
|
|
@ -95,6 +95,23 @@ class Correction {
|
||||||
return mCorrectionStates[index].mParentIndex;
|
return mCorrectionStates[index].mParentIndex;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
class RankingAlgorithm {
|
||||||
|
public:
|
||||||
|
static int calculateFinalFreq(const int inputIndex, const int depth,
|
||||||
|
const int freq, int *editDistanceTable, const Correction* correction);
|
||||||
|
static int calcFreqForSplitTwoWords(const int firstFreq, const int secondFreq,
|
||||||
|
const Correction* correction, const unsigned short *word);
|
||||||
|
static double calcNormalizedScore(const unsigned short* before, const int beforeLength,
|
||||||
|
const unsigned short* after, const int afterLength, const int score);
|
||||||
|
static int editDistance(const unsigned short* before,
|
||||||
|
const int beforeLength, const unsigned short* after, const int afterLength);
|
||||||
|
private:
|
||||||
|
static const int CODE_SPACE = ' ';
|
||||||
|
static const int MAX_INITIAL_SCORE = 255;
|
||||||
|
static const int TYPED_LETTER_MULTIPLIER = 2;
|
||||||
|
static const int FULL_WORD_MULTIPLIER = 2;
|
||||||
|
};
|
||||||
|
|
||||||
private:
|
private:
|
||||||
inline void incrementInputIndex();
|
inline void incrementInputIndex();
|
||||||
inline void incrementOutputIndex();
|
inline void incrementOutputIndex();
|
||||||
|
@ -153,13 +170,6 @@ class Correction {
|
||||||
bool mTransposing;
|
bool mTransposing;
|
||||||
bool mSkipping;
|
bool mSkipping;
|
||||||
|
|
||||||
class RankingAlgorithm {
|
|
||||||
public:
|
|
||||||
static int calculateFinalFreq(const int inputIndex, const int depth,
|
|
||||||
const int freq, int *editDistanceTable, const Correction* correction);
|
|
||||||
static int calcFreqForSplitTwoWords(const int firstFreq, const int secondFreq,
|
|
||||||
const Correction* correction, const unsigned short *word);
|
|
||||||
};
|
|
||||||
};
|
};
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
#endif // LATINIME_CORRECTION_H
|
#endif // LATINIME_CORRECTION_H
|
||||||
|
|
|
@ -37,7 +37,7 @@ public class EditDistanceTests extends AndroidTestCase {
|
||||||
* sitting
|
* sitting
|
||||||
*/
|
*/
|
||||||
public void testExample1() {
|
public void testExample1() {
|
||||||
final int dist = Utils.editDistance("kitten", "sitting");
|
final int dist = BinaryDictionary.editDistance("kitten", "sitting");
|
||||||
assertEquals("edit distance between 'kitten' and 'sitting' is 3",
|
assertEquals("edit distance between 'kitten' and 'sitting' is 3",
|
||||||
3, dist);
|
3, dist);
|
||||||
}
|
}
|
||||||
|
@ -50,26 +50,26 @@ public class EditDistanceTests extends AndroidTestCase {
|
||||||
* S--unday
|
* S--unday
|
||||||
*/
|
*/
|
||||||
public void testExample2() {
|
public void testExample2() {
|
||||||
final int dist = Utils.editDistance("Saturday", "Sunday");
|
final int dist = BinaryDictionary.editDistance("Saturday", "Sunday");
|
||||||
assertEquals("edit distance between 'Saturday' and 'Sunday' is 3",
|
assertEquals("edit distance between 'Saturday' and 'Sunday' is 3",
|
||||||
3, dist);
|
3, dist);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testBothEmpty() {
|
public void testBothEmpty() {
|
||||||
final int dist = Utils.editDistance("", "");
|
final int dist = BinaryDictionary.editDistance("", "");
|
||||||
assertEquals("when both string are empty, no edits are needed",
|
assertEquals("when both string are empty, no edits are needed",
|
||||||
0, dist);
|
0, dist);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testFirstArgIsEmpty() {
|
public void testFirstArgIsEmpty() {
|
||||||
final int dist = Utils.editDistance("", "aaaa");
|
final int dist = BinaryDictionary.editDistance("", "aaaa");
|
||||||
assertEquals("when only one string of the arguments is empty,"
|
assertEquals("when only one string of the arguments is empty,"
|
||||||
+ " the edit distance is the length of the other.",
|
+ " the edit distance is the length of the other.",
|
||||||
4, dist);
|
4, dist);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testSecoondArgIsEmpty() {
|
public void testSecoondArgIsEmpty() {
|
||||||
final int dist = Utils.editDistance("aaaa", "");
|
final int dist = BinaryDictionary.editDistance("aaaa", "");
|
||||||
assertEquals("when only one string of the arguments is empty,"
|
assertEquals("when only one string of the arguments is empty,"
|
||||||
+ " the edit distance is the length of the other.",
|
+ " the edit distance is the length of the other.",
|
||||||
4, dist);
|
4, dist);
|
||||||
|
@ -78,27 +78,27 @@ public class EditDistanceTests extends AndroidTestCase {
|
||||||
public void testSameStrings() {
|
public void testSameStrings() {
|
||||||
final String arg1 = "The quick brown fox jumps over the lazy dog.";
|
final String arg1 = "The quick brown fox jumps over the lazy dog.";
|
||||||
final String arg2 = "The quick brown fox jumps over the lazy dog.";
|
final String arg2 = "The quick brown fox jumps over the lazy dog.";
|
||||||
final int dist = Utils.editDistance(arg1, arg2);
|
final int dist = BinaryDictionary.editDistance(arg1, arg2);
|
||||||
assertEquals("when same strings are passed, distance equals 0.",
|
assertEquals("when same strings are passed, distance equals 0.",
|
||||||
0, dist);
|
0, dist);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testSameReference() {
|
public void testSameReference() {
|
||||||
final String arg = "The quick brown fox jumps over the lazy dog.";
|
final String arg = "The quick brown fox jumps over the lazy dog.";
|
||||||
final int dist = Utils.editDistance(arg, arg);
|
final int dist = BinaryDictionary.editDistance(arg, arg);
|
||||||
assertEquals("when same string references are passed, the distance equals 0.",
|
assertEquals("when same string references are passed, the distance equals 0.",
|
||||||
0, dist);
|
0, dist);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testNullArg() {
|
public void testNullArg() {
|
||||||
try {
|
try {
|
||||||
Utils.editDistance(null, "aaa");
|
BinaryDictionary.editDistance(null, "aaa");
|
||||||
fail("IllegalArgumentException should be thrown.");
|
fail("IllegalArgumentException should be thrown.");
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
assertTrue(e instanceof IllegalArgumentException);
|
assertTrue(e instanceof IllegalArgumentException);
|
||||||
}
|
}
|
||||||
try {
|
try {
|
||||||
Utils.editDistance("aaa", null);
|
BinaryDictionary.editDistance("aaa", null);
|
||||||
fail("IllegalArgumentException should be thrown.");
|
fail("IllegalArgumentException should be thrown.");
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
assertTrue(e instanceof IllegalArgumentException);
|
assertTrue(e instanceof IllegalArgumentException);
|
||||||
|
|
Loading…
Reference in New Issue