Merge "Improve fat finger correction."

main
satok 2011-08-16 09:21:46 -07:00 committed by Android (Google) Code Review
commit 1d66cc1be6
3 changed files with 103 additions and 105 deletions

View File

@ -95,10 +95,8 @@ int Correction::getFinalFreq(const int freq, unsigned short **word, int *wordLen
}
*word = mWord;
const bool sameLength = (mExcessivePos == mInputLength - 1) ? (mInputLength == inputIndex + 2)
: (mInputLength == inputIndex + 1);
return Correction::RankingAlgorithm::calculateFinalFreq(
inputIndex, outputIndex, freq, sameLength, mEditDistanceTable, this);
inputIndex, outputIndex, freq, mEditDistanceTable, this);
}
bool Correction::initProcessState(const int outputIndex) {
@ -205,20 +203,6 @@ Correction::CorrectionType Correction::processCharAndCalcState(
}
if (mNeedsToTraverseAllNodes || isQuote(c)) {
const bool checkProximityChars =
!(mSkippedCount > 0 || mExcessivePos >= 0 || mTransposedPos >= 0);
// Note: This logic tries saving cases like contrst --> contrast -- "a" is one of
// proximity chars of "s", but it should rather be handled as a skipped char.
if (checkProximityChars
&& mInputIndex > 0
&& mCorrectionStates[mOutputIndex].mProximityMatching
&& mCorrectionStates[mOutputIndex].mSkipping
&& mProximityInfo->getMatchedProximityId(
mInputIndex - 1, c, false)
== ProximityInfo::SAME_OR_ACCENTED_OR_CAPITALIZED_CHAR) {
++mSkippedCount;
--mProximityCount;
}
return processSkipChar(c, isTerminal);
} else {
int inputIndexForProximity = mInputIndex;
@ -250,6 +234,8 @@ Correction::CorrectionType Correction::processCharAndCalcState(
&& mProximityInfo->getMatchedProximityId(
inputIndexForProximity - 1, c, false)
== ProximityInfo::SAME_OR_ACCENTED_OR_CAPITALIZED_CHAR) {
// Note: This logic tries saving cases like contrst --> contrast -- "a" is one of
// proximity chars of "s", but it should rather be handled as a skipped char.
++mSkippedCount;
--mProximityCount;
return processSkipChar(c, isTerminal);
@ -344,6 +330,16 @@ inline static void multiplyRate(const int rate, int *freq) {
}
}
inline static int getQuoteCount(const unsigned short* word, const int length) {
int quoteCount = 0;
for (int i = 0; i < length; ++i) {
if(word[i] == '\'') {
++quoteCount;
}
}
return quoteCount;
}
/* static */
inline static int editDistance(
int* editDistanceTable, const unsigned short* input,
@ -392,8 +388,7 @@ inline static int editDistance(
/* static */
int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const int outputIndex,
const int freq, const bool sameLength, int* editDistanceTable,
const Correction* correction) {
const int freq, int* editDistanceTable, const Correction* correction) {
const int excessivePos = correction->getExcessivePos();
const int transposedPos = correction->getTransposedPos();
const int inputLength = correction->mInputLength;
@ -402,6 +397,12 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const
const ProximityInfo *proximityInfo = correction->mProximityInfo;
const int skipCount = correction->mSkippedCount;
const int proximityMatchedCount = correction->mProximityCount;
if (skipCount >= inputLength || inputLength == 0) {
return -1;
}
const bool sameLength = (excessivePos == inputLength - 1) ? (inputLength == inputIndex + 2)
: (inputLength == inputIndex + 1);
// TODO: use mExcessiveCount
int matchCount = inputLength - correction->mProximityCount - (excessivePos >= 0 ? 1 : 0);
@ -409,53 +410,37 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const
const unsigned short* word = correction->mWord;
const bool skipped = skipCount > 0;
// ----- TODO: use edit distance here as follows? ---------------------- /
//if (!skipped && excessivePos < 0 && transposedPos < 0) {
// const int ed = editDistance(dp, proximityInfo->getInputWord(),
// inputLength, word, outputIndex + 1);
// matchCount = outputIndex + 1 - ed;
// if (ed == 1 && !sameLength) ++matchCount;
//}
// const int ed = editDistance(dp, proximityInfo->getInputWord(),
// inputLength, word, outputIndex + 1);
// if (ed == 1 && !sameLength) ++matchCount; ------------------------ /
int matchWeight = powerIntCapped(typedLetterMultiplier, matchCount);
const int quoteDiffCount = max(0, getQuoteCount(word, outputIndex + 1)
- getQuoteCount(proximityInfo->getPrimaryInputWord(), inputLength));
// TODO: Calculate edit distance for transposed and excessive
int matchWeight;
int ed = 0;
int adJustedProximityMatchedCount = proximityMatchedCount;
if (excessivePos < 0 && transposedPos < 0 && (proximityMatchedCount > 0 || skipped)) {
const unsigned short* primaryInputWord = proximityInfo->getPrimaryInputWord();
ed = editDistance(editDistanceTable, primaryInputWord,
inputLength, word, outputIndex + 1);
matchWeight = powerIntCapped(typedLetterMultiplier, outputIndex + 1 - ed);
if (ed == 1 && inputLength == outputIndex) {
// Promote a word with just one skipped char
multiplyRate(WORDS_WITH_JUST_ONE_CORRECTION_PROMOTION_RATE, &matchWeight);
}
ed = max(0, ed - quoteDiffCount);
adJustedProximityMatchedCount = min(max(0, ed - (outputIndex + 1 - inputLength)),
proximityMatchedCount);
} else {
matchWeight = powerIntCapped(typedLetterMultiplier, matchCount);
}
// TODO: Demote by edit distance
int finalFreq = freq * matchWeight;
// +1 +11/-12
/*if (inputLength == outputIndex && !skipped && excessivePos < 0 && transposedPos < 0) {
const int ed = editDistance(dp, proximityInfo->getInputWord(),
inputLength, word, outputIndex + 1);
if (ed == 1) {
multiplyRate(160, &finalFreq);
}
}*/
if (inputLength == outputIndex && excessivePos < 0 && transposedPos < 0
&& (proximityMatchedCount > 0 || skipped)) {
const int ed = editDistance(editDistanceTable, proximityInfo->getPrimaryInputWord(),
inputLength, word, outputIndex + 1);
if (ed == 1) {
multiplyRate(160, &finalFreq);
}
}
// TODO: Promote properly?
//if (skipCount == 1 && excessivePos < 0 && transposedPos < 0 && inputLength == outputIndex
// && !sameLength) {
// multiplyRate(150, &finalFreq);
//}
//if (skipCount == 0 && excessivePos < 0 && transposedPos < 0 && inputLength == outputIndex
// && !sameLength) {
// multiplyRate(150, &finalFreq);
//}
//if (skipCount == 0 && excessivePos < 0 && transposedPos < 0
// && inputLength == outputIndex + 1) {
// multiplyRate(150, &finalFreq);
//}
///////////////////////////////////////////////
// Promotion and Demotion for each correction
// Demotion for a word with missing character
if (skipped) {
if (inputLength >= 2) {
const int demotionRate = WORDS_WITH_MISSING_CHARACTER_DEMOTION_RATE
* (10 * inputLength - WORDS_WITH_MISSING_CHARACTER_DEMOTION_START_POS_10X)
/ (10 * inputLength
@ -464,12 +449,13 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const
LOGI("Demotion rate for missing character is %d.", demotionRate);
}
multiplyRate(demotionRate, &finalFreq);
} else {
finalFreq = 0;
}
}
// Demotion for a word with transposed character
if (transposedPos >= 0) multiplyRate(
WORDS_WITH_TRANSPOSED_CHARACTERS_DEMOTION_RATE, &finalFreq);
// Demotion for a word with excessive character
if (excessivePos >= 0) {
multiplyRate(WORDS_WITH_EXCESSIVE_CHARACTER_DEMOTION_RATE, &finalFreq);
if (!proximityInfo->existsAdjacentProximityChars(inputIndex)) {
@ -478,34 +464,36 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const
multiplyRate(WORDS_WITH_EXCESSIVE_CHARACTER_OUT_OF_PROXIMITY_DEMOTION_RATE, &finalFreq);
}
}
int lengthFreq = typedLetterMultiplier;
multiplyIntCapped(powerIntCapped(typedLetterMultiplier, outputIndex), &lengthFreq);
if ((outputIndex + 1) == matchCount) {
// Full exact match
if (outputIndex > 1) {
if (DEBUG_DICT) {
LOGI("Found full matched word.");
}
multiplyRate(FULL_MATCHED_WORDS_PROMOTION_RATE, &finalFreq);
}
if (sameLength && transposedPos < 0 && !skipped && excessivePos < 0) {
finalFreq = capped255MultForFullMatchAccentsOrCapitalizationDifference(finalFreq);
}
} else if (sameLength && transposedPos < 0 && !skipped && excessivePos < 0
&& outputIndex > 0) {
// Promotion for a word with proximity characters
for (int i = 0; i < adJustedProximityMatchedCount; ++i) {
// A word with proximity corrections
if (DEBUG_DICT) {
LOGI("Found one proximity correction.");
if (DEBUG_DICT_FULL) {
LOGI("Found a proximity correction.");
}
multiplyIntCapped(typedLetterMultiplier, &finalFreq);
multiplyRate(WORDS_WITH_PROXIMITY_CHARACTER_DEMOTION_RATE, &finalFreq);
}
if (DEBUG_DICT_FULL) {
LOGI("calc: %d, %d", outputIndex, sameLength);
}
if (sameLength) multiplyIntCapped(fullWordMultiplier, &finalFreq);
// TODO: check excessive count and transposed count
const int errorCount = proximityMatchedCount + skipCount;
multiplyRate(
100 - CORRECTION_COUNT_RATE_DEMOTION_RATE_BASE * errorCount / inputLength, &finalFreq);
// Promotion for an exactly matched word
if (matchCount == outputIndex + 1) {
// Full exact match
if (sameLength && transposedPos < 0 && !skipped && excessivePos < 0) {
finalFreq = capped255MultForFullMatchAccentsOrCapitalizationDifference(finalFreq);
}
}
// Promote a word with no correction
if (proximityMatchedCount == 0 && transposedPos < 0 && !skipped && excessivePos < 0) {
multiplyRate(FULL_MATCHED_WORDS_PROMOTION_RATE, &finalFreq);
}
// TODO: Check excessive count and transposed count
// TODO: Remove this if possible
/*
If the last character of the user input word is the same as the next character
of the output word, and also all of characters of the user input are matched
@ -524,6 +512,14 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const
multiplyRate(WORDS_WITH_MATCH_SKIP_PROMOTION_RATE, &finalFreq);
}
if (sameLength) {
multiplyIntCapped(fullWordMultiplier, &finalFreq);
}
if (DEBUG_DICT_FULL) {
LOGI("calc: %d, %d", outputIndex, sameLength);
}
return finalFreq;
}

View File

@ -139,8 +139,7 @@ private:
class RankingAlgorithm {
public:
static int calculateFinalFreq(const int inputIndex, const int depth,
const int freq, const bool sameLength, int *editDistanceTable,
const Correction* correction);
const int freq, int *editDistanceTable, const Correction* correction);
static int calcFreqForSplitTwoWords(const int firstFreq, const int secondFreq,
const Correction* correction);
};

View File

@ -177,6 +177,8 @@ static void dumpWord(const unsigned short* word, const int length) {
#define FULL_MATCHED_WORDS_PROMOTION_RATE 120
#define WORDS_WITH_PROXIMITY_CHARACTER_DEMOTION_RATE 90
#define WORDS_WITH_MATCH_SKIP_PROMOTION_RATE 105
#define WORDS_WITH_JUST_ONE_CORRECTION_PROMOTION_RATE 160
#define CORRECTION_COUNT_RATE_DEMOTION_RATE_BASE 42
// This should be greater than or equal to MAX_WORD_LENGTH defined in BinaryDictionary.java
// This is only used for the size of array. Not to be used in c functions.
@ -194,5 +196,6 @@ static void dumpWord(const unsigned short* word, const int length) {
#define MIN_USER_TYPED_LENGTH_FOR_EXCESSIVE_CHARACTER_SUGGESTION 3
#define min(a,b) ((a)<(b)?(a):(b))
#define max(a,b) ((a)>(b)?(a):(b))
#endif // LATINIME_DEFINES_H