am a503def1: Merge "Support correction conversion from skip to additional proximity"

* commit 'a503def1c26818a1975647d0dc7e4f2492660214':
  Support correction conversion from skip to additional proximity
main
satok 2012-02-03 03:29:19 -08:00 committed by Android Git Automerger
commit 88b31f3133
4 changed files with 54 additions and 15 deletions

View File

@ -210,6 +210,7 @@ bool Correction::initProcessState(const int outputIndex) {
mMatching = false; mMatching = false;
mProximityMatching = false; mProximityMatching = false;
mAdditionalProximityMatching = false;
mTransposing = false; mTransposing = false;
mExceeding = false; mExceeding = false;
mSkipping = false; mSkipping = false;
@ -256,6 +257,7 @@ void Correction::incrementOutputIndex() {
mCorrectionStates[mOutputIndex].mMatching = mMatching; mCorrectionStates[mOutputIndex].mMatching = mMatching;
mCorrectionStates[mOutputIndex].mProximityMatching = mProximityMatching; mCorrectionStates[mOutputIndex].mProximityMatching = mProximityMatching;
mCorrectionStates[mOutputIndex].mAdditionalProximityMatching = mAdditionalProximityMatching;
mCorrectionStates[mOutputIndex].mTransposing = mTransposing; mCorrectionStates[mOutputIndex].mTransposing = mTransposing;
mCorrectionStates[mOutputIndex].mExceeding = mExceeding; mCorrectionStates[mOutputIndex].mExceeding = mExceeding;
mCorrectionStates[mOutputIndex].mSkipping = mSkipping; mCorrectionStates[mOutputIndex].mSkipping = mSkipping;
@ -304,6 +306,11 @@ inline bool isEquivalentChar(ProximityInfo::ProximityType type) {
return type == ProximityInfo::EQUIVALENT_CHAR; return type == ProximityInfo::EQUIVALENT_CHAR;
} }
inline bool isProximityCharOrEquivalentChar(ProximityInfo::ProximityType type) {
return type == ProximityInfo::EQUIVALENT_CHAR
|| type == ProximityInfo::NEAR_PROXIMITY_CHAR;
}
Correction::CorrectionType Correction::processCharAndCalcState( Correction::CorrectionType Correction::processCharAndCalcState(
const int32_t c, const bool isTerminal) { const int32_t c, const bool isTerminal) {
const int correctionCount = (mSkippedCount + mExcessiveCount + mTransposedCount); const int correctionCount = (mSkippedCount + mExcessiveCount + mTransposedCount);
@ -438,6 +445,9 @@ Correction::CorrectionType Correction::processCharAndCalcState(
if (ProximityInfo::UNRELATED_CHAR == matchedProximityCharId if (ProximityInfo::UNRELATED_CHAR == matchedProximityCharId
|| ProximityInfo::ADDITIONAL_PROXIMITY_CHAR == matchedProximityCharId) { || ProximityInfo::ADDITIONAL_PROXIMITY_CHAR == matchedProximityCharId) {
if (ProximityInfo::ADDITIONAL_PROXIMITY_CHAR == matchedProximityCharId) {
mAdditionalProximityMatching = true;
}
// TODO: Optimize // TODO: Optimize
// As the current char turned out to be an unrelated char, // As the current char turned out to be an unrelated char,
// we will try other correction-types. Please note that mCorrectionStates[mOutputIndex] // we will try other correction-types. Please note that mCorrectionStates[mOutputIndex]
@ -479,6 +489,18 @@ Correction::CorrectionType Correction::processCharAndCalcState(
++mSkippedCount; ++mSkippedCount;
--mProximityCount; --mProximityCount;
return processSkipChar(c, isTerminal, false); return processSkipChar(c, isTerminal, false);
} else if (mInputIndex - 1 < mInputLength
&& mSkippedCount > 0
&& mCorrectionStates[mOutputIndex].mSkipping
&& mCorrectionStates[mOutputIndex].mAdditionalProximityMatching
&& isProximityCharOrEquivalentChar(
mProximityInfo->getMatchedProximityId(mInputIndex + 1, c, false))) {
// Conversion s->a
incrementInputIndex();
--mSkippedCount;
mProximityMatching = true;
++mProximityCount;
mDistances[mOutputIndex] = ADDITIONAL_PROXIMITY_CHAR_DISTANCE_INFO;
} else if ((mExceeding || mTransposing) && mInputIndex - 1 < mInputLength } else if ((mExceeding || mTransposing) && mInputIndex - 1 < mInputLength
&& isEquivalentChar( && isEquivalentChar(
mProximityInfo->getMatchedProximityId(mInputIndex + 1, c, false))) { mProximityInfo->getMatchedProximityId(mInputIndex + 1, c, false))) {
@ -666,6 +688,10 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const
int finalFreq = freq; int finalFreq = freq;
if (DEBUG_CORRECTION_FREQ
&& (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == inputLength)) {
AKLOGI("FinalFreq0: %d", finalFreq);
}
// TODO: Optimize this. // TODO: Optimize this.
if (transposedCount > 0 || proximityMatchedCount > 0 || skipped || excessiveCount > 0) { if (transposedCount > 0 || proximityMatchedCount > 0 || skipped || excessiveCount > 0) {
ed = getCurrentEditDistance(editDistanceTable, correction->mInputLength, outputLength, ed = getCurrentEditDistance(editDistanceTable, correction->mInputLength, outputLength,
@ -681,12 +707,15 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const
} }
ed = max(0, ed - quoteDiffCount); ed = max(0, ed - quoteDiffCount);
adjustedProximityMatchedCount = min(max(0, ed - (outputLength - inputLength)),
proximityMatchedCount);
if (transposedCount < 1) { if (transposedCount < 1) {
if (ed == 1 && (inputLength == outputLength - 1 || inputLength == outputLength + 1)) { if (ed == 1 && (inputLength == outputLength - 1 || inputLength == outputLength + 1)) {
// Promote a word with just one skipped or excessive char // Promote a word with just one skipped or excessive char
if (sameLength) { if (sameLength) {
multiplyRate(WORDS_WITH_JUST_ONE_CORRECTION_PROMOTION_RATE, &finalFreq); multiplyRate(WORDS_WITH_JUST_ONE_CORRECTION_PROMOTION_RATE
+ WORDS_WITH_JUST_ONE_CORRECTION_PROMOTION_MULTIPLIER * outputLength,
&finalFreq);
} else { } else {
multiplyIntCapped(typedLetterMultiplier, &finalFreq); multiplyIntCapped(typedLetterMultiplier, &finalFreq);
} }
@ -695,8 +724,6 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const
sameLength = true; sameLength = true;
} }
} }
adjustedProximityMatchedCount = min(max(0, ed - (outputLength - inputLength)),
proximityMatchedCount);
} else { } else {
const int matchWeight = powerIntCapped(typedLetterMultiplier, matchCount); const int matchWeight = powerIntCapped(typedLetterMultiplier, matchCount);
multiplyIntCapped(matchWeight, &finalFreq); multiplyIntCapped(matchWeight, &finalFreq);
@ -744,6 +771,7 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const
&& skippedCount == 0 && excessiveCount == 0 && transposedCount == 0; && skippedCount == 0 && excessiveCount == 0 && transposedCount == 0;
// Score calibration by touch coordinates is being done only for pure-fat finger typing error // Score calibration by touch coordinates is being done only for pure-fat finger typing error
// cases. // cases.
int additionalProximityCount = 0;
// TODO: Remove this constraint. // TODO: Remove this constraint.
if (performTouchPositionCorrection) { if (performTouchPositionCorrection) {
for (int i = 0; i < outputLength; ++i) { for (int i = 0; i < outputLength; ++i) {
@ -776,12 +804,12 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const
} else if (squaredDistance == PROXIMITY_CHAR_WITHOUT_DISTANCE_INFO) { } else if (squaredDistance == PROXIMITY_CHAR_WITHOUT_DISTANCE_INFO) {
multiplyRate(WORDS_WITH_PROXIMITY_CHARACTER_DEMOTION_RATE, &finalFreq); multiplyRate(WORDS_WITH_PROXIMITY_CHARACTER_DEMOTION_RATE, &finalFreq);
} else if (squaredDistance == ADDITIONAL_PROXIMITY_CHAR_DISTANCE_INFO) { } else if (squaredDistance == ADDITIONAL_PROXIMITY_CHAR_DISTANCE_INFO) {
++additionalProximityCount;
multiplyRate(WORDS_WITH_ADDITIONAL_PROXIMITY_CHARACTER_DEMOTION_RATE, &finalFreq); multiplyRate(WORDS_WITH_ADDITIONAL_PROXIMITY_CHARACTER_DEMOTION_RATE, &finalFreq);
} }
} }
} else { } else {
// Demote additional proximity characters // Demote additional proximity characters
int additionalProximityCount = 0;
for (int i = 0; i < outputLength; ++i) { for (int i = 0; i < outputLength; ++i) {
const int squaredDistance = correction->mDistances[i]; const int squaredDistance = correction->mDistances[i];
if (squaredDistance == ADDITIONAL_PROXIMITY_CHAR_DISTANCE_INFO) { if (squaredDistance == ADDITIONAL_PROXIMITY_CHAR_DISTANCE_INFO) {
@ -803,6 +831,13 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const
} }
} }
// If the user types too many(three or more) proximity characters with additional proximity
// character,do not treat as the same length word.
if (sameLength && additionalProximityCount > 0 && (adjustedProximityMatchedCount >= 3
|| transposedCount > 0 || skipped || excessiveCount > 0)) {
sameLength = false;
}
const int errorCount = adjustedProximityMatchedCount > 0 const int errorCount = adjustedProximityMatchedCount > 0
? adjustedProximityMatchedCount ? adjustedProximityMatchedCount
: (proximityMatchedCount + transposedCount); : (proximityMatchedCount + transposedCount);
@ -813,13 +848,14 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const
if (ed == 0) { if (ed == 0) {
// Full exact match // Full exact match
if (sameLength && transposedCount == 0 && !skipped && excessiveCount == 0 if (sameLength && transposedCount == 0 && !skipped && excessiveCount == 0
&& quoteDiffCount == 0) { && quoteDiffCount == 0 && additionalProximityCount == 0) {
finalFreq = capped255MultForFullMatchAccentsOrCapitalizationDifference(finalFreq); finalFreq = capped255MultForFullMatchAccentsOrCapitalizationDifference(finalFreq);
} }
} }
// Promote a word with no correction // Promote a word with no correction
if (proximityMatchedCount == 0 && transposedCount == 0 && !skipped && excessiveCount == 0) { if (proximityMatchedCount == 0 && transposedCount == 0 && !skipped && excessiveCount == 0
&& additionalProximityCount == 0) {
multiplyRate(FULL_MATCHED_WORDS_PROMOTION_RATE, &finalFreq); multiplyRate(FULL_MATCHED_WORDS_PROMOTION_RATE, &finalFreq);
} }
@ -863,10 +899,11 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const
if (DEBUG_CORRECTION_FREQ if (DEBUG_CORRECTION_FREQ
&& (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == inputLength)) { && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == inputLength)) {
DUMP_WORD(proximityInfo->getPrimaryInputWord(), inputLength);
DUMP_WORD(correction->mWord, outputLength); DUMP_WORD(correction->mWord, outputLength);
AKLOGI("FinalFreq: [P%d, S%d, T%d, E%d] %d, %d, %d, %d, %d, %d", proximityMatchedCount, AKLOGI("FinalFreq: [P%d, S%d, T%d, E%d, A%d] %d, %d, %d, %d, %d, %d", proximityMatchedCount,
skippedCount, transposedCount, excessiveCount, outputLength, lastCharExceeded, skippedCount, transposedCount, excessiveCount, additionalProximityCount,
sameLength, quoteDiffCount, ed, finalFreq); outputLength, lastCharExceeded, sameLength, quoteDiffCount, ed, finalFreq);
} }
return finalFreq; return finalFreq;

View File

@ -221,6 +221,7 @@ class Correction {
bool mMatching; bool mMatching;
bool mProximityMatching; bool mProximityMatching;
bool mAdditionalProximityMatching;
bool mExceeding; bool mExceeding;
bool mTransposing; bool mTransposing;
bool mSkipping; bool mSkipping;

View File

@ -47,9 +47,9 @@ struct CorrectionState {
bool mExceeding; bool mExceeding;
bool mSkipping; bool mSkipping;
bool mProximityMatching; bool mProximityMatching;
bool mAdditionalProximityMatching;
bool mNeedsToTraverseAllNodes; bool mNeedsToTraverseAllNodes;
}; };
inline static void initCorrectionState(CorrectionState *state, const int rootPos, inline static void initCorrectionState(CorrectionState *state, const int rootPos,
@ -77,7 +77,7 @@ inline static void initCorrectionState(CorrectionState *state, const int rootPos
state->mTransposing = false; state->mTransposing = false;
state->mExceeding = false; state->mExceeding = false;
state->mSkipping = false; state->mSkipping = false;
state->mAdditionalProximityMatching = false;
} }
} // namespace latinime } // namespace latinime

View File

@ -195,9 +195,10 @@ static void prof_out(void) {
#define WORDS_WITH_TRANSPOSED_CHARACTERS_DEMOTION_RATE 70 #define WORDS_WITH_TRANSPOSED_CHARACTERS_DEMOTION_RATE 70
#define FULL_MATCHED_WORDS_PROMOTION_RATE 120 #define FULL_MATCHED_WORDS_PROMOTION_RATE 120
#define WORDS_WITH_PROXIMITY_CHARACTER_DEMOTION_RATE 90 #define WORDS_WITH_PROXIMITY_CHARACTER_DEMOTION_RATE 90
#define WORDS_WITH_ADDITIONAL_PROXIMITY_CHARACTER_DEMOTION_RATE 30 #define WORDS_WITH_ADDITIONAL_PROXIMITY_CHARACTER_DEMOTION_RATE 70
#define WORDS_WITH_MATCH_SKIP_PROMOTION_RATE 105 #define WORDS_WITH_MATCH_SKIP_PROMOTION_RATE 105
#define WORDS_WITH_JUST_ONE_CORRECTION_PROMOTION_RATE 160 #define WORDS_WITH_JUST_ONE_CORRECTION_PROMOTION_RATE 148
#define WORDS_WITH_JUST_ONE_CORRECTION_PROMOTION_MULTIPLIER 3
#define CORRECTION_COUNT_RATE_DEMOTION_RATE_BASE 45 #define CORRECTION_COUNT_RATE_DEMOTION_RATE_BASE 45
#define INPUT_EXCEEDS_OUTPUT_DEMOTION_RATE 70 #define INPUT_EXCEEDS_OUTPUT_DEMOTION_RATE 70
#define FIRST_CHAR_DIFFERENT_DEMOTION_RATE 96 #define FIRST_CHAR_DIFFERENT_DEMOTION_RATE 96
@ -247,7 +248,7 @@ static void prof_out(void) {
#define NEUTRAL_AREA_RADIUS_RATIO 1.3f #define NEUTRAL_AREA_RADIUS_RATIO 1.3f
// DEBUG // DEBUG
#define INPUTLENGTH_FOR_DEBUG -1 #define INPUTLENGTH_FOR_DEBUG 10
#define MIN_OUTPUT_INDEX_FOR_DEBUG -1 #define MIN_OUTPUT_INDEX_FOR_DEBUG -1
#endif // LATINIME_DEFINES_H #endif // LATINIME_DEFINES_H