From 1b9fa942b4b62a818e45655dc5097c7eed7a5465 Mon Sep 17 00:00:00 2001 From: satok Date: Thu, 2 Feb 2012 18:49:22 +0900 Subject: [PATCH] Support correction conversion from skip to additional proximity Result: I34bedff6149a6a4e01 Change-Id: I46d528f228a969a0a996299221622627f43c55ec --- native/src/correction.cpp | 57 +++++++++++++++++++++++++++++------ native/src/correction.h | 1 + native/src/correction_state.h | 4 +-- native/src/defines.h | 7 +++-- 4 files changed, 54 insertions(+), 15 deletions(-) diff --git a/native/src/correction.cpp b/native/src/correction.cpp index 8275c5d7e..2fc0569fa 100644 --- a/native/src/correction.cpp +++ b/native/src/correction.cpp @@ -210,6 +210,7 @@ bool Correction::initProcessState(const int outputIndex) { mMatching = false; mProximityMatching = false; + mAdditionalProximityMatching = false; mTransposing = false; mExceeding = false; mSkipping = false; @@ -256,6 +257,7 @@ void Correction::incrementOutputIndex() { mCorrectionStates[mOutputIndex].mMatching = mMatching; mCorrectionStates[mOutputIndex].mProximityMatching = mProximityMatching; + mCorrectionStates[mOutputIndex].mAdditionalProximityMatching = mAdditionalProximityMatching; mCorrectionStates[mOutputIndex].mTransposing = mTransposing; mCorrectionStates[mOutputIndex].mExceeding = mExceeding; mCorrectionStates[mOutputIndex].mSkipping = mSkipping; @@ -304,6 +306,11 @@ inline bool isEquivalentChar(ProximityInfo::ProximityType type) { return type == ProximityInfo::EQUIVALENT_CHAR; } +inline bool isProximityCharOrEquivalentChar(ProximityInfo::ProximityType type) { + return type == ProximityInfo::EQUIVALENT_CHAR + || type == ProximityInfo::NEAR_PROXIMITY_CHAR; +} + Correction::CorrectionType Correction::processCharAndCalcState( const int32_t c, const bool isTerminal) { const int correctionCount = (mSkippedCount + mExcessiveCount + mTransposedCount); @@ -438,6 +445,9 @@ Correction::CorrectionType Correction::processCharAndCalcState( if (ProximityInfo::UNRELATED_CHAR == matchedProximityCharId || ProximityInfo::ADDITIONAL_PROXIMITY_CHAR == matchedProximityCharId) { + if (ProximityInfo::ADDITIONAL_PROXIMITY_CHAR == matchedProximityCharId) { + mAdditionalProximityMatching = true; + } // TODO: Optimize // As the current char turned out to be an unrelated char, // we will try other correction-types. Please note that mCorrectionStates[mOutputIndex] @@ -479,6 +489,18 @@ Correction::CorrectionType Correction::processCharAndCalcState( ++mSkippedCount; --mProximityCount; return processSkipChar(c, isTerminal, false); + } else if (mInputIndex - 1 < mInputLength + && mSkippedCount > 0 + && mCorrectionStates[mOutputIndex].mSkipping + && mCorrectionStates[mOutputIndex].mAdditionalProximityMatching + && isProximityCharOrEquivalentChar( + mProximityInfo->getMatchedProximityId(mInputIndex + 1, c, false))) { + // Conversion s->a + incrementInputIndex(); + --mSkippedCount; + mProximityMatching = true; + ++mProximityCount; + mDistances[mOutputIndex] = ADDITIONAL_PROXIMITY_CHAR_DISTANCE_INFO; } else if ((mExceeding || mTransposing) && mInputIndex - 1 < mInputLength && isEquivalentChar( mProximityInfo->getMatchedProximityId(mInputIndex + 1, c, false))) { @@ -666,6 +688,10 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const int finalFreq = freq; + if (DEBUG_CORRECTION_FREQ + && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == inputLength)) { + AKLOGI("FinalFreq0: %d", finalFreq); + } // TODO: Optimize this. if (transposedCount > 0 || proximityMatchedCount > 0 || skipped || excessiveCount > 0) { ed = getCurrentEditDistance(editDistanceTable, correction->mInputLength, outputLength, @@ -681,12 +707,15 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const } ed = max(0, ed - quoteDiffCount); - + adjustedProximityMatchedCount = min(max(0, ed - (outputLength - inputLength)), + proximityMatchedCount); if (transposedCount < 1) { if (ed == 1 && (inputLength == outputLength - 1 || inputLength == outputLength + 1)) { // Promote a word with just one skipped or excessive char if (sameLength) { - multiplyRate(WORDS_WITH_JUST_ONE_CORRECTION_PROMOTION_RATE, &finalFreq); + multiplyRate(WORDS_WITH_JUST_ONE_CORRECTION_PROMOTION_RATE + + WORDS_WITH_JUST_ONE_CORRECTION_PROMOTION_MULTIPLIER * outputLength, + &finalFreq); } else { multiplyIntCapped(typedLetterMultiplier, &finalFreq); } @@ -695,8 +724,6 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const sameLength = true; } } - adjustedProximityMatchedCount = min(max(0, ed - (outputLength - inputLength)), - proximityMatchedCount); } else { const int matchWeight = powerIntCapped(typedLetterMultiplier, matchCount); multiplyIntCapped(matchWeight, &finalFreq); @@ -744,6 +771,7 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const && skippedCount == 0 && excessiveCount == 0 && transposedCount == 0; // Score calibration by touch coordinates is being done only for pure-fat finger typing error // cases. + int additionalProximityCount = 0; // TODO: Remove this constraint. if (performTouchPositionCorrection) { for (int i = 0; i < outputLength; ++i) { @@ -776,12 +804,12 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const } else if (squaredDistance == PROXIMITY_CHAR_WITHOUT_DISTANCE_INFO) { multiplyRate(WORDS_WITH_PROXIMITY_CHARACTER_DEMOTION_RATE, &finalFreq); } else if (squaredDistance == ADDITIONAL_PROXIMITY_CHAR_DISTANCE_INFO) { + ++additionalProximityCount; multiplyRate(WORDS_WITH_ADDITIONAL_PROXIMITY_CHARACTER_DEMOTION_RATE, &finalFreq); } } } else { // Demote additional proximity characters - int additionalProximityCount = 0; for (int i = 0; i < outputLength; ++i) { const int squaredDistance = correction->mDistances[i]; if (squaredDistance == ADDITIONAL_PROXIMITY_CHAR_DISTANCE_INFO) { @@ -803,6 +831,13 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const } } + // If the user types too many(three or more) proximity characters with additional proximity + // character,do not treat as the same length word. + if (sameLength && additionalProximityCount > 0 && (adjustedProximityMatchedCount >= 3 + || transposedCount > 0 || skipped || excessiveCount > 0)) { + sameLength = false; + } + const int errorCount = adjustedProximityMatchedCount > 0 ? adjustedProximityMatchedCount : (proximityMatchedCount + transposedCount); @@ -813,13 +848,14 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const if (ed == 0) { // Full exact match if (sameLength && transposedCount == 0 && !skipped && excessiveCount == 0 - && quoteDiffCount == 0) { + && quoteDiffCount == 0 && additionalProximityCount == 0) { finalFreq = capped255MultForFullMatchAccentsOrCapitalizationDifference(finalFreq); } } // Promote a word with no correction - if (proximityMatchedCount == 0 && transposedCount == 0 && !skipped && excessiveCount == 0) { + if (proximityMatchedCount == 0 && transposedCount == 0 && !skipped && excessiveCount == 0 + && additionalProximityCount == 0) { multiplyRate(FULL_MATCHED_WORDS_PROMOTION_RATE, &finalFreq); } @@ -863,10 +899,11 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const if (DEBUG_CORRECTION_FREQ && (INPUTLENGTH_FOR_DEBUG <= 0 || INPUTLENGTH_FOR_DEBUG == inputLength)) { + DUMP_WORD(proximityInfo->getPrimaryInputWord(), inputLength); DUMP_WORD(correction->mWord, outputLength); - AKLOGI("FinalFreq: [P%d, S%d, T%d, E%d] %d, %d, %d, %d, %d, %d", proximityMatchedCount, - skippedCount, transposedCount, excessiveCount, outputLength, lastCharExceeded, - sameLength, quoteDiffCount, ed, finalFreq); + AKLOGI("FinalFreq: [P%d, S%d, T%d, E%d, A%d] %d, %d, %d, %d, %d, %d", proximityMatchedCount, + skippedCount, transposedCount, excessiveCount, additionalProximityCount, + outputLength, lastCharExceeded, sameLength, quoteDiffCount, ed, finalFreq); } return finalFreq; diff --git a/native/src/correction.h b/native/src/correction.h index a711c994d..398e7e7bf 100644 --- a/native/src/correction.h +++ b/native/src/correction.h @@ -221,6 +221,7 @@ class Correction { bool mMatching; bool mProximityMatching; + bool mAdditionalProximityMatching; bool mExceeding; bool mTransposing; bool mSkipping; diff --git a/native/src/correction_state.h b/native/src/correction_state.h index c04146e54..5b2cbd3a2 100644 --- a/native/src/correction_state.h +++ b/native/src/correction_state.h @@ -47,9 +47,9 @@ struct CorrectionState { bool mExceeding; bool mSkipping; bool mProximityMatching; + bool mAdditionalProximityMatching; bool mNeedsToTraverseAllNodes; - }; inline static void initCorrectionState(CorrectionState *state, const int rootPos, @@ -77,7 +77,7 @@ inline static void initCorrectionState(CorrectionState *state, const int rootPos state->mTransposing = false; state->mExceeding = false; state->mSkipping = false; - + state->mAdditionalProximityMatching = false; } } // namespace latinime diff --git a/native/src/defines.h b/native/src/defines.h index 02c1fe0a2..1e108cb17 100644 --- a/native/src/defines.h +++ b/native/src/defines.h @@ -195,9 +195,10 @@ static void prof_out(void) { #define WORDS_WITH_TRANSPOSED_CHARACTERS_DEMOTION_RATE 70 #define FULL_MATCHED_WORDS_PROMOTION_RATE 120 #define WORDS_WITH_PROXIMITY_CHARACTER_DEMOTION_RATE 90 -#define WORDS_WITH_ADDITIONAL_PROXIMITY_CHARACTER_DEMOTION_RATE 30 +#define WORDS_WITH_ADDITIONAL_PROXIMITY_CHARACTER_DEMOTION_RATE 70 #define WORDS_WITH_MATCH_SKIP_PROMOTION_RATE 105 -#define WORDS_WITH_JUST_ONE_CORRECTION_PROMOTION_RATE 160 +#define WORDS_WITH_JUST_ONE_CORRECTION_PROMOTION_RATE 148 +#define WORDS_WITH_JUST_ONE_CORRECTION_PROMOTION_MULTIPLIER 3 #define CORRECTION_COUNT_RATE_DEMOTION_RATE_BASE 45 #define INPUT_EXCEEDS_OUTPUT_DEMOTION_RATE 70 #define FIRST_CHAR_DIFFERENT_DEMOTION_RATE 96 @@ -247,7 +248,7 @@ static void prof_out(void) { #define NEUTRAL_AREA_RADIUS_RATIO 1.3f // DEBUG -#define INPUTLENGTH_FOR_DEBUG -1 +#define INPUTLENGTH_FOR_DEBUG 10 #define MIN_OUTPUT_INDEX_FOR_DEBUG -1 #endif // LATINIME_DEFINES_H