am ed1ab3b3: Improve accent error handling in exact match logic.

* commit 'ed1ab3b3632d7fd366a33944abf1e2d7f9ad6539': Improve accent error handling in exact match logic.
2014-08-15 09:00:45 +00:00 · 2014-08-15 09:00:45 +00:00 · f44978f4bd
parent 2444dd2974 ed1ab3b363
commit f44978f4bd
4 changed files with 34 additions and 23 deletions
--- a/native/jni/src/suggest/core/dictionary/error_type_utils.cpp
+++ b/native/jni/src/suggest/core/dictionary/error_type_utils.cpp
@ -19,17 +19,18 @@
 namespace latinime {
 const ErrorTypeUtils::ErrorType ErrorTypeUtils::NOT_AN_ERROR = 0x0;
-const ErrorTypeUtils::ErrorType ErrorTypeUtils::MATCH_WITH_CASE_ERROR = 0x1;
+const ErrorTypeUtils::ErrorType ErrorTypeUtils::MATCH_WITH_WRONG_CASE = 0x1;
-const ErrorTypeUtils::ErrorType ErrorTypeUtils::MATCH_WITH_ACCENT_ERROR = 0x2;
+const ErrorTypeUtils::ErrorType ErrorTypeUtils::MATCH_WITH_MISSING_ACCENT = 0x2;
-const ErrorTypeUtils::ErrorType ErrorTypeUtils::MATCH_WITH_DIGRAPH = 0x4;
+const ErrorTypeUtils::ErrorType ErrorTypeUtils::MATCH_WITH_WRONG_ACCENT = 0x4;
-const ErrorTypeUtils::ErrorType ErrorTypeUtils::INTENTIONAL_OMISSION = 0x8;
+const ErrorTypeUtils::ErrorType ErrorTypeUtils::MATCH_WITH_DIGRAPH = 0x8;
-const ErrorTypeUtils::ErrorType ErrorTypeUtils::EDIT_CORRECTION = 0x10;
+const ErrorTypeUtils::ErrorType ErrorTypeUtils::INTENTIONAL_OMISSION = 0x10;
-const ErrorTypeUtils::ErrorType ErrorTypeUtils::PROXIMITY_CORRECTION = 0x20;
+const ErrorTypeUtils::ErrorType ErrorTypeUtils::EDIT_CORRECTION = 0x20;
-const ErrorTypeUtils::ErrorType ErrorTypeUtils::COMPLETION = 0x40;
+const ErrorTypeUtils::ErrorType ErrorTypeUtils::PROXIMITY_CORRECTION = 0x40;
-const ErrorTypeUtils::ErrorType ErrorTypeUtils::NEW_WORD = 0x80;
+const ErrorTypeUtils::ErrorType ErrorTypeUtils::COMPLETION = 0x80;
 const ErrorTypeUtils::ErrorType ErrorTypeUtils::NEW_WORD = 0x100;
 const ErrorTypeUtils::ErrorType ErrorTypeUtils::ERRORS_TREATED_AS_AN_EXACT_MATCH =
-        NOT_AN_ERROR | MATCH_WITH_CASE_ERROR | MATCH_WITH_ACCENT_ERROR | MATCH_WITH_DIGRAPH;
+        NOT_AN_ERROR | MATCH_WITH_WRONG_CASE | MATCH_WITH_MISSING_ACCENT | MATCH_WITH_DIGRAPH;
 const ErrorTypeUtils::ErrorType
        ErrorTypeUtils::ERRORS_TREATED_AS_AN_EXACT_MATCH_WITH_INTENTIONAL_OMISSION =
--- a/native/jni/src/suggest/core/dictionary/error_type_utils.h
+++ b/native/jni/src/suggest/core/dictionary/error_type_utils.h
@ -30,8 +30,9 @@ class ErrorTypeUtils {
    typedef uint32_t ErrorType;
    static const ErrorType NOT_AN_ERROR;
-    static const ErrorType MATCH_WITH_CASE_ERROR;
+    static const ErrorType MATCH_WITH_WRONG_CASE;
-    static const ErrorType MATCH_WITH_ACCENT_ERROR;
+    static const ErrorType MATCH_WITH_MISSING_ACCENT;
    static const ErrorType MATCH_WITH_WRONG_ACCENT;
    static const ErrorType MATCH_WITH_DIGRAPH;
    // Treat error as an intentional omission when the CorrectionType is omission and the node can
    // be intentional omission.
--- a/native/jni/src/suggest/policyimpl/typing/typing_scoring.h
+++ b/native/jni/src/suggest/policyimpl/typing/typing_scoring.h
@ -51,10 +51,10 @@ class TypingScoring : public Scoring {
        }
        if (boostExactMatches && ErrorTypeUtils::isExactMatch(containedErrorTypes)) {
            score += ScoringParams::EXACT_MATCH_PROMOTION;
-            if ((ErrorTypeUtils::MATCH_WITH_CASE_ERROR & containedErrorTypes) != 0) {
+            if ((ErrorTypeUtils::MATCH_WITH_WRONG_CASE & containedErrorTypes) != 0) {
                score -= ScoringParams::CASE_ERROR_PENALTY_FOR_EXACT_MATCH;
            }
-            if ((ErrorTypeUtils::MATCH_WITH_ACCENT_ERROR & containedErrorTypes) != 0) {
+            if ((ErrorTypeUtils::MATCH_WITH_MISSING_ACCENT & containedErrorTypes) != 0) {
                score -= ScoringParams::ACCENT_ERROR_PENALTY_FOR_EXACT_MATCH;
            }
            if ((ErrorTypeUtils::MATCH_WITH_DIGRAPH & containedErrorTypes) != 0) {
--- a/native/jni/src/suggest/policyimpl/typing/typing_weighting.cpp
+++ b/native/jni/src/suggest/policyimpl/typing/typing_weighting.cpp
@ -36,25 +36,34 @@ ErrorTypeUtils::ErrorType TypingWeighting::getErrorType(const CorrectionType cor
                // Compare the node code point with original primary code point on the keyboard.
                const ProximityInfoState *const pInfoState =
                        traverseSession->getProximityInfoState(0);
-                const int primaryOriginalCodePoint = pInfoState->getPrimaryOriginalCodePointAt(
+                const int primaryCodePoint = pInfoState->getPrimaryCodePointAt(
                        dicNode->getInputIndex(0));
                const int nodeCodePoint = dicNode->getNodeCodePoint();
-                if (primaryOriginalCodePoint == nodeCodePoint) {
+                // TODO: Check whether the input code point is on the keyboard.
                if (primaryCodePoint == nodeCodePoint) {
                    // Node code point is same as original code point on the keyboard.
                    return ErrorTypeUtils::NOT_AN_ERROR;
-                } else if (CharUtils::toLowerCase(primaryOriginalCodePoint) ==
+                } else if (CharUtils::toLowerCase(primaryCodePoint) ==
                        CharUtils::toLowerCase(nodeCodePoint)) {
                    // Only cases of the code points are different.
-                    return ErrorTypeUtils::MATCH_WITH_CASE_ERROR;
+                    return ErrorTypeUtils::MATCH_WITH_WRONG_CASE;
-                } else if (CharUtils::toBaseCodePoint(primaryOriginalCodePoint) ==
+                } else if (primaryCodePoint == CharUtils::toBaseCodePoint(nodeCodePoint)) {
                        CharUtils::toBaseCodePoint(nodeCodePoint)) {
                    // Node code point is a variant of original code point.
-                    return ErrorTypeUtils::MATCH_WITH_ACCENT_ERROR;
+                    return ErrorTypeUtils::MATCH_WITH_MISSING_ACCENT;
-                } else {
+                } else if (CharUtils::toBaseCodePoint(primaryCodePoint)
                        == CharUtils::toBaseCodePoint(nodeCodePoint)) {
                    // Base code points are the same but the code point is intentionally input.
                    return ErrorTypeUtils::MATCH_WITH_WRONG_ACCENT;
                } else if (CharUtils::toLowerCase(primaryCodePoint)
                        == CharUtils::toBaseLowerCase(nodeCodePoint)) {
                    // Node code point is a variant of original code point and the cases are also
                    // different.
-                    return ErrorTypeUtils::MATCH_WITH_ACCENT_ERROR
+                    return ErrorTypeUtils::MATCH_WITH_MISSING_ACCENT
-                            | ErrorTypeUtils::MATCH_WITH_CASE_ERROR;
+                            | ErrorTypeUtils::MATCH_WITH_WRONG_CASE;
                } else {
                    // Base code points are the same and the cases are different.
                    return ErrorTypeUtils::MATCH_WITH_WRONG_ACCENT
                            | ErrorTypeUtils::MATCH_WITH_WRONG_CASE;
                }
            }
            break;