Improve accent error handling in exact match logic.

Bug: 16963362
Change-Id: I50a48806bc3147388d165faab6b572415a851f29
main
Keisuke Kuroyanagi 2014-08-15 15:06:39 +09:00
parent 5409957440
commit ed1ab3b363
4 changed files with 34 additions and 23 deletions

View File

@ -19,17 +19,18 @@
namespace latinime { namespace latinime {
const ErrorTypeUtils::ErrorType ErrorTypeUtils::NOT_AN_ERROR = 0x0; const ErrorTypeUtils::ErrorType ErrorTypeUtils::NOT_AN_ERROR = 0x0;
const ErrorTypeUtils::ErrorType ErrorTypeUtils::MATCH_WITH_CASE_ERROR = 0x1; const ErrorTypeUtils::ErrorType ErrorTypeUtils::MATCH_WITH_WRONG_CASE = 0x1;
const ErrorTypeUtils::ErrorType ErrorTypeUtils::MATCH_WITH_ACCENT_ERROR = 0x2; const ErrorTypeUtils::ErrorType ErrorTypeUtils::MATCH_WITH_MISSING_ACCENT = 0x2;
const ErrorTypeUtils::ErrorType ErrorTypeUtils::MATCH_WITH_DIGRAPH = 0x4; const ErrorTypeUtils::ErrorType ErrorTypeUtils::MATCH_WITH_WRONG_ACCENT = 0x4;
const ErrorTypeUtils::ErrorType ErrorTypeUtils::INTENTIONAL_OMISSION = 0x8; const ErrorTypeUtils::ErrorType ErrorTypeUtils::MATCH_WITH_DIGRAPH = 0x8;
const ErrorTypeUtils::ErrorType ErrorTypeUtils::EDIT_CORRECTION = 0x10; const ErrorTypeUtils::ErrorType ErrorTypeUtils::INTENTIONAL_OMISSION = 0x10;
const ErrorTypeUtils::ErrorType ErrorTypeUtils::PROXIMITY_CORRECTION = 0x20; const ErrorTypeUtils::ErrorType ErrorTypeUtils::EDIT_CORRECTION = 0x20;
const ErrorTypeUtils::ErrorType ErrorTypeUtils::COMPLETION = 0x40; const ErrorTypeUtils::ErrorType ErrorTypeUtils::PROXIMITY_CORRECTION = 0x40;
const ErrorTypeUtils::ErrorType ErrorTypeUtils::NEW_WORD = 0x80; const ErrorTypeUtils::ErrorType ErrorTypeUtils::COMPLETION = 0x80;
const ErrorTypeUtils::ErrorType ErrorTypeUtils::NEW_WORD = 0x100;
const ErrorTypeUtils::ErrorType ErrorTypeUtils::ERRORS_TREATED_AS_AN_EXACT_MATCH = const ErrorTypeUtils::ErrorType ErrorTypeUtils::ERRORS_TREATED_AS_AN_EXACT_MATCH =
NOT_AN_ERROR | MATCH_WITH_CASE_ERROR | MATCH_WITH_ACCENT_ERROR | MATCH_WITH_DIGRAPH; NOT_AN_ERROR | MATCH_WITH_WRONG_CASE | MATCH_WITH_MISSING_ACCENT | MATCH_WITH_DIGRAPH;
const ErrorTypeUtils::ErrorType const ErrorTypeUtils::ErrorType
ErrorTypeUtils::ERRORS_TREATED_AS_AN_EXACT_MATCH_WITH_INTENTIONAL_OMISSION = ErrorTypeUtils::ERRORS_TREATED_AS_AN_EXACT_MATCH_WITH_INTENTIONAL_OMISSION =

View File

@ -30,8 +30,9 @@ class ErrorTypeUtils {
typedef uint32_t ErrorType; typedef uint32_t ErrorType;
static const ErrorType NOT_AN_ERROR; static const ErrorType NOT_AN_ERROR;
static const ErrorType MATCH_WITH_CASE_ERROR; static const ErrorType MATCH_WITH_WRONG_CASE;
static const ErrorType MATCH_WITH_ACCENT_ERROR; static const ErrorType MATCH_WITH_MISSING_ACCENT;
static const ErrorType MATCH_WITH_WRONG_ACCENT;
static const ErrorType MATCH_WITH_DIGRAPH; static const ErrorType MATCH_WITH_DIGRAPH;
// Treat error as an intentional omission when the CorrectionType is omission and the node can // Treat error as an intentional omission when the CorrectionType is omission and the node can
// be intentional omission. // be intentional omission.

View File

@ -51,10 +51,10 @@ class TypingScoring : public Scoring {
} }
if (boostExactMatches && ErrorTypeUtils::isExactMatch(containedErrorTypes)) { if (boostExactMatches && ErrorTypeUtils::isExactMatch(containedErrorTypes)) {
score += ScoringParams::EXACT_MATCH_PROMOTION; score += ScoringParams::EXACT_MATCH_PROMOTION;
if ((ErrorTypeUtils::MATCH_WITH_CASE_ERROR & containedErrorTypes) != 0) { if ((ErrorTypeUtils::MATCH_WITH_WRONG_CASE & containedErrorTypes) != 0) {
score -= ScoringParams::CASE_ERROR_PENALTY_FOR_EXACT_MATCH; score -= ScoringParams::CASE_ERROR_PENALTY_FOR_EXACT_MATCH;
} }
if ((ErrorTypeUtils::MATCH_WITH_ACCENT_ERROR & containedErrorTypes) != 0) { if ((ErrorTypeUtils::MATCH_WITH_MISSING_ACCENT & containedErrorTypes) != 0) {
score -= ScoringParams::ACCENT_ERROR_PENALTY_FOR_EXACT_MATCH; score -= ScoringParams::ACCENT_ERROR_PENALTY_FOR_EXACT_MATCH;
} }
if ((ErrorTypeUtils::MATCH_WITH_DIGRAPH & containedErrorTypes) != 0) { if ((ErrorTypeUtils::MATCH_WITH_DIGRAPH & containedErrorTypes) != 0) {

View File

@ -36,25 +36,34 @@ ErrorTypeUtils::ErrorType TypingWeighting::getErrorType(const CorrectionType cor
// Compare the node code point with original primary code point on the keyboard. // Compare the node code point with original primary code point on the keyboard.
const ProximityInfoState *const pInfoState = const ProximityInfoState *const pInfoState =
traverseSession->getProximityInfoState(0); traverseSession->getProximityInfoState(0);
const int primaryOriginalCodePoint = pInfoState->getPrimaryOriginalCodePointAt( const int primaryCodePoint = pInfoState->getPrimaryCodePointAt(
dicNode->getInputIndex(0)); dicNode->getInputIndex(0));
const int nodeCodePoint = dicNode->getNodeCodePoint(); const int nodeCodePoint = dicNode->getNodeCodePoint();
if (primaryOriginalCodePoint == nodeCodePoint) { // TODO: Check whether the input code point is on the keyboard.
if (primaryCodePoint == nodeCodePoint) {
// Node code point is same as original code point on the keyboard. // Node code point is same as original code point on the keyboard.
return ErrorTypeUtils::NOT_AN_ERROR; return ErrorTypeUtils::NOT_AN_ERROR;
} else if (CharUtils::toLowerCase(primaryOriginalCodePoint) == } else if (CharUtils::toLowerCase(primaryCodePoint) ==
CharUtils::toLowerCase(nodeCodePoint)) { CharUtils::toLowerCase(nodeCodePoint)) {
// Only cases of the code points are different. // Only cases of the code points are different.
return ErrorTypeUtils::MATCH_WITH_CASE_ERROR; return ErrorTypeUtils::MATCH_WITH_WRONG_CASE;
} else if (CharUtils::toBaseCodePoint(primaryOriginalCodePoint) == } else if (primaryCodePoint == CharUtils::toBaseCodePoint(nodeCodePoint)) {
CharUtils::toBaseCodePoint(nodeCodePoint)) {
// Node code point is a variant of original code point. // Node code point is a variant of original code point.
return ErrorTypeUtils::MATCH_WITH_ACCENT_ERROR; return ErrorTypeUtils::MATCH_WITH_MISSING_ACCENT;
} else { } else if (CharUtils::toBaseCodePoint(primaryCodePoint)
== CharUtils::toBaseCodePoint(nodeCodePoint)) {
// Base code points are the same but the code point is intentionally input.
return ErrorTypeUtils::MATCH_WITH_WRONG_ACCENT;
} else if (CharUtils::toLowerCase(primaryCodePoint)
== CharUtils::toBaseLowerCase(nodeCodePoint)) {
// Node code point is a variant of original code point and the cases are also // Node code point is a variant of original code point and the cases are also
// different. // different.
return ErrorTypeUtils::MATCH_WITH_ACCENT_ERROR return ErrorTypeUtils::MATCH_WITH_MISSING_ACCENT
| ErrorTypeUtils::MATCH_WITH_CASE_ERROR; | ErrorTypeUtils::MATCH_WITH_WRONG_CASE;
} else {
// Base code points are the same and the cases are different.
return ErrorTypeUtils::MATCH_WITH_WRONG_ACCENT
| ErrorTypeUtils::MATCH_WITH_WRONG_CASE;
} }
} }
break; break;