From 34047d8905fbd2cbe4c99618aab105556ebee9ed Mon Sep 17 00:00:00 2001 From: Keisuke Kuroynagi Date: Thu, 11 Jul 2013 16:56:16 +0900 Subject: [PATCH] Quit ignoring language cost of exact matches. Handle exact matches in DicNode::compare() and calculateFinalScore(). Bug: 8844931 Change-Id: I17c78d4e352b0a4308727229b02a8004b38468bf --- .../jni/src/suggest/core/dicnode/dic_node.h | 6 +++++ native/jni/src/suggest/core/suggest.cpp | 27 +++---------------- native/jni/src/suggest/core/suggest.h | 2 -- .../policyimpl/typing/typing_weighting.h | 7 +---- 4 files changed, 10 insertions(+), 32 deletions(-) diff --git a/native/jni/src/suggest/core/dicnode/dic_node.h b/native/jni/src/suggest/core/dicnode/dic_node.h index be40c9d83..973da67e4 100644 --- a/native/jni/src/suggest/core/dicnode/dic_node.h +++ b/native/jni/src/suggest/core/dicnode/dic_node.h @@ -504,6 +504,12 @@ class DicNode { if (!right->isUsed()) { return false; } + // Promote exact matches to prevent them from being pruned. + const bool leftExactMatch = isExactMatch(); + const bool rightExactMatch = right->isExactMatch(); + if (leftExactMatch != rightExactMatch) { + return leftExactMatch; + } const float diff = right->getNormalizedCompoundDistance() - getNormalizedCompoundDistance(); static const float MIN_DIFF = 0.000001f; diff --git a/native/jni/src/suggest/core/suggest.cpp b/native/jni/src/suggest/core/suggest.cpp index 6e9aff5ec..c6da6f003 100644 --- a/native/jni/src/suggest/core/suggest.cpp +++ b/native/jni/src/suggest/core/suggest.cpp @@ -36,7 +36,6 @@ namespace latinime { const int Suggest::MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT = 16; const int Suggest::MIN_CONTINUOUS_SUGGESTION_INPUT_SIZE = 2; const float Suggest::AUTOCORRECT_CLASSIFICATION_THRESHOLD = 0.33f; -const int Suggest::FINAL_SCORE_PENALTY_FOR_NOT_BEST_EXACT_MATCHED_WORD = 1; /** * Returns a set of suggestions for the given input touch points. The commitPoint argument indicates @@ -149,8 +148,6 @@ int Suggest::outputSuggestions(DicTraverseSession *traverseSession, int *frequen &doubleLetterTerminalIndex, &doubleLetterLevel); int maxScore = S_INT_MIN; - int bestExactMatchedNodeTerminalIndex = -1; - int bestExactMatchedNodeOutputWordIndex = -1; // Force autocorrection for obvious long multi-word suggestions when the top suggestion is // a long multiple words suggestion. // TODO: Implement a smarter auto-commit method for handling multi-word suggestions. @@ -191,8 +188,9 @@ int Suggest::outputSuggestions(DicTraverseSession *traverseSession, int *frequen // TODO: Better integration with java side autocorrection logic. const int finalScore = SCORING->calculateFinalScore( compoundDistance, traverseSession->getInputSize(), - (forceCommitMultiWords && terminalDicNode->hasMultipleWords()) - || (isValidWord && SCORING->doesAutoCorrectValidWord())); + terminalDicNode->isExactMatch() + || (forceCommitMultiWords && terminalDicNode->hasMultipleWords()) + || (isValidWord && SCORING->doesAutoCorrectValidWord())); maxScore = max(maxScore, finalScore); // TODO: Implement a smarter auto-commit method for handling multi-word suggestions. @@ -205,25 +203,6 @@ int Suggest::outputSuggestions(DicTraverseSession *traverseSession, int *frequen if (isValidWord) { outputTypes[outputWordIndex] = Dictionary::KIND_CORRECTION | outputTypeFlags; frequencies[outputWordIndex] = finalScore; - if (isSafeExactMatch) { - // Demote exact matches that are not the highest probable node among all exact - // matches. - const bool isBestTerminal = bestExactMatchedNodeTerminalIndex < 0 - || terminals[bestExactMatchedNodeTerminalIndex].getProbability() - < terminalDicNode->getProbability(); - const int outputWordIndexToBeDemoted = isBestTerminal ? - bestExactMatchedNodeOutputWordIndex : outputWordIndex; - if (outputWordIndexToBeDemoted >= 0) { - frequencies[outputWordIndexToBeDemoted] -= - FINAL_SCORE_PENALTY_FOR_NOT_BEST_EXACT_MATCHED_WORD; - } - if (isBestTerminal) { - // Updates the best exact matched node index. - bestExactMatchedNodeTerminalIndex = terminalIndex; - // Updates the best exact matched output word index. - bestExactMatchedNodeOutputWordIndex = outputWordIndex; - } - } // Populate the outputChars array with the suggested word. const int startIndex = outputWordIndex * MAX_WORD_LENGTH; terminalDicNode->outputResult(&outputCodePoints[startIndex]); diff --git a/native/jni/src/suggest/core/suggest.h b/native/jni/src/suggest/core/suggest.h index 752bde9ac..875cbe4e0 100644 --- a/native/jni/src/suggest/core/suggest.h +++ b/native/jni/src/suggest/core/suggest.h @@ -82,8 +82,6 @@ class Suggest : public SuggestInterface { // Threshold for autocorrection classifier static const float AUTOCORRECT_CLASSIFICATION_THRESHOLD; - // Final score penalty to exact match words that are not the most probable exact match. - static const int FINAL_SCORE_PENALTY_FOR_NOT_BEST_EXACT_MATCHED_WORD; const Traversal *const TRAVERSAL; const Scoring *const SCORING; diff --git a/native/jni/src/suggest/policyimpl/typing/typing_weighting.h b/native/jni/src/suggest/policyimpl/typing/typing_weighting.h index e098f353e..830aa80de 100644 --- a/native/jni/src/suggest/policyimpl/typing/typing_weighting.h +++ b/native/jni/src/suggest/policyimpl/typing/typing_weighting.h @@ -169,12 +169,7 @@ class TypingWeighting : public Weighting { float getTerminalLanguageCost(const DicTraverseSession *const traverseSession, const DicNode *const dicNode, const float dicNodeLanguageImprobability) const { - // We promote exact matches here to prevent them from being pruned. The final score of - // exact match nodes might be demoted later in Suggest::outputSuggestions if there are - // multiple exact matches. - const float languageImprobability = (dicNode->isExactMatch()) ? - 0.0f : dicNodeLanguageImprobability; - return languageImprobability * ScoringParams::DISTANCE_WEIGHT_LANGUAGE; + return dicNodeLanguageImprobability * ScoringParams::DISTANCE_WEIGHT_LANGUAGE; } AK_FORCE_INLINE bool needsToNormalizeCompoundDistance() const {