am 34047d89: Quit ignoring language cost of exact matches.

* commit '34047d8905fbd2cbe4c99618aab105556ebee9ed':
  Quit ignoring language cost of exact matches.
This commit is contained in:
Keisuke Kuroynagi 2013-07-11 01:43:22 -07:00 committed by Android Git Automerger
commit 6df2b0ba3b
4 changed files with 10 additions and 32 deletions

View file

@ -504,6 +504,12 @@ class DicNode {
if (!right->isUsed()) { if (!right->isUsed()) {
return false; return false;
} }
// Promote exact matches to prevent them from being pruned.
const bool leftExactMatch = isExactMatch();
const bool rightExactMatch = right->isExactMatch();
if (leftExactMatch != rightExactMatch) {
return leftExactMatch;
}
const float diff = const float diff =
right->getNormalizedCompoundDistance() - getNormalizedCompoundDistance(); right->getNormalizedCompoundDistance() - getNormalizedCompoundDistance();
static const float MIN_DIFF = 0.000001f; static const float MIN_DIFF = 0.000001f;

View file

@ -36,7 +36,6 @@ namespace latinime {
const int Suggest::MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT = 16; const int Suggest::MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT = 16;
const int Suggest::MIN_CONTINUOUS_SUGGESTION_INPUT_SIZE = 2; const int Suggest::MIN_CONTINUOUS_SUGGESTION_INPUT_SIZE = 2;
const float Suggest::AUTOCORRECT_CLASSIFICATION_THRESHOLD = 0.33f; const float Suggest::AUTOCORRECT_CLASSIFICATION_THRESHOLD = 0.33f;
const int Suggest::FINAL_SCORE_PENALTY_FOR_NOT_BEST_EXACT_MATCHED_WORD = 1;
/** /**
* Returns a set of suggestions for the given input touch points. The commitPoint argument indicates * Returns a set of suggestions for the given input touch points. The commitPoint argument indicates
@ -149,8 +148,6 @@ int Suggest::outputSuggestions(DicTraverseSession *traverseSession, int *frequen
&doubleLetterTerminalIndex, &doubleLetterLevel); &doubleLetterTerminalIndex, &doubleLetterLevel);
int maxScore = S_INT_MIN; int maxScore = S_INT_MIN;
int bestExactMatchedNodeTerminalIndex = -1;
int bestExactMatchedNodeOutputWordIndex = -1;
// Force autocorrection for obvious long multi-word suggestions when the top suggestion is // Force autocorrection for obvious long multi-word suggestions when the top suggestion is
// a long multiple words suggestion. // a long multiple words suggestion.
// TODO: Implement a smarter auto-commit method for handling multi-word suggestions. // TODO: Implement a smarter auto-commit method for handling multi-word suggestions.
@ -191,8 +188,9 @@ int Suggest::outputSuggestions(DicTraverseSession *traverseSession, int *frequen
// TODO: Better integration with java side autocorrection logic. // TODO: Better integration with java side autocorrection logic.
const int finalScore = SCORING->calculateFinalScore( const int finalScore = SCORING->calculateFinalScore(
compoundDistance, traverseSession->getInputSize(), compoundDistance, traverseSession->getInputSize(),
(forceCommitMultiWords && terminalDicNode->hasMultipleWords()) terminalDicNode->isExactMatch()
|| (isValidWord && SCORING->doesAutoCorrectValidWord())); || (forceCommitMultiWords && terminalDicNode->hasMultipleWords())
|| (isValidWord && SCORING->doesAutoCorrectValidWord()));
maxScore = max(maxScore, finalScore); maxScore = max(maxScore, finalScore);
// TODO: Implement a smarter auto-commit method for handling multi-word suggestions. // TODO: Implement a smarter auto-commit method for handling multi-word suggestions.
@ -205,25 +203,6 @@ int Suggest::outputSuggestions(DicTraverseSession *traverseSession, int *frequen
if (isValidWord) { if (isValidWord) {
outputTypes[outputWordIndex] = Dictionary::KIND_CORRECTION | outputTypeFlags; outputTypes[outputWordIndex] = Dictionary::KIND_CORRECTION | outputTypeFlags;
frequencies[outputWordIndex] = finalScore; frequencies[outputWordIndex] = finalScore;
if (isSafeExactMatch) {
// Demote exact matches that are not the highest probable node among all exact
// matches.
const bool isBestTerminal = bestExactMatchedNodeTerminalIndex < 0
|| terminals[bestExactMatchedNodeTerminalIndex].getProbability()
< terminalDicNode->getProbability();
const int outputWordIndexToBeDemoted = isBestTerminal ?
bestExactMatchedNodeOutputWordIndex : outputWordIndex;
if (outputWordIndexToBeDemoted >= 0) {
frequencies[outputWordIndexToBeDemoted] -=
FINAL_SCORE_PENALTY_FOR_NOT_BEST_EXACT_MATCHED_WORD;
}
if (isBestTerminal) {
// Updates the best exact matched node index.
bestExactMatchedNodeTerminalIndex = terminalIndex;
// Updates the best exact matched output word index.
bestExactMatchedNodeOutputWordIndex = outputWordIndex;
}
}
// Populate the outputChars array with the suggested word. // Populate the outputChars array with the suggested word.
const int startIndex = outputWordIndex * MAX_WORD_LENGTH; const int startIndex = outputWordIndex * MAX_WORD_LENGTH;
terminalDicNode->outputResult(&outputCodePoints[startIndex]); terminalDicNode->outputResult(&outputCodePoints[startIndex]);

View file

@ -82,8 +82,6 @@ class Suggest : public SuggestInterface {
// Threshold for autocorrection classifier // Threshold for autocorrection classifier
static const float AUTOCORRECT_CLASSIFICATION_THRESHOLD; static const float AUTOCORRECT_CLASSIFICATION_THRESHOLD;
// Final score penalty to exact match words that are not the most probable exact match.
static const int FINAL_SCORE_PENALTY_FOR_NOT_BEST_EXACT_MATCHED_WORD;
const Traversal *const TRAVERSAL; const Traversal *const TRAVERSAL;
const Scoring *const SCORING; const Scoring *const SCORING;

View file

@ -169,12 +169,7 @@ class TypingWeighting : public Weighting {
float getTerminalLanguageCost(const DicTraverseSession *const traverseSession, float getTerminalLanguageCost(const DicTraverseSession *const traverseSession,
const DicNode *const dicNode, const float dicNodeLanguageImprobability) const { const DicNode *const dicNode, const float dicNodeLanguageImprobability) const {
// We promote exact matches here to prevent them from being pruned. The final score of return dicNodeLanguageImprobability * ScoringParams::DISTANCE_WEIGHT_LANGUAGE;
// exact match nodes might be demoted later in Suggest::outputSuggestions if there are
// multiple exact matches.
const float languageImprobability = (dicNode->isExactMatch()) ?
0.0f : dicNodeLanguageImprobability;
return languageImprobability * ScoringParams::DISTANCE_WEIGHT_LANGUAGE;
} }
AK_FORCE_INLINE bool needsToNormalizeCompoundDistance() const { AK_FORCE_INLINE bool needsToNormalizeCompoundDistance() const {