am 34047d89
: Quit ignoring language cost of exact matches.
* commit '34047d8905fbd2cbe4c99618aab105556ebee9ed': Quit ignoring language cost of exact matches.
This commit is contained in:
commit
6df2b0ba3b
4 changed files with 10 additions and 32 deletions
|
@ -504,6 +504,12 @@ class DicNode {
|
||||||
if (!right->isUsed()) {
|
if (!right->isUsed()) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
// Promote exact matches to prevent them from being pruned.
|
||||||
|
const bool leftExactMatch = isExactMatch();
|
||||||
|
const bool rightExactMatch = right->isExactMatch();
|
||||||
|
if (leftExactMatch != rightExactMatch) {
|
||||||
|
return leftExactMatch;
|
||||||
|
}
|
||||||
const float diff =
|
const float diff =
|
||||||
right->getNormalizedCompoundDistance() - getNormalizedCompoundDistance();
|
right->getNormalizedCompoundDistance() - getNormalizedCompoundDistance();
|
||||||
static const float MIN_DIFF = 0.000001f;
|
static const float MIN_DIFF = 0.000001f;
|
||||||
|
|
|
@ -36,7 +36,6 @@ namespace latinime {
|
||||||
const int Suggest::MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT = 16;
|
const int Suggest::MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT = 16;
|
||||||
const int Suggest::MIN_CONTINUOUS_SUGGESTION_INPUT_SIZE = 2;
|
const int Suggest::MIN_CONTINUOUS_SUGGESTION_INPUT_SIZE = 2;
|
||||||
const float Suggest::AUTOCORRECT_CLASSIFICATION_THRESHOLD = 0.33f;
|
const float Suggest::AUTOCORRECT_CLASSIFICATION_THRESHOLD = 0.33f;
|
||||||
const int Suggest::FINAL_SCORE_PENALTY_FOR_NOT_BEST_EXACT_MATCHED_WORD = 1;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns a set of suggestions for the given input touch points. The commitPoint argument indicates
|
* Returns a set of suggestions for the given input touch points. The commitPoint argument indicates
|
||||||
|
@ -149,8 +148,6 @@ int Suggest::outputSuggestions(DicTraverseSession *traverseSession, int *frequen
|
||||||
&doubleLetterTerminalIndex, &doubleLetterLevel);
|
&doubleLetterTerminalIndex, &doubleLetterLevel);
|
||||||
|
|
||||||
int maxScore = S_INT_MIN;
|
int maxScore = S_INT_MIN;
|
||||||
int bestExactMatchedNodeTerminalIndex = -1;
|
|
||||||
int bestExactMatchedNodeOutputWordIndex = -1;
|
|
||||||
// Force autocorrection for obvious long multi-word suggestions when the top suggestion is
|
// Force autocorrection for obvious long multi-word suggestions when the top suggestion is
|
||||||
// a long multiple words suggestion.
|
// a long multiple words suggestion.
|
||||||
// TODO: Implement a smarter auto-commit method for handling multi-word suggestions.
|
// TODO: Implement a smarter auto-commit method for handling multi-word suggestions.
|
||||||
|
@ -191,7 +188,8 @@ int Suggest::outputSuggestions(DicTraverseSession *traverseSession, int *frequen
|
||||||
// TODO: Better integration with java side autocorrection logic.
|
// TODO: Better integration with java side autocorrection logic.
|
||||||
const int finalScore = SCORING->calculateFinalScore(
|
const int finalScore = SCORING->calculateFinalScore(
|
||||||
compoundDistance, traverseSession->getInputSize(),
|
compoundDistance, traverseSession->getInputSize(),
|
||||||
(forceCommitMultiWords && terminalDicNode->hasMultipleWords())
|
terminalDicNode->isExactMatch()
|
||||||
|
|| (forceCommitMultiWords && terminalDicNode->hasMultipleWords())
|
||||||
|| (isValidWord && SCORING->doesAutoCorrectValidWord()));
|
|| (isValidWord && SCORING->doesAutoCorrectValidWord()));
|
||||||
maxScore = max(maxScore, finalScore);
|
maxScore = max(maxScore, finalScore);
|
||||||
|
|
||||||
|
@ -205,25 +203,6 @@ int Suggest::outputSuggestions(DicTraverseSession *traverseSession, int *frequen
|
||||||
if (isValidWord) {
|
if (isValidWord) {
|
||||||
outputTypes[outputWordIndex] = Dictionary::KIND_CORRECTION | outputTypeFlags;
|
outputTypes[outputWordIndex] = Dictionary::KIND_CORRECTION | outputTypeFlags;
|
||||||
frequencies[outputWordIndex] = finalScore;
|
frequencies[outputWordIndex] = finalScore;
|
||||||
if (isSafeExactMatch) {
|
|
||||||
// Demote exact matches that are not the highest probable node among all exact
|
|
||||||
// matches.
|
|
||||||
const bool isBestTerminal = bestExactMatchedNodeTerminalIndex < 0
|
|
||||||
|| terminals[bestExactMatchedNodeTerminalIndex].getProbability()
|
|
||||||
< terminalDicNode->getProbability();
|
|
||||||
const int outputWordIndexToBeDemoted = isBestTerminal ?
|
|
||||||
bestExactMatchedNodeOutputWordIndex : outputWordIndex;
|
|
||||||
if (outputWordIndexToBeDemoted >= 0) {
|
|
||||||
frequencies[outputWordIndexToBeDemoted] -=
|
|
||||||
FINAL_SCORE_PENALTY_FOR_NOT_BEST_EXACT_MATCHED_WORD;
|
|
||||||
}
|
|
||||||
if (isBestTerminal) {
|
|
||||||
// Updates the best exact matched node index.
|
|
||||||
bestExactMatchedNodeTerminalIndex = terminalIndex;
|
|
||||||
// Updates the best exact matched output word index.
|
|
||||||
bestExactMatchedNodeOutputWordIndex = outputWordIndex;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// Populate the outputChars array with the suggested word.
|
// Populate the outputChars array with the suggested word.
|
||||||
const int startIndex = outputWordIndex * MAX_WORD_LENGTH;
|
const int startIndex = outputWordIndex * MAX_WORD_LENGTH;
|
||||||
terminalDicNode->outputResult(&outputCodePoints[startIndex]);
|
terminalDicNode->outputResult(&outputCodePoints[startIndex]);
|
||||||
|
|
|
@ -82,8 +82,6 @@ class Suggest : public SuggestInterface {
|
||||||
|
|
||||||
// Threshold for autocorrection classifier
|
// Threshold for autocorrection classifier
|
||||||
static const float AUTOCORRECT_CLASSIFICATION_THRESHOLD;
|
static const float AUTOCORRECT_CLASSIFICATION_THRESHOLD;
|
||||||
// Final score penalty to exact match words that are not the most probable exact match.
|
|
||||||
static const int FINAL_SCORE_PENALTY_FOR_NOT_BEST_EXACT_MATCHED_WORD;
|
|
||||||
|
|
||||||
const Traversal *const TRAVERSAL;
|
const Traversal *const TRAVERSAL;
|
||||||
const Scoring *const SCORING;
|
const Scoring *const SCORING;
|
||||||
|
|
|
@ -169,12 +169,7 @@ class TypingWeighting : public Weighting {
|
||||||
|
|
||||||
float getTerminalLanguageCost(const DicTraverseSession *const traverseSession,
|
float getTerminalLanguageCost(const DicTraverseSession *const traverseSession,
|
||||||
const DicNode *const dicNode, const float dicNodeLanguageImprobability) const {
|
const DicNode *const dicNode, const float dicNodeLanguageImprobability) const {
|
||||||
// We promote exact matches here to prevent them from being pruned. The final score of
|
return dicNodeLanguageImprobability * ScoringParams::DISTANCE_WEIGHT_LANGUAGE;
|
||||||
// exact match nodes might be demoted later in Suggest::outputSuggestions if there are
|
|
||||||
// multiple exact matches.
|
|
||||||
const float languageImprobability = (dicNode->isExactMatch()) ?
|
|
||||||
0.0f : dicNodeLanguageImprobability;
|
|
||||||
return languageImprobability * ScoringParams::DISTANCE_WEIGHT_LANGUAGE;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
AK_FORCE_INLINE bool needsToNormalizeCompoundDistance() const {
|
AK_FORCE_INLINE bool needsToNormalizeCompoundDistance() const {
|
||||||
|
|
Loading…
Reference in a new issue