Split SuggestionsOutputUtils::outputSuggestions.
Bug: 13333066 Change-Id: Ie5e513dacdc5502e7263ddf709824bbd6bc6b74a
This commit is contained in:
parent
a0ea92f76c
commit
ff1b3947c6
2 changed files with 86 additions and 74 deletions
|
@ -17,11 +17,11 @@
|
||||||
#include "suggest/core/result/suggestions_output_utils.h"
|
#include "suggest/core/result/suggestions_output_utils.h"
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
#include "suggest/core/dicnode/dic_node.h"
|
#include "suggest/core/dicnode/dic_node.h"
|
||||||
#include "suggest/core/dicnode/dic_node_utils.h"
|
#include "suggest/core/dicnode/dic_node_utils.h"
|
||||||
#include "suggest/core/dictionary/binary_dictionary_shortcut_iterator.h"
|
#include "suggest/core/dictionary/binary_dictionary_shortcut_iterator.h"
|
||||||
#include "suggest/core/dictionary/dictionary.h"
|
|
||||||
#include "suggest/core/dictionary/error_type_utils.h"
|
#include "suggest/core/dictionary/error_type_utils.h"
|
||||||
#include "suggest/core/policy/scoring.h"
|
#include "suggest/core/policy/scoring.h"
|
||||||
#include "suggest/core/result/suggestion_results.h"
|
#include "suggest/core/result/suggestion_results.h"
|
||||||
|
@ -31,105 +31,113 @@ namespace latinime {
|
||||||
|
|
||||||
const int SuggestionsOutputUtils::MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT = 16;
|
const int SuggestionsOutputUtils::MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT = 16;
|
||||||
|
|
||||||
// TODO: Split this method.
|
|
||||||
/* static */ void SuggestionsOutputUtils::outputSuggestions(
|
/* static */ void SuggestionsOutputUtils::outputSuggestions(
|
||||||
const Scoring *const scoringPolicy, DicTraverseSession *traverseSession,
|
const Scoring *const scoringPolicy, DicTraverseSession *traverseSession,
|
||||||
SuggestionResults *const outSuggestionResults) {
|
SuggestionResults *const outSuggestionResults) {
|
||||||
#if DEBUG_EVALUATE_MOST_PROBABLE_STRING
|
#if DEBUG_EVALUATE_MOST_PROBABLE_STRING
|
||||||
const int terminalSize = 0;
|
const int terminalSize = 0;
|
||||||
#else
|
#else
|
||||||
const int terminalSize = std::min(MAX_RESULTS,
|
const int terminalSize = traverseSession->getDicTraverseCache()->terminalSize();
|
||||||
static_cast<int>(traverseSession->getDicTraverseCache()->terminalSize()));
|
|
||||||
#endif
|
#endif
|
||||||
DicNode terminals[MAX_RESULTS]; // Avoiding non-POD variable length array
|
std::vector<DicNode> terminals(terminalSize);
|
||||||
|
|
||||||
for (int index = terminalSize - 1; index >= 0; --index) {
|
for (int index = terminalSize - 1; index >= 0; --index) {
|
||||||
traverseSession->getDicTraverseCache()->popTerminal(&terminals[index]);
|
traverseSession->getDicTraverseCache()->popTerminal(&terminals[index]);
|
||||||
}
|
}
|
||||||
|
|
||||||
const float languageWeight = scoringPolicy->getAdjustedLanguageWeight(
|
const float languageWeight = scoringPolicy->getAdjustedLanguageWeight(
|
||||||
traverseSession, terminals, terminalSize);
|
traverseSession, terminals.data(), terminalSize);
|
||||||
// Force autocorrection for obvious long multi-word suggestions when the top suggestion is
|
// Force autocorrection for obvious long multi-word suggestions when the top suggestion is
|
||||||
// a long multiple words suggestion.
|
// a long multiple words suggestion.
|
||||||
// TODO: Implement a smarter auto-commit method for handling multi-word suggestions.
|
// TODO: Implement a smarter auto-commit method for handling multi-word suggestions.
|
||||||
const bool forceCommitMultiWords = scoringPolicy->autoCorrectsToMultiWordSuggestionIfTop()
|
const bool forceCommitMultiWords = scoringPolicy->autoCorrectsToMultiWordSuggestionIfTop()
|
||||||
&& (traverseSession->getInputSize() >= MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT
|
&& (traverseSession->getInputSize() >= MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT
|
||||||
&& terminals[0].hasMultipleWords());
|
&& !terminals.empty() && terminals.front().hasMultipleWords());
|
||||||
// TODO: have partial commit work even with multiple pointers.
|
// TODO: have partial commit work even with multiple pointers.
|
||||||
const bool outputSecondWordFirstLetterInputIndex =
|
const bool outputSecondWordFirstLetterInputIndex =
|
||||||
traverseSession->isOnlyOnePointerUsed(0 /* pointerId */);
|
traverseSession->isOnlyOnePointerUsed(0 /* pointerId */);
|
||||||
const bool boostExactMatches = traverseSession->getDictionaryStructurePolicy()->
|
const bool boostExactMatches = traverseSession->getDictionaryStructurePolicy()->
|
||||||
getHeaderStructurePolicy()->shouldBoostExactMatches();
|
getHeaderStructurePolicy()->shouldBoostExactMatches();
|
||||||
|
|
||||||
int codePoints[MAX_WORD_LENGTH];
|
|
||||||
// Output suggestion results here
|
// Output suggestion results here
|
||||||
for (int terminalIndex = 0; terminalIndex < terminalSize; ++terminalIndex) {
|
for (auto &terminalDicNode : terminals) {
|
||||||
DicNode *terminalDicNode = &terminals[terminalIndex];
|
outputSuggestionsOfDicNode(scoringPolicy, traverseSession, &terminalDicNode,
|
||||||
if (DEBUG_GEO_FULL) {
|
languageWeight, boostExactMatches, forceCommitMultiWords,
|
||||||
terminalDicNode->dump("OUT:");
|
outputSecondWordFirstLetterInputIndex, outSuggestionResults);
|
||||||
}
|
|
||||||
const float doubleLetterCost =
|
|
||||||
scoringPolicy->getDoubleLetterDemotionDistanceCost(terminalDicNode);
|
|
||||||
const float compoundDistance = terminalDicNode->getCompoundDistance(languageWeight)
|
|
||||||
+ doubleLetterCost;
|
|
||||||
const bool isPossiblyOffensiveWord =
|
|
||||||
traverseSession->getDictionaryStructurePolicy()->getProbability(
|
|
||||||
terminalDicNode->getProbability(), NOT_A_PROBABILITY) <= 0;
|
|
||||||
const bool isExactMatch =
|
|
||||||
ErrorTypeUtils::isExactMatch(terminalDicNode->getContainedErrorTypes());
|
|
||||||
const bool isFirstCharUppercase = terminalDicNode->isFirstCharUppercase();
|
|
||||||
// Heuristic: We exclude probability=0 first-char-uppercase words from exact match.
|
|
||||||
// (e.g. "AMD" and "and")
|
|
||||||
const bool isSafeExactMatch = isExactMatch
|
|
||||||
&& !(isPossiblyOffensiveWord && isFirstCharUppercase);
|
|
||||||
const int outputTypeFlags =
|
|
||||||
(isPossiblyOffensiveWord ? Dictionary::KIND_FLAG_POSSIBLY_OFFENSIVE : 0)
|
|
||||||
| ((isSafeExactMatch && boostExactMatches) ? Dictionary::KIND_FLAG_EXACT_MATCH : 0);
|
|
||||||
|
|
||||||
// Entries that are blacklisted or do not represent a word should not be output.
|
|
||||||
const bool isValidWord = !terminalDicNode->isBlacklistedOrNotAWord();
|
|
||||||
|
|
||||||
// Increase output score of top typing suggestion to ensure autocorrection.
|
|
||||||
// TODO: Better integration with java side autocorrection logic.
|
|
||||||
const int finalScore = scoringPolicy->calculateFinalScore(
|
|
||||||
compoundDistance, traverseSession->getInputSize(),
|
|
||||||
terminalDicNode->getContainedErrorTypes(),
|
|
||||||
(forceCommitMultiWords && terminalDicNode->hasMultipleWords())
|
|
||||||
|| (isValidWord && scoringPolicy->doesAutoCorrectValidWord()),
|
|
||||||
boostExactMatches);
|
|
||||||
|
|
||||||
// Don't output invalid words. However, we still need to submit their shortcuts if any.
|
|
||||||
if (isValidWord) {
|
|
||||||
terminalDicNode->outputResult(codePoints);
|
|
||||||
const int indexToPartialCommit = outputSecondWordFirstLetterInputIndex ?
|
|
||||||
terminalDicNode->getSecondWordFirstInputIndex(
|
|
||||||
traverseSession->getProximityInfoState(0)) :
|
|
||||||
NOT_AN_INDEX;
|
|
||||||
outSuggestionResults->addSuggestion(codePoints,
|
|
||||||
terminalDicNode->getTotalNodeCodePointCount(),
|
|
||||||
finalScore, Dictionary::KIND_CORRECTION | outputTypeFlags,
|
|
||||||
indexToPartialCommit, computeFirstWordConfidence(terminalDicNode));
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!terminalDicNode->hasMultipleWords()) {
|
|
||||||
BinaryDictionaryShortcutIterator shortcutIt(
|
|
||||||
traverseSession->getDictionaryStructurePolicy()->getShortcutsStructurePolicy(),
|
|
||||||
traverseSession->getDictionaryStructurePolicy()
|
|
||||||
->getShortcutPositionOfPtNode(terminalDicNode->getPtNodePos()));
|
|
||||||
// Shortcut is not supported for multiple words suggestions.
|
|
||||||
// TODO: Check shortcuts during traversal for multiple words suggestions.
|
|
||||||
const bool sameAsTyped = scoringPolicy->sameAsTyped(traverseSession, terminalDicNode);
|
|
||||||
const int shortcutBaseScore = scoringPolicy->doesAutoCorrectValidWord() ?
|
|
||||||
scoringPolicy->calculateFinalScore(compoundDistance,
|
|
||||||
traverseSession->getInputSize(),
|
|
||||||
terminalDicNode->getContainedErrorTypes(),
|
|
||||||
true /* forceCommit */, boostExactMatches) : finalScore;
|
|
||||||
outputShortcuts(&shortcutIt, shortcutBaseScore, sameAsTyped, outSuggestionResults);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
scoringPolicy->getMostProbableString(traverseSession, languageWeight, outSuggestionResults);
|
scoringPolicy->getMostProbableString(traverseSession, languageWeight, outSuggestionResults);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* static */ void SuggestionsOutputUtils::outputSuggestionsOfDicNode(
|
||||||
|
const Scoring *const scoringPolicy, DicTraverseSession *traverseSession,
|
||||||
|
const DicNode *const terminalDicNode, const float languageWeight,
|
||||||
|
const bool boostExactMatches, const bool forceCommitMultiWords,
|
||||||
|
const bool outputSecondWordFirstLetterInputIndex,
|
||||||
|
SuggestionResults *const outSuggestionResults) {
|
||||||
|
if (DEBUG_GEO_FULL) {
|
||||||
|
terminalDicNode->dump("OUT:");
|
||||||
|
}
|
||||||
|
const float doubleLetterCost =
|
||||||
|
scoringPolicy->getDoubleLetterDemotionDistanceCost(terminalDicNode);
|
||||||
|
const float compoundDistance = terminalDicNode->getCompoundDistance(languageWeight)
|
||||||
|
+ doubleLetterCost;
|
||||||
|
const bool isPossiblyOffensiveWord =
|
||||||
|
traverseSession->getDictionaryStructurePolicy()->getProbability(
|
||||||
|
terminalDicNode->getProbability(), NOT_A_PROBABILITY) <= 0;
|
||||||
|
const bool isExactMatch =
|
||||||
|
ErrorTypeUtils::isExactMatch(terminalDicNode->getContainedErrorTypes());
|
||||||
|
const bool isFirstCharUppercase = terminalDicNode->isFirstCharUppercase();
|
||||||
|
// Heuristic: We exclude probability=0 first-char-uppercase words from exact match.
|
||||||
|
// (e.g. "AMD" and "and")
|
||||||
|
const bool isSafeExactMatch = isExactMatch
|
||||||
|
&& !(isPossiblyOffensiveWord && isFirstCharUppercase);
|
||||||
|
const int outputTypeFlags =
|
||||||
|
(isPossiblyOffensiveWord ? Dictionary::KIND_FLAG_POSSIBLY_OFFENSIVE : 0)
|
||||||
|
| ((isSafeExactMatch && boostExactMatches) ? Dictionary::KIND_FLAG_EXACT_MATCH : 0);
|
||||||
|
|
||||||
|
// Entries that are blacklisted or do not represent a word should not be output.
|
||||||
|
const bool isValidWord = !terminalDicNode->isBlacklistedOrNotAWord();
|
||||||
|
|
||||||
|
// Increase output score of top typing suggestion to ensure autocorrection.
|
||||||
|
// TODO: Better integration with java side autocorrection logic.
|
||||||
|
const int finalScore = scoringPolicy->calculateFinalScore(
|
||||||
|
compoundDistance, traverseSession->getInputSize(),
|
||||||
|
terminalDicNode->getContainedErrorTypes(),
|
||||||
|
(forceCommitMultiWords && terminalDicNode->hasMultipleWords())
|
||||||
|
|| (isValidWord && scoringPolicy->doesAutoCorrectValidWord()),
|
||||||
|
boostExactMatches);
|
||||||
|
|
||||||
|
// Don't output invalid words. However, we still need to submit their shortcuts if any.
|
||||||
|
if (isValidWord) {
|
||||||
|
int codePoints[MAX_WORD_LENGTH];
|
||||||
|
terminalDicNode->outputResult(codePoints);
|
||||||
|
const int indexToPartialCommit = outputSecondWordFirstLetterInputIndex ?
|
||||||
|
terminalDicNode->getSecondWordFirstInputIndex(
|
||||||
|
traverseSession->getProximityInfoState(0)) :
|
||||||
|
NOT_AN_INDEX;
|
||||||
|
outSuggestionResults->addSuggestion(codePoints,
|
||||||
|
terminalDicNode->getTotalNodeCodePointCount(),
|
||||||
|
finalScore, Dictionary::KIND_CORRECTION | outputTypeFlags,
|
||||||
|
indexToPartialCommit, computeFirstWordConfidence(terminalDicNode));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Output shortcuts.
|
||||||
|
// Shortcut is not supported for multiple words suggestions.
|
||||||
|
// TODO: Check shortcuts during traversal for multiple words suggestions.
|
||||||
|
if (!terminalDicNode->hasMultipleWords()) {
|
||||||
|
BinaryDictionaryShortcutIterator shortcutIt(
|
||||||
|
traverseSession->getDictionaryStructurePolicy()->getShortcutsStructurePolicy(),
|
||||||
|
traverseSession->getDictionaryStructurePolicy()
|
||||||
|
->getShortcutPositionOfPtNode(terminalDicNode->getPtNodePos()));
|
||||||
|
const bool sameAsTyped = scoringPolicy->sameAsTyped(traverseSession, terminalDicNode);
|
||||||
|
const int shortcutBaseScore = scoringPolicy->doesAutoCorrectValidWord() ?
|
||||||
|
scoringPolicy->calculateFinalScore(compoundDistance,
|
||||||
|
traverseSession->getInputSize(),
|
||||||
|
terminalDicNode->getContainedErrorTypes(),
|
||||||
|
true /* forceCommit */, boostExactMatches) : finalScore;
|
||||||
|
outputShortcuts(&shortcutIt, shortcutBaseScore, sameAsTyped, outSuggestionResults);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/* static */ int SuggestionsOutputUtils::computeFirstWordConfidence(
|
/* static */ int SuggestionsOutputUtils::computeFirstWordConfidence(
|
||||||
const DicNode *const terminalDicNode) {
|
const DicNode *const terminalDicNode) {
|
||||||
// Get the number of spaces in the first suggestion
|
// Get the number of spaces in the first suggestion
|
||||||
|
|
|
@ -41,11 +41,15 @@ class SuggestionsOutputUtils {
|
||||||
// Inputs longer than this will autocorrect if the suggestion is multi-word
|
// Inputs longer than this will autocorrect if the suggestion is multi-word
|
||||||
static const int MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT;
|
static const int MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT;
|
||||||
|
|
||||||
static int computeFirstWordConfidence(const DicNode *const terminalDicNode);
|
static void outputSuggestionsOfDicNode(const Scoring *const scoringPolicy,
|
||||||
|
DicTraverseSession *traverseSession, const DicNode *const terminalDicNode,
|
||||||
|
const float languageWeight, const bool boostExactMatches,
|
||||||
|
const bool forceCommitMultiWords, const bool outputSecondWordFirstLetterInputIndex,
|
||||||
|
SuggestionResults *const outSuggestionResults);
|
||||||
static void outputShortcuts(BinaryDictionaryShortcutIterator *const shortcutIt,
|
static void outputShortcuts(BinaryDictionaryShortcutIterator *const shortcutIt,
|
||||||
const int finalScore, const bool sameAsTyped,
|
const int finalScore, const bool sameAsTyped,
|
||||||
SuggestionResults *const outSuggestionResults);
|
SuggestionResults *const outSuggestionResults);
|
||||||
|
static int computeFirstWordConfidence(const DicNode *const terminalDicNode);
|
||||||
};
|
};
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
#endif // LATINIME_SUGGESTIONS_OUTPUT_UTILS
|
#endif // LATINIME_SUGGESTIONS_OUTPUT_UTILS
|
||||||
|
|
Loading…
Reference in a new issue