Split SuggestionsOutputUtils::outputSuggestions.

Bug: 13333066
Change-Id: Ie5e513dacdc5502e7263ddf709824bbd6bc6b74a
This commit is contained in:
Keisuke Kuroyanagi 2014-03-28 12:35:45 +09:00
parent a0ea92f76c
commit ff1b3947c6
2 changed files with 86 additions and 74 deletions

View file

@ -17,11 +17,11 @@
#include "suggest/core/result/suggestions_output_utils.h"
#include <algorithm>
#include <vector>
#include "suggest/core/dicnode/dic_node.h"
#include "suggest/core/dicnode/dic_node_utils.h"
#include "suggest/core/dictionary/binary_dictionary_shortcut_iterator.h"
#include "suggest/core/dictionary/dictionary.h"
#include "suggest/core/dictionary/error_type_utils.h"
#include "suggest/core/policy/scoring.h"
#include "suggest/core/result/suggestion_results.h"
@ -31,105 +31,113 @@ namespace latinime {
const int SuggestionsOutputUtils::MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT = 16;
// TODO: Split this method.
/* static */ void SuggestionsOutputUtils::outputSuggestions(
const Scoring *const scoringPolicy, DicTraverseSession *traverseSession,
SuggestionResults *const outSuggestionResults) {
#if DEBUG_EVALUATE_MOST_PROBABLE_STRING
const int terminalSize = 0;
#else
const int terminalSize = std::min(MAX_RESULTS,
static_cast<int>(traverseSession->getDicTraverseCache()->terminalSize()));
const int terminalSize = traverseSession->getDicTraverseCache()->terminalSize();
#endif
DicNode terminals[MAX_RESULTS]; // Avoiding non-POD variable length array
std::vector<DicNode> terminals(terminalSize);
for (int index = terminalSize - 1; index >= 0; --index) {
traverseSession->getDicTraverseCache()->popTerminal(&terminals[index]);
}
const float languageWeight = scoringPolicy->getAdjustedLanguageWeight(
traverseSession, terminals, terminalSize);
traverseSession, terminals.data(), terminalSize);
// Force autocorrection for obvious long multi-word suggestions when the top suggestion is
// a long multiple words suggestion.
// TODO: Implement a smarter auto-commit method for handling multi-word suggestions.
const bool forceCommitMultiWords = scoringPolicy->autoCorrectsToMultiWordSuggestionIfTop()
&& (traverseSession->getInputSize() >= MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT
&& terminals[0].hasMultipleWords());
&& !terminals.empty() && terminals.front().hasMultipleWords());
// TODO: have partial commit work even with multiple pointers.
const bool outputSecondWordFirstLetterInputIndex =
traverseSession->isOnlyOnePointerUsed(0 /* pointerId */);
const bool boostExactMatches = traverseSession->getDictionaryStructurePolicy()->
getHeaderStructurePolicy()->shouldBoostExactMatches();
int codePoints[MAX_WORD_LENGTH];
// Output suggestion results here
for (int terminalIndex = 0; terminalIndex < terminalSize; ++terminalIndex) {
DicNode *terminalDicNode = &terminals[terminalIndex];
if (DEBUG_GEO_FULL) {
terminalDicNode->dump("OUT:");
}
const float doubleLetterCost =
scoringPolicy->getDoubleLetterDemotionDistanceCost(terminalDicNode);
const float compoundDistance = terminalDicNode->getCompoundDistance(languageWeight)
+ doubleLetterCost;
const bool isPossiblyOffensiveWord =
traverseSession->getDictionaryStructurePolicy()->getProbability(
terminalDicNode->getProbability(), NOT_A_PROBABILITY) <= 0;
const bool isExactMatch =
ErrorTypeUtils::isExactMatch(terminalDicNode->getContainedErrorTypes());
const bool isFirstCharUppercase = terminalDicNode->isFirstCharUppercase();
// Heuristic: We exclude probability=0 first-char-uppercase words from exact match.
// (e.g. "AMD" and "and")
const bool isSafeExactMatch = isExactMatch
&& !(isPossiblyOffensiveWord && isFirstCharUppercase);
const int outputTypeFlags =
(isPossiblyOffensiveWord ? Dictionary::KIND_FLAG_POSSIBLY_OFFENSIVE : 0)
| ((isSafeExactMatch && boostExactMatches) ? Dictionary::KIND_FLAG_EXACT_MATCH : 0);
// Entries that are blacklisted or do not represent a word should not be output.
const bool isValidWord = !terminalDicNode->isBlacklistedOrNotAWord();
// Increase output score of top typing suggestion to ensure autocorrection.
// TODO: Better integration with java side autocorrection logic.
const int finalScore = scoringPolicy->calculateFinalScore(
compoundDistance, traverseSession->getInputSize(),
terminalDicNode->getContainedErrorTypes(),
(forceCommitMultiWords && terminalDicNode->hasMultipleWords())
|| (isValidWord && scoringPolicy->doesAutoCorrectValidWord()),
boostExactMatches);
// Don't output invalid words. However, we still need to submit their shortcuts if any.
if (isValidWord) {
terminalDicNode->outputResult(codePoints);
const int indexToPartialCommit = outputSecondWordFirstLetterInputIndex ?
terminalDicNode->getSecondWordFirstInputIndex(
traverseSession->getProximityInfoState(0)) :
NOT_AN_INDEX;
outSuggestionResults->addSuggestion(codePoints,
terminalDicNode->getTotalNodeCodePointCount(),
finalScore, Dictionary::KIND_CORRECTION | outputTypeFlags,
indexToPartialCommit, computeFirstWordConfidence(terminalDicNode));
}
if (!terminalDicNode->hasMultipleWords()) {
BinaryDictionaryShortcutIterator shortcutIt(
traverseSession->getDictionaryStructurePolicy()->getShortcutsStructurePolicy(),
traverseSession->getDictionaryStructurePolicy()
->getShortcutPositionOfPtNode(terminalDicNode->getPtNodePos()));
// Shortcut is not supported for multiple words suggestions.
// TODO: Check shortcuts during traversal for multiple words suggestions.
const bool sameAsTyped = scoringPolicy->sameAsTyped(traverseSession, terminalDicNode);
const int shortcutBaseScore = scoringPolicy->doesAutoCorrectValidWord() ?
scoringPolicy->calculateFinalScore(compoundDistance,
traverseSession->getInputSize(),
terminalDicNode->getContainedErrorTypes(),
true /* forceCommit */, boostExactMatches) : finalScore;
outputShortcuts(&shortcutIt, shortcutBaseScore, sameAsTyped, outSuggestionResults);
}
for (auto &terminalDicNode : terminals) {
outputSuggestionsOfDicNode(scoringPolicy, traverseSession, &terminalDicNode,
languageWeight, boostExactMatches, forceCommitMultiWords,
outputSecondWordFirstLetterInputIndex, outSuggestionResults);
}
scoringPolicy->getMostProbableString(traverseSession, languageWeight, outSuggestionResults);
}
/* static */ void SuggestionsOutputUtils::outputSuggestionsOfDicNode(
const Scoring *const scoringPolicy, DicTraverseSession *traverseSession,
const DicNode *const terminalDicNode, const float languageWeight,
const bool boostExactMatches, const bool forceCommitMultiWords,
const bool outputSecondWordFirstLetterInputIndex,
SuggestionResults *const outSuggestionResults) {
if (DEBUG_GEO_FULL) {
terminalDicNode->dump("OUT:");
}
const float doubleLetterCost =
scoringPolicy->getDoubleLetterDemotionDistanceCost(terminalDicNode);
const float compoundDistance = terminalDicNode->getCompoundDistance(languageWeight)
+ doubleLetterCost;
const bool isPossiblyOffensiveWord =
traverseSession->getDictionaryStructurePolicy()->getProbability(
terminalDicNode->getProbability(), NOT_A_PROBABILITY) <= 0;
const bool isExactMatch =
ErrorTypeUtils::isExactMatch(terminalDicNode->getContainedErrorTypes());
const bool isFirstCharUppercase = terminalDicNode->isFirstCharUppercase();
// Heuristic: We exclude probability=0 first-char-uppercase words from exact match.
// (e.g. "AMD" and "and")
const bool isSafeExactMatch = isExactMatch
&& !(isPossiblyOffensiveWord && isFirstCharUppercase);
const int outputTypeFlags =
(isPossiblyOffensiveWord ? Dictionary::KIND_FLAG_POSSIBLY_OFFENSIVE : 0)
| ((isSafeExactMatch && boostExactMatches) ? Dictionary::KIND_FLAG_EXACT_MATCH : 0);
// Entries that are blacklisted or do not represent a word should not be output.
const bool isValidWord = !terminalDicNode->isBlacklistedOrNotAWord();
// Increase output score of top typing suggestion to ensure autocorrection.
// TODO: Better integration with java side autocorrection logic.
const int finalScore = scoringPolicy->calculateFinalScore(
compoundDistance, traverseSession->getInputSize(),
terminalDicNode->getContainedErrorTypes(),
(forceCommitMultiWords && terminalDicNode->hasMultipleWords())
|| (isValidWord && scoringPolicy->doesAutoCorrectValidWord()),
boostExactMatches);
// Don't output invalid words. However, we still need to submit their shortcuts if any.
if (isValidWord) {
int codePoints[MAX_WORD_LENGTH];
terminalDicNode->outputResult(codePoints);
const int indexToPartialCommit = outputSecondWordFirstLetterInputIndex ?
terminalDicNode->getSecondWordFirstInputIndex(
traverseSession->getProximityInfoState(0)) :
NOT_AN_INDEX;
outSuggestionResults->addSuggestion(codePoints,
terminalDicNode->getTotalNodeCodePointCount(),
finalScore, Dictionary::KIND_CORRECTION | outputTypeFlags,
indexToPartialCommit, computeFirstWordConfidence(terminalDicNode));
}
// Output shortcuts.
// Shortcut is not supported for multiple words suggestions.
// TODO: Check shortcuts during traversal for multiple words suggestions.
if (!terminalDicNode->hasMultipleWords()) {
BinaryDictionaryShortcutIterator shortcutIt(
traverseSession->getDictionaryStructurePolicy()->getShortcutsStructurePolicy(),
traverseSession->getDictionaryStructurePolicy()
->getShortcutPositionOfPtNode(terminalDicNode->getPtNodePos()));
const bool sameAsTyped = scoringPolicy->sameAsTyped(traverseSession, terminalDicNode);
const int shortcutBaseScore = scoringPolicy->doesAutoCorrectValidWord() ?
scoringPolicy->calculateFinalScore(compoundDistance,
traverseSession->getInputSize(),
terminalDicNode->getContainedErrorTypes(),
true /* forceCommit */, boostExactMatches) : finalScore;
outputShortcuts(&shortcutIt, shortcutBaseScore, sameAsTyped, outSuggestionResults);
}
}
/* static */ int SuggestionsOutputUtils::computeFirstWordConfidence(
const DicNode *const terminalDicNode) {
// Get the number of spaces in the first suggestion

View file

@ -41,11 +41,15 @@ class SuggestionsOutputUtils {
// Inputs longer than this will autocorrect if the suggestion is multi-word
static const int MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT;
static int computeFirstWordConfidence(const DicNode *const terminalDicNode);
static void outputSuggestionsOfDicNode(const Scoring *const scoringPolicy,
DicTraverseSession *traverseSession, const DicNode *const terminalDicNode,
const float languageWeight, const bool boostExactMatches,
const bool forceCommitMultiWords, const bool outputSecondWordFirstLetterInputIndex,
SuggestionResults *const outSuggestionResults);
static void outputShortcuts(BinaryDictionaryShortcutIterator *const shortcutIt,
const int finalScore, const bool sameAsTyped,
SuggestionResults *const outSuggestionResults);
static int computeFirstWordConfidence(const DicNode *const terminalDicNode);
};
} // namespace latinime
#endif // LATINIME_SUGGESTIONS_OUTPUT_UTILS