Move methods for outputting from Suggest.

Bug: 8844931 Change-Id: I83dde6c37b75ed1e1ae4e0972e169d30ada8faf5
2013-12-18 16:47:23 +09:00 · 2013-12-18 16:47:23 +09:00 · d8f35f7b4c
commit d8f35f7b4c
parent 9ee9095528
6 changed files with 317 additions and 282 deletions
--- a/native/jni/NativeFileList.mk
+++ b/native/jni/NativeFileList.mk
@ -31,6 +31,7 @@ LATIN_IME_CORE_SRC_FILES := \
        digraph_utils.cpp \
        error_type_utils.cpp \
        multi_bigram_map.cpp \
        suggestions_output_utils.cpp \
        unigram_property.cpp) \
    $(addprefix suggest/core/layout/, \
        additional_proximity_chars.cpp \
--- a/native/jni/src/suggest/core/dictionary/shortcut_utils.h
+++ b/native/jni/src/suggest/core/dictionary/shortcut_utils.h
@ -1,64 +0,0 @@
 /*
 * Copyright (C) 2012 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #ifndef LATINIME_SHORTCUT_UTILS
 #define LATINIME_SHORTCUT_UTILS
 #include "defines.h"
 #include "suggest/core/dicnode/dic_node_utils.h"
 #include "suggest/core/dictionary/binary_dictionary_shortcut_iterator.h"
 namespace latinime {
 class ShortcutUtils {
 public:
    static int outputShortcuts(BinaryDictionaryShortcutIterator *const shortcutIt,
            int outputWordIndex, const int finalScore, int *const outputCodePoints,
            int *const frequencies, int *const outputTypes, const bool sameAsTyped) {
        int shortcutTarget[MAX_WORD_LENGTH];
        while (shortcutIt->hasNextShortcutTarget() && outputWordIndex < MAX_RESULTS) {
            bool isWhilelist;
            int shortcutTargetStringLength;
            shortcutIt->nextShortcutTarget(MAX_WORD_LENGTH, shortcutTarget,
                    &shortcutTargetStringLength, &isWhilelist);
            int shortcutScore;
            int kind;
            if (isWhilelist && sameAsTyped) {
                shortcutScore = S_INT_MAX;
                kind = Dictionary::KIND_WHITELIST;
            } else {
                // shortcut entry's score == its base entry's score - 1
                shortcutScore = finalScore;
                // Protection against int underflow
                shortcutScore = max(S_INT_MIN + 1, shortcutScore) - 1;
                kind = Dictionary::KIND_SHORTCUT;
            }
            outputTypes[outputWordIndex] = kind;
            frequencies[outputWordIndex] = shortcutScore;
            frequencies[outputWordIndex] = max(S_INT_MIN + 1, shortcutScore) - 1;
            const int startIndex2 = outputWordIndex * MAX_WORD_LENGTH;
            DicNodeUtils::appendTwoWords(0, 0, shortcutTarget, shortcutTargetStringLength,
                    &outputCodePoints[startIndex2]);
            ++outputWordIndex;
        }
        return outputWordIndex;
    }
 private:
    DISALLOW_IMPLICIT_CONSTRUCTORS(ShortcutUtils);
 };
 } // namespace latinime
 #endif // LATINIME_SHORTCUT_UTILS
--- a/native/jni/src/suggest/core/dictionary/suggestions_output_utils.cpp
+++ b/native/jni/src/suggest/core/dictionary/suggestions_output_utils.cpp
@ -0,0 +1,261 @@
 /*
 * Copyright (C) 2013 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #include "suggest/core/dictionary/suggestions_output_utils.h"
 #include "suggest/core/dicnode/dic_node.h"
 #include "suggest/core/dicnode/dic_node_utils.h"
 #include "suggest/core/dictionary/dictionary.h"
 #include "suggest/core/dictionary/binary_dictionary_shortcut_iterator.h"
 #include "suggest/core/policy/scoring.h"
 #include "suggest/core/session/dic_traverse_session.h"
 namespace latinime {
 const int SuggestionsOutputUtils::MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT = 16;
 // TODO: Split this method.
 /* static */ int SuggestionsOutputUtils::outputSuggestions(
        const Scoring *const scoringPolicy, DicTraverseSession *traverseSession,
        int *frequencies, int *outputCodePoints, int *outputIndicesToPartialCommit,
        int *outputTypes, int *outputAutoCommitFirstWordConfidence) {
 #if DEBUG_EVALUATE_MOST_PROBABLE_STRING
    const int terminalSize = 0;
 #else
    const int terminalSize = min(MAX_RESULTS,
            static_cast<int>(traverseSession->getDicTraverseCache()->terminalSize()));
 #endif
    DicNode terminals[MAX_RESULTS]; // Avoiding non-POD variable length array
    for (int index = terminalSize - 1; index >= 0; --index) {
        traverseSession->getDicTraverseCache()->popTerminal(&terminals[index]);
    }
    const float languageWeight = scoringPolicy->getAdjustedLanguageWeight(
            traverseSession, terminals, terminalSize);
    int outputWordIndex = 0;
    // Insert most probable word at index == 0 as long as there is one terminal at least
    const bool hasMostProbableString =
            scoringPolicy->getMostProbableString(traverseSession, terminalSize, languageWeight,
                    &outputCodePoints[0], &outputTypes[0], &frequencies[0]);
    if (hasMostProbableString) {
        outputIndicesToPartialCommit[outputWordIndex] = NOT_AN_INDEX;
        ++outputWordIndex;
    }
    // Initial value of the loop index for terminal nodes (words)
    int doubleLetterTerminalIndex = -1;
    DoubleLetterLevel doubleLetterLevel = NOT_A_DOUBLE_LETTER;
    scoringPolicy->searchWordWithDoubleLetter(terminals, terminalSize,
            &doubleLetterTerminalIndex, &doubleLetterLevel);
    int maxScore = S_INT_MIN;
    // Force autocorrection for obvious long multi-word suggestions when the top suggestion is
    // a long multiple words suggestion.
    // TODO: Implement a smarter auto-commit method for handling multi-word suggestions.
    // traverseSession->isPartiallyCommited() always returns false because we never auto partial
    // commit for now.
    const bool forceCommitMultiWords = (terminalSize > 0) ?
            scoringPolicy->autoCorrectsToMultiWordSuggestionIfTop()
                    && (traverseSession->isPartiallyCommited()
                            || (traverseSession->getInputSize()
                                    >= MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT
                                            && terminals[0].hasMultipleWords())) : false;
    // TODO: have partial commit work even with multiple pointers.
    const bool outputSecondWordFirstLetterInputIndex =
            traverseSession->isOnlyOnePointerUsed(0 /* pointerId */);
    if (terminalSize > 0) {
        // If we have no suggestions, don't write this
        outputAutoCommitFirstWordConfidence[0] =
                computeFirstWordConfidence(&terminals[0]);
    }
    // Output suggestion results here
    for (int terminalIndex = 0; terminalIndex < terminalSize && outputWordIndex < MAX_RESULTS;
            ++terminalIndex) {
        DicNode *terminalDicNode = &terminals[terminalIndex];
        if (DEBUG_GEO_FULL) {
            terminalDicNode->dump("OUT:");
        }
        const float doubleLetterCost = scoringPolicy->getDoubleLetterDemotionDistanceCost(
                terminalIndex, doubleLetterTerminalIndex, doubleLetterLevel);
        const float compoundDistance = terminalDicNode->getCompoundDistance(languageWeight)
                + doubleLetterCost;
        const bool isPossiblyOffensiveWord =
                traverseSession->getDictionaryStructurePolicy()->getProbability(
                        terminalDicNode->getProbability(), NOT_A_PROBABILITY) <= 0;
        const bool isExactMatch = terminalDicNode->isExactMatch();
        const bool isFirstCharUppercase = terminalDicNode->isFirstCharUppercase();
        // Heuristic: We exclude freq=0 first-char-uppercase words from exact match.
        // (e.g. "AMD" and "and")
        const bool isSafeExactMatch = isExactMatch
                && !(isPossiblyOffensiveWord && isFirstCharUppercase);
        const int outputTypeFlags =
                (isPossiblyOffensiveWord ? Dictionary::KIND_FLAG_POSSIBLY_OFFENSIVE : 0)
                | (isSafeExactMatch ? Dictionary::KIND_FLAG_EXACT_MATCH : 0);
        // Entries that are blacklisted or do not represent a word should not be output.
        const bool isValidWord = !terminalDicNode->isBlacklistedOrNotAWord();
        // Increase output score of top typing suggestion to ensure autocorrection.
        // TODO: Better integration with java side autocorrection logic.
        const int finalScore = scoringPolicy->calculateFinalScore(
                compoundDistance, traverseSession->getInputSize(),
                terminalDicNode->isExactMatch()
                        || (forceCommitMultiWords && terminalDicNode->hasMultipleWords())
                                || (isValidWord && scoringPolicy->doesAutoCorrectValidWord()));
        if (maxScore < finalScore && isValidWord) {
            maxScore = finalScore;
        }
        // Don't output invalid words. However, we still need to submit their shortcuts if any.
        if (isValidWord) {
            outputTypes[outputWordIndex] = Dictionary::KIND_CORRECTION | outputTypeFlags;
            frequencies[outputWordIndex] = finalScore;
            if (outputSecondWordFirstLetterInputIndex) {
                outputIndicesToPartialCommit[outputWordIndex] =
                        terminalDicNode->getSecondWordFirstInputIndex(
                                traverseSession->getProximityInfoState(0));
            } else {
                outputIndicesToPartialCommit[outputWordIndex] = NOT_AN_INDEX;
            }
            // Populate the outputChars array with the suggested word.
            const int startIndex = outputWordIndex * MAX_WORD_LENGTH;
            terminalDicNode->outputResult(&outputCodePoints[startIndex]);
            ++outputWordIndex;
        }
        if (!terminalDicNode->hasMultipleWords()) {
            BinaryDictionaryShortcutIterator shortcutIt(
                    traverseSession->getDictionaryStructurePolicy()->getShortcutsStructurePolicy(),
                    traverseSession->getDictionaryStructurePolicy()
                            ->getShortcutPositionOfPtNode(terminalDicNode->getPtNodePos()));
            // Shortcut is not supported for multiple words suggestions.
            // TODO: Check shortcuts during traversal for multiple words suggestions.
            const bool sameAsTyped = scoringPolicy->sameAsTyped(traverseSession, terminalDicNode);
            const int shortcutBaseScore = scoringPolicy->doesAutoCorrectValidWord() ?
                     scoringPolicy->calculateFinalScore(compoundDistance,
                             traverseSession->getInputSize(), true /* forceCommit */) : finalScore;
            const int updatedOutputWordIndex = outputShortcuts(&shortcutIt,
                    outputWordIndex, shortcutBaseScore, outputCodePoints, frequencies, outputTypes,
                    sameAsTyped);
            const int secondWordFirstInputIndex = terminalDicNode->getSecondWordFirstInputIndex(
                    traverseSession->getProximityInfoState(0));
            for (int i = outputWordIndex; i < updatedOutputWordIndex; ++i) {
                if (outputSecondWordFirstLetterInputIndex) {
                    outputIndicesToPartialCommit[i] = secondWordFirstInputIndex;
                } else {
                    outputIndicesToPartialCommit[i] = NOT_AN_INDEX;
                }
            }
            outputWordIndex = updatedOutputWordIndex;
        }
        DicNode::managedDelete(terminalDicNode);
    }
    if (hasMostProbableString) {
        scoringPolicy->safetyNetForMostProbableString(terminalSize, maxScore,
                &outputCodePoints[0], &frequencies[0]);
    }
    return outputWordIndex;
 }
 /* static */ int SuggestionsOutputUtils::computeFirstWordConfidence(
        const DicNode *const terminalDicNode) {
    // Get the number of spaces in the first suggestion
    const int spaceCount = terminalDicNode->getTotalNodeSpaceCount();
    // Get the number of characters in the first suggestion
    const int length = terminalDicNode->getTotalNodeCodePointCount();
    // Get the distance for the first word of the suggestion
    const float distance = terminalDicNode->getNormalizedCompoundDistanceAfterFirstWord();
    // Arbitrarily, we give a score whose useful values range from 0 to 1,000,000.
    // 1,000,000 will be the cutoff to auto-commit. It's fine if the number is under 0 or
    // above 1,000,000 : under 0 just means it's very bad to commit, and above 1,000,000 means
    // we are very confident.
    // Expected space count is 1 ~ 5
    static const int MIN_EXPECTED_SPACE_COUNT = 1;
    static const int MAX_EXPECTED_SPACE_COUNT = 5;
    // Expected length is about 4 ~ 30
    static const int MIN_EXPECTED_LENGTH = 4;
    static const int MAX_EXPECTED_LENGTH = 30;
    // Expected distance is about 0.2 ~ 2.0, but consider 0.0 ~ 2.0
    static const float MIN_EXPECTED_DISTANCE = 0.0;
    static const float MAX_EXPECTED_DISTANCE = 2.0;
    // This is not strict: it's where most stuff will be falling, but it's still fine if it's
    // outside these values. We want to output a value that reflects all of these. Each factor
    // contributes a bit.
    // We need at least a space.
    if (spaceCount < 1) return NOT_A_FIRST_WORD_CONFIDENCE;
    // The smaller the edit distance, the higher the contribution. MIN_EXPECTED_DISTANCE means 0
    // contribution, while MAX_EXPECTED_DISTANCE means full contribution according to the
    // weight of the distance. Clamp to avoid overflows.
    const float clampedDistance = distance < MIN_EXPECTED_DISTANCE ? MIN_EXPECTED_DISTANCE
            : distance > MAX_EXPECTED_DISTANCE ? MAX_EXPECTED_DISTANCE : distance;
    const int distanceContribution = DISTANCE_WEIGHT_FOR_AUTO_COMMIT
            * (MAX_EXPECTED_DISTANCE - clampedDistance)
            / (MAX_EXPECTED_DISTANCE - MIN_EXPECTED_DISTANCE);
    // The larger the suggestion length, the larger the contribution. MIN_EXPECTED_LENGTH is no
    // contribution, MAX_EXPECTED_LENGTH is full contribution according to the weight of the
    // length. Length is guaranteed to be between 1 and 48, so we don't need to clamp.
    const int lengthContribution = LENGTH_WEIGHT_FOR_AUTO_COMMIT
            * (length - MIN_EXPECTED_LENGTH) / (MAX_EXPECTED_LENGTH - MIN_EXPECTED_LENGTH);
    // The more spaces, the larger the contribution. MIN_EXPECTED_SPACE_COUNT space is no
    // contribution, MAX_EXPECTED_SPACE_COUNT spaces is full contribution according to the
    // weight of the space count.
    const int spaceContribution = SPACE_COUNT_WEIGHT_FOR_AUTO_COMMIT
            * (spaceCount - MIN_EXPECTED_SPACE_COUNT)
            / (MAX_EXPECTED_SPACE_COUNT - MIN_EXPECTED_SPACE_COUNT);
    return distanceContribution + lengthContribution + spaceContribution;
 }
 /* static */ int SuggestionsOutputUtils::outputShortcuts(
        BinaryDictionaryShortcutIterator *const shortcutIt,
        int outputWordIndex, const int finalScore, int *const outputCodePoints,
        int *const frequencies, int *const outputTypes, const bool sameAsTyped) {
    int shortcutTarget[MAX_WORD_LENGTH];
    while (shortcutIt->hasNextShortcutTarget() && outputWordIndex < MAX_RESULTS) {
        bool isWhilelist;
        int shortcutTargetStringLength;
        shortcutIt->nextShortcutTarget(MAX_WORD_LENGTH, shortcutTarget,
                &shortcutTargetStringLength, &isWhilelist);
        int shortcutScore;
        int kind;
        if (isWhilelist && sameAsTyped) {
            shortcutScore = S_INT_MAX;
            kind = Dictionary::KIND_WHITELIST;
        } else {
            // shortcut entry's score == its base entry's score - 1
            shortcutScore = finalScore;
            // Protection against int underflow
            shortcutScore = max(S_INT_MIN + 1, shortcutScore) - 1;
            kind = Dictionary::KIND_SHORTCUT;
        }
        outputTypes[outputWordIndex] = kind;
        frequencies[outputWordIndex] = shortcutScore;
        frequencies[outputWordIndex] = max(S_INT_MIN + 1, shortcutScore) - 1;
        const int startIndex2 = outputWordIndex * MAX_WORD_LENGTH;
        DicNodeUtils::appendTwoWords(0, 0, shortcutTarget, shortcutTargetStringLength,
                &outputCodePoints[startIndex2]);
        ++outputWordIndex;
    }
    return outputWordIndex;
 }
 } // namespace latinime
--- a/native/jni/src/suggest/core/dictionary/suggestions_output_utils.h
+++ b/native/jni/src/suggest/core/dictionary/suggestions_output_utils.h
@ -0,0 +1,52 @@
 /*
 * Copyright (C) 2013 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #ifndef LATINIME_SUGGESTIONS_OUTPUT_UTILS
 #define LATINIME_SUGGESTIONS_OUTPUT_UTILS
 #include "defines.h"
 namespace latinime {
 class BinaryDictionaryShortcutIterator;
 class DicNode;
 class DicTraverseSession;
 class Scoring;
 class SuggestionsOutputUtils {
 public:
    /**
     * Outputs the final list of suggestions (i.e., terminal nodes).
     */
    static int outputSuggestions(const Scoring *const scoringPolicy,
            DicTraverseSession *traverseSession, int *frequencies, int *outputCodePoints,
            int *outputIndicesToPartialCommit, int *outputTypes,
            int *outputAutoCommitFirstWordConfidence);
 private:
    DISALLOW_IMPLICIT_CONSTRUCTORS(SuggestionsOutputUtils);
    // Inputs longer than this will autocorrect if the suggestion is multi-word
    static const int MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT;
    static int computeFirstWordConfidence(const DicNode *const terminalDicNode);
    static int outputShortcuts(BinaryDictionaryShortcutIterator *const shortcutIt,
            int outputWordIndex, const int finalScore, int *const outputCodePoints,
            int *const frequencies, int *const outputTypes, const bool sameAsTyped);
 };
 } // namespace latinime
 #endif // LATINIME_SUGGESTIONS_OUTPUT_UTILS
--- a/native/jni/src/suggest/core/suggest.cpp
+++ b/native/jni/src/suggest/core/suggest.cpp
@ -19,13 +19,11 @@
 #include "suggest/core/dicnode/dic_node.h"
 #include "suggest/core/dicnode/dic_node_priority_queue.h"
 #include "suggest/core/dicnode/dic_node_vector.h"
 #include "suggest/core/dictionary/binary_dictionary_shortcut_iterator.h"
 #include "suggest/core/dictionary/dictionary.h"
 #include "suggest/core/dictionary/digraph_utils.h"
-#include "suggest/core/dictionary/shortcut_utils.h"
+#include "suggest/core/dictionary/suggestions_output_utils.h"
 #include "suggest/core/layout/proximity_info.h"
 #include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
 #include "suggest/core/policy/scoring.h"
 #include "suggest/core/policy/traversal.h"
 #include "suggest/core/policy/weighting.h"
 #include "suggest/core/session/dic_traverse_session.h"
@ -33,9 +31,7 @@
 namespace latinime {
 // Initialization of class constants.
 const int Suggest::MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT = 16;
 const int Suggest::MIN_CONTINUOUS_SUGGESTION_INPUT_SIZE = 2;
 const float Suggest::AUTOCORRECT_CLASSIFICATION_THRESHOLD = 0.33f;
 /**
 * Returns a set of suggestions for the given input touch points. The commitPoint argument indicates
@ -70,8 +66,8 @@ int Suggest::getSuggestions(ProximityInfo *pInfo, void *traverseSession,
    }
    PROF_END(1);
    PROF_START(2);
-    const int size = outputSuggestions(tSession, frequencies, outWords, outputIndices, outputTypes,
+    const int size = SuggestionsOutputUtils::outputSuggestions(SCORING, tSession, frequencies,
-            outputAutoCommitFirstWordConfidence);
+            outWords, outputIndices, outputTypes, outputAutoCommitFirstWordConfidence);
    PROF_END(2);
    PROF_CLOSE;
    return size;
@ -114,205 +110,6 @@ void Suggest::initializeSearch(DicTraverseSession *traverseSession, int commitPo
    }
 }
 /**
 * Outputs the final list of suggestions (i.e., terminal nodes).
 */
 int Suggest::outputSuggestions(DicTraverseSession *traverseSession, int *frequencies,
        int *outputCodePoints, int *outputIndicesToPartialCommit, int *outputTypes,
        int *outputAutoCommitFirstWordConfidence) const {
 #if DEBUG_EVALUATE_MOST_PROBABLE_STRING
    const int terminalSize = 0;
 #else
    const int terminalSize = min(MAX_RESULTS,
            static_cast<int>(traverseSession->getDicTraverseCache()->terminalSize()));
 #endif
    DicNode terminals[MAX_RESULTS]; // Avoiding non-POD variable length array
    for (int index = terminalSize - 1; index >= 0; --index) {
        traverseSession->getDicTraverseCache()->popTerminal(&terminals[index]);
    }
    const float languageWeight = SCORING->getAdjustedLanguageWeight(
            traverseSession, terminals, terminalSize);
    int outputWordIndex = 0;
    // Insert most probable word at index == 0 as long as there is one terminal at least
    const bool hasMostProbableString =
            SCORING->getMostProbableString(traverseSession, terminalSize, languageWeight,
                    &outputCodePoints[0], &outputTypes[0], &frequencies[0]);
    if (hasMostProbableString) {
        outputIndicesToPartialCommit[outputWordIndex] = NOT_AN_INDEX;
        ++outputWordIndex;
    }
    // Initial value of the loop index for terminal nodes (words)
    int doubleLetterTerminalIndex = -1;
    DoubleLetterLevel doubleLetterLevel = NOT_A_DOUBLE_LETTER;
    SCORING->searchWordWithDoubleLetter(terminals, terminalSize,
            &doubleLetterTerminalIndex, &doubleLetterLevel);
    int maxScore = S_INT_MIN;
    // Force autocorrection for obvious long multi-word suggestions when the top suggestion is
    // a long multiple words suggestion.
    // TODO: Implement a smarter auto-commit method for handling multi-word suggestions.
    // traverseSession->isPartiallyCommited() always returns false because we never auto partial
    // commit for now.
    const bool forceCommitMultiWords = (terminalSize > 0) ?
            SCORING->autoCorrectsToMultiWordSuggestionIfTop()
                    && (traverseSession->isPartiallyCommited()
                            || (traverseSession->getInputSize()
                                    >= MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT
                                            && terminals[0].hasMultipleWords())) : false;
    // TODO: have partial commit work even with multiple pointers.
    const bool outputSecondWordFirstLetterInputIndex =
            traverseSession->isOnlyOnePointerUsed(0 /* pointerId */);
    if (terminalSize > 0) {
        // If we have no suggestions, don't write this
        outputAutoCommitFirstWordConfidence[0] =
                computeFirstWordConfidence(&terminals[0]);
    }
    // Output suggestion results here
    for (int terminalIndex = 0; terminalIndex < terminalSize && outputWordIndex < MAX_RESULTS;
            ++terminalIndex) {
        DicNode *terminalDicNode = &terminals[terminalIndex];
        if (DEBUG_GEO_FULL) {
            terminalDicNode->dump("OUT:");
        }
        const float doubleLetterCost = SCORING->getDoubleLetterDemotionDistanceCost(
                terminalIndex, doubleLetterTerminalIndex, doubleLetterLevel);
        const float compoundDistance = terminalDicNode->getCompoundDistance(languageWeight)
                + doubleLetterCost;
        const bool isPossiblyOffensiveWord =
                traverseSession->getDictionaryStructurePolicy()->getProbability(
                        terminalDicNode->getProbability(), NOT_A_PROBABILITY) <= 0;
        const bool isExactMatch = terminalDicNode->isExactMatch();
        const bool isFirstCharUppercase = terminalDicNode->isFirstCharUppercase();
        // Heuristic: We exclude freq=0 first-char-uppercase words from exact match.
        // (e.g. "AMD" and "and")
        const bool isSafeExactMatch = isExactMatch
                && !(isPossiblyOffensiveWord && isFirstCharUppercase);
        const int outputTypeFlags =
                (isPossiblyOffensiveWord ? Dictionary::KIND_FLAG_POSSIBLY_OFFENSIVE : 0)
                | (isSafeExactMatch ? Dictionary::KIND_FLAG_EXACT_MATCH : 0);
        // Entries that are blacklisted or do not represent a word should not be output.
        const bool isValidWord = !terminalDicNode->isBlacklistedOrNotAWord();
        // Increase output score of top typing suggestion to ensure autocorrection.
        // TODO: Better integration with java side autocorrection logic.
        const int finalScore = SCORING->calculateFinalScore(
                compoundDistance, traverseSession->getInputSize(),
                terminalDicNode->isExactMatch()
                        || (forceCommitMultiWords && terminalDicNode->hasMultipleWords())
                                || (isValidWord && SCORING->doesAutoCorrectValidWord()));
        if (maxScore < finalScore && isValidWord) {
            maxScore = finalScore;
        }
        // Don't output invalid words. However, we still need to submit their shortcuts if any.
        if (isValidWord) {
            outputTypes[outputWordIndex] = Dictionary::KIND_CORRECTION | outputTypeFlags;
            frequencies[outputWordIndex] = finalScore;
            if (outputSecondWordFirstLetterInputIndex) {
                outputIndicesToPartialCommit[outputWordIndex] =
                        terminalDicNode->getSecondWordFirstInputIndex(
                                traverseSession->getProximityInfoState(0));
            } else {
                outputIndicesToPartialCommit[outputWordIndex] = NOT_AN_INDEX;
            }
            // Populate the outputChars array with the suggested word.
            const int startIndex = outputWordIndex * MAX_WORD_LENGTH;
            terminalDicNode->outputResult(&outputCodePoints[startIndex]);
            ++outputWordIndex;
        }
        if (!terminalDicNode->hasMultipleWords()) {
            BinaryDictionaryShortcutIterator shortcutIt(
                    traverseSession->getDictionaryStructurePolicy()->getShortcutsStructurePolicy(),
                    traverseSession->getDictionaryStructurePolicy()
                            ->getShortcutPositionOfPtNode(terminalDicNode->getPtNodePos()));
            // Shortcut is not supported for multiple words suggestions.
            // TODO: Check shortcuts during traversal for multiple words suggestions.
            const bool sameAsTyped = SCORING->sameAsTyped(traverseSession, terminalDicNode);
            const int shortcutBaseScore = SCORING->doesAutoCorrectValidWord() ?
                     SCORING->calculateFinalScore(compoundDistance, traverseSession->getInputSize(),
                             true /* forceCommit */) : finalScore;
            const int updatedOutputWordIndex = ShortcutUtils::outputShortcuts(&shortcutIt,
                    outputWordIndex, shortcutBaseScore, outputCodePoints, frequencies, outputTypes,
                    sameAsTyped);
            const int secondWordFirstInputIndex = terminalDicNode->getSecondWordFirstInputIndex(
                    traverseSession->getProximityInfoState(0));
            for (int i = outputWordIndex; i < updatedOutputWordIndex; ++i) {
                if (outputSecondWordFirstLetterInputIndex) {
                    outputIndicesToPartialCommit[i] = secondWordFirstInputIndex;
                } else {
                    outputIndicesToPartialCommit[i] = NOT_AN_INDEX;
                }
            }
            outputWordIndex = updatedOutputWordIndex;
        }
        DicNode::managedDelete(terminalDicNode);
    }
    if (hasMostProbableString) {
        SCORING->safetyNetForMostProbableString(terminalSize, maxScore,
                &outputCodePoints[0], &frequencies[0]);
    }
    return outputWordIndex;
 }
 int Suggest::computeFirstWordConfidence(const DicNode *const terminalDicNode) const {
    // Get the number of spaces in the first suggestion
    const int spaceCount = terminalDicNode->getTotalNodeSpaceCount();
    // Get the number of characters in the first suggestion
    const int length = terminalDicNode->getTotalNodeCodePointCount();
    // Get the distance for the first word of the suggestion
    const float distance = terminalDicNode->getNormalizedCompoundDistanceAfterFirstWord();
    // Arbitrarily, we give a score whose useful values range from 0 to 1,000,000.
    // 1,000,000 will be the cutoff to auto-commit. It's fine if the number is under 0 or
    // above 1,000,000 : under 0 just means it's very bad to commit, and above 1,000,000 means
    // we are very confident.
    // Expected space count is 1 ~ 5
    static const int MIN_EXPECTED_SPACE_COUNT = 1;
    static const int MAX_EXPECTED_SPACE_COUNT = 5;
    // Expected length is about 4 ~ 30
    static const int MIN_EXPECTED_LENGTH = 4;
    static const int MAX_EXPECTED_LENGTH = 30;
    // Expected distance is about 0.2 ~ 2.0, but consider 0.0 ~ 2.0
    static const float MIN_EXPECTED_DISTANCE = 0.0;
    static const float MAX_EXPECTED_DISTANCE = 2.0;
    // This is not strict: it's where most stuff will be falling, but it's still fine if it's
    // outside these values. We want to output a value that reflects all of these. Each factor
    // contributes a bit.
    // We need at least a space.
    if (spaceCount < 1) return NOT_A_FIRST_WORD_CONFIDENCE;
    // The smaller the edit distance, the higher the contribution. MIN_EXPECTED_DISTANCE means 0
    // contribution, while MAX_EXPECTED_DISTANCE means full contribution according to the
    // weight of the distance. Clamp to avoid overflows.
    const float clampedDistance = distance < MIN_EXPECTED_DISTANCE ? MIN_EXPECTED_DISTANCE
            : distance > MAX_EXPECTED_DISTANCE ? MAX_EXPECTED_DISTANCE : distance;
    const int distanceContribution = DISTANCE_WEIGHT_FOR_AUTO_COMMIT
            * (MAX_EXPECTED_DISTANCE - clampedDistance)
            / (MAX_EXPECTED_DISTANCE - MIN_EXPECTED_DISTANCE);
    // The larger the suggestion length, the larger the contribution. MIN_EXPECTED_LENGTH is no
    // contribution, MAX_EXPECTED_LENGTH is full contribution according to the weight of the
    // length. Length is guaranteed to be between 1 and 48, so we don't need to clamp.
    const int lengthContribution = LENGTH_WEIGHT_FOR_AUTO_COMMIT
            * (length - MIN_EXPECTED_LENGTH) / (MAX_EXPECTED_LENGTH - MIN_EXPECTED_LENGTH);
    // The more spaces, the larger the contribution. MIN_EXPECTED_SPACE_COUNT space is no
    // contribution, MAX_EXPECTED_SPACE_COUNT spaces is full contribution according to the
    // weight of the space count.
    const int spaceContribution = SPACE_COUNT_WEIGHT_FOR_AUTO_COMMIT
            * (spaceCount - MIN_EXPECTED_SPACE_COUNT)
            / (MAX_EXPECTED_SPACE_COUNT - MIN_EXPECTED_SPACE_COUNT);
    return distanceContribution + lengthContribution + spaceContribution;
 }
 /**
 * Expands the dicNodes in the current search priority queue by advancing to the possible child
 * nodes based on the next touch point(s) (or no touch points for lookahead)
--- a/native/jni/src/suggest/core/suggest.h
+++ b/native/jni/src/suggest/core/suggest.h
@ -55,18 +55,11 @@ class Suggest : public SuggestInterface {
    DISALLOW_IMPLICIT_CONSTRUCTORS(Suggest);
    void createNextWordDicNode(DicTraverseSession *traverseSession, DicNode *dicNode,
            const bool spaceSubstitution) const;
    int outputSuggestions(DicTraverseSession *traverseSession, int *frequencies,
            int *outputCodePoints, int *outputIndicesToPartialCommit, int *outputTypes,
            int *outputAutoCommitFirstWordConfidence) const;
    int computeFirstWordConfidence(const DicNode *const terminalDicNode) const;
    void initializeSearch(DicTraverseSession *traverseSession, int commitPoint) const;
    void expandCurrentDicNodes(DicTraverseSession *traverseSession) const;
    void processTerminalDicNode(DicTraverseSession *traverseSession, DicNode *dicNode) const;
    void processExpandedDicNode(DicTraverseSession *traverseSession, DicNode *dicNode) const;
    void weightChildNode(DicTraverseSession *traverseSession, DicNode *dicNode) const;
    float getAutocorrectScore(DicTraverseSession *traverseSession, DicNode *dicNode) const;
    void generateFeatures(
            DicTraverseSession *traverseSession, DicNode *dicNode, float *features) const;
    void processDicNodeAsOmission(DicTraverseSession *traverseSession, DicNode *dicNode) const;
    void processDicNodeAsDigraph(DicTraverseSession *traverseSession, DicNode *dicNode) const;
    void processDicNodeAsTransposition(DicTraverseSession *traverseSession,
@ -79,13 +72,8 @@ class Suggest : public SuggestInterface {
    void processDicNodeAsMatch(DicTraverseSession *traverseSession,
            DicNode *childDicNode) const;
    // Inputs longer than this will autocorrect if the suggestion is multi-word
    static const int MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT;
    static const int MIN_CONTINUOUS_SUGGESTION_INPUT_SIZE;
    // Threshold for autocorrection classifier
    static const float AUTOCORRECT_CLASSIFICATION_THRESHOLD;
    const Traversal *const TRAVERSAL;
    const Scoring *const SCORING;
    const Weighting *const WEIGHTING;