LatinIME/java/src/com/android/inputmethod/latin/Suggest.java

/*
 * Copyright (C) 2008 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.android.inputmethod.latin;

import android.text.TextUtils;

import static com.android.inputmethod.latin.define.DecoderSpecificConstants.SHOULD_AUTO_CORRECT_USING_NON_WHITE_LISTED_SUGGESTION;
import static com.android.inputmethod.latin.define.DecoderSpecificConstants.SHOULD_REMOVE_PREVIOUSLY_REJECTED_SUGGESTION;

import com.android.inputmethod.keyboard.Keyboard;
import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo;
import com.android.inputmethod.latin.common.Constants;
import com.android.inputmethod.latin.common.StringUtils;
import com.android.inputmethod.latin.define.DebugFlags;
import com.android.inputmethod.latin.settings.SettingsValuesForSuggestion;
import com.android.inputmethod.latin.utils.AutoCorrectionUtils;
import com.android.inputmethod.latin.utils.BinaryDictionaryUtils;
import com.android.inputmethod.latin.utils.SuggestionResults;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.Locale;

import javax.annotation.Nonnull;

/**
 * This class loads a dictionary and provides a list of suggestions for a given sequence of
 * characters. This includes corrections and completions.
 */
public final class Suggest {
    public static final String TAG = Suggest.class.getSimpleName();

    // Session id for
    // {@link #getSuggestedWords(WordComposer,String,ProximityInfo,boolean,int)}.
    // We are sharing the same ID between typing and gesture to save RAM footprint.
    public static final int SESSION_ID_TYPING = 0;
    public static final int SESSION_ID_GESTURE = 0;

    // Close to -2**31
    private static final int SUPPRESS_SUGGEST_THRESHOLD = -2000000000;

    private static final boolean DBG = DebugFlags.DEBUG_ENABLED;
    private final DictionaryFacilitator mDictionaryFacilitator;

    private static final int MAXIMUM_AUTO_CORRECT_LENGTH_FOR_GERMAN = 12;
    private static final HashMap<String, Integer> sLanguageToMaximumAutoCorrectionWithSpaceLength =
            new HashMap<>();
    static {
        // TODO: should we add Finnish here?
        // TODO: This should not be hardcoded here but be written in the dictionary header
        sLanguageToMaximumAutoCorrectionWithSpaceLength.put(Locale.GERMAN.getLanguage(),
                MAXIMUM_AUTO_CORRECT_LENGTH_FOR_GERMAN);
    }

    private float mAutoCorrectionThreshold;
    private float mPlausibilityThreshold;

    public Suggest(final DictionaryFacilitator dictionaryFacilitator) {
        mDictionaryFacilitator = dictionaryFacilitator;
    }

    /**
     * Set the normalized-score threshold for a suggestion to be considered strong enough that we
     * will auto-correct to this.
     * @param threshold the threshold
     */
    public void setAutoCorrectionThreshold(final float threshold) {
        mAutoCorrectionThreshold = threshold;
    }

    /**
     * Set the normalized-score threshold for what we consider a "plausible" suggestion, in
     * the same dimension as the auto-correction threshold.
     * @param threshold the threshold
     */
    public void setPlausibilityThreshold(final float threshold) {
        mPlausibilityThreshold = threshold;
    }

    public interface OnGetSuggestedWordsCallback {
        public void onGetSuggestedWords(final SuggestedWords suggestedWords);
    }

    public void getSuggestedWords(final WordComposer wordComposer,
            final NgramContext ngramContext, final Keyboard keyboard,
            final SettingsValuesForSuggestion settingsValuesForSuggestion,
            final boolean isCorrectionEnabled, final int inputStyle, final int sequenceNumber,
            final OnGetSuggestedWordsCallback callback) {
        if (wordComposer.isBatchMode()) {
            getSuggestedWordsForBatchInput(wordComposer, ngramContext, keyboard,
                    settingsValuesForSuggestion, inputStyle, sequenceNumber, callback);
        } else {
            getSuggestedWordsForNonBatchInput(wordComposer, ngramContext, keyboard,
                    settingsValuesForSuggestion, inputStyle, isCorrectionEnabled,
                    sequenceNumber, callback);
        }
    }

    private static ArrayList<SuggestedWordInfo> getTransformedSuggestedWordInfoList(
            final WordComposer wordComposer, final SuggestionResults results,
            final int trailingSingleQuotesCount, final Locale defaultLocale) {
        final boolean shouldMakeSuggestionsAllUpperCase = wordComposer.isAllUpperCase()
                && !wordComposer.isResumed();
        final boolean isOnlyFirstCharCapitalized =
                wordComposer.isOrWillBeOnlyFirstCharCapitalized();

        final ArrayList<SuggestedWordInfo> suggestionsContainer = new ArrayList<>(results);
        final int suggestionsCount = suggestionsContainer.size();
        if (isOnlyFirstCharCapitalized || shouldMakeSuggestionsAllUpperCase
                || 0 != trailingSingleQuotesCount) {
            for (int i = 0; i < suggestionsCount; ++i) {
                final SuggestedWordInfo wordInfo = suggestionsContainer.get(i);
                final Locale wordLocale = wordInfo.mSourceDict.mLocale;
                final SuggestedWordInfo transformedWordInfo = getTransformedSuggestedWordInfo(
                        wordInfo, null == wordLocale ? defaultLocale : wordLocale,
                        shouldMakeSuggestionsAllUpperCase, isOnlyFirstCharCapitalized,
                        trailingSingleQuotesCount);
                suggestionsContainer.set(i, transformedWordInfo);
            }
        }
        return suggestionsContainer;
    }

    private static SuggestedWordInfo getWhitelistedWordInfoOrNull(
            @Nonnull final ArrayList<SuggestedWordInfo> suggestions) {
        if (suggestions.isEmpty()) {
            return null;
        }
        final SuggestedWordInfo firstSuggestedWordInfo = suggestions.get(0);
        if (!firstSuggestedWordInfo.isKindOf(SuggestedWordInfo.KIND_WHITELIST)) {
            return null;
        }
        return firstSuggestedWordInfo;
    }

    // Retrieves suggestions for non-batch input (typing, recorrection, predictions...)
    // and calls the callback function with the suggestions.
    private void getSuggestedWordsForNonBatchInput(final WordComposer wordComposer,
            final NgramContext ngramContext, final Keyboard keyboard,
            final SettingsValuesForSuggestion settingsValuesForSuggestion,
            final int inputStyleIfNotPrediction, final boolean isCorrectionEnabled,
            final int sequenceNumber, final OnGetSuggestedWordsCallback callback) {
        final String typedWordString = wordComposer.getTypedWord();
        final int trailingSingleQuotesCount =
                StringUtils.getTrailingSingleQuotesCount(typedWordString);
        final String consideredWord = trailingSingleQuotesCount > 0
                ? typedWordString.substring(0, typedWordString.length() - trailingSingleQuotesCount)
                : typedWordString;

        final SuggestionResults suggestionResults = mDictionaryFacilitator.getSuggestionResults(
                wordComposer.getComposedDataSnapshot(), ngramContext, keyboard,
                settingsValuesForSuggestion, SESSION_ID_TYPING, inputStyleIfNotPrediction);
        final Locale locale = mDictionaryFacilitator.getLocale();
        final ArrayList<SuggestedWordInfo> suggestionsContainer =
                getTransformedSuggestedWordInfoList(wordComposer, suggestionResults,
                        trailingSingleQuotesCount, locale);

        boolean foundInDictionary = false;
        Dictionary sourceDictionaryOfRemovedWord = null;
        for (final SuggestedWordInfo info : suggestionsContainer) {
            // Search for the best dictionary, defined as the first one with the highest match
            // quality we can find.
            if (!foundInDictionary && typedWordString.equals(info.mWord)) {
                // Use this source if the old match had lower quality than this match
                sourceDictionaryOfRemovedWord = info.mSourceDict;
                foundInDictionary = true;
                break;
            }
        }

        final int firstOcurrenceOfTypedWordInSuggestions =
                SuggestedWordInfo.removeDups(typedWordString, suggestionsContainer);

        final SuggestedWordInfo whitelistedWordInfo =
                getWhitelistedWordInfoOrNull(suggestionsContainer);
        final String whitelistedWord = whitelistedWordInfo == null
                ? null : whitelistedWordInfo.mWord;
        final boolean resultsArePredictions = !wordComposer.isComposingWord();

        // We allow auto-correction if whitelisting is not required or the word is whitelisted,
        // or if the word had more than one char and was not suggested.
        final boolean allowsToBeAutoCorrected =
                (SHOULD_AUTO_CORRECT_USING_NON_WHITE_LISTED_SUGGESTION || whitelistedWord != null)
                || (consideredWord.length() > 1 && (sourceDictionaryOfRemovedWord == null));

        final boolean hasAutoCorrection;
        // If correction is not enabled, we never auto-correct. This is for example for when
        // the setting "Auto-correction" is "off": we still suggest, but we don't auto-correct.
        if (!isCorrectionEnabled
                // If the word does not allow to be auto-corrected, then we don't auto-correct.
                || !allowsToBeAutoCorrected
                // If we are doing prediction, then we never auto-correct of course
                || resultsArePredictions
                // If we don't have suggestion results, we can't evaluate the first suggestion
                // for auto-correction
                || suggestionResults.isEmpty()
                // If the word has digits, we never auto-correct because it's likely the word
                // was type with a lot of care
                || wordComposer.hasDigits()
                // If the word is mostly caps, we never auto-correct because this is almost
                // certainly intentional (and careful input)
                || wordComposer.isMostlyCaps()
                // We never auto-correct when suggestions are resumed because it would be unexpected
                || wordComposer.isResumed()
                // If we don't have a main dictionary, we never want to auto-correct. The reason
                // for this is, the user may have a contact whose name happens to match a valid
                // word in their language, and it will unexpectedly auto-correct. For example, if
                // the user types in English with no dictionary and has a "Will" in their contact
                // list, "will" would always auto-correct to "Will" which is unwanted. Hence, no
                // main dict => no auto-correct. Also, it would probably get obnoxious quickly.
                // TODO: now that we have personalization, we may want to re-evaluate this decision
                || !mDictionaryFacilitator.hasAtLeastOneInitializedMainDictionary()
                // If the first suggestion is a shortcut we never auto-correct to it, regardless
                // of how strong it is (allowlist entries are not KIND_SHORTCUT but KIND_WHITELIST).
                // TODO: we may want to have shortcut-only entries auto-correct in the future.
                || suggestionResults.first().isKindOf(SuggestedWordInfo.KIND_SHORTCUT)) {
            hasAutoCorrection = false;
        } else {
            final SuggestedWordInfo firstSuggestion = suggestionResults.first();
            if (suggestionResults.mFirstSuggestionExceedsConfidenceThreshold
                    && firstOcurrenceOfTypedWordInSuggestions != 0) {
                hasAutoCorrection = true;
            } else if (!AutoCorrectionUtils.suggestionExceedsThreshold(
                    firstSuggestion, consideredWord, mAutoCorrectionThreshold)) {
                // Score is too low for autocorrect
                hasAutoCorrection = false;
            } else {
                // We have a high score, so we need to check if this suggestion is in the correct
                // form to allow auto-correcting to it in this language. For details of how this
                // is determined, see #isAllowedByAutoCorrectionWithSpaceFilter.
                // TODO: this should not have its own logic here but be handled by the dictionary.
                hasAutoCorrection = isAllowedByAutoCorrectionWithSpaceFilter(firstSuggestion);
            }
        }

        final SuggestedWordInfo typedWordInfo = new SuggestedWordInfo(typedWordString,
                "" /* prevWordsContext */, SuggestedWordInfo.MAX_SCORE,
                SuggestedWordInfo.KIND_TYPED,
                null == sourceDictionaryOfRemovedWord ? Dictionary.DICTIONARY_USER_TYPED
                        : sourceDictionaryOfRemovedWord,
                SuggestedWordInfo.NOT_AN_INDEX /* indexOfTouchPointOfSecondWord */,
                SuggestedWordInfo.NOT_A_CONFIDENCE /* autoCommitFirstWordConfidence */);
        if (!TextUtils.isEmpty(typedWordString)) {
            suggestionsContainer.add(0, typedWordInfo);
        }

        final ArrayList<SuggestedWordInfo> suggestionsList;
        if (DBG && !suggestionsContainer.isEmpty()) {
            suggestionsList = getSuggestionsInfoListWithDebugInfo(typedWordString,
                    suggestionsContainer);
        } else {
            suggestionsList = suggestionsContainer;
        }

        final int inputStyle;
        if (resultsArePredictions) {
            inputStyle = suggestionResults.mIsBeginningOfSentence
                    ? SuggestedWords.INPUT_STYLE_BEGINNING_OF_SENTENCE_PREDICTION
                    : SuggestedWords.INPUT_STYLE_PREDICTION;
        } else {
            inputStyle = inputStyleIfNotPrediction;
        }

        final boolean isTypedWordValid = firstOcurrenceOfTypedWordInSuggestions > -1
                || (!resultsArePredictions && !allowsToBeAutoCorrected);
        callback.onGetSuggestedWords(new SuggestedWords(suggestionsList,
                suggestionResults.mRawSuggestions, typedWordInfo,
                isTypedWordValid,
                hasAutoCorrection /* willAutoCorrect */,
                false /* isObsoleteSuggestions */, inputStyle, sequenceNumber));
    }

    // Retrieves suggestions for the batch input
    // and calls the callback function with the suggestions.
    private void getSuggestedWordsForBatchInput(final WordComposer wordComposer,
            final NgramContext ngramContext, final Keyboard keyboard,
            final SettingsValuesForSuggestion settingsValuesForSuggestion,
            final int inputStyle, final int sequenceNumber,
            final OnGetSuggestedWordsCallback callback) {
        final SuggestionResults suggestionResults = mDictionaryFacilitator.getSuggestionResults(
                wordComposer.getComposedDataSnapshot(), ngramContext, keyboard,
                settingsValuesForSuggestion, SESSION_ID_GESTURE, inputStyle);
        // For transforming words that don't come from a dictionary, because it's our best bet
        final Locale locale = mDictionaryFacilitator.getLocale();
        final ArrayList<SuggestedWordInfo> suggestionsContainer =
                new ArrayList<>(suggestionResults);
        final int suggestionsCount = suggestionsContainer.size();
        final boolean isFirstCharCapitalized = wordComposer.wasShiftedNoLock();
        final boolean isAllUpperCase = wordComposer.isAllUpperCase();
        if (isFirstCharCapitalized || isAllUpperCase) {
            for (int i = 0; i < suggestionsCount; ++i) {
                final SuggestedWordInfo wordInfo = suggestionsContainer.get(i);
                final Locale wordlocale = wordInfo.mSourceDict.mLocale;
                final SuggestedWordInfo transformedWordInfo = getTransformedSuggestedWordInfo(
                        wordInfo, null == wordlocale ? locale : wordlocale, isAllUpperCase,
                        isFirstCharCapitalized, 0 /* trailingSingleQuotesCount */);
                suggestionsContainer.set(i, transformedWordInfo);
            }
        }

        if (SHOULD_REMOVE_PREVIOUSLY_REJECTED_SUGGESTION
                && suggestionsContainer.size() > 1
                && TextUtils.equals(suggestionsContainer.get(0).mWord,
                   wordComposer.getRejectedBatchModeSuggestion())) {
            final SuggestedWordInfo rejected = suggestionsContainer.remove(0);
            suggestionsContainer.add(1, rejected);
        }
        SuggestedWordInfo.removeDups(null /* typedWord */, suggestionsContainer);

        // For some reason some suggestions with MIN_VALUE are making their way here.
        // TODO: Find a more robust way to detect distracters.
        for (int i = suggestionsContainer.size() - 1; i >= 0; --i) {
            if (suggestionsContainer.get(i).mScore < SUPPRESS_SUGGEST_THRESHOLD) {
                suggestionsContainer.remove(i);
            }
        }

        // In the batch input mode, the most relevant suggested word should act as a "typed word"
        // (typedWordValid=true), not as an "auto correct word" (willAutoCorrect=false).
        // Note that because this method is never used to get predictions, there is no need to
        // modify inputType such in getSuggestedWordsForNonBatchInput.
        final SuggestedWordInfo pseudoTypedWordInfo = suggestionsContainer.isEmpty() ? null
                : suggestionsContainer.get(0);

        callback.onGetSuggestedWords(new SuggestedWords(suggestionsContainer,
                suggestionResults.mRawSuggestions,
                pseudoTypedWordInfo,
                true /* typedWordValid */,
                false /* willAutoCorrect */,
                false /* isObsoleteSuggestions */,
                inputStyle, sequenceNumber));
    }

    private static ArrayList<SuggestedWordInfo> getSuggestionsInfoListWithDebugInfo(
            final String typedWord, final ArrayList<SuggestedWordInfo> suggestions) {
        final SuggestedWordInfo typedWordInfo = suggestions.get(0);
        typedWordInfo.setDebugString("+");
        final int suggestionsSize = suggestions.size();
        final ArrayList<SuggestedWordInfo> suggestionsList = new ArrayList<>(suggestionsSize);
        suggestionsList.add(typedWordInfo);
        // Note: i here is the index in mScores[], but the index in mSuggestions is one more
        // than i because we added the typed word to mSuggestions without touching mScores.
        for (int i = 0; i < suggestionsSize - 1; ++i) {
            final SuggestedWordInfo cur = suggestions.get(i + 1);
            final float normalizedScore = BinaryDictionaryUtils.calcNormalizedScore(
                    typedWord, cur.toString(), cur.mScore);
            final String scoreInfoString;
            if (normalizedScore > 0) {
                scoreInfoString = String.format(
                        Locale.ROOT, "%d (%4.2f), %s", cur.mScore, normalizedScore,
                        cur.mSourceDict.mDictType);
            } else {
                scoreInfoString = Integer.toString(cur.mScore);
            }
            cur.setDebugString(scoreInfoString);
            suggestionsList.add(cur);
        }
        return suggestionsList;
    }

    /**
     * Computes whether this suggestion should be blocked or not in this language
     *
     * This function implements a filter that avoids auto-correcting to suggestions that contain
     * spaces that are above a certain language-dependent character limit. In languages like German
     * where it's possible to concatenate many words, it often happens our dictionary does not
     * have the longer words. In this case, we offer a lot of unhelpful suggestions that contain
     * one or several spaces. Ideally we should understand what the user wants and display useful
     * suggestions by improving the dictionary and possibly having some specific logic. Until
     * that's possible we should avoid displaying unhelpful suggestions. But it's hard to tell
     * whether a suggestion is useful or not. So at least for the time being we block
     * auto-correction when the suggestion is long and contains a space, which should avoid the
     * worst damage.
     * This function is implementing that filter. If the language enforces no such limit, then it
     * always returns true. If the suggestion contains no space, it also returns true. Otherwise,
     * it checks the length against the language-specific limit.
     *
     * @param info the suggestion info
     * @return whether it's fine to auto-correct to this.
     */
    private static boolean isAllowedByAutoCorrectionWithSpaceFilter(final SuggestedWordInfo info) {
        final Locale locale = info.mSourceDict.mLocale;
        if (null == locale) {
            return true;
        }
        final Integer maximumLengthForThisLanguage =
                sLanguageToMaximumAutoCorrectionWithSpaceLength.get(locale.getLanguage());
        if (null == maximumLengthForThisLanguage) {
            // This language does not enforce a maximum length to auto-correction
            return true;
        }
        return info.mWord.length() <= maximumLengthForThisLanguage
                || -1 == info.mWord.indexOf(Constants.CODE_SPACE);
    }

    /* package for test */ static SuggestedWordInfo getTransformedSuggestedWordInfo(
            final SuggestedWordInfo wordInfo, final Locale locale, final boolean isAllUpperCase,
            final boolean isOnlyFirstCharCapitalized, final int trailingSingleQuotesCount) {
        final StringBuilder sb = new StringBuilder(wordInfo.mWord.length());
        if (isAllUpperCase) {
            sb.append(wordInfo.mWord.toUpperCase(locale));
        } else if (isOnlyFirstCharCapitalized) {
            sb.append(StringUtils.capitalizeFirstCodePoint(wordInfo.mWord, locale));
        } else {
            sb.append(wordInfo.mWord);
        }
        // Appending quotes is here to help people quote words. However, it's not helpful
        // when they type words with quotes toward the end like "it's" or "didn't", where
        // it's more likely the user missed the last character (or didn't type it yet).
        final int quotesToAppend = trailingSingleQuotesCount
                - (-1 == wordInfo.mWord.indexOf(Constants.CODE_SINGLE_QUOTE) ? 0 : 1);
        for (int i = quotesToAppend - 1; i >= 0; --i) {
            sb.appendCodePoint(Constants.CODE_SINGLE_QUOTE);
        }
        return new SuggestedWordInfo(sb.toString(), wordInfo.mPrevWordsContext,
                wordInfo.mScore, wordInfo.mKindAndFlags,
                wordInfo.mSourceDict, wordInfo.mIndexOfTouchPointOfSecondWord,
                wordInfo.mAutoCommitFirstWordConfidence);
    }
}