Separate bigram prediction from suggestion process

Change-Id: Ibdef206fbd6688a45cebbd7fb8a9f6cd175d03b0
2012-03-09 18:01:31 +09:00 · 2012-03-09 18:01:31 +09:00 · de165aed2a
parent 6f722c8301
commit de165aed2a
2 changed files with 152 additions and 3 deletions
--- a/java/src/com/android/inputmethod/latin/LatinIME.java
+++ b/java/src/com/android/inputmethod/latin/LatinIME.java
@ -2025,7 +2025,6 @@ public class LatinIME extends InputMethodServiceCompatWrapper implements Keyboar
                separatorCode);
    }

-    private static final WordComposer sEmptyWordComposer = new WordComposer();
    public void updateBigramPredictions() {
        if (mSuggest == null || !isSuggestionsRequested())
            return;
@ -2037,8 +2036,8 @@ public class LatinIME extends InputMethodServiceCompatWrapper implements Keyboar

        final CharSequence prevWord = EditingUtils.getThisWord(getCurrentInputConnection(),
                mSettingsValues.mWordSeparators);
-        SuggestedWords.Builder builder = mSuggest.getSuggestedWordBuilder(sEmptyWordComposer,
-                prevWord, mKeyboardSwitcher.getKeyboard().getProximityInfo(), mCorrectionMode);
+        SuggestedWords.Builder builder = mSuggest.getBigramPredictionWordBuilder(prevWord,
+                mKeyboardSwitcher.getKeyboard().getProximityInfo(), mCorrectionMode);

        if (builder.size() > 0) {
            // Explicitly supply an empty typed word (the no-second-arg version of
--- a/java/src/com/android/inputmethod/latin/Suggest.java
+++ b/java/src/com/android/inputmethod/latin/Suggest.java
@ -260,6 +260,156 @@ public class Suggest implements Dictionary.WordCallback {
        mSuggestions.add(sb);
    }

+    private static final WordComposer sEmptyWordComposer = new WordComposer();
+    public SuggestedWords.Builder getBigramPredictionWordBuilder(CharSequence prevWordForBigram,
+            final ProximityInfo proximityInfo, final int correctionMode) {
+        final WordComposer wordComposer = sEmptyWordComposer;
+
+        LatinImeLogger.onStartSuggestion(prevWordForBigram);
+        mIsFirstCharCapitalized = wordComposer.isFirstCharCapitalized();
+        mIsAllUpperCase = wordComposer.isAllUpperCase();
+        mTrailingSingleQuotesCount = wordComposer.trailingSingleQuotesCount();
+        collectGarbage(mSuggestions, mPrefMaxSuggestions);
+        Arrays.fill(mScores, 0);
+
+        final String typedWord = wordComposer.getTypedWord();
+        final String consideredWord = mTrailingSingleQuotesCount > 0
+                ? typedWord.substring(0, typedWord.length() - mTrailingSingleQuotesCount)
+                : typedWord;
+        // Treating USER_TYPED as UNIGRAM suggestion for logging now.
+        LatinImeLogger.onAddSuggestedWord(typedWord, Suggest.DIC_USER_TYPED,
+                Dictionary.UNIGRAM);
+        mConsideredWord = consideredWord;
+
+        // TODO: Change this scheme - a boolean is not enough. A whitelisted word may be "valid"
+        // but still autocorrected from - in the case the whitelist only capitalizes the word.
+        // The whitelist should be case-insensitive, so it's not possible to be consistent with
+        // a boolean flag. Right now this is handled with a slight hack in
+        // WhitelistDictionary#shouldForciblyAutoCorrectFrom.
+        final boolean allowsToBeAutoCorrected = AutoCorrection.allowsToBeAutoCorrected(
+                getUnigramDictionaries(), consideredWord, wordComposer.isFirstCharCapitalized());
+
+        if (wordComposer.size() <= 1 && (correctionMode == CORRECTION_FULL_BIGRAM)) {
+            // At first character typed, search only the bigrams
+            Arrays.fill(mBigramScores, 0);
+            collectGarbage(mBigramSuggestions, PREF_MAX_BIGRAMS);
+
+            if (!TextUtils.isEmpty(prevWordForBigram)) {
+                CharSequence lowerPrevWord = prevWordForBigram.toString().toLowerCase();
+                if (mMainDict != null && mMainDict.isValidWord(lowerPrevWord)) {
+                    prevWordForBigram = lowerPrevWord;
+                }
+                for (final Dictionary dictionary : mBigramDictionaries.values()) {
+                    dictionary.getBigrams(wordComposer, prevWordForBigram, this);
+                }
+                if (TextUtils.isEmpty(consideredWord)) {
+                    // Nothing entered: return all bigrams for the previous word
+                    int insertCount = Math.min(mBigramSuggestions.size(), mPrefMaxSuggestions);
+                    for (int i = 0; i < insertCount; ++i) {
+                        addBigramToSuggestions(mBigramSuggestions.get(i));
+                    }
+                } else {
+                    // Word entered: return only bigrams that match the first char of the typed word
+                    final char currentChar = consideredWord.charAt(0);
+                    // TODO: Must pay attention to locale when changing case.
+                    final char currentCharUpper = Character.toUpperCase(currentChar);
+                    int count = 0;
+                    final int bigramSuggestionSize = mBigramSuggestions.size();
+                    for (int i = 0; i < bigramSuggestionSize; i++) {
+                        final CharSequence bigramSuggestion = mBigramSuggestions.get(i);
+                        final char bigramSuggestionFirstChar = bigramSuggestion.charAt(0);
+                        if (bigramSuggestionFirstChar == currentChar
+                                || bigramSuggestionFirstChar == currentCharUpper) {
+                            addBigramToSuggestions(bigramSuggestion);
+                            if (++count > mPrefMaxSuggestions) break;
+                        }
+                    }
+                }
+            }
+
+        } else if (wordComposer.size() > 1) {
+            // At second character typed, search the unigrams (scores being affected by bigrams)
+            for (final String key : mUnigramDictionaries.keySet()) {
+                // Skip UserUnigramDictionary and WhitelistDictionary to lookup
+                if (key.equals(DICT_KEY_USER_UNIGRAM) || key.equals(DICT_KEY_WHITELIST))
+                    continue;
+                final Dictionary dictionary = mUnigramDictionaries.get(key);
+                if (mTrailingSingleQuotesCount > 0) {
+                    final WordComposer tmpWordComposer = new WordComposer(wordComposer);
+                    for (int i = mTrailingSingleQuotesCount - 1; i >= 0; --i) {
+                        tmpWordComposer.deleteLast();
+                    }
+                    dictionary.getWords(tmpWordComposer, this, proximityInfo);
+                } else {
+                    dictionary.getWords(wordComposer, this, proximityInfo);
+                }
+            }
+        }
+        final String consideredWordString = consideredWord.toString();
+
+        CharSequence whitelistedWord = capitalizeWord(mIsAllUpperCase, mIsFirstCharCapitalized,
+                mWhiteListDictionary.getWhitelistedWord(consideredWordString));
+
+        final boolean hasAutoCorrection;
+        if (CORRECTION_FULL == correctionMode
+                || CORRECTION_FULL_BIGRAM == correctionMode) {
+            final CharSequence autoCorrection =
+                    AutoCorrection.computeAutoCorrectionWord(mUnigramDictionaries, wordComposer,
+                            mSuggestions, mScores, consideredWord, mAutoCorrectionThreshold,
+                            whitelistedWord);
+            hasAutoCorrection = (null != autoCorrection);
+        } else {
+            hasAutoCorrection = false;
+        }
+
+        if (whitelistedWord != null) {
+            if (mTrailingSingleQuotesCount > 0) {
+                final StringBuilder sb = new StringBuilder(whitelistedWord);
+                for (int i = mTrailingSingleQuotesCount - 1; i >= 0; --i) {
+                    sb.appendCodePoint(Keyboard.CODE_SINGLE_QUOTE);
+                }
+                mSuggestions.add(0, sb.toString());
+            } else {
+                mSuggestions.add(0, whitelistedWord);
+            }
+        }
+
+        mSuggestions.add(0, typedWord.toString());
+        StringUtils.removeDupes(mSuggestions);
+
+        if (DBG) {
+            final CharSequence autoCorrectionSuggestion = mSuggestions.get(0);
+            final int autoCorrectionSuggestionScore = mScores[0];
+            double normalizedScore = BinaryDictionary.calcNormalizedScore(
+                    typedWord.toString(), autoCorrectionSuggestion.toString(),
+                    autoCorrectionSuggestionScore);
+            ArrayList<SuggestedWords.SuggestedWordInfo> scoreInfoList =
+                    new ArrayList<SuggestedWords.SuggestedWordInfo>();
+            scoreInfoList.add(new SuggestedWords.SuggestedWordInfo("+", false));
+            for (int i = 0; i < mScores.length; ++i) {
+                if (normalizedScore > 0) {
+                    final String scoreThreshold = String.format("%d (%4.2f)", mScores[i],
+                            normalizedScore);
+                    scoreInfoList.add(
+                            new SuggestedWords.SuggestedWordInfo(scoreThreshold, false));
+                    normalizedScore = 0.0;
+                } else {
+                    final String score = Integer.toString(mScores[i]);
+                    scoreInfoList.add(new SuggestedWords.SuggestedWordInfo(score, false));
+                }
+            }
+            for (int i = mScores.length; i < mSuggestions.size(); ++i) {
+                scoreInfoList.add(new SuggestedWords.SuggestedWordInfo("--", false));
+            }
+            return new SuggestedWords.Builder().addWords(mSuggestions, scoreInfoList)
+                    .setAllowsToBeAutoCorrected(allowsToBeAutoCorrected)
+                    .setHasAutoCorrection(hasAutoCorrection);
+        }
+        return new SuggestedWords.Builder().addWords(mSuggestions, null)
+                .setAllowsToBeAutoCorrected(allowsToBeAutoCorrected)
+                .setHasAutoCorrection(hasAutoCorrection);
+    }
+
    // TODO: cleanup dictionaries looking up and suggestions building with SuggestedWords.Builder
    public SuggestedWords.Builder getSuggestedWordBuilder(
            final WordComposer wordComposer, CharSequence prevWordForBigram,