diff --git a/java/src/com/android/inputmethod/latin/LatinIME.java b/java/src/com/android/inputmethod/latin/LatinIME.java index d64a1a6f7..9c9a65b9d 100644 --- a/java/src/com/android/inputmethod/latin/LatinIME.java +++ b/java/src/com/android/inputmethod/latin/LatinIME.java @@ -540,18 +540,6 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen refreshPersonalizationDictionarySession(); } - private DistracterFilter createDistracterFilter() { - final MainKeyboardView mainKeyboardView = mKeyboardSwitcher.getMainKeyboardView(); - // TODO: Create Keyboard when mainKeyboardView is null. - // TODO: Figure out the most reasonable keyboard for the filter. Refer to the - // spellchecker's logic. - final Keyboard keyboard = (mainKeyboardView != null) ? - mainKeyboardView.getKeyboard() : null; - final DistracterFilter distracterFilter = new DistracterFilter(mInputLogic.mSuggest, - keyboard); - return distracterFilter; - } - private void refreshPersonalizationDictionarySession() { final DictionaryFacilitatorForSuggest dictionaryFacilitator = mInputLogic.mSuggest.mDictionaryFacilitator; @@ -1751,6 +1739,11 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen mInputLogic.mSuggest.mDictionaryFacilitator.clearPersonalizationDictionary(); } + @UsedForTesting + /* package for test */ DistracterFilter createDistracterFilter() { + return DistracterFilter.createDistracterFilter(mInputLogic.mSuggest, mKeyboardSwitcher); + } + public void dumpDictionaryForDebug(final String dictName) { final DictionaryFacilitatorForSuggest dictionaryFacilitator = mInputLogic.mSuggest.mDictionaryFacilitator; diff --git a/java/src/com/android/inputmethod/latin/utils/DistracterFilter.java b/java/src/com/android/inputmethod/latin/utils/DistracterFilter.java index 48e43d6ef..55cbf79b3 100644 --- a/java/src/com/android/inputmethod/latin/utils/DistracterFilter.java +++ b/java/src/com/android/inputmethod/latin/utils/DistracterFilter.java @@ -17,21 +17,35 @@ package com.android.inputmethod.latin.utils; import com.android.inputmethod.keyboard.Keyboard; +import com.android.inputmethod.keyboard.KeyboardSwitcher; +import com.android.inputmethod.keyboard.MainKeyboardView; +import com.android.inputmethod.latin.Constants; import com.android.inputmethod.latin.Suggest; +import com.android.inputmethod.latin.Suggest.OnGetSuggestedWordsCallback; +import com.android.inputmethod.latin.SuggestedWords; +import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo; +import com.android.inputmethod.latin.WordComposer; /** - * This class is used to prevent distracters/misspellings being added to personalization + * This class is used to prevent distracters being added to personalization * or user history dictionaries */ public class DistracterFilter { private final Suggest mSuggest; private final Keyboard mKeyboard; + // If the score of the top suggestion exceeds this value, the tested word (e.g., + // an OOV, a misspelling, or an in-vocabulary word) would be considered as a distracter to + // words in dictionary. The greater the threshold is, the less likely the tested word would + // become a distracter, which means the tested word will be more likely to be added to + // the dictionary. + private static final float DISTRACTER_WORD_SCORE_THRESHOLD = 2.0f; + /** * Create a DistracterFilter instance. * * @param suggest an instance of Suggest which will be used to obtain a list of suggestions - * for a potential distracter/misspelling + * for a potential distracter * @param keyboard the keyboard that is currently being used. This information is needed * when calling mSuggest.getSuggestedWords(...) to obtain a list of suggestions. */ @@ -40,9 +54,79 @@ public class DistracterFilter { mKeyboard = keyboard; } - public boolean isDistracterToWordsInDictionaries(final String prevWord, - final String targetWord) { - // TODO: to be implemented + public static DistracterFilter createDistracterFilter(final Suggest suggest, + final KeyboardSwitcher keyboardSwitcher) { + final MainKeyboardView mainKeyboardView = keyboardSwitcher.getMainKeyboardView(); + // TODO: Create Keyboard when mainKeyboardView is null. + // TODO: Figure out the most reasonable keyboard for the filter. Refer to the + // spellchecker's logic. + final Keyboard keyboard = (mainKeyboardView != null) ? + mainKeyboardView.getKeyboard() : null; + final DistracterFilter distracterFilter = new DistracterFilter(suggest, keyboard); + return distracterFilter; + } + + private static boolean suggestionExceedsDistracterThreshold( + final SuggestedWordInfo suggestion, final String consideredWord, + final float distracterThreshold) { + if (null != suggestion) { + final int suggestionScore = suggestion.mScore; + final float normalizedScore = BinaryDictionaryUtils.calcNormalizedScore( + consideredWord, suggestion.mWord, suggestionScore); + if (normalizedScore > distracterThreshold) { + return true; + } + } return false; } + + /** + * Determine whether a word is a distracter to words in dictionaries. + * + * @param prevWord the previous word, or null if none. + * @param testedWord the word that will be tested to see whether it is a distracter to words + * in dictionaries. + * @return true if testedWord is a distracter, otherwise false. + */ + public boolean isDistracterToWordsInDictionaries(final String prevWord, + final String testedWord) { + if (mSuggest == null) { + return false; + } + + final WordComposer composer = new WordComposer(); + final int[] codePoints = StringUtils.toCodePointArray(testedWord); + final int[] coordinates; + if (null == mKeyboard) { + coordinates = CoordinateUtils.newCoordinateArray(codePoints.length, + Constants.NOT_A_COORDINATE, Constants.NOT_A_COORDINATE); + } else { + coordinates = mKeyboard.getCoordinates(codePoints); + } + composer.setComposingWord(codePoints, coordinates, prevWord); + + final int trailingSingleQuotesCount = composer.trailingSingleQuotesCount(); + final String consideredWord = trailingSingleQuotesCount > 0 ? testedWord.substring(0, + testedWord.length() - trailingSingleQuotesCount) : testedWord; + final AsyncResultHolder holder = new AsyncResultHolder(); + final OnGetSuggestedWordsCallback callback = new OnGetSuggestedWordsCallback() { + @Override + public void onGetSuggestedWords(final SuggestedWords suggestedWords) { + if (suggestedWords != null && suggestedWords.size() > 1) { + // The suggestedWordInfo at 0 is the typed word. The 1st suggestion from + // the decoder is at index 1. + final SuggestedWordInfo firstSuggestion = suggestedWords.getInfo(1); + final boolean hasStrongDistractor = suggestionExceedsDistracterThreshold( + firstSuggestion, consideredWord, DISTRACTER_WORD_SCORE_THRESHOLD); + holder.set(hasStrongDistractor); + } + } + }; + mSuggest.getSuggestedWords(composer, prevWord, mKeyboard.getProximityInfo(), + true /* blockOffensiveWords */, true /* isCorrectionEnbaled */, + null /* additionalFeaturesOptions */, 0 /* sessionId */, + SuggestedWords.NOT_A_SEQUENCE_NUMBER, callback); + + return holder.get(false /* defaultValue */, Constants.GET_SUGGESTED_WORDS_TIMEOUT); + } } diff --git a/java/src/com/android/inputmethod/latin/utils/LanguageModelParam.java b/java/src/com/android/inputmethod/latin/utils/LanguageModelParam.java index 55061f45f..74e7db901 100644 --- a/java/src/com/android/inputmethod/latin/utils/LanguageModelParam.java +++ b/java/src/com/android/inputmethod/latin/utils/LanguageModelParam.java @@ -129,6 +129,9 @@ public final class LanguageModelParam { if (locale == null) { return null; } + // TODO: Though targetWord is an IV (in-vocabulary) word, we should still apply + // distracterFilter in the following code. If targetWord is a distracter, + // it should be filtered out. if (dictionaryFacilitator.isValidWord(targetWord, false /* ignoreCase */)) { return createAndGetLanguageModelParamOfWord(prevWord, targetWord, timestamp, true /* isValidWord */, locale); diff --git a/tests/src/com/android/inputmethod/latin/DistracterFilterTest.java b/tests/src/com/android/inputmethod/latin/DistracterFilterTest.java new file mode 100644 index 000000000..186542ae5 --- /dev/null +++ b/tests/src/com/android/inputmethod/latin/DistracterFilterTest.java @@ -0,0 +1,73 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.android.inputmethod.latin; + +import android.test.suitebuilder.annotation.LargeTest; + +import com.android.inputmethod.latin.utils.DistracterFilter; + +/** + * Unit test for DistracterFilter + */ +@LargeTest +public class DistracterFilterTest extends InputTestsBase { + private DistracterFilter mDistracterFilter; + + @Override + protected void setUp() throws Exception { + super.setUp(); + mDistracterFilter = mLatinIME.createDistracterFilter(); + } + + public void testIsDistractorToWordsInDictionaries() { + final String EMPTY_PREV_WORD = null; + String typedWord = "alot"; + // For this test case, we consider "alot" is a distracter to "a lot". + assertTrue(mDistracterFilter.isDistracterToWordsInDictionaries(EMPTY_PREV_WORD, typedWord)); + + typedWord = "mot"; + // For this test case, we consider "mot" is a distracter to "not". + assertTrue(mDistracterFilter.isDistracterToWordsInDictionaries(EMPTY_PREV_WORD, typedWord)); + + typedWord = "wierd"; + // For this test case, we consider "wierd" is a distracter to "weird". + assertTrue(mDistracterFilter.isDistracterToWordsInDictionaries(EMPTY_PREV_WORD, typedWord)); + + typedWord = "hoe"; + // For this test case, we consider "hoe" is a distracter to "how". + assertTrue(mDistracterFilter.isDistracterToWordsInDictionaries(EMPTY_PREV_WORD, typedWord)); + + typedWord = "nit"; + // For this test case, we consider "nit" is a distracter to "not". + assertTrue(mDistracterFilter.isDistracterToWordsInDictionaries(EMPTY_PREV_WORD, typedWord)); + + typedWord = "ill"; + // For this test case, we consider "ill" is a distracter to "I'll". + assertTrue(mDistracterFilter.isDistracterToWordsInDictionaries(EMPTY_PREV_WORD, typedWord)); + + typedWord = "asdfd"; + // For this test case, we consider "asdfd" is not a distracter to any word in dictionaries. + assertFalse( + mDistracterFilter.isDistracterToWordsInDictionaries(EMPTY_PREV_WORD, typedWord)); + + typedWord = "thank"; + // For this test case, we consider "thank" is not a distracter to any other word + // in dictionaries. + assertFalse( + mDistracterFilter.isDistracterToWordsInDictionaries(EMPTY_PREV_WORD, typedWord)); + } +}