From 33ca0c80c1b0fa6b695bbd9907e8942996b8c0b5 Mon Sep 17 00:00:00 2001 From: Keisuke Kuroyanagi Date: Fri, 8 Aug 2014 17:01:16 +0900 Subject: [PATCH] Use suggestions in the distracter filter. Bug: 13142176 Bug: 15531638 Change-Id: I609fa61fccf2a0c738726985806b7d9248eadf49 --- .../DistracterFilterCheckingExactMatches.java | 168 +++++++++++++++++- .../latin/DistracterFilterTest.java | 64 ++++++- 2 files changed, 217 insertions(+), 15 deletions(-) diff --git a/java/src/com/android/inputmethod/latin/utils/DistracterFilterCheckingExactMatches.java b/java/src/com/android/inputmethod/latin/utils/DistracterFilterCheckingExactMatches.java index 0ee6236b1..845b73961 100644 --- a/java/src/com/android/inputmethod/latin/utils/DistracterFilterCheckingExactMatches.java +++ b/java/src/com/android/inputmethod/latin/utils/DistracterFilterCheckingExactMatches.java @@ -16,17 +16,28 @@ package com.android.inputmethod.latin.utils; +import java.util.HashMap; import java.util.List; import java.util.Locale; +import java.util.Map; import java.util.concurrent.TimeUnit; import android.content.Context; +import android.content.res.Resources; +import android.text.InputType; import android.util.Log; import android.util.LruCache; +import android.view.inputmethod.EditorInfo; import android.view.inputmethod.InputMethodSubtype; +import com.android.inputmethod.keyboard.Keyboard; +import com.android.inputmethod.keyboard.KeyboardId; +import com.android.inputmethod.keyboard.KeyboardLayoutSet; import com.android.inputmethod.latin.DictionaryFacilitator; import com.android.inputmethod.latin.PrevWordsInfo; +import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo; +import com.android.inputmethod.latin.WordComposer; +import com.android.inputmethod.latin.settings.SettingsValuesForSuggestion; /** * This class is used to prevent distracters being added to personalization @@ -40,10 +51,20 @@ public class DistracterFilterCheckingExactMatches implements DistracterFilter { private static final int MAX_DISTRACTERS_CACHE_SIZE = 512; private final Context mContext; + private final Map mLocaleToSubtypeMap; + private final Map mLocaleToKeyboardMap; private final DictionaryFacilitator mDictionaryFacilitator; private final LruCache mDistractersCache; + private Keyboard mKeyboard; private final Object mLock = new Object(); + // If the score of the top suggestion exceeds this value, the tested word (e.g., + // an OOV, a misspelling, or an in-vocabulary word) would be considered as a distractor to + // words in dictionary. The greater the threshold is, the less likely the tested word would + // become a distractor, which means the tested word will be more likely to be added to + // the dictionary. + private static final float DISTRACTER_WORD_SCORE_THRESHOLD = 0.4f; + /** * Create a DistracterFilter instance. * @@ -51,8 +72,11 @@ public class DistracterFilterCheckingExactMatches implements DistracterFilter { */ public DistracterFilterCheckingExactMatches(final Context context) { mContext = context; + mLocaleToSubtypeMap = new HashMap<>(); + mLocaleToKeyboardMap = new HashMap<>(); mDictionaryFacilitator = new DictionaryFacilitator(); mDistractersCache = new LruCache<>(MAX_DISTRACTERS_CACHE_SIZE); + mKeyboard = null; } @Override @@ -62,6 +86,54 @@ public class DistracterFilterCheckingExactMatches implements DistracterFilter { @Override public void updateEnabledSubtypes(final List enabledSubtypes) { + final Map newLocaleToSubtypeMap = new HashMap<>(); + if (enabledSubtypes != null) { + for (final InputMethodSubtype subtype : enabledSubtypes) { + final Locale locale = SubtypeLocaleUtils.getSubtypeLocale(subtype); + if (newLocaleToSubtypeMap.containsKey(locale)) { + // Multiple subtypes are enabled for one locale. + // TODO: Investigate what we should do for this case. + continue; + } + newLocaleToSubtypeMap.put(locale, subtype); + } + } + if (mLocaleToSubtypeMap.equals(newLocaleToSubtypeMap)) { + // Enabled subtypes have not been changed. + return; + } + synchronized (mLock) { + mLocaleToSubtypeMap.clear(); + mLocaleToSubtypeMap.putAll(newLocaleToSubtypeMap); + mLocaleToKeyboardMap.clear(); + } + } + + private void loadKeyboardForLocale(final Locale newLocale) { + final Keyboard cachedKeyboard = mLocaleToKeyboardMap.get(newLocale); + if (cachedKeyboard != null) { + mKeyboard = cachedKeyboard; + return; + } + final InputMethodSubtype subtype; + synchronized (mLock) { + subtype = mLocaleToSubtypeMap.get(newLocale); + } + if (subtype == null) { + return; + } + final EditorInfo editorInfo = new EditorInfo(); + editorInfo.inputType = InputType.TYPE_CLASS_TEXT; + final KeyboardLayoutSet.Builder builder = new KeyboardLayoutSet.Builder( + mContext, editorInfo); + final Resources res = mContext.getResources(); + final int keyboardWidth = ResourceUtils.getDefaultKeyboardWidth(res); + final int keyboardHeight = ResourceUtils.getDefaultKeyboardHeight(res); + builder.setKeyboardGeometry(keyboardWidth, keyboardHeight); + builder.setSubtype(subtype); + builder.setIsSpellChecker(false /* isSpellChecker */); + final KeyboardLayoutSet layoutSet = builder.build(); + mKeyboard = layoutSet.getKeyboard(KeyboardId.ELEMENT_ALPHABET); } private void loadDictionariesForLocale(final Locale newlocale) throws InterruptedException { @@ -89,6 +161,12 @@ public class DistracterFilterCheckingExactMatches implements DistracterFilter { } if (!locale.equals(mDictionaryFacilitator.getLocale())) { synchronized (mLock) { + if (!mLocaleToSubtypeMap.containsKey(locale)) { + Log.e(TAG, "Locale " + locale + " is not enabled."); + // TODO: Investigate what we should do for disabled locales. + return false; + } + loadKeyboardForLocale(locale); // Reset dictionaries for the locale. try { mDistractersCache.evictAll(); @@ -101,29 +179,107 @@ public class DistracterFilterCheckingExactMatches implements DistracterFilter { } } + if (DEBUG) { + Log.d(TAG, "testedWord: " + testedWord); + } final Boolean isCachedDistracter = mDistractersCache.get(testedWord); if (isCachedDistracter != null && isCachedDistracter) { if (DEBUG) { - Log.d(TAG, "testedWord: " + testedWord); Log.d(TAG, "isDistracter: true (cache hit)"); } return true; } + + final boolean isDistracterCheckedByGetMaxFreqencyOfExactMatches = + checkDistracterUsingMaxFreqencyOfExactMatches(testedWord); + if (isDistracterCheckedByGetMaxFreqencyOfExactMatches) { + // Add the word to the cache. + mDistractersCache.put(testedWord, Boolean.TRUE); + return true; + } + final boolean isValidWord = mDictionaryFacilitator.isValidWord(testedWord, + false /* ignoreCase */); + if (isValidWord) { + // Valid word is not a distractor. + if (DEBUG) { + Log.d(TAG, "isDistracter: false (valid word)"); + } + return false; + } + + final boolean isDistracterCheckedByGetSuggestion = + checkDistracterUsingGetSuggestions(testedWord); + if (isDistracterCheckedByGetSuggestion) { + // Add the word to the cache. + mDistractersCache.put(testedWord, Boolean.TRUE); + return true; + } + return false; + } + + private boolean checkDistracterUsingMaxFreqencyOfExactMatches(final String testedWord) { // The tested word is a distracter when there is a word that is exact matched to the tested // word and its probability is higher than the tested word's probability. final int perfectMatchFreq = mDictionaryFacilitator.getFrequency(testedWord); final int exactMatchFreq = mDictionaryFacilitator.getMaxFrequencyOfExactMatches(testedWord); final boolean isDistracter = perfectMatchFreq < exactMatchFreq; if (DEBUG) { - Log.d(TAG, "testedWord: " + testedWord); Log.d(TAG, "perfectMatchFreq: " + perfectMatchFreq); Log.d(TAG, "exactMatchFreq: " + exactMatchFreq); Log.d(TAG, "isDistracter: " + isDistracter); } - if (isDistracter) { - // Add the word to the cache. - mDistractersCache.put(testedWord, Boolean.TRUE); - } return isDistracter; } + + private boolean checkDistracterUsingGetSuggestions(final String testedWord) { + if (mKeyboard == null) { + return false; + } + final SettingsValuesForSuggestion settingsValuesForSuggestion = + new SettingsValuesForSuggestion(false /* blockPotentiallyOffensive */, + false /* spaceAwareGestureEnabled */, + null /* additionalFeaturesSettingValues */); + final int trailingSingleQuotesCount = StringUtils.getTrailingSingleQuotesCount(testedWord); + final String consideredWord = trailingSingleQuotesCount > 0 ? + testedWord.substring(0, testedWord.length() - trailingSingleQuotesCount) : + testedWord; + final WordComposer composer = new WordComposer(); + final int[] codePoints = StringUtils.toCodePointArray(testedWord); + + synchronized (mLock) { + final int[] coordinates = mKeyboard.getCoordinates(codePoints); + composer.setComposingWord(codePoints, coordinates); + final SuggestionResults suggestionResults = mDictionaryFacilitator.getSuggestionResults( + composer, PrevWordsInfo.EMPTY_PREV_WORDS_INFO, mKeyboard.getProximityInfo(), + settingsValuesForSuggestion, 0 /* sessionId */); + if (suggestionResults.isEmpty()) { + return false; + } + final SuggestedWordInfo firstSuggestion = suggestionResults.first(); + final boolean isDistractor = suggestionExceedsDistracterThreshold( + firstSuggestion, consideredWord, DISTRACTER_WORD_SCORE_THRESHOLD); + if (DEBUG) { + Log.d(TAG, "isDistracter: " + isDistractor); + } + return isDistractor; + } + } + + private static boolean suggestionExceedsDistracterThreshold(final SuggestedWordInfo suggestion, + final String consideredWord, final float distracterThreshold) { + if (suggestion == null) { + return false; + } + final int suggestionScore = suggestion.mScore; + final float normalizedScore = BinaryDictionaryUtils.calcNormalizedScore( + consideredWord, suggestion.mWord, suggestionScore); + if (DEBUG) { + Log.d(TAG, "normalizedScore: " + normalizedScore); + Log.d(TAG, "distracterThreshold: " + distracterThreshold); + } + if (normalizedScore > distracterThreshold) { + return true; + } + return false; + } } diff --git a/tests/src/com/android/inputmethod/latin/DistracterFilterTest.java b/tests/src/com/android/inputmethod/latin/DistracterFilterTest.java index 70b8f530a..6285d16f0 100644 --- a/tests/src/com/android/inputmethod/latin/DistracterFilterTest.java +++ b/tests/src/com/android/inputmethod/latin/DistracterFilterTest.java @@ -16,9 +16,13 @@ package com.android.inputmethod.latin; +import java.util.ArrayList; import java.util.Locale; +import android.content.Context; +import android.test.AndroidTestCase; import android.test.suitebuilder.annotation.LargeTest; +import android.view.inputmethod.InputMethodSubtype; import com.android.inputmethod.latin.utils.DistracterFilterCheckingExactMatches; @@ -26,14 +30,24 @@ import com.android.inputmethod.latin.utils.DistracterFilterCheckingExactMatches; * Unit test for DistracterFilter */ @LargeTest -public class DistracterFilterTest extends InputTestsBase { +public class DistracterFilterTest extends AndroidTestCase { private DistracterFilterCheckingExactMatches mDistracterFilter; @Override protected void setUp() throws Exception { super.setUp(); - mDistracterFilter = new DistracterFilterCheckingExactMatches(getContext()); - mDistracterFilter.updateEnabledSubtypes(mLatinIME.getEnabledSubtypesForTest()); + final Context context = getContext(); + mDistracterFilter = new DistracterFilterCheckingExactMatches(context); + RichInputMethodManager.init(context); + final RichInputMethodManager richImm = RichInputMethodManager.getInstance(); + final ArrayList subtypes = new ArrayList<>(); + subtypes.add(richImm.findSubtypeByLocaleAndKeyboardLayoutSet( + Locale.US.toString(), "qwerty")); + subtypes.add(richImm.findSubtypeByLocaleAndKeyboardLayoutSet( + Locale.FRENCH.toString(), "azerty")); + subtypes.add(richImm.findSubtypeByLocaleAndKeyboardLayoutSet( + Locale.GERMAN.toString(), "qwertz")); + mDistracterFilter.updateEnabledSubtypes(subtypes); } public void testIsDistractorToWordsInDictionaries() { @@ -104,24 +118,56 @@ public class DistracterFilterTest extends InputTestsBase { assertFalse(mDistracterFilter.isDistracterToWordsInDictionaries( EMPTY_PREV_WORDS_INFO, typedWord, localeEnUs)); - final Locale localeDeDe = new Locale("de", "DE"); - - typedWord = "fuer"; - // For this test case, we consider "fuer" is a distracter to "für". + typedWord = "thabk"; + // For this test case, we consider "thabk" is a distracter to "thank" assertTrue(mDistracterFilter.isDistracterToWordsInDictionaries( - EMPTY_PREV_WORDS_INFO, typedWord, localeDeDe)); + EMPTY_PREV_WORDS_INFO, typedWord, localeEnUs)); + + typedWord = "thanks"; + // For this test case, we consider "thanks" is not a distracter to any other word + // in dictionaries. + assertFalse(mDistracterFilter.isDistracterToWordsInDictionaries( + EMPTY_PREV_WORDS_INFO, typedWord, localeEnUs)); + + typedWord = "thabks"; + // For this test case, we consider "thabks" is a distracter to "thanks" + assertTrue(mDistracterFilter.isDistracterToWordsInDictionaries( + EMPTY_PREV_WORDS_INFO, typedWord, localeEnUs)); + + typedWord = "think"; + // For this test case, we consider "think" is not a distracter to any other word + // in dictionaries. + assertFalse(mDistracterFilter.isDistracterToWordsInDictionaries( + EMPTY_PREV_WORDS_INFO, typedWord, localeEnUs)); + + typedWord = "thibk"; + // For this test case, we consider "thibk" is a distracter to "think" + assertTrue(mDistracterFilter.isDistracterToWordsInDictionaries( + EMPTY_PREV_WORDS_INFO, typedWord, localeEnUs)); + + typedWord = "tgis"; + // For this test case, we consider "tgis" is a distracter to "this" + assertTrue(mDistracterFilter.isDistracterToWordsInDictionaries( + EMPTY_PREV_WORDS_INFO, typedWord, localeEnUs)); + + final Locale localeDeDe = new Locale("de"); typedWord = "fUEr"; // For this test case, we consider "fUEr" is a distracter to "für". assertTrue(mDistracterFilter.isDistracterToWordsInDictionaries( EMPTY_PREV_WORDS_INFO, typedWord, localeDeDe)); + typedWord = "fuer"; + // For this test case, we consider "fuer" is a distracter to "für". + assertTrue(mDistracterFilter.isDistracterToWordsInDictionaries( + EMPTY_PREV_WORDS_INFO, typedWord, localeDeDe)); + typedWord = "fur"; // For this test case, we consider "fur" is a distracter to "für". assertTrue(mDistracterFilter.isDistracterToWordsInDictionaries( EMPTY_PREV_WORDS_INFO, typedWord, localeDeDe)); - final Locale localeFrFr = new Locale("fr", "FR"); + final Locale localeFrFr = new Locale("fr"); typedWord = "a"; // For this test case, we consider "a" is a distracter to "à".