Merge "Use suggestions in the distracter filter." into lmp-dev

This commit is contained in:
Keisuke Kuroyanagi 2014-08-08 08:05:11 +00:00 committed by Android (Google) Code Review
commit 6fef4ff00d
2 changed files with 217 additions and 15 deletions

View file

@ -16,17 +16,28 @@
package com.android.inputmethod.latin.utils; package com.android.inputmethod.latin.utils;
import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Locale; import java.util.Locale;
import java.util.Map;
import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeUnit;
import android.content.Context; import android.content.Context;
import android.content.res.Resources;
import android.text.InputType;
import android.util.Log; import android.util.Log;
import android.util.LruCache; import android.util.LruCache;
import android.view.inputmethod.EditorInfo;
import android.view.inputmethod.InputMethodSubtype; import android.view.inputmethod.InputMethodSubtype;
import com.android.inputmethod.keyboard.Keyboard;
import com.android.inputmethod.keyboard.KeyboardId;
import com.android.inputmethod.keyboard.KeyboardLayoutSet;
import com.android.inputmethod.latin.DictionaryFacilitator; import com.android.inputmethod.latin.DictionaryFacilitator;
import com.android.inputmethod.latin.PrevWordsInfo; import com.android.inputmethod.latin.PrevWordsInfo;
import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo;
import com.android.inputmethod.latin.WordComposer;
import com.android.inputmethod.latin.settings.SettingsValuesForSuggestion;
/** /**
* This class is used to prevent distracters being added to personalization * This class is used to prevent distracters being added to personalization
@ -40,10 +51,20 @@ public class DistracterFilterCheckingExactMatches implements DistracterFilter {
private static final int MAX_DISTRACTERS_CACHE_SIZE = 512; private static final int MAX_DISTRACTERS_CACHE_SIZE = 512;
private final Context mContext; private final Context mContext;
private final Map<Locale, InputMethodSubtype> mLocaleToSubtypeMap;
private final Map<Locale, Keyboard> mLocaleToKeyboardMap;
private final DictionaryFacilitator mDictionaryFacilitator; private final DictionaryFacilitator mDictionaryFacilitator;
private final LruCache<String, Boolean> mDistractersCache; private final LruCache<String, Boolean> mDistractersCache;
private Keyboard mKeyboard;
private final Object mLock = new Object(); private final Object mLock = new Object();
// If the score of the top suggestion exceeds this value, the tested word (e.g.,
// an OOV, a misspelling, or an in-vocabulary word) would be considered as a distractor to
// words in dictionary. The greater the threshold is, the less likely the tested word would
// become a distractor, which means the tested word will be more likely to be added to
// the dictionary.
private static final float DISTRACTER_WORD_SCORE_THRESHOLD = 0.4f;
/** /**
* Create a DistracterFilter instance. * Create a DistracterFilter instance.
* *
@ -51,8 +72,11 @@ public class DistracterFilterCheckingExactMatches implements DistracterFilter {
*/ */
public DistracterFilterCheckingExactMatches(final Context context) { public DistracterFilterCheckingExactMatches(final Context context) {
mContext = context; mContext = context;
mLocaleToSubtypeMap = new HashMap<>();
mLocaleToKeyboardMap = new HashMap<>();
mDictionaryFacilitator = new DictionaryFacilitator(); mDictionaryFacilitator = new DictionaryFacilitator();
mDistractersCache = new LruCache<>(MAX_DISTRACTERS_CACHE_SIZE); mDistractersCache = new LruCache<>(MAX_DISTRACTERS_CACHE_SIZE);
mKeyboard = null;
} }
@Override @Override
@ -62,6 +86,54 @@ public class DistracterFilterCheckingExactMatches implements DistracterFilter {
@Override @Override
public void updateEnabledSubtypes(final List<InputMethodSubtype> enabledSubtypes) { public void updateEnabledSubtypes(final List<InputMethodSubtype> enabledSubtypes) {
final Map<Locale, InputMethodSubtype> newLocaleToSubtypeMap = new HashMap<>();
if (enabledSubtypes != null) {
for (final InputMethodSubtype subtype : enabledSubtypes) {
final Locale locale = SubtypeLocaleUtils.getSubtypeLocale(subtype);
if (newLocaleToSubtypeMap.containsKey(locale)) {
// Multiple subtypes are enabled for one locale.
// TODO: Investigate what we should do for this case.
continue;
}
newLocaleToSubtypeMap.put(locale, subtype);
}
}
if (mLocaleToSubtypeMap.equals(newLocaleToSubtypeMap)) {
// Enabled subtypes have not been changed.
return;
}
synchronized (mLock) {
mLocaleToSubtypeMap.clear();
mLocaleToSubtypeMap.putAll(newLocaleToSubtypeMap);
mLocaleToKeyboardMap.clear();
}
}
private void loadKeyboardForLocale(final Locale newLocale) {
final Keyboard cachedKeyboard = mLocaleToKeyboardMap.get(newLocale);
if (cachedKeyboard != null) {
mKeyboard = cachedKeyboard;
return;
}
final InputMethodSubtype subtype;
synchronized (mLock) {
subtype = mLocaleToSubtypeMap.get(newLocale);
}
if (subtype == null) {
return;
}
final EditorInfo editorInfo = new EditorInfo();
editorInfo.inputType = InputType.TYPE_CLASS_TEXT;
final KeyboardLayoutSet.Builder builder = new KeyboardLayoutSet.Builder(
mContext, editorInfo);
final Resources res = mContext.getResources();
final int keyboardWidth = ResourceUtils.getDefaultKeyboardWidth(res);
final int keyboardHeight = ResourceUtils.getDefaultKeyboardHeight(res);
builder.setKeyboardGeometry(keyboardWidth, keyboardHeight);
builder.setSubtype(subtype);
builder.setIsSpellChecker(false /* isSpellChecker */);
final KeyboardLayoutSet layoutSet = builder.build();
mKeyboard = layoutSet.getKeyboard(KeyboardId.ELEMENT_ALPHABET);
} }
private void loadDictionariesForLocale(final Locale newlocale) throws InterruptedException { private void loadDictionariesForLocale(final Locale newlocale) throws InterruptedException {
@ -89,6 +161,12 @@ public class DistracterFilterCheckingExactMatches implements DistracterFilter {
} }
if (!locale.equals(mDictionaryFacilitator.getLocale())) { if (!locale.equals(mDictionaryFacilitator.getLocale())) {
synchronized (mLock) { synchronized (mLock) {
if (!mLocaleToSubtypeMap.containsKey(locale)) {
Log.e(TAG, "Locale " + locale + " is not enabled.");
// TODO: Investigate what we should do for disabled locales.
return false;
}
loadKeyboardForLocale(locale);
// Reset dictionaries for the locale. // Reset dictionaries for the locale.
try { try {
mDistractersCache.evictAll(); mDistractersCache.evictAll();
@ -101,29 +179,107 @@ public class DistracterFilterCheckingExactMatches implements DistracterFilter {
} }
} }
if (DEBUG) {
Log.d(TAG, "testedWord: " + testedWord);
}
final Boolean isCachedDistracter = mDistractersCache.get(testedWord); final Boolean isCachedDistracter = mDistractersCache.get(testedWord);
if (isCachedDistracter != null && isCachedDistracter) { if (isCachedDistracter != null && isCachedDistracter) {
if (DEBUG) { if (DEBUG) {
Log.d(TAG, "testedWord: " + testedWord);
Log.d(TAG, "isDistracter: true (cache hit)"); Log.d(TAG, "isDistracter: true (cache hit)");
} }
return true; return true;
} }
final boolean isDistracterCheckedByGetMaxFreqencyOfExactMatches =
checkDistracterUsingMaxFreqencyOfExactMatches(testedWord);
if (isDistracterCheckedByGetMaxFreqencyOfExactMatches) {
// Add the word to the cache.
mDistractersCache.put(testedWord, Boolean.TRUE);
return true;
}
final boolean isValidWord = mDictionaryFacilitator.isValidWord(testedWord,
false /* ignoreCase */);
if (isValidWord) {
// Valid word is not a distractor.
if (DEBUG) {
Log.d(TAG, "isDistracter: false (valid word)");
}
return false;
}
final boolean isDistracterCheckedByGetSuggestion =
checkDistracterUsingGetSuggestions(testedWord);
if (isDistracterCheckedByGetSuggestion) {
// Add the word to the cache.
mDistractersCache.put(testedWord, Boolean.TRUE);
return true;
}
return false;
}
private boolean checkDistracterUsingMaxFreqencyOfExactMatches(final String testedWord) {
// The tested word is a distracter when there is a word that is exact matched to the tested // The tested word is a distracter when there is a word that is exact matched to the tested
// word and its probability is higher than the tested word's probability. // word and its probability is higher than the tested word's probability.
final int perfectMatchFreq = mDictionaryFacilitator.getFrequency(testedWord); final int perfectMatchFreq = mDictionaryFacilitator.getFrequency(testedWord);
final int exactMatchFreq = mDictionaryFacilitator.getMaxFrequencyOfExactMatches(testedWord); final int exactMatchFreq = mDictionaryFacilitator.getMaxFrequencyOfExactMatches(testedWord);
final boolean isDistracter = perfectMatchFreq < exactMatchFreq; final boolean isDistracter = perfectMatchFreq < exactMatchFreq;
if (DEBUG) { if (DEBUG) {
Log.d(TAG, "testedWord: " + testedWord);
Log.d(TAG, "perfectMatchFreq: " + perfectMatchFreq); Log.d(TAG, "perfectMatchFreq: " + perfectMatchFreq);
Log.d(TAG, "exactMatchFreq: " + exactMatchFreq); Log.d(TAG, "exactMatchFreq: " + exactMatchFreq);
Log.d(TAG, "isDistracter: " + isDistracter); Log.d(TAG, "isDistracter: " + isDistracter);
} }
if (isDistracter) {
// Add the word to the cache.
mDistractersCache.put(testedWord, Boolean.TRUE);
}
return isDistracter; return isDistracter;
} }
private boolean checkDistracterUsingGetSuggestions(final String testedWord) {
if (mKeyboard == null) {
return false;
}
final SettingsValuesForSuggestion settingsValuesForSuggestion =
new SettingsValuesForSuggestion(false /* blockPotentiallyOffensive */,
false /* spaceAwareGestureEnabled */,
null /* additionalFeaturesSettingValues */);
final int trailingSingleQuotesCount = StringUtils.getTrailingSingleQuotesCount(testedWord);
final String consideredWord = trailingSingleQuotesCount > 0 ?
testedWord.substring(0, testedWord.length() - trailingSingleQuotesCount) :
testedWord;
final WordComposer composer = new WordComposer();
final int[] codePoints = StringUtils.toCodePointArray(testedWord);
synchronized (mLock) {
final int[] coordinates = mKeyboard.getCoordinates(codePoints);
composer.setComposingWord(codePoints, coordinates);
final SuggestionResults suggestionResults = mDictionaryFacilitator.getSuggestionResults(
composer, PrevWordsInfo.EMPTY_PREV_WORDS_INFO, mKeyboard.getProximityInfo(),
settingsValuesForSuggestion, 0 /* sessionId */);
if (suggestionResults.isEmpty()) {
return false;
}
final SuggestedWordInfo firstSuggestion = suggestionResults.first();
final boolean isDistractor = suggestionExceedsDistracterThreshold(
firstSuggestion, consideredWord, DISTRACTER_WORD_SCORE_THRESHOLD);
if (DEBUG) {
Log.d(TAG, "isDistracter: " + isDistractor);
}
return isDistractor;
}
}
private static boolean suggestionExceedsDistracterThreshold(final SuggestedWordInfo suggestion,
final String consideredWord, final float distracterThreshold) {
if (suggestion == null) {
return false;
}
final int suggestionScore = suggestion.mScore;
final float normalizedScore = BinaryDictionaryUtils.calcNormalizedScore(
consideredWord, suggestion.mWord, suggestionScore);
if (DEBUG) {
Log.d(TAG, "normalizedScore: " + normalizedScore);
Log.d(TAG, "distracterThreshold: " + distracterThreshold);
}
if (normalizedScore > distracterThreshold) {
return true;
}
return false;
}
} }

View file

@ -16,9 +16,13 @@
package com.android.inputmethod.latin; package com.android.inputmethod.latin;
import java.util.ArrayList;
import java.util.Locale; import java.util.Locale;
import android.content.Context;
import android.test.AndroidTestCase;
import android.test.suitebuilder.annotation.LargeTest; import android.test.suitebuilder.annotation.LargeTest;
import android.view.inputmethod.InputMethodSubtype;
import com.android.inputmethod.latin.utils.DistracterFilterCheckingExactMatches; import com.android.inputmethod.latin.utils.DistracterFilterCheckingExactMatches;
@ -26,14 +30,24 @@ import com.android.inputmethod.latin.utils.DistracterFilterCheckingExactMatches;
* Unit test for DistracterFilter * Unit test for DistracterFilter
*/ */
@LargeTest @LargeTest
public class DistracterFilterTest extends InputTestsBase { public class DistracterFilterTest extends AndroidTestCase {
private DistracterFilterCheckingExactMatches mDistracterFilter; private DistracterFilterCheckingExactMatches mDistracterFilter;
@Override @Override
protected void setUp() throws Exception { protected void setUp() throws Exception {
super.setUp(); super.setUp();
mDistracterFilter = new DistracterFilterCheckingExactMatches(getContext()); final Context context = getContext();
mDistracterFilter.updateEnabledSubtypes(mLatinIME.getEnabledSubtypesForTest()); mDistracterFilter = new DistracterFilterCheckingExactMatches(context);
RichInputMethodManager.init(context);
final RichInputMethodManager richImm = RichInputMethodManager.getInstance();
final ArrayList<InputMethodSubtype> subtypes = new ArrayList<>();
subtypes.add(richImm.findSubtypeByLocaleAndKeyboardLayoutSet(
Locale.US.toString(), "qwerty"));
subtypes.add(richImm.findSubtypeByLocaleAndKeyboardLayoutSet(
Locale.FRENCH.toString(), "azerty"));
subtypes.add(richImm.findSubtypeByLocaleAndKeyboardLayoutSet(
Locale.GERMAN.toString(), "qwertz"));
mDistracterFilter.updateEnabledSubtypes(subtypes);
} }
public void testIsDistractorToWordsInDictionaries() { public void testIsDistractorToWordsInDictionaries() {
@ -104,24 +118,56 @@ public class DistracterFilterTest extends InputTestsBase {
assertFalse(mDistracterFilter.isDistracterToWordsInDictionaries( assertFalse(mDistracterFilter.isDistracterToWordsInDictionaries(
EMPTY_PREV_WORDS_INFO, typedWord, localeEnUs)); EMPTY_PREV_WORDS_INFO, typedWord, localeEnUs));
final Locale localeDeDe = new Locale("de", "DE"); typedWord = "thabk";
// For this test case, we consider "thabk" is a distracter to "thank"
typedWord = "fuer";
// For this test case, we consider "fuer" is a distracter to "für".
assertTrue(mDistracterFilter.isDistracterToWordsInDictionaries( assertTrue(mDistracterFilter.isDistracterToWordsInDictionaries(
EMPTY_PREV_WORDS_INFO, typedWord, localeDeDe)); EMPTY_PREV_WORDS_INFO, typedWord, localeEnUs));
typedWord = "thanks";
// For this test case, we consider "thanks" is not a distracter to any other word
// in dictionaries.
assertFalse(mDistracterFilter.isDistracterToWordsInDictionaries(
EMPTY_PREV_WORDS_INFO, typedWord, localeEnUs));
typedWord = "thabks";
// For this test case, we consider "thabks" is a distracter to "thanks"
assertTrue(mDistracterFilter.isDistracterToWordsInDictionaries(
EMPTY_PREV_WORDS_INFO, typedWord, localeEnUs));
typedWord = "think";
// For this test case, we consider "think" is not a distracter to any other word
// in dictionaries.
assertFalse(mDistracterFilter.isDistracterToWordsInDictionaries(
EMPTY_PREV_WORDS_INFO, typedWord, localeEnUs));
typedWord = "thibk";
// For this test case, we consider "thibk" is a distracter to "think"
assertTrue(mDistracterFilter.isDistracterToWordsInDictionaries(
EMPTY_PREV_WORDS_INFO, typedWord, localeEnUs));
typedWord = "tgis";
// For this test case, we consider "tgis" is a distracter to "this"
assertTrue(mDistracterFilter.isDistracterToWordsInDictionaries(
EMPTY_PREV_WORDS_INFO, typedWord, localeEnUs));
final Locale localeDeDe = new Locale("de");
typedWord = "fUEr"; typedWord = "fUEr";
// For this test case, we consider "fUEr" is a distracter to "für". // For this test case, we consider "fUEr" is a distracter to "für".
assertTrue(mDistracterFilter.isDistracterToWordsInDictionaries( assertTrue(mDistracterFilter.isDistracterToWordsInDictionaries(
EMPTY_PREV_WORDS_INFO, typedWord, localeDeDe)); EMPTY_PREV_WORDS_INFO, typedWord, localeDeDe));
typedWord = "fuer";
// For this test case, we consider "fuer" is a distracter to "für".
assertTrue(mDistracterFilter.isDistracterToWordsInDictionaries(
EMPTY_PREV_WORDS_INFO, typedWord, localeDeDe));
typedWord = "fur"; typedWord = "fur";
// For this test case, we consider "fur" is a distracter to "für". // For this test case, we consider "fur" is a distracter to "für".
assertTrue(mDistracterFilter.isDistracterToWordsInDictionaries( assertTrue(mDistracterFilter.isDistracterToWordsInDictionaries(
EMPTY_PREV_WORDS_INFO, typedWord, localeDeDe)); EMPTY_PREV_WORDS_INFO, typedWord, localeDeDe));
final Locale localeFrFr = new Locale("fr", "FR"); final Locale localeFrFr = new Locale("fr");
typedWord = "a"; typedWord = "a";
// For this test case, we consider "a" is a distracter to "à". // For this test case, we consider "a" is a distracter to "à".