Merge "Use suggestions in the distracter filter." into lmp-dev
This commit is contained in:
commit
6fef4ff00d
2 changed files with 217 additions and 15 deletions
|
@ -16,17 +16,28 @@
|
|||
|
||||
package com.android.inputmethod.latin.utils;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
import android.content.Context;
|
||||
import android.content.res.Resources;
|
||||
import android.text.InputType;
|
||||
import android.util.Log;
|
||||
import android.util.LruCache;
|
||||
import android.view.inputmethod.EditorInfo;
|
||||
import android.view.inputmethod.InputMethodSubtype;
|
||||
|
||||
import com.android.inputmethod.keyboard.Keyboard;
|
||||
import com.android.inputmethod.keyboard.KeyboardId;
|
||||
import com.android.inputmethod.keyboard.KeyboardLayoutSet;
|
||||
import com.android.inputmethod.latin.DictionaryFacilitator;
|
||||
import com.android.inputmethod.latin.PrevWordsInfo;
|
||||
import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo;
|
||||
import com.android.inputmethod.latin.WordComposer;
|
||||
import com.android.inputmethod.latin.settings.SettingsValuesForSuggestion;
|
||||
|
||||
/**
|
||||
* This class is used to prevent distracters being added to personalization
|
||||
|
@ -40,10 +51,20 @@ public class DistracterFilterCheckingExactMatches implements DistracterFilter {
|
|||
private static final int MAX_DISTRACTERS_CACHE_SIZE = 512;
|
||||
|
||||
private final Context mContext;
|
||||
private final Map<Locale, InputMethodSubtype> mLocaleToSubtypeMap;
|
||||
private final Map<Locale, Keyboard> mLocaleToKeyboardMap;
|
||||
private final DictionaryFacilitator mDictionaryFacilitator;
|
||||
private final LruCache<String, Boolean> mDistractersCache;
|
||||
private Keyboard mKeyboard;
|
||||
private final Object mLock = new Object();
|
||||
|
||||
// If the score of the top suggestion exceeds this value, the tested word (e.g.,
|
||||
// an OOV, a misspelling, or an in-vocabulary word) would be considered as a distractor to
|
||||
// words in dictionary. The greater the threshold is, the less likely the tested word would
|
||||
// become a distractor, which means the tested word will be more likely to be added to
|
||||
// the dictionary.
|
||||
private static final float DISTRACTER_WORD_SCORE_THRESHOLD = 0.4f;
|
||||
|
||||
/**
|
||||
* Create a DistracterFilter instance.
|
||||
*
|
||||
|
@ -51,8 +72,11 @@ public class DistracterFilterCheckingExactMatches implements DistracterFilter {
|
|||
*/
|
||||
public DistracterFilterCheckingExactMatches(final Context context) {
|
||||
mContext = context;
|
||||
mLocaleToSubtypeMap = new HashMap<>();
|
||||
mLocaleToKeyboardMap = new HashMap<>();
|
||||
mDictionaryFacilitator = new DictionaryFacilitator();
|
||||
mDistractersCache = new LruCache<>(MAX_DISTRACTERS_CACHE_SIZE);
|
||||
mKeyboard = null;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -62,6 +86,54 @@ public class DistracterFilterCheckingExactMatches implements DistracterFilter {
|
|||
|
||||
@Override
|
||||
public void updateEnabledSubtypes(final List<InputMethodSubtype> enabledSubtypes) {
|
||||
final Map<Locale, InputMethodSubtype> newLocaleToSubtypeMap = new HashMap<>();
|
||||
if (enabledSubtypes != null) {
|
||||
for (final InputMethodSubtype subtype : enabledSubtypes) {
|
||||
final Locale locale = SubtypeLocaleUtils.getSubtypeLocale(subtype);
|
||||
if (newLocaleToSubtypeMap.containsKey(locale)) {
|
||||
// Multiple subtypes are enabled for one locale.
|
||||
// TODO: Investigate what we should do for this case.
|
||||
continue;
|
||||
}
|
||||
newLocaleToSubtypeMap.put(locale, subtype);
|
||||
}
|
||||
}
|
||||
if (mLocaleToSubtypeMap.equals(newLocaleToSubtypeMap)) {
|
||||
// Enabled subtypes have not been changed.
|
||||
return;
|
||||
}
|
||||
synchronized (mLock) {
|
||||
mLocaleToSubtypeMap.clear();
|
||||
mLocaleToSubtypeMap.putAll(newLocaleToSubtypeMap);
|
||||
mLocaleToKeyboardMap.clear();
|
||||
}
|
||||
}
|
||||
|
||||
private void loadKeyboardForLocale(final Locale newLocale) {
|
||||
final Keyboard cachedKeyboard = mLocaleToKeyboardMap.get(newLocale);
|
||||
if (cachedKeyboard != null) {
|
||||
mKeyboard = cachedKeyboard;
|
||||
return;
|
||||
}
|
||||
final InputMethodSubtype subtype;
|
||||
synchronized (mLock) {
|
||||
subtype = mLocaleToSubtypeMap.get(newLocale);
|
||||
}
|
||||
if (subtype == null) {
|
||||
return;
|
||||
}
|
||||
final EditorInfo editorInfo = new EditorInfo();
|
||||
editorInfo.inputType = InputType.TYPE_CLASS_TEXT;
|
||||
final KeyboardLayoutSet.Builder builder = new KeyboardLayoutSet.Builder(
|
||||
mContext, editorInfo);
|
||||
final Resources res = mContext.getResources();
|
||||
final int keyboardWidth = ResourceUtils.getDefaultKeyboardWidth(res);
|
||||
final int keyboardHeight = ResourceUtils.getDefaultKeyboardHeight(res);
|
||||
builder.setKeyboardGeometry(keyboardWidth, keyboardHeight);
|
||||
builder.setSubtype(subtype);
|
||||
builder.setIsSpellChecker(false /* isSpellChecker */);
|
||||
final KeyboardLayoutSet layoutSet = builder.build();
|
||||
mKeyboard = layoutSet.getKeyboard(KeyboardId.ELEMENT_ALPHABET);
|
||||
}
|
||||
|
||||
private void loadDictionariesForLocale(final Locale newlocale) throws InterruptedException {
|
||||
|
@ -89,6 +161,12 @@ public class DistracterFilterCheckingExactMatches implements DistracterFilter {
|
|||
}
|
||||
if (!locale.equals(mDictionaryFacilitator.getLocale())) {
|
||||
synchronized (mLock) {
|
||||
if (!mLocaleToSubtypeMap.containsKey(locale)) {
|
||||
Log.e(TAG, "Locale " + locale + " is not enabled.");
|
||||
// TODO: Investigate what we should do for disabled locales.
|
||||
return false;
|
||||
}
|
||||
loadKeyboardForLocale(locale);
|
||||
// Reset dictionaries for the locale.
|
||||
try {
|
||||
mDistractersCache.evictAll();
|
||||
|
@ -101,29 +179,107 @@ public class DistracterFilterCheckingExactMatches implements DistracterFilter {
|
|||
}
|
||||
}
|
||||
|
||||
if (DEBUG) {
|
||||
Log.d(TAG, "testedWord: " + testedWord);
|
||||
}
|
||||
final Boolean isCachedDistracter = mDistractersCache.get(testedWord);
|
||||
if (isCachedDistracter != null && isCachedDistracter) {
|
||||
if (DEBUG) {
|
||||
Log.d(TAG, "testedWord: " + testedWord);
|
||||
Log.d(TAG, "isDistracter: true (cache hit)");
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
final boolean isDistracterCheckedByGetMaxFreqencyOfExactMatches =
|
||||
checkDistracterUsingMaxFreqencyOfExactMatches(testedWord);
|
||||
if (isDistracterCheckedByGetMaxFreqencyOfExactMatches) {
|
||||
// Add the word to the cache.
|
||||
mDistractersCache.put(testedWord, Boolean.TRUE);
|
||||
return true;
|
||||
}
|
||||
final boolean isValidWord = mDictionaryFacilitator.isValidWord(testedWord,
|
||||
false /* ignoreCase */);
|
||||
if (isValidWord) {
|
||||
// Valid word is not a distractor.
|
||||
if (DEBUG) {
|
||||
Log.d(TAG, "isDistracter: false (valid word)");
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
final boolean isDistracterCheckedByGetSuggestion =
|
||||
checkDistracterUsingGetSuggestions(testedWord);
|
||||
if (isDistracterCheckedByGetSuggestion) {
|
||||
// Add the word to the cache.
|
||||
mDistractersCache.put(testedWord, Boolean.TRUE);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
private boolean checkDistracterUsingMaxFreqencyOfExactMatches(final String testedWord) {
|
||||
// The tested word is a distracter when there is a word that is exact matched to the tested
|
||||
// word and its probability is higher than the tested word's probability.
|
||||
final int perfectMatchFreq = mDictionaryFacilitator.getFrequency(testedWord);
|
||||
final int exactMatchFreq = mDictionaryFacilitator.getMaxFrequencyOfExactMatches(testedWord);
|
||||
final boolean isDistracter = perfectMatchFreq < exactMatchFreq;
|
||||
if (DEBUG) {
|
||||
Log.d(TAG, "testedWord: " + testedWord);
|
||||
Log.d(TAG, "perfectMatchFreq: " + perfectMatchFreq);
|
||||
Log.d(TAG, "exactMatchFreq: " + exactMatchFreq);
|
||||
Log.d(TAG, "isDistracter: " + isDistracter);
|
||||
}
|
||||
if (isDistracter) {
|
||||
// Add the word to the cache.
|
||||
mDistractersCache.put(testedWord, Boolean.TRUE);
|
||||
}
|
||||
return isDistracter;
|
||||
}
|
||||
|
||||
private boolean checkDistracterUsingGetSuggestions(final String testedWord) {
|
||||
if (mKeyboard == null) {
|
||||
return false;
|
||||
}
|
||||
final SettingsValuesForSuggestion settingsValuesForSuggestion =
|
||||
new SettingsValuesForSuggestion(false /* blockPotentiallyOffensive */,
|
||||
false /* spaceAwareGestureEnabled */,
|
||||
null /* additionalFeaturesSettingValues */);
|
||||
final int trailingSingleQuotesCount = StringUtils.getTrailingSingleQuotesCount(testedWord);
|
||||
final String consideredWord = trailingSingleQuotesCount > 0 ?
|
||||
testedWord.substring(0, testedWord.length() - trailingSingleQuotesCount) :
|
||||
testedWord;
|
||||
final WordComposer composer = new WordComposer();
|
||||
final int[] codePoints = StringUtils.toCodePointArray(testedWord);
|
||||
|
||||
synchronized (mLock) {
|
||||
final int[] coordinates = mKeyboard.getCoordinates(codePoints);
|
||||
composer.setComposingWord(codePoints, coordinates);
|
||||
final SuggestionResults suggestionResults = mDictionaryFacilitator.getSuggestionResults(
|
||||
composer, PrevWordsInfo.EMPTY_PREV_WORDS_INFO, mKeyboard.getProximityInfo(),
|
||||
settingsValuesForSuggestion, 0 /* sessionId */);
|
||||
if (suggestionResults.isEmpty()) {
|
||||
return false;
|
||||
}
|
||||
final SuggestedWordInfo firstSuggestion = suggestionResults.first();
|
||||
final boolean isDistractor = suggestionExceedsDistracterThreshold(
|
||||
firstSuggestion, consideredWord, DISTRACTER_WORD_SCORE_THRESHOLD);
|
||||
if (DEBUG) {
|
||||
Log.d(TAG, "isDistracter: " + isDistractor);
|
||||
}
|
||||
return isDistractor;
|
||||
}
|
||||
}
|
||||
|
||||
private static boolean suggestionExceedsDistracterThreshold(final SuggestedWordInfo suggestion,
|
||||
final String consideredWord, final float distracterThreshold) {
|
||||
if (suggestion == null) {
|
||||
return false;
|
||||
}
|
||||
final int suggestionScore = suggestion.mScore;
|
||||
final float normalizedScore = BinaryDictionaryUtils.calcNormalizedScore(
|
||||
consideredWord, suggestion.mWord, suggestionScore);
|
||||
if (DEBUG) {
|
||||
Log.d(TAG, "normalizedScore: " + normalizedScore);
|
||||
Log.d(TAG, "distracterThreshold: " + distracterThreshold);
|
||||
}
|
||||
if (normalizedScore > distracterThreshold) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -16,9 +16,13 @@
|
|||
|
||||
package com.android.inputmethod.latin;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Locale;
|
||||
|
||||
import android.content.Context;
|
||||
import android.test.AndroidTestCase;
|
||||
import android.test.suitebuilder.annotation.LargeTest;
|
||||
import android.view.inputmethod.InputMethodSubtype;
|
||||
|
||||
import com.android.inputmethod.latin.utils.DistracterFilterCheckingExactMatches;
|
||||
|
||||
|
@ -26,14 +30,24 @@ import com.android.inputmethod.latin.utils.DistracterFilterCheckingExactMatches;
|
|||
* Unit test for DistracterFilter
|
||||
*/
|
||||
@LargeTest
|
||||
public class DistracterFilterTest extends InputTestsBase {
|
||||
public class DistracterFilterTest extends AndroidTestCase {
|
||||
private DistracterFilterCheckingExactMatches mDistracterFilter;
|
||||
|
||||
@Override
|
||||
protected void setUp() throws Exception {
|
||||
super.setUp();
|
||||
mDistracterFilter = new DistracterFilterCheckingExactMatches(getContext());
|
||||
mDistracterFilter.updateEnabledSubtypes(mLatinIME.getEnabledSubtypesForTest());
|
||||
final Context context = getContext();
|
||||
mDistracterFilter = new DistracterFilterCheckingExactMatches(context);
|
||||
RichInputMethodManager.init(context);
|
||||
final RichInputMethodManager richImm = RichInputMethodManager.getInstance();
|
||||
final ArrayList<InputMethodSubtype> subtypes = new ArrayList<>();
|
||||
subtypes.add(richImm.findSubtypeByLocaleAndKeyboardLayoutSet(
|
||||
Locale.US.toString(), "qwerty"));
|
||||
subtypes.add(richImm.findSubtypeByLocaleAndKeyboardLayoutSet(
|
||||
Locale.FRENCH.toString(), "azerty"));
|
||||
subtypes.add(richImm.findSubtypeByLocaleAndKeyboardLayoutSet(
|
||||
Locale.GERMAN.toString(), "qwertz"));
|
||||
mDistracterFilter.updateEnabledSubtypes(subtypes);
|
||||
}
|
||||
|
||||
public void testIsDistractorToWordsInDictionaries() {
|
||||
|
@ -104,24 +118,56 @@ public class DistracterFilterTest extends InputTestsBase {
|
|||
assertFalse(mDistracterFilter.isDistracterToWordsInDictionaries(
|
||||
EMPTY_PREV_WORDS_INFO, typedWord, localeEnUs));
|
||||
|
||||
final Locale localeDeDe = new Locale("de", "DE");
|
||||
|
||||
typedWord = "fuer";
|
||||
// For this test case, we consider "fuer" is a distracter to "für".
|
||||
typedWord = "thabk";
|
||||
// For this test case, we consider "thabk" is a distracter to "thank"
|
||||
assertTrue(mDistracterFilter.isDistracterToWordsInDictionaries(
|
||||
EMPTY_PREV_WORDS_INFO, typedWord, localeDeDe));
|
||||
EMPTY_PREV_WORDS_INFO, typedWord, localeEnUs));
|
||||
|
||||
typedWord = "thanks";
|
||||
// For this test case, we consider "thanks" is not a distracter to any other word
|
||||
// in dictionaries.
|
||||
assertFalse(mDistracterFilter.isDistracterToWordsInDictionaries(
|
||||
EMPTY_PREV_WORDS_INFO, typedWord, localeEnUs));
|
||||
|
||||
typedWord = "thabks";
|
||||
// For this test case, we consider "thabks" is a distracter to "thanks"
|
||||
assertTrue(mDistracterFilter.isDistracterToWordsInDictionaries(
|
||||
EMPTY_PREV_WORDS_INFO, typedWord, localeEnUs));
|
||||
|
||||
typedWord = "think";
|
||||
// For this test case, we consider "think" is not a distracter to any other word
|
||||
// in dictionaries.
|
||||
assertFalse(mDistracterFilter.isDistracterToWordsInDictionaries(
|
||||
EMPTY_PREV_WORDS_INFO, typedWord, localeEnUs));
|
||||
|
||||
typedWord = "thibk";
|
||||
// For this test case, we consider "thibk" is a distracter to "think"
|
||||
assertTrue(mDistracterFilter.isDistracterToWordsInDictionaries(
|
||||
EMPTY_PREV_WORDS_INFO, typedWord, localeEnUs));
|
||||
|
||||
typedWord = "tgis";
|
||||
// For this test case, we consider "tgis" is a distracter to "this"
|
||||
assertTrue(mDistracterFilter.isDistracterToWordsInDictionaries(
|
||||
EMPTY_PREV_WORDS_INFO, typedWord, localeEnUs));
|
||||
|
||||
final Locale localeDeDe = new Locale("de");
|
||||
|
||||
typedWord = "fUEr";
|
||||
// For this test case, we consider "fUEr" is a distracter to "für".
|
||||
assertTrue(mDistracterFilter.isDistracterToWordsInDictionaries(
|
||||
EMPTY_PREV_WORDS_INFO, typedWord, localeDeDe));
|
||||
|
||||
typedWord = "fuer";
|
||||
// For this test case, we consider "fuer" is a distracter to "für".
|
||||
assertTrue(mDistracterFilter.isDistracterToWordsInDictionaries(
|
||||
EMPTY_PREV_WORDS_INFO, typedWord, localeDeDe));
|
||||
|
||||
typedWord = "fur";
|
||||
// For this test case, we consider "fur" is a distracter to "für".
|
||||
assertTrue(mDistracterFilter.isDistracterToWordsInDictionaries(
|
||||
EMPTY_PREV_WORDS_INFO, typedWord, localeDeDe));
|
||||
|
||||
final Locale localeFrFr = new Locale("fr", "FR");
|
||||
final Locale localeFrFr = new Locale("fr");
|
||||
|
||||
typedWord = "a";
|
||||
// For this test case, we consider "a" is a distracter to "à".
|
||||
|
|
Loading…
Reference in a new issue