Merge "Move case and OOV detection logic into distracter filter."

This commit is contained in:
Keisuke Kuroyanagi 2014-09-10 03:49:09 +00:00 committed by Android (Google) Code Review
commit 934e1d8087
6 changed files with 130 additions and 70 deletions

View file

@ -60,7 +60,6 @@ public class DictionaryFacilitator {
// HACK: This threshold is being used when adding a capitalized entry in the User History // HACK: This threshold is being used when adding a capitalized entry in the User History
// dictionary. // dictionary.
private static final int CAPITALIZED_FORM_MAX_PROBABILITY_FOR_INSERT = 140; private static final int CAPITALIZED_FORM_MAX_PROBABILITY_FOR_INSERT = 140;
private static final int MAX_DICTIONARY_FACILITATOR_CACHE_SIZE = 3;
private DictionaryGroup mDictionaryGroup = new DictionaryGroup(); private DictionaryGroup mDictionaryGroup = new DictionaryGroup();
private boolean mIsUserDictEnabled = false; private boolean mIsUserDictEnabled = false;
@ -68,7 +67,6 @@ public class DictionaryFacilitator {
// To synchronize assigning mDictionaryGroup to ensure closing dictionaries. // To synchronize assigning mDictionaryGroup to ensure closing dictionaries.
private final Object mLock = new Object(); private final Object mLock = new Object();
private final DistracterFilter mDistracterFilter; private final DistracterFilter mDistracterFilter;
private final DictionaryFacilitatorLruCache mFacilitatorCacheForPersonalization;
private static final String[] DICT_TYPES_ORDERED_TO_GET_SUGGESTIONS = private static final String[] DICT_TYPES_ORDERED_TO_GET_SUGGESTIONS =
new String[] { new String[] {
@ -176,14 +174,10 @@ public class DictionaryFacilitator {
public DictionaryFacilitator() { public DictionaryFacilitator() {
mDistracterFilter = DistracterFilter.EMPTY_DISTRACTER_FILTER; mDistracterFilter = DistracterFilter.EMPTY_DISTRACTER_FILTER;
mFacilitatorCacheForPersonalization = null;
} }
public DictionaryFacilitator(final Context context) { public DictionaryFacilitator(final Context context) {
mFacilitatorCacheForPersonalization = new DictionaryFacilitatorLruCache(context, mDistracterFilter = new DistracterFilterCheckingExactMatchesAndSuggestions(context);
MAX_DICTIONARY_FACILITATOR_CACHE_SIZE, "" /* dictionaryNamePrefix */);
mDistracterFilter = new DistracterFilterCheckingExactMatchesAndSuggestions(context,
mFacilitatorCacheForPersonalization);
} }
public void updateEnabledSubtypes(final List<InputMethodSubtype> enabledSubtypes) { public void updateEnabledSubtypes(final List<InputMethodSubtype> enabledSubtypes) {
@ -358,9 +352,6 @@ public class DictionaryFacilitator {
for (final String dictType : DICT_TYPES_ORDERED_TO_GET_SUGGESTIONS) { for (final String dictType : DICT_TYPES_ORDERED_TO_GET_SUGGESTIONS) {
dictionaryGroup.closeDict(dictType); dictionaryGroup.closeDict(dictType);
} }
if (mFacilitatorCacheForPersonalization != null) {
mFacilitatorCacheForPersonalization.evictAll();
}
mDistracterFilter.close(); mDistracterFilter.close();
} }
@ -609,14 +600,11 @@ public class DictionaryFacilitator {
} }
// TODO: Get locale from personalizationDataChunk.mDetectedLanguage. // TODO: Get locale from personalizationDataChunk.mDetectedLanguage.
final Locale dataChunkLocale = getLocale(); final Locale dataChunkLocale = getLocale();
final DictionaryFacilitator dictionaryFacilitatorForLocale =
mFacilitatorCacheForPersonalization.get(dataChunkLocale);
final ArrayList<LanguageModelParam> languageModelParams = final ArrayList<LanguageModelParam> languageModelParams =
LanguageModelParam.createLanguageModelParamsFrom( LanguageModelParam.createLanguageModelParamsFrom(
personalizationDataChunk.mTokens, personalizationDataChunk.mTokens,
personalizationDataChunk.mTimestampInSeconds, personalizationDataChunk.mTimestampInSeconds, spacingAndPunctuations,
dictionaryFacilitatorForLocale, spacingAndPunctuations, dataChunkLocale, new DistracterFilterCheckingIsInDictionary(
new DistracterFilterCheckingIsInDictionary(
mDistracterFilter, personalizationDict)); mDistracterFilter, personalizationDict));
if (languageModelParams == null || languageModelParams.isEmpty()) { if (languageModelParams == null || languageModelParams.isEmpty()) {
if (callback != null) { if (callback != null) {

View file

@ -36,10 +36,38 @@ public interface DistracterFilter {
public boolean isDistracterToWordsInDictionaries(final PrevWordsInfo prevWordsInfo, public boolean isDistracterToWordsInDictionaries(final PrevWordsInfo prevWordsInfo,
final String testedWord, final Locale locale); final String testedWord, final Locale locale);
public int getWordHandlingType(final PrevWordsInfo prevWordsInfo, final String testedWord,
final Locale locale);
public void updateEnabledSubtypes(final List<InputMethodSubtype> enabledSubtypes); public void updateEnabledSubtypes(final List<InputMethodSubtype> enabledSubtypes);
public void close(); public void close();
public static final class HandlingType {
private final static int REQUIRE_NO_SPECIAL_HANDLINGS = 0x0;
private final static int SHOULD_BE_LOWER_CASED = 0x1;
private final static int SHOULD_BE_HANDLED_AS_OOV = 0x2;
public static int getHandlingType(final boolean shouldBeLowerCased, final boolean isOov) {
int wordHandlingType = HandlingType.REQUIRE_NO_SPECIAL_HANDLINGS;
if (shouldBeLowerCased) {
wordHandlingType |= HandlingType.SHOULD_BE_LOWER_CASED;
}
if (isOov) {
wordHandlingType |= HandlingType.SHOULD_BE_HANDLED_AS_OOV;
}
return wordHandlingType;
}
public static boolean shouldBeLowerCased(final int handlingType) {
return (handlingType & SHOULD_BE_LOWER_CASED) != 0;
}
public static boolean shouldBeHandledAsOov(final int handlingType) {
return (handlingType & SHOULD_BE_HANDLED_AS_OOV) != 0;
}
};
public static final DistracterFilter EMPTY_DISTRACTER_FILTER = new DistracterFilter() { public static final DistracterFilter EMPTY_DISTRACTER_FILTER = new DistracterFilter() {
@Override @Override
public boolean isDistracterToWordsInDictionaries(PrevWordsInfo prevWordsInfo, public boolean isDistracterToWordsInDictionaries(PrevWordsInfo prevWordsInfo,
@ -47,6 +75,12 @@ public interface DistracterFilter {
return false; return false;
} }
@Override
public int getWordHandlingType(final PrevWordsInfo prevWordsInfo,
final String testedWord, final Locale locale) {
return HandlingType.REQUIRE_NO_SPECIAL_HANDLINGS;
}
@Override @Override
public void close() { public void close() {
} }

View file

@ -51,6 +51,7 @@ public class DistracterFilterCheckingExactMatchesAndSuggestions implements Distr
DistracterFilterCheckingExactMatchesAndSuggestions.class.getSimpleName(); DistracterFilterCheckingExactMatchesAndSuggestions.class.getSimpleName();
private static final boolean DEBUG = false; private static final boolean DEBUG = false;
private static final int MAX_DICTIONARY_FACILITATOR_CACHE_SIZE = 3;
private static final int MAX_DISTRACTERS_CACHE_SIZE = 1024; private static final int MAX_DISTRACTERS_CACHE_SIZE = 1024;
private final Context mContext; private final Context mContext;
@ -73,15 +74,13 @@ public class DistracterFilterCheckingExactMatchesAndSuggestions implements Distr
* Create a DistracterFilter instance. * Create a DistracterFilter instance.
* *
* @param context the context. * @param context the context.
* @param dictionaryFacilitatorLruCache the cache of dictionaryFacilitators that are used for
* checking distracters.
*/ */
public DistracterFilterCheckingExactMatchesAndSuggestions(final Context context, public DistracterFilterCheckingExactMatchesAndSuggestions(final Context context) {
final DictionaryFacilitatorLruCache dictionaryFacilitatorLruCache) {
mContext = context; mContext = context;
mLocaleToSubtypeCache = new ConcurrentHashMap<>(); mLocaleToSubtypeCache = new ConcurrentHashMap<>();
mLocaleToKeyboardCache = new ConcurrentHashMap<>(); mLocaleToKeyboardCache = new ConcurrentHashMap<>();
mDictionaryFacilitatorLruCache = dictionaryFacilitatorLruCache; mDictionaryFacilitatorLruCache = new DictionaryFacilitatorLruCache(context,
MAX_DICTIONARY_FACILITATOR_CACHE_SIZE, "" /* dictionaryNamePrefix */);
mDistractersCache = new LruCache<>(MAX_DISTRACTERS_CACHE_SIZE); mDistractersCache = new LruCache<>(MAX_DISTRACTERS_CACHE_SIZE);
} }
@ -89,7 +88,8 @@ public class DistracterFilterCheckingExactMatchesAndSuggestions implements Distr
public void close() { public void close() {
mLocaleToSubtypeCache.clear(); mLocaleToSubtypeCache.clear();
mLocaleToKeyboardCache.clear(); mLocaleToKeyboardCache.clear();
mDistractersCache.evictAll(); mDictionaryFacilitatorLruCache.evictAll();
// Don't clear mDistractersCache.
} }
@Override @Override
@ -194,9 +194,8 @@ public class DistracterFilterCheckingExactMatchesAndSuggestions implements Distr
mDistractersCache.put(cacheKey, Boolean.TRUE); mDistractersCache.put(cacheKey, Boolean.TRUE);
return true; return true;
} }
final boolean isValidWord = dictionaryFacilitator.isValidWord(testedWord, final boolean Word = dictionaryFacilitator.isValidWord(testedWord, false /* ignoreCase */);
false /* ignoreCase */); if (Word) {
if (isValidWord) {
// Valid word is not a distractor. // Valid word is not a distractor.
if (DEBUG) { if (DEBUG) {
Log.d(TAG, "isDistracter: false (valid word)"); Log.d(TAG, "isDistracter: false (valid word)");
@ -283,4 +282,41 @@ public class DistracterFilterCheckingExactMatchesAndSuggestions implements Distr
} }
return false; return false;
} }
private boolean shouldBeLowerCased(final PrevWordsInfo prevWordsInfo, final String testedWord,
final Locale locale) {
final DictionaryFacilitator dictionaryFacilitator =
mDictionaryFacilitatorLruCache.get(locale);
if (dictionaryFacilitator.isValidWord(testedWord, false /* ignoreCase */)) {
return false;
}
final String lowerCaseTargetWord = testedWord.toLowerCase(locale);
if (testedWord.equals(lowerCaseTargetWord)) {
return false;
}
if (dictionaryFacilitator.isValidWord(lowerCaseTargetWord, false /* ignoreCase */)) {
return true;
}
if (StringUtils.getCapitalizationType(testedWord) == StringUtils.CAPITALIZE_FIRST
&& !prevWordsInfo.isValid()) {
// TODO: Check beginning-of-sentence.
return true;
}
return false;
}
@Override
public int getWordHandlingType(final PrevWordsInfo prevWordsInfo, final String testedWord,
final Locale locale) {
// TODO: Use this method for user history dictionary.
if (testedWord == null|| locale == null) {
return HandlingType.getHandlingType(false /* shouldBeLowerCased */, false /* isOov */);
}
final boolean shouldBeLowerCased = shouldBeLowerCased(prevWordsInfo, testedWord, locale);
final String caseModifiedWord =
shouldBeLowerCased ? testedWord.toLowerCase(locale) : testedWord;
final boolean isOov = !mDictionaryFacilitatorLruCache.get(locale).isValidWord(
caseModifiedWord, false /* ignoreCase */);
return HandlingType.getHandlingType(shouldBeLowerCased, isOov);
}
} }

View file

@ -47,6 +47,12 @@ public class DistracterFilterCheckingIsInDictionary implements DistracterFilter
} }
} }
@Override
public int getWordHandlingType(final PrevWordsInfo prevWordsInfo, final String testedWord,
final Locale locale) {
return mDistracterFilter.getWordHandlingType(prevWordsInfo, testedWord, locale);
}
@Override @Override
public void updateEnabledSubtypes(List<InputMethodSubtype> enabledSubtypes) { public void updateEnabledSubtypes(List<InputMethodSubtype> enabledSubtypes) {
// Do nothing. // Do nothing.

View file

@ -22,6 +22,7 @@ import com.android.inputmethod.latin.Dictionary;
import com.android.inputmethod.latin.DictionaryFacilitator; import com.android.inputmethod.latin.DictionaryFacilitator;
import com.android.inputmethod.latin.PrevWordsInfo; import com.android.inputmethod.latin.PrevWordsInfo;
import com.android.inputmethod.latin.settings.SpacingAndPunctuations; import com.android.inputmethod.latin.settings.SpacingAndPunctuations;
import com.android.inputmethod.latin.utils.DistracterFilter.HandlingType;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
@ -81,8 +82,7 @@ public final class LanguageModelParam {
// Process a list of words and return a list of {@link LanguageModelParam} objects. // Process a list of words and return a list of {@link LanguageModelParam} objects.
public static ArrayList<LanguageModelParam> createLanguageModelParamsFrom( public static ArrayList<LanguageModelParam> createLanguageModelParamsFrom(
final List<String> tokens, final int timestamp, final List<String> tokens, final int timestamp,
final DictionaryFacilitator dictionaryFacilitator, final SpacingAndPunctuations spacingAndPunctuations, final Locale locale,
final SpacingAndPunctuations spacingAndPunctuations,
final DistracterFilter distracterFilter) { final DistracterFilter distracterFilter) {
final ArrayList<LanguageModelParam> languageModelParams = new ArrayList<>(); final ArrayList<LanguageModelParam> languageModelParams = new ArrayList<>();
final int N = tokens.size(); final int N = tokens.size();
@ -111,8 +111,7 @@ public final class LanguageModelParam {
} }
final LanguageModelParam languageModelParam = final LanguageModelParam languageModelParam =
detectWhetherVaildWordOrNotAndGetLanguageModelParam( detectWhetherVaildWordOrNotAndGetLanguageModelParam(
prevWordsInfo, tempWord, timestamp, dictionaryFacilitator, prevWordsInfo, tempWord, timestamp, locale, distracterFilter);
distracterFilter);
if (languageModelParam == null) { if (languageModelParam == null) {
continue; continue;
} }
@ -125,47 +124,25 @@ public final class LanguageModelParam {
private static LanguageModelParam detectWhetherVaildWordOrNotAndGetLanguageModelParam( private static LanguageModelParam detectWhetherVaildWordOrNotAndGetLanguageModelParam(
final PrevWordsInfo prevWordsInfo, final String targetWord, final int timestamp, final PrevWordsInfo prevWordsInfo, final String targetWord, final int timestamp,
final DictionaryFacilitator dictionaryFacilitator, final Locale locale, final DistracterFilter distracterFilter) {
final DistracterFilter distracterFilter) {
final Locale locale = dictionaryFacilitator.getLocale();
if (locale == null) { if (locale == null) {
return null; return null;
} }
if (dictionaryFacilitator.isValidWord(targetWord, false /* ignoreCase */)) { final int wordHandlingType = distracterFilter.getWordHandlingType(prevWordsInfo,
return createAndGetLanguageModelParamOfWord(prevWordsInfo, targetWord, timestamp, targetWord, locale);
true /* isValidWord */, locale, distracterFilter); final String word = HandlingType.shouldBeLowerCased(wordHandlingType) ?
targetWord.toLowerCase(locale) : targetWord;
if (distracterFilter.isDistracterToWordsInDictionaries(prevWordsInfo, targetWord, locale)) {
// The word is a distracter.
return null;
} }
return createAndGetLanguageModelParamOfWord(prevWordsInfo, word, timestamp,
final String lowerCaseTargetWord = targetWord.toLowerCase(locale); !HandlingType.shouldBeHandledAsOov(wordHandlingType));
if (dictionaryFacilitator.isValidWord(lowerCaseTargetWord, false /* ignoreCase */)) {
// Add the lower-cased word.
return createAndGetLanguageModelParamOfWord(prevWordsInfo, lowerCaseTargetWord,
timestamp, true /* isValidWord */, locale, distracterFilter);
}
// Treat the word as an OOV word.
return createAndGetLanguageModelParamOfWord(prevWordsInfo, targetWord, timestamp,
false /* isValidWord */, locale, distracterFilter);
} }
private static LanguageModelParam createAndGetLanguageModelParamOfWord( private static LanguageModelParam createAndGetLanguageModelParamOfWord(
final PrevWordsInfo prevWordsInfo, final String targetWord, final int timestamp, final PrevWordsInfo prevWordsInfo, final String word, final int timestamp,
final boolean isValidWord, final Locale locale, final boolean isValidWord) {
final DistracterFilter distracterFilter) {
final String word;
if (StringUtils.getCapitalizationType(targetWord) == StringUtils.CAPITALIZE_FIRST
&& !prevWordsInfo.isValid() && !isValidWord) {
word = targetWord.toLowerCase(locale);
} else {
word = targetWord;
}
// Check whether the word is a distracter to words in the dictionaries.
if (distracterFilter.isDistracterToWordsInDictionaries(prevWordsInfo, word, locale)) {
if (DEBUG) {
Log.d(TAG, "The word (" + word + ") is a distracter. Skip this word.");
}
return null;
}
final int unigramProbability = isValidWord ? final int unigramProbability = isValidWord ?
UNIGRAM_PROBABILITY_FOR_VALID_WORD : UNIGRAM_PROBABILITY_FOR_OOV_WORD; UNIGRAM_PROBABILITY_FOR_VALID_WORD : UNIGRAM_PROBABILITY_FOR_OOV_WORD;
if (!prevWordsInfo.isValid()) { if (!prevWordsInfo.isValid()) {

View file

@ -14,7 +14,7 @@
* limitations under the License. * limitations under the License.
*/ */
package com.android.inputmethod.latin; package com.android.inputmethod.latin.utils;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Locale; import java.util.Locale;
@ -24,24 +24,22 @@ import android.test.AndroidTestCase;
import android.test.suitebuilder.annotation.LargeTest; import android.test.suitebuilder.annotation.LargeTest;
import android.view.inputmethod.InputMethodSubtype; import android.view.inputmethod.InputMethodSubtype;
import com.android.inputmethod.latin.utils.DistracterFilterCheckingExactMatchesAndSuggestions; import com.android.inputmethod.latin.PrevWordsInfo;
import com.android.inputmethod.latin.RichInputMethodManager;
import com.android.inputmethod.latin.utils.DistracterFilter.HandlingType;
/** /**
* Unit test for DistracterFilter * Unit test for DistracterFilter
*/ */
@LargeTest @LargeTest
public class DistracterFilterTest extends AndroidTestCase { public class DistracterFilterTest extends AndroidTestCase {
private DictionaryFacilitatorLruCache mDictionaryFacilitatorLruCache;
private DistracterFilterCheckingExactMatchesAndSuggestions mDistracterFilter; private DistracterFilterCheckingExactMatchesAndSuggestions mDistracterFilter;
@Override @Override
protected void setUp() throws Exception { protected void setUp() throws Exception {
super.setUp(); super.setUp();
final Context context = getContext(); final Context context = getContext();
mDictionaryFacilitatorLruCache = new DictionaryFacilitatorLruCache(context, mDistracterFilter = new DistracterFilterCheckingExactMatchesAndSuggestions(context);
2 /* maxSize */, "" /* dictionaryNamePrefix */);
mDistracterFilter = new DistracterFilterCheckingExactMatchesAndSuggestions(context,
mDictionaryFacilitatorLruCache);
RichInputMethodManager.init(context); RichInputMethodManager.init(context);
final RichInputMethodManager richImm = RichInputMethodManager.getInstance(); final RichInputMethodManager richImm = RichInputMethodManager.getInstance();
final ArrayList<InputMethodSubtype> subtypes = new ArrayList<>(); final ArrayList<InputMethodSubtype> subtypes = new ArrayList<>();
@ -56,7 +54,7 @@ public class DistracterFilterTest extends AndroidTestCase {
@Override @Override
protected void tearDown() { protected void tearDown() {
mDictionaryFacilitatorLruCache.evictAll(); mDistracterFilter.close();
} }
public void testIsDistractorToWordsInDictionaries() { public void testIsDistractorToWordsInDictionaries() {
@ -203,4 +201,25 @@ public class DistracterFilterTest extends AndroidTestCase {
assertTrue(mDistracterFilter.isDistracterToWordsInDictionaries( assertTrue(mDistracterFilter.isDistracterToWordsInDictionaries(
EMPTY_PREV_WORDS_INFO, typedWord, localeFrFr)); EMPTY_PREV_WORDS_INFO, typedWord, localeFrFr));
} }
public void testGetWordHandlingType() {
final Locale localeEnUs = new Locale("en", "US");
final PrevWordsInfo EMPTY_PREV_WORDS_INFO = PrevWordsInfo.EMPTY_PREV_WORDS_INFO;
int handlingType = 0;
handlingType = mDistracterFilter.getWordHandlingType(EMPTY_PREV_WORDS_INFO,
"this", localeEnUs);
assertFalse(HandlingType.shouldBeLowerCased(handlingType));
assertFalse(HandlingType.shouldBeHandledAsOov(handlingType));
handlingType = mDistracterFilter.getWordHandlingType(EMPTY_PREV_WORDS_INFO,
"This", localeEnUs);
assertTrue(HandlingType.shouldBeLowerCased(handlingType));
assertFalse(HandlingType.shouldBeHandledAsOov(handlingType));
handlingType = mDistracterFilter.getWordHandlingType(EMPTY_PREV_WORDS_INFO,
"thibk", localeEnUs);
assertFalse(HandlingType.shouldBeLowerCased(handlingType));
assertTrue(HandlingType.shouldBeHandledAsOov(handlingType));
}
} }