Merge "Move case and OOV detection logic into distracter filter."

This commit is contained in:
Keisuke Kuroyanagi 2014-09-10 03:49:09 +00:00 committed by Android (Google) Code Review
commit 934e1d8087
6 changed files with 130 additions and 70 deletions

View file

@ -60,7 +60,6 @@ public class DictionaryFacilitator {
// HACK: This threshold is being used when adding a capitalized entry in the User History
// dictionary.
private static final int CAPITALIZED_FORM_MAX_PROBABILITY_FOR_INSERT = 140;
private static final int MAX_DICTIONARY_FACILITATOR_CACHE_SIZE = 3;
private DictionaryGroup mDictionaryGroup = new DictionaryGroup();
private boolean mIsUserDictEnabled = false;
@ -68,7 +67,6 @@ public class DictionaryFacilitator {
// To synchronize assigning mDictionaryGroup to ensure closing dictionaries.
private final Object mLock = new Object();
private final DistracterFilter mDistracterFilter;
private final DictionaryFacilitatorLruCache mFacilitatorCacheForPersonalization;
private static final String[] DICT_TYPES_ORDERED_TO_GET_SUGGESTIONS =
new String[] {
@ -176,14 +174,10 @@ public class DictionaryFacilitator {
public DictionaryFacilitator() {
mDistracterFilter = DistracterFilter.EMPTY_DISTRACTER_FILTER;
mFacilitatorCacheForPersonalization = null;
}
public DictionaryFacilitator(final Context context) {
mFacilitatorCacheForPersonalization = new DictionaryFacilitatorLruCache(context,
MAX_DICTIONARY_FACILITATOR_CACHE_SIZE, "" /* dictionaryNamePrefix */);
mDistracterFilter = new DistracterFilterCheckingExactMatchesAndSuggestions(context,
mFacilitatorCacheForPersonalization);
mDistracterFilter = new DistracterFilterCheckingExactMatchesAndSuggestions(context);
}
public void updateEnabledSubtypes(final List<InputMethodSubtype> enabledSubtypes) {
@ -358,9 +352,6 @@ public class DictionaryFacilitator {
for (final String dictType : DICT_TYPES_ORDERED_TO_GET_SUGGESTIONS) {
dictionaryGroup.closeDict(dictType);
}
if (mFacilitatorCacheForPersonalization != null) {
mFacilitatorCacheForPersonalization.evictAll();
}
mDistracterFilter.close();
}
@ -609,14 +600,11 @@ public class DictionaryFacilitator {
}
// TODO: Get locale from personalizationDataChunk.mDetectedLanguage.
final Locale dataChunkLocale = getLocale();
final DictionaryFacilitator dictionaryFacilitatorForLocale =
mFacilitatorCacheForPersonalization.get(dataChunkLocale);
final ArrayList<LanguageModelParam> languageModelParams =
LanguageModelParam.createLanguageModelParamsFrom(
personalizationDataChunk.mTokens,
personalizationDataChunk.mTimestampInSeconds,
dictionaryFacilitatorForLocale, spacingAndPunctuations,
new DistracterFilterCheckingIsInDictionary(
personalizationDataChunk.mTimestampInSeconds, spacingAndPunctuations,
dataChunkLocale, new DistracterFilterCheckingIsInDictionary(
mDistracterFilter, personalizationDict));
if (languageModelParams == null || languageModelParams.isEmpty()) {
if (callback != null) {

View file

@ -36,10 +36,38 @@ public interface DistracterFilter {
public boolean isDistracterToWordsInDictionaries(final PrevWordsInfo prevWordsInfo,
final String testedWord, final Locale locale);
public int getWordHandlingType(final PrevWordsInfo prevWordsInfo, final String testedWord,
final Locale locale);
public void updateEnabledSubtypes(final List<InputMethodSubtype> enabledSubtypes);
public void close();
public static final class HandlingType {
private final static int REQUIRE_NO_SPECIAL_HANDLINGS = 0x0;
private final static int SHOULD_BE_LOWER_CASED = 0x1;
private final static int SHOULD_BE_HANDLED_AS_OOV = 0x2;
public static int getHandlingType(final boolean shouldBeLowerCased, final boolean isOov) {
int wordHandlingType = HandlingType.REQUIRE_NO_SPECIAL_HANDLINGS;
if (shouldBeLowerCased) {
wordHandlingType |= HandlingType.SHOULD_BE_LOWER_CASED;
}
if (isOov) {
wordHandlingType |= HandlingType.SHOULD_BE_HANDLED_AS_OOV;
}
return wordHandlingType;
}
public static boolean shouldBeLowerCased(final int handlingType) {
return (handlingType & SHOULD_BE_LOWER_CASED) != 0;
}
public static boolean shouldBeHandledAsOov(final int handlingType) {
return (handlingType & SHOULD_BE_HANDLED_AS_OOV) != 0;
}
};
public static final DistracterFilter EMPTY_DISTRACTER_FILTER = new DistracterFilter() {
@Override
public boolean isDistracterToWordsInDictionaries(PrevWordsInfo prevWordsInfo,
@ -47,6 +75,12 @@ public interface DistracterFilter {
return false;
}
@Override
public int getWordHandlingType(final PrevWordsInfo prevWordsInfo,
final String testedWord, final Locale locale) {
return HandlingType.REQUIRE_NO_SPECIAL_HANDLINGS;
}
@Override
public void close() {
}

View file

@ -51,6 +51,7 @@ public class DistracterFilterCheckingExactMatchesAndSuggestions implements Distr
DistracterFilterCheckingExactMatchesAndSuggestions.class.getSimpleName();
private static final boolean DEBUG = false;
private static final int MAX_DICTIONARY_FACILITATOR_CACHE_SIZE = 3;
private static final int MAX_DISTRACTERS_CACHE_SIZE = 1024;
private final Context mContext;
@ -73,15 +74,13 @@ public class DistracterFilterCheckingExactMatchesAndSuggestions implements Distr
* Create a DistracterFilter instance.
*
* @param context the context.
* @param dictionaryFacilitatorLruCache the cache of dictionaryFacilitators that are used for
* checking distracters.
*/
public DistracterFilterCheckingExactMatchesAndSuggestions(final Context context,
final DictionaryFacilitatorLruCache dictionaryFacilitatorLruCache) {
public DistracterFilterCheckingExactMatchesAndSuggestions(final Context context) {
mContext = context;
mLocaleToSubtypeCache = new ConcurrentHashMap<>();
mLocaleToKeyboardCache = new ConcurrentHashMap<>();
mDictionaryFacilitatorLruCache = dictionaryFacilitatorLruCache;
mDictionaryFacilitatorLruCache = new DictionaryFacilitatorLruCache(context,
MAX_DICTIONARY_FACILITATOR_CACHE_SIZE, "" /* dictionaryNamePrefix */);
mDistractersCache = new LruCache<>(MAX_DISTRACTERS_CACHE_SIZE);
}
@ -89,7 +88,8 @@ public class DistracterFilterCheckingExactMatchesAndSuggestions implements Distr
public void close() {
mLocaleToSubtypeCache.clear();
mLocaleToKeyboardCache.clear();
mDistractersCache.evictAll();
mDictionaryFacilitatorLruCache.evictAll();
// Don't clear mDistractersCache.
}
@Override
@ -194,9 +194,8 @@ public class DistracterFilterCheckingExactMatchesAndSuggestions implements Distr
mDistractersCache.put(cacheKey, Boolean.TRUE);
return true;
}
final boolean isValidWord = dictionaryFacilitator.isValidWord(testedWord,
false /* ignoreCase */);
if (isValidWord) {
final boolean Word = dictionaryFacilitator.isValidWord(testedWord, false /* ignoreCase */);
if (Word) {
// Valid word is not a distractor.
if (DEBUG) {
Log.d(TAG, "isDistracter: false (valid word)");
@ -283,4 +282,41 @@ public class DistracterFilterCheckingExactMatchesAndSuggestions implements Distr
}
return false;
}
private boolean shouldBeLowerCased(final PrevWordsInfo prevWordsInfo, final String testedWord,
final Locale locale) {
final DictionaryFacilitator dictionaryFacilitator =
mDictionaryFacilitatorLruCache.get(locale);
if (dictionaryFacilitator.isValidWord(testedWord, false /* ignoreCase */)) {
return false;
}
final String lowerCaseTargetWord = testedWord.toLowerCase(locale);
if (testedWord.equals(lowerCaseTargetWord)) {
return false;
}
if (dictionaryFacilitator.isValidWord(lowerCaseTargetWord, false /* ignoreCase */)) {
return true;
}
if (StringUtils.getCapitalizationType(testedWord) == StringUtils.CAPITALIZE_FIRST
&& !prevWordsInfo.isValid()) {
// TODO: Check beginning-of-sentence.
return true;
}
return false;
}
@Override
public int getWordHandlingType(final PrevWordsInfo prevWordsInfo, final String testedWord,
final Locale locale) {
// TODO: Use this method for user history dictionary.
if (testedWord == null|| locale == null) {
return HandlingType.getHandlingType(false /* shouldBeLowerCased */, false /* isOov */);
}
final boolean shouldBeLowerCased = shouldBeLowerCased(prevWordsInfo, testedWord, locale);
final String caseModifiedWord =
shouldBeLowerCased ? testedWord.toLowerCase(locale) : testedWord;
final boolean isOov = !mDictionaryFacilitatorLruCache.get(locale).isValidWord(
caseModifiedWord, false /* ignoreCase */);
return HandlingType.getHandlingType(shouldBeLowerCased, isOov);
}
}

View file

@ -47,6 +47,12 @@ public class DistracterFilterCheckingIsInDictionary implements DistracterFilter
}
}
@Override
public int getWordHandlingType(final PrevWordsInfo prevWordsInfo, final String testedWord,
final Locale locale) {
return mDistracterFilter.getWordHandlingType(prevWordsInfo, testedWord, locale);
}
@Override
public void updateEnabledSubtypes(List<InputMethodSubtype> enabledSubtypes) {
// Do nothing.

View file

@ -22,6 +22,7 @@ import com.android.inputmethod.latin.Dictionary;
import com.android.inputmethod.latin.DictionaryFacilitator;
import com.android.inputmethod.latin.PrevWordsInfo;
import com.android.inputmethod.latin.settings.SpacingAndPunctuations;
import com.android.inputmethod.latin.utils.DistracterFilter.HandlingType;
import java.util.ArrayList;
import java.util.List;
@ -81,8 +82,7 @@ public final class LanguageModelParam {
// Process a list of words and return a list of {@link LanguageModelParam} objects.
public static ArrayList<LanguageModelParam> createLanguageModelParamsFrom(
final List<String> tokens, final int timestamp,
final DictionaryFacilitator dictionaryFacilitator,
final SpacingAndPunctuations spacingAndPunctuations,
final SpacingAndPunctuations spacingAndPunctuations, final Locale locale,
final DistracterFilter distracterFilter) {
final ArrayList<LanguageModelParam> languageModelParams = new ArrayList<>();
final int N = tokens.size();
@ -111,8 +111,7 @@ public final class LanguageModelParam {
}
final LanguageModelParam languageModelParam =
detectWhetherVaildWordOrNotAndGetLanguageModelParam(
prevWordsInfo, tempWord, timestamp, dictionaryFacilitator,
distracterFilter);
prevWordsInfo, tempWord, timestamp, locale, distracterFilter);
if (languageModelParam == null) {
continue;
}
@ -125,47 +124,25 @@ public final class LanguageModelParam {
private static LanguageModelParam detectWhetherVaildWordOrNotAndGetLanguageModelParam(
final PrevWordsInfo prevWordsInfo, final String targetWord, final int timestamp,
final DictionaryFacilitator dictionaryFacilitator,
final DistracterFilter distracterFilter) {
final Locale locale = dictionaryFacilitator.getLocale();
final Locale locale, final DistracterFilter distracterFilter) {
if (locale == null) {
return null;
}
if (dictionaryFacilitator.isValidWord(targetWord, false /* ignoreCase */)) {
return createAndGetLanguageModelParamOfWord(prevWordsInfo, targetWord, timestamp,
true /* isValidWord */, locale, distracterFilter);
final int wordHandlingType = distracterFilter.getWordHandlingType(prevWordsInfo,
targetWord, locale);
final String word = HandlingType.shouldBeLowerCased(wordHandlingType) ?
targetWord.toLowerCase(locale) : targetWord;
if (distracterFilter.isDistracterToWordsInDictionaries(prevWordsInfo, targetWord, locale)) {
// The word is a distracter.
return null;
}
final String lowerCaseTargetWord = targetWord.toLowerCase(locale);
if (dictionaryFacilitator.isValidWord(lowerCaseTargetWord, false /* ignoreCase */)) {
// Add the lower-cased word.
return createAndGetLanguageModelParamOfWord(prevWordsInfo, lowerCaseTargetWord,
timestamp, true /* isValidWord */, locale, distracterFilter);
}
// Treat the word as an OOV word.
return createAndGetLanguageModelParamOfWord(prevWordsInfo, targetWord, timestamp,
false /* isValidWord */, locale, distracterFilter);
return createAndGetLanguageModelParamOfWord(prevWordsInfo, word, timestamp,
!HandlingType.shouldBeHandledAsOov(wordHandlingType));
}
private static LanguageModelParam createAndGetLanguageModelParamOfWord(
final PrevWordsInfo prevWordsInfo, final String targetWord, final int timestamp,
final boolean isValidWord, final Locale locale,
final DistracterFilter distracterFilter) {
final String word;
if (StringUtils.getCapitalizationType(targetWord) == StringUtils.CAPITALIZE_FIRST
&& !prevWordsInfo.isValid() && !isValidWord) {
word = targetWord.toLowerCase(locale);
} else {
word = targetWord;
}
// Check whether the word is a distracter to words in the dictionaries.
if (distracterFilter.isDistracterToWordsInDictionaries(prevWordsInfo, word, locale)) {
if (DEBUG) {
Log.d(TAG, "The word (" + word + ") is a distracter. Skip this word.");
}
return null;
}
final PrevWordsInfo prevWordsInfo, final String word, final int timestamp,
final boolean isValidWord) {
final int unigramProbability = isValidWord ?
UNIGRAM_PROBABILITY_FOR_VALID_WORD : UNIGRAM_PROBABILITY_FOR_OOV_WORD;
if (!prevWordsInfo.isValid()) {

View file

@ -14,7 +14,7 @@
* limitations under the License.
*/
package com.android.inputmethod.latin;
package com.android.inputmethod.latin.utils;
import java.util.ArrayList;
import java.util.Locale;
@ -24,24 +24,22 @@ import android.test.AndroidTestCase;
import android.test.suitebuilder.annotation.LargeTest;
import android.view.inputmethod.InputMethodSubtype;
import com.android.inputmethod.latin.utils.DistracterFilterCheckingExactMatchesAndSuggestions;
import com.android.inputmethod.latin.PrevWordsInfo;
import com.android.inputmethod.latin.RichInputMethodManager;
import com.android.inputmethod.latin.utils.DistracterFilter.HandlingType;
/**
* Unit test for DistracterFilter
*/
@LargeTest
public class DistracterFilterTest extends AndroidTestCase {
private DictionaryFacilitatorLruCache mDictionaryFacilitatorLruCache;
private DistracterFilterCheckingExactMatchesAndSuggestions mDistracterFilter;
@Override
protected void setUp() throws Exception {
super.setUp();
final Context context = getContext();
mDictionaryFacilitatorLruCache = new DictionaryFacilitatorLruCache(context,
2 /* maxSize */, "" /* dictionaryNamePrefix */);
mDistracterFilter = new DistracterFilterCheckingExactMatchesAndSuggestions(context,
mDictionaryFacilitatorLruCache);
mDistracterFilter = new DistracterFilterCheckingExactMatchesAndSuggestions(context);
RichInputMethodManager.init(context);
final RichInputMethodManager richImm = RichInputMethodManager.getInstance();
final ArrayList<InputMethodSubtype> subtypes = new ArrayList<>();
@ -56,7 +54,7 @@ public class DistracterFilterTest extends AndroidTestCase {
@Override
protected void tearDown() {
mDictionaryFacilitatorLruCache.evictAll();
mDistracterFilter.close();
}
public void testIsDistractorToWordsInDictionaries() {
@ -203,4 +201,25 @@ public class DistracterFilterTest extends AndroidTestCase {
assertTrue(mDistracterFilter.isDistracterToWordsInDictionaries(
EMPTY_PREV_WORDS_INFO, typedWord, localeFrFr));
}
public void testGetWordHandlingType() {
final Locale localeEnUs = new Locale("en", "US");
final PrevWordsInfo EMPTY_PREV_WORDS_INFO = PrevWordsInfo.EMPTY_PREV_WORDS_INFO;
int handlingType = 0;
handlingType = mDistracterFilter.getWordHandlingType(EMPTY_PREV_WORDS_INFO,
"this", localeEnUs);
assertFalse(HandlingType.shouldBeLowerCased(handlingType));
assertFalse(HandlingType.shouldBeHandledAsOov(handlingType));
handlingType = mDistracterFilter.getWordHandlingType(EMPTY_PREV_WORDS_INFO,
"This", localeEnUs);
assertTrue(HandlingType.shouldBeLowerCased(handlingType));
assertFalse(HandlingType.shouldBeHandledAsOov(handlingType));
handlingType = mDistracterFilter.getWordHandlingType(EMPTY_PREV_WORDS_INFO,
"thibk", localeEnUs);
assertFalse(HandlingType.shouldBeLowerCased(handlingType));
assertTrue(HandlingType.shouldBeHandledAsOov(handlingType));
}
}