From d8430811935dd054f7aee87ceba532c0b30fb60e Mon Sep 17 00:00:00 2001 From: Keisuke Kuroyanagi Date: Wed, 10 Sep 2014 11:25:34 +0900 Subject: [PATCH] Move case and OOV detection logic into distracter filter. Bug: 16547557 Change-Id: I8502585976deb5e93fff3b1e0266654b8a927bda --- .../latin/DictionaryFacilitator.java | 18 ++----- .../latin/utils/DistracterFilter.java | 34 ++++++++++++ ...terCheckingExactMatchesAndSuggestions.java | 54 +++++++++++++++---- ...istracterFilterCheckingIsInDictionary.java | 6 +++ .../latin/utils/LanguageModelParam.java | 53 ++++++------------ .../{ => utils}/DistracterFilterTest.java | 35 +++++++++--- 6 files changed, 130 insertions(+), 70 deletions(-) rename tests/src/com/android/inputmethod/latin/{ => utils}/DistracterFilterTest.java (87%) diff --git a/java/src/com/android/inputmethod/latin/DictionaryFacilitator.java b/java/src/com/android/inputmethod/latin/DictionaryFacilitator.java index fde94da93..47aaeadcc 100644 --- a/java/src/com/android/inputmethod/latin/DictionaryFacilitator.java +++ b/java/src/com/android/inputmethod/latin/DictionaryFacilitator.java @@ -60,7 +60,6 @@ public class DictionaryFacilitator { // HACK: This threshold is being used when adding a capitalized entry in the User History // dictionary. private static final int CAPITALIZED_FORM_MAX_PROBABILITY_FOR_INSERT = 140; - private static final int MAX_DICTIONARY_FACILITATOR_CACHE_SIZE = 3; private DictionaryGroup mDictionaryGroup = new DictionaryGroup(); private boolean mIsUserDictEnabled = false; @@ -68,7 +67,6 @@ public class DictionaryFacilitator { // To synchronize assigning mDictionaryGroup to ensure closing dictionaries. private final Object mLock = new Object(); private final DistracterFilter mDistracterFilter; - private final DictionaryFacilitatorLruCache mFacilitatorCacheForPersonalization; private static final String[] DICT_TYPES_ORDERED_TO_GET_SUGGESTIONS = new String[] { @@ -176,14 +174,10 @@ public class DictionaryFacilitator { public DictionaryFacilitator() { mDistracterFilter = DistracterFilter.EMPTY_DISTRACTER_FILTER; - mFacilitatorCacheForPersonalization = null; } public DictionaryFacilitator(final Context context) { - mFacilitatorCacheForPersonalization = new DictionaryFacilitatorLruCache(context, - MAX_DICTIONARY_FACILITATOR_CACHE_SIZE, "" /* dictionaryNamePrefix */); - mDistracterFilter = new DistracterFilterCheckingExactMatchesAndSuggestions(context, - mFacilitatorCacheForPersonalization); + mDistracterFilter = new DistracterFilterCheckingExactMatchesAndSuggestions(context); } public void updateEnabledSubtypes(final List enabledSubtypes) { @@ -358,9 +352,6 @@ public class DictionaryFacilitator { for (final String dictType : DICT_TYPES_ORDERED_TO_GET_SUGGESTIONS) { dictionaryGroup.closeDict(dictType); } - if (mFacilitatorCacheForPersonalization != null) { - mFacilitatorCacheForPersonalization.evictAll(); - } mDistracterFilter.close(); } @@ -609,14 +600,11 @@ public class DictionaryFacilitator { } // TODO: Get locale from personalizationDataChunk.mDetectedLanguage. final Locale dataChunkLocale = getLocale(); - final DictionaryFacilitator dictionaryFacilitatorForLocale = - mFacilitatorCacheForPersonalization.get(dataChunkLocale); final ArrayList languageModelParams = LanguageModelParam.createLanguageModelParamsFrom( personalizationDataChunk.mTokens, - personalizationDataChunk.mTimestampInSeconds, - dictionaryFacilitatorForLocale, spacingAndPunctuations, - new DistracterFilterCheckingIsInDictionary( + personalizationDataChunk.mTimestampInSeconds, spacingAndPunctuations, + dataChunkLocale, new DistracterFilterCheckingIsInDictionary( mDistracterFilter, personalizationDict)); if (languageModelParams == null || languageModelParams.isEmpty()) { if (callback != null) { diff --git a/java/src/com/android/inputmethod/latin/utils/DistracterFilter.java b/java/src/com/android/inputmethod/latin/utils/DistracterFilter.java index 787e4a59d..94c62429e 100644 --- a/java/src/com/android/inputmethod/latin/utils/DistracterFilter.java +++ b/java/src/com/android/inputmethod/latin/utils/DistracterFilter.java @@ -36,10 +36,38 @@ public interface DistracterFilter { public boolean isDistracterToWordsInDictionaries(final PrevWordsInfo prevWordsInfo, final String testedWord, final Locale locale); + public int getWordHandlingType(final PrevWordsInfo prevWordsInfo, final String testedWord, + final Locale locale); + public void updateEnabledSubtypes(final List enabledSubtypes); public void close(); + public static final class HandlingType { + private final static int REQUIRE_NO_SPECIAL_HANDLINGS = 0x0; + private final static int SHOULD_BE_LOWER_CASED = 0x1; + private final static int SHOULD_BE_HANDLED_AS_OOV = 0x2; + + public static int getHandlingType(final boolean shouldBeLowerCased, final boolean isOov) { + int wordHandlingType = HandlingType.REQUIRE_NO_SPECIAL_HANDLINGS; + if (shouldBeLowerCased) { + wordHandlingType |= HandlingType.SHOULD_BE_LOWER_CASED; + } + if (isOov) { + wordHandlingType |= HandlingType.SHOULD_BE_HANDLED_AS_OOV; + } + return wordHandlingType; + } + + public static boolean shouldBeLowerCased(final int handlingType) { + return (handlingType & SHOULD_BE_LOWER_CASED) != 0; + } + + public static boolean shouldBeHandledAsOov(final int handlingType) { + return (handlingType & SHOULD_BE_HANDLED_AS_OOV) != 0; + } + }; + public static final DistracterFilter EMPTY_DISTRACTER_FILTER = new DistracterFilter() { @Override public boolean isDistracterToWordsInDictionaries(PrevWordsInfo prevWordsInfo, @@ -47,6 +75,12 @@ public interface DistracterFilter { return false; } + @Override + public int getWordHandlingType(final PrevWordsInfo prevWordsInfo, + final String testedWord, final Locale locale) { + return HandlingType.REQUIRE_NO_SPECIAL_HANDLINGS; + } + @Override public void close() { } diff --git a/java/src/com/android/inputmethod/latin/utils/DistracterFilterCheckingExactMatchesAndSuggestions.java b/java/src/com/android/inputmethod/latin/utils/DistracterFilterCheckingExactMatchesAndSuggestions.java index e10571e4a..1db525502 100644 --- a/java/src/com/android/inputmethod/latin/utils/DistracterFilterCheckingExactMatchesAndSuggestions.java +++ b/java/src/com/android/inputmethod/latin/utils/DistracterFilterCheckingExactMatchesAndSuggestions.java @@ -51,6 +51,7 @@ public class DistracterFilterCheckingExactMatchesAndSuggestions implements Distr DistracterFilterCheckingExactMatchesAndSuggestions.class.getSimpleName(); private static final boolean DEBUG = false; + private static final int MAX_DICTIONARY_FACILITATOR_CACHE_SIZE = 3; private static final int MAX_DISTRACTERS_CACHE_SIZE = 1024; private final Context mContext; @@ -73,15 +74,13 @@ public class DistracterFilterCheckingExactMatchesAndSuggestions implements Distr * Create a DistracterFilter instance. * * @param context the context. - * @param dictionaryFacilitatorLruCache the cache of dictionaryFacilitators that are used for - * checking distracters. */ - public DistracterFilterCheckingExactMatchesAndSuggestions(final Context context, - final DictionaryFacilitatorLruCache dictionaryFacilitatorLruCache) { + public DistracterFilterCheckingExactMatchesAndSuggestions(final Context context) { mContext = context; mLocaleToSubtypeCache = new ConcurrentHashMap<>(); mLocaleToKeyboardCache = new ConcurrentHashMap<>(); - mDictionaryFacilitatorLruCache = dictionaryFacilitatorLruCache; + mDictionaryFacilitatorLruCache = new DictionaryFacilitatorLruCache(context, + MAX_DICTIONARY_FACILITATOR_CACHE_SIZE, "" /* dictionaryNamePrefix */); mDistractersCache = new LruCache<>(MAX_DISTRACTERS_CACHE_SIZE); } @@ -89,7 +88,8 @@ public class DistracterFilterCheckingExactMatchesAndSuggestions implements Distr public void close() { mLocaleToSubtypeCache.clear(); mLocaleToKeyboardCache.clear(); - mDistractersCache.evictAll(); + mDictionaryFacilitatorLruCache.evictAll(); + // Don't clear mDistractersCache. } @Override @@ -194,9 +194,8 @@ public class DistracterFilterCheckingExactMatchesAndSuggestions implements Distr mDistractersCache.put(cacheKey, Boolean.TRUE); return true; } - final boolean isValidWord = dictionaryFacilitator.isValidWord(testedWord, - false /* ignoreCase */); - if (isValidWord) { + final boolean Word = dictionaryFacilitator.isValidWord(testedWord, false /* ignoreCase */); + if (Word) { // Valid word is not a distractor. if (DEBUG) { Log.d(TAG, "isDistracter: false (valid word)"); @@ -283,4 +282,41 @@ public class DistracterFilterCheckingExactMatchesAndSuggestions implements Distr } return false; } + + private boolean shouldBeLowerCased(final PrevWordsInfo prevWordsInfo, final String testedWord, + final Locale locale) { + final DictionaryFacilitator dictionaryFacilitator = + mDictionaryFacilitatorLruCache.get(locale); + if (dictionaryFacilitator.isValidWord(testedWord, false /* ignoreCase */)) { + return false; + } + final String lowerCaseTargetWord = testedWord.toLowerCase(locale); + if (testedWord.equals(lowerCaseTargetWord)) { + return false; + } + if (dictionaryFacilitator.isValidWord(lowerCaseTargetWord, false /* ignoreCase */)) { + return true; + } + if (StringUtils.getCapitalizationType(testedWord) == StringUtils.CAPITALIZE_FIRST + && !prevWordsInfo.isValid()) { + // TODO: Check beginning-of-sentence. + return true; + } + return false; + } + + @Override + public int getWordHandlingType(final PrevWordsInfo prevWordsInfo, final String testedWord, + final Locale locale) { + // TODO: Use this method for user history dictionary. + if (testedWord == null|| locale == null) { + return HandlingType.getHandlingType(false /* shouldBeLowerCased */, false /* isOov */); + } + final boolean shouldBeLowerCased = shouldBeLowerCased(prevWordsInfo, testedWord, locale); + final String caseModifiedWord = + shouldBeLowerCased ? testedWord.toLowerCase(locale) : testedWord; + final boolean isOov = !mDictionaryFacilitatorLruCache.get(locale).isValidWord( + caseModifiedWord, false /* ignoreCase */); + return HandlingType.getHandlingType(shouldBeLowerCased, isOov); + } } diff --git a/java/src/com/android/inputmethod/latin/utils/DistracterFilterCheckingIsInDictionary.java b/java/src/com/android/inputmethod/latin/utils/DistracterFilterCheckingIsInDictionary.java index 4ad4ba784..349236f18 100644 --- a/java/src/com/android/inputmethod/latin/utils/DistracterFilterCheckingIsInDictionary.java +++ b/java/src/com/android/inputmethod/latin/utils/DistracterFilterCheckingIsInDictionary.java @@ -47,6 +47,12 @@ public class DistracterFilterCheckingIsInDictionary implements DistracterFilter } } + @Override + public int getWordHandlingType(final PrevWordsInfo prevWordsInfo, final String testedWord, + final Locale locale) { + return mDistracterFilter.getWordHandlingType(prevWordsInfo, testedWord, locale); + } + @Override public void updateEnabledSubtypes(List enabledSubtypes) { // Do nothing. diff --git a/java/src/com/android/inputmethod/latin/utils/LanguageModelParam.java b/java/src/com/android/inputmethod/latin/utils/LanguageModelParam.java index fbce3f2fd..05d124764 100644 --- a/java/src/com/android/inputmethod/latin/utils/LanguageModelParam.java +++ b/java/src/com/android/inputmethod/latin/utils/LanguageModelParam.java @@ -22,6 +22,7 @@ import com.android.inputmethod.latin.Dictionary; import com.android.inputmethod.latin.DictionaryFacilitator; import com.android.inputmethod.latin.PrevWordsInfo; import com.android.inputmethod.latin.settings.SpacingAndPunctuations; +import com.android.inputmethod.latin.utils.DistracterFilter.HandlingType; import java.util.ArrayList; import java.util.List; @@ -81,8 +82,7 @@ public final class LanguageModelParam { // Process a list of words and return a list of {@link LanguageModelParam} objects. public static ArrayList createLanguageModelParamsFrom( final List tokens, final int timestamp, - final DictionaryFacilitator dictionaryFacilitator, - final SpacingAndPunctuations spacingAndPunctuations, + final SpacingAndPunctuations spacingAndPunctuations, final Locale locale, final DistracterFilter distracterFilter) { final ArrayList languageModelParams = new ArrayList<>(); final int N = tokens.size(); @@ -111,8 +111,7 @@ public final class LanguageModelParam { } final LanguageModelParam languageModelParam = detectWhetherVaildWordOrNotAndGetLanguageModelParam( - prevWordsInfo, tempWord, timestamp, dictionaryFacilitator, - distracterFilter); + prevWordsInfo, tempWord, timestamp, locale, distracterFilter); if (languageModelParam == null) { continue; } @@ -125,47 +124,25 @@ public final class LanguageModelParam { private static LanguageModelParam detectWhetherVaildWordOrNotAndGetLanguageModelParam( final PrevWordsInfo prevWordsInfo, final String targetWord, final int timestamp, - final DictionaryFacilitator dictionaryFacilitator, - final DistracterFilter distracterFilter) { - final Locale locale = dictionaryFacilitator.getLocale(); + final Locale locale, final DistracterFilter distracterFilter) { if (locale == null) { return null; } - if (dictionaryFacilitator.isValidWord(targetWord, false /* ignoreCase */)) { - return createAndGetLanguageModelParamOfWord(prevWordsInfo, targetWord, timestamp, - true /* isValidWord */, locale, distracterFilter); + final int wordHandlingType = distracterFilter.getWordHandlingType(prevWordsInfo, + targetWord, locale); + final String word = HandlingType.shouldBeLowerCased(wordHandlingType) ? + targetWord.toLowerCase(locale) : targetWord; + if (distracterFilter.isDistracterToWordsInDictionaries(prevWordsInfo, targetWord, locale)) { + // The word is a distracter. + return null; } - - final String lowerCaseTargetWord = targetWord.toLowerCase(locale); - if (dictionaryFacilitator.isValidWord(lowerCaseTargetWord, false /* ignoreCase */)) { - // Add the lower-cased word. - return createAndGetLanguageModelParamOfWord(prevWordsInfo, lowerCaseTargetWord, - timestamp, true /* isValidWord */, locale, distracterFilter); - } - - // Treat the word as an OOV word. - return createAndGetLanguageModelParamOfWord(prevWordsInfo, targetWord, timestamp, - false /* isValidWord */, locale, distracterFilter); + return createAndGetLanguageModelParamOfWord(prevWordsInfo, word, timestamp, + !HandlingType.shouldBeHandledAsOov(wordHandlingType)); } private static LanguageModelParam createAndGetLanguageModelParamOfWord( - final PrevWordsInfo prevWordsInfo, final String targetWord, final int timestamp, - final boolean isValidWord, final Locale locale, - final DistracterFilter distracterFilter) { - final String word; - if (StringUtils.getCapitalizationType(targetWord) == StringUtils.CAPITALIZE_FIRST - && !prevWordsInfo.isValid() && !isValidWord) { - word = targetWord.toLowerCase(locale); - } else { - word = targetWord; - } - // Check whether the word is a distracter to words in the dictionaries. - if (distracterFilter.isDistracterToWordsInDictionaries(prevWordsInfo, word, locale)) { - if (DEBUG) { - Log.d(TAG, "The word (" + word + ") is a distracter. Skip this word."); - } - return null; - } + final PrevWordsInfo prevWordsInfo, final String word, final int timestamp, + final boolean isValidWord) { final int unigramProbability = isValidWord ? UNIGRAM_PROBABILITY_FOR_VALID_WORD : UNIGRAM_PROBABILITY_FOR_OOV_WORD; if (!prevWordsInfo.isValid()) { diff --git a/tests/src/com/android/inputmethod/latin/DistracterFilterTest.java b/tests/src/com/android/inputmethod/latin/utils/DistracterFilterTest.java similarity index 87% rename from tests/src/com/android/inputmethod/latin/DistracterFilterTest.java rename to tests/src/com/android/inputmethod/latin/utils/DistracterFilterTest.java index af22fb8b9..5fbd36ac7 100644 --- a/tests/src/com/android/inputmethod/latin/DistracterFilterTest.java +++ b/tests/src/com/android/inputmethod/latin/utils/DistracterFilterTest.java @@ -14,7 +14,7 @@ * limitations under the License. */ -package com.android.inputmethod.latin; +package com.android.inputmethod.latin.utils; import java.util.ArrayList; import java.util.Locale; @@ -24,24 +24,22 @@ import android.test.AndroidTestCase; import android.test.suitebuilder.annotation.LargeTest; import android.view.inputmethod.InputMethodSubtype; -import com.android.inputmethod.latin.utils.DistracterFilterCheckingExactMatchesAndSuggestions; +import com.android.inputmethod.latin.PrevWordsInfo; +import com.android.inputmethod.latin.RichInputMethodManager; +import com.android.inputmethod.latin.utils.DistracterFilter.HandlingType; /** * Unit test for DistracterFilter */ @LargeTest public class DistracterFilterTest extends AndroidTestCase { - private DictionaryFacilitatorLruCache mDictionaryFacilitatorLruCache; private DistracterFilterCheckingExactMatchesAndSuggestions mDistracterFilter; @Override protected void setUp() throws Exception { super.setUp(); final Context context = getContext(); - mDictionaryFacilitatorLruCache = new DictionaryFacilitatorLruCache(context, - 2 /* maxSize */, "" /* dictionaryNamePrefix */); - mDistracterFilter = new DistracterFilterCheckingExactMatchesAndSuggestions(context, - mDictionaryFacilitatorLruCache); + mDistracterFilter = new DistracterFilterCheckingExactMatchesAndSuggestions(context); RichInputMethodManager.init(context); final RichInputMethodManager richImm = RichInputMethodManager.getInstance(); final ArrayList subtypes = new ArrayList<>(); @@ -56,7 +54,7 @@ public class DistracterFilterTest extends AndroidTestCase { @Override protected void tearDown() { - mDictionaryFacilitatorLruCache.evictAll(); + mDistracterFilter.close(); } public void testIsDistractorToWordsInDictionaries() { @@ -203,4 +201,25 @@ public class DistracterFilterTest extends AndroidTestCase { assertTrue(mDistracterFilter.isDistracterToWordsInDictionaries( EMPTY_PREV_WORDS_INFO, typedWord, localeFrFr)); } + + public void testGetWordHandlingType() { + final Locale localeEnUs = new Locale("en", "US"); + final PrevWordsInfo EMPTY_PREV_WORDS_INFO = PrevWordsInfo.EMPTY_PREV_WORDS_INFO; + int handlingType = 0; + + handlingType = mDistracterFilter.getWordHandlingType(EMPTY_PREV_WORDS_INFO, + "this", localeEnUs); + assertFalse(HandlingType.shouldBeLowerCased(handlingType)); + assertFalse(HandlingType.shouldBeHandledAsOov(handlingType)); + + handlingType = mDistracterFilter.getWordHandlingType(EMPTY_PREV_WORDS_INFO, + "This", localeEnUs); + assertTrue(HandlingType.shouldBeLowerCased(handlingType)); + assertFalse(HandlingType.shouldBeHandledAsOov(handlingType)); + + handlingType = mDistracterFilter.getWordHandlingType(EMPTY_PREV_WORDS_INFO, + "thibk", localeEnUs); + assertFalse(HandlingType.shouldBeLowerCased(handlingType)); + assertTrue(HandlingType.shouldBeHandledAsOov(handlingType)); + } }