Merge "Move case and OOV detection logic into distracter filter."
This commit is contained in:
commit
934e1d8087
6 changed files with 130 additions and 70 deletions
|
@ -60,7 +60,6 @@ public class DictionaryFacilitator {
|
|||
// HACK: This threshold is being used when adding a capitalized entry in the User History
|
||||
// dictionary.
|
||||
private static final int CAPITALIZED_FORM_MAX_PROBABILITY_FOR_INSERT = 140;
|
||||
private static final int MAX_DICTIONARY_FACILITATOR_CACHE_SIZE = 3;
|
||||
|
||||
private DictionaryGroup mDictionaryGroup = new DictionaryGroup();
|
||||
private boolean mIsUserDictEnabled = false;
|
||||
|
@ -68,7 +67,6 @@ public class DictionaryFacilitator {
|
|||
// To synchronize assigning mDictionaryGroup to ensure closing dictionaries.
|
||||
private final Object mLock = new Object();
|
||||
private final DistracterFilter mDistracterFilter;
|
||||
private final DictionaryFacilitatorLruCache mFacilitatorCacheForPersonalization;
|
||||
|
||||
private static final String[] DICT_TYPES_ORDERED_TO_GET_SUGGESTIONS =
|
||||
new String[] {
|
||||
|
@ -176,14 +174,10 @@ public class DictionaryFacilitator {
|
|||
|
||||
public DictionaryFacilitator() {
|
||||
mDistracterFilter = DistracterFilter.EMPTY_DISTRACTER_FILTER;
|
||||
mFacilitatorCacheForPersonalization = null;
|
||||
}
|
||||
|
||||
public DictionaryFacilitator(final Context context) {
|
||||
mFacilitatorCacheForPersonalization = new DictionaryFacilitatorLruCache(context,
|
||||
MAX_DICTIONARY_FACILITATOR_CACHE_SIZE, "" /* dictionaryNamePrefix */);
|
||||
mDistracterFilter = new DistracterFilterCheckingExactMatchesAndSuggestions(context,
|
||||
mFacilitatorCacheForPersonalization);
|
||||
mDistracterFilter = new DistracterFilterCheckingExactMatchesAndSuggestions(context);
|
||||
}
|
||||
|
||||
public void updateEnabledSubtypes(final List<InputMethodSubtype> enabledSubtypes) {
|
||||
|
@ -358,9 +352,6 @@ public class DictionaryFacilitator {
|
|||
for (final String dictType : DICT_TYPES_ORDERED_TO_GET_SUGGESTIONS) {
|
||||
dictionaryGroup.closeDict(dictType);
|
||||
}
|
||||
if (mFacilitatorCacheForPersonalization != null) {
|
||||
mFacilitatorCacheForPersonalization.evictAll();
|
||||
}
|
||||
mDistracterFilter.close();
|
||||
}
|
||||
|
||||
|
@ -609,14 +600,11 @@ public class DictionaryFacilitator {
|
|||
}
|
||||
// TODO: Get locale from personalizationDataChunk.mDetectedLanguage.
|
||||
final Locale dataChunkLocale = getLocale();
|
||||
final DictionaryFacilitator dictionaryFacilitatorForLocale =
|
||||
mFacilitatorCacheForPersonalization.get(dataChunkLocale);
|
||||
final ArrayList<LanguageModelParam> languageModelParams =
|
||||
LanguageModelParam.createLanguageModelParamsFrom(
|
||||
personalizationDataChunk.mTokens,
|
||||
personalizationDataChunk.mTimestampInSeconds,
|
||||
dictionaryFacilitatorForLocale, spacingAndPunctuations,
|
||||
new DistracterFilterCheckingIsInDictionary(
|
||||
personalizationDataChunk.mTimestampInSeconds, spacingAndPunctuations,
|
||||
dataChunkLocale, new DistracterFilterCheckingIsInDictionary(
|
||||
mDistracterFilter, personalizationDict));
|
||||
if (languageModelParams == null || languageModelParams.isEmpty()) {
|
||||
if (callback != null) {
|
||||
|
|
|
@ -36,10 +36,38 @@ public interface DistracterFilter {
|
|||
public boolean isDistracterToWordsInDictionaries(final PrevWordsInfo prevWordsInfo,
|
||||
final String testedWord, final Locale locale);
|
||||
|
||||
public int getWordHandlingType(final PrevWordsInfo prevWordsInfo, final String testedWord,
|
||||
final Locale locale);
|
||||
|
||||
public void updateEnabledSubtypes(final List<InputMethodSubtype> enabledSubtypes);
|
||||
|
||||
public void close();
|
||||
|
||||
public static final class HandlingType {
|
||||
private final static int REQUIRE_NO_SPECIAL_HANDLINGS = 0x0;
|
||||
private final static int SHOULD_BE_LOWER_CASED = 0x1;
|
||||
private final static int SHOULD_BE_HANDLED_AS_OOV = 0x2;
|
||||
|
||||
public static int getHandlingType(final boolean shouldBeLowerCased, final boolean isOov) {
|
||||
int wordHandlingType = HandlingType.REQUIRE_NO_SPECIAL_HANDLINGS;
|
||||
if (shouldBeLowerCased) {
|
||||
wordHandlingType |= HandlingType.SHOULD_BE_LOWER_CASED;
|
||||
}
|
||||
if (isOov) {
|
||||
wordHandlingType |= HandlingType.SHOULD_BE_HANDLED_AS_OOV;
|
||||
}
|
||||
return wordHandlingType;
|
||||
}
|
||||
|
||||
public static boolean shouldBeLowerCased(final int handlingType) {
|
||||
return (handlingType & SHOULD_BE_LOWER_CASED) != 0;
|
||||
}
|
||||
|
||||
public static boolean shouldBeHandledAsOov(final int handlingType) {
|
||||
return (handlingType & SHOULD_BE_HANDLED_AS_OOV) != 0;
|
||||
}
|
||||
};
|
||||
|
||||
public static final DistracterFilter EMPTY_DISTRACTER_FILTER = new DistracterFilter() {
|
||||
@Override
|
||||
public boolean isDistracterToWordsInDictionaries(PrevWordsInfo prevWordsInfo,
|
||||
|
@ -47,6 +75,12 @@ public interface DistracterFilter {
|
|||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getWordHandlingType(final PrevWordsInfo prevWordsInfo,
|
||||
final String testedWord, final Locale locale) {
|
||||
return HandlingType.REQUIRE_NO_SPECIAL_HANDLINGS;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
}
|
||||
|
|
|
@ -51,6 +51,7 @@ public class DistracterFilterCheckingExactMatchesAndSuggestions implements Distr
|
|||
DistracterFilterCheckingExactMatchesAndSuggestions.class.getSimpleName();
|
||||
private static final boolean DEBUG = false;
|
||||
|
||||
private static final int MAX_DICTIONARY_FACILITATOR_CACHE_SIZE = 3;
|
||||
private static final int MAX_DISTRACTERS_CACHE_SIZE = 1024;
|
||||
|
||||
private final Context mContext;
|
||||
|
@ -73,15 +74,13 @@ public class DistracterFilterCheckingExactMatchesAndSuggestions implements Distr
|
|||
* Create a DistracterFilter instance.
|
||||
*
|
||||
* @param context the context.
|
||||
* @param dictionaryFacilitatorLruCache the cache of dictionaryFacilitators that are used for
|
||||
* checking distracters.
|
||||
*/
|
||||
public DistracterFilterCheckingExactMatchesAndSuggestions(final Context context,
|
||||
final DictionaryFacilitatorLruCache dictionaryFacilitatorLruCache) {
|
||||
public DistracterFilterCheckingExactMatchesAndSuggestions(final Context context) {
|
||||
mContext = context;
|
||||
mLocaleToSubtypeCache = new ConcurrentHashMap<>();
|
||||
mLocaleToKeyboardCache = new ConcurrentHashMap<>();
|
||||
mDictionaryFacilitatorLruCache = dictionaryFacilitatorLruCache;
|
||||
mDictionaryFacilitatorLruCache = new DictionaryFacilitatorLruCache(context,
|
||||
MAX_DICTIONARY_FACILITATOR_CACHE_SIZE, "" /* dictionaryNamePrefix */);
|
||||
mDistractersCache = new LruCache<>(MAX_DISTRACTERS_CACHE_SIZE);
|
||||
}
|
||||
|
||||
|
@ -89,7 +88,8 @@ public class DistracterFilterCheckingExactMatchesAndSuggestions implements Distr
|
|||
public void close() {
|
||||
mLocaleToSubtypeCache.clear();
|
||||
mLocaleToKeyboardCache.clear();
|
||||
mDistractersCache.evictAll();
|
||||
mDictionaryFacilitatorLruCache.evictAll();
|
||||
// Don't clear mDistractersCache.
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -194,9 +194,8 @@ public class DistracterFilterCheckingExactMatchesAndSuggestions implements Distr
|
|||
mDistractersCache.put(cacheKey, Boolean.TRUE);
|
||||
return true;
|
||||
}
|
||||
final boolean isValidWord = dictionaryFacilitator.isValidWord(testedWord,
|
||||
false /* ignoreCase */);
|
||||
if (isValidWord) {
|
||||
final boolean Word = dictionaryFacilitator.isValidWord(testedWord, false /* ignoreCase */);
|
||||
if (Word) {
|
||||
// Valid word is not a distractor.
|
||||
if (DEBUG) {
|
||||
Log.d(TAG, "isDistracter: false (valid word)");
|
||||
|
@ -283,4 +282,41 @@ public class DistracterFilterCheckingExactMatchesAndSuggestions implements Distr
|
|||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
private boolean shouldBeLowerCased(final PrevWordsInfo prevWordsInfo, final String testedWord,
|
||||
final Locale locale) {
|
||||
final DictionaryFacilitator dictionaryFacilitator =
|
||||
mDictionaryFacilitatorLruCache.get(locale);
|
||||
if (dictionaryFacilitator.isValidWord(testedWord, false /* ignoreCase */)) {
|
||||
return false;
|
||||
}
|
||||
final String lowerCaseTargetWord = testedWord.toLowerCase(locale);
|
||||
if (testedWord.equals(lowerCaseTargetWord)) {
|
||||
return false;
|
||||
}
|
||||
if (dictionaryFacilitator.isValidWord(lowerCaseTargetWord, false /* ignoreCase */)) {
|
||||
return true;
|
||||
}
|
||||
if (StringUtils.getCapitalizationType(testedWord) == StringUtils.CAPITALIZE_FIRST
|
||||
&& !prevWordsInfo.isValid()) {
|
||||
// TODO: Check beginning-of-sentence.
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getWordHandlingType(final PrevWordsInfo prevWordsInfo, final String testedWord,
|
||||
final Locale locale) {
|
||||
// TODO: Use this method for user history dictionary.
|
||||
if (testedWord == null|| locale == null) {
|
||||
return HandlingType.getHandlingType(false /* shouldBeLowerCased */, false /* isOov */);
|
||||
}
|
||||
final boolean shouldBeLowerCased = shouldBeLowerCased(prevWordsInfo, testedWord, locale);
|
||||
final String caseModifiedWord =
|
||||
shouldBeLowerCased ? testedWord.toLowerCase(locale) : testedWord;
|
||||
final boolean isOov = !mDictionaryFacilitatorLruCache.get(locale).isValidWord(
|
||||
caseModifiedWord, false /* ignoreCase */);
|
||||
return HandlingType.getHandlingType(shouldBeLowerCased, isOov);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -47,6 +47,12 @@ public class DistracterFilterCheckingIsInDictionary implements DistracterFilter
|
|||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getWordHandlingType(final PrevWordsInfo prevWordsInfo, final String testedWord,
|
||||
final Locale locale) {
|
||||
return mDistracterFilter.getWordHandlingType(prevWordsInfo, testedWord, locale);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void updateEnabledSubtypes(List<InputMethodSubtype> enabledSubtypes) {
|
||||
// Do nothing.
|
||||
|
|
|
@ -22,6 +22,7 @@ import com.android.inputmethod.latin.Dictionary;
|
|||
import com.android.inputmethod.latin.DictionaryFacilitator;
|
||||
import com.android.inputmethod.latin.PrevWordsInfo;
|
||||
import com.android.inputmethod.latin.settings.SpacingAndPunctuations;
|
||||
import com.android.inputmethod.latin.utils.DistracterFilter.HandlingType;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
@ -81,8 +82,7 @@ public final class LanguageModelParam {
|
|||
// Process a list of words and return a list of {@link LanguageModelParam} objects.
|
||||
public static ArrayList<LanguageModelParam> createLanguageModelParamsFrom(
|
||||
final List<String> tokens, final int timestamp,
|
||||
final DictionaryFacilitator dictionaryFacilitator,
|
||||
final SpacingAndPunctuations spacingAndPunctuations,
|
||||
final SpacingAndPunctuations spacingAndPunctuations, final Locale locale,
|
||||
final DistracterFilter distracterFilter) {
|
||||
final ArrayList<LanguageModelParam> languageModelParams = new ArrayList<>();
|
||||
final int N = tokens.size();
|
||||
|
@ -111,8 +111,7 @@ public final class LanguageModelParam {
|
|||
}
|
||||
final LanguageModelParam languageModelParam =
|
||||
detectWhetherVaildWordOrNotAndGetLanguageModelParam(
|
||||
prevWordsInfo, tempWord, timestamp, dictionaryFacilitator,
|
||||
distracterFilter);
|
||||
prevWordsInfo, tempWord, timestamp, locale, distracterFilter);
|
||||
if (languageModelParam == null) {
|
||||
continue;
|
||||
}
|
||||
|
@ -125,47 +124,25 @@ public final class LanguageModelParam {
|
|||
|
||||
private static LanguageModelParam detectWhetherVaildWordOrNotAndGetLanguageModelParam(
|
||||
final PrevWordsInfo prevWordsInfo, final String targetWord, final int timestamp,
|
||||
final DictionaryFacilitator dictionaryFacilitator,
|
||||
final DistracterFilter distracterFilter) {
|
||||
final Locale locale = dictionaryFacilitator.getLocale();
|
||||
final Locale locale, final DistracterFilter distracterFilter) {
|
||||
if (locale == null) {
|
||||
return null;
|
||||
}
|
||||
if (dictionaryFacilitator.isValidWord(targetWord, false /* ignoreCase */)) {
|
||||
return createAndGetLanguageModelParamOfWord(prevWordsInfo, targetWord, timestamp,
|
||||
true /* isValidWord */, locale, distracterFilter);
|
||||
final int wordHandlingType = distracterFilter.getWordHandlingType(prevWordsInfo,
|
||||
targetWord, locale);
|
||||
final String word = HandlingType.shouldBeLowerCased(wordHandlingType) ?
|
||||
targetWord.toLowerCase(locale) : targetWord;
|
||||
if (distracterFilter.isDistracterToWordsInDictionaries(prevWordsInfo, targetWord, locale)) {
|
||||
// The word is a distracter.
|
||||
return null;
|
||||
}
|
||||
|
||||
final String lowerCaseTargetWord = targetWord.toLowerCase(locale);
|
||||
if (dictionaryFacilitator.isValidWord(lowerCaseTargetWord, false /* ignoreCase */)) {
|
||||
// Add the lower-cased word.
|
||||
return createAndGetLanguageModelParamOfWord(prevWordsInfo, lowerCaseTargetWord,
|
||||
timestamp, true /* isValidWord */, locale, distracterFilter);
|
||||
}
|
||||
|
||||
// Treat the word as an OOV word.
|
||||
return createAndGetLanguageModelParamOfWord(prevWordsInfo, targetWord, timestamp,
|
||||
false /* isValidWord */, locale, distracterFilter);
|
||||
return createAndGetLanguageModelParamOfWord(prevWordsInfo, word, timestamp,
|
||||
!HandlingType.shouldBeHandledAsOov(wordHandlingType));
|
||||
}
|
||||
|
||||
private static LanguageModelParam createAndGetLanguageModelParamOfWord(
|
||||
final PrevWordsInfo prevWordsInfo, final String targetWord, final int timestamp,
|
||||
final boolean isValidWord, final Locale locale,
|
||||
final DistracterFilter distracterFilter) {
|
||||
final String word;
|
||||
if (StringUtils.getCapitalizationType(targetWord) == StringUtils.CAPITALIZE_FIRST
|
||||
&& !prevWordsInfo.isValid() && !isValidWord) {
|
||||
word = targetWord.toLowerCase(locale);
|
||||
} else {
|
||||
word = targetWord;
|
||||
}
|
||||
// Check whether the word is a distracter to words in the dictionaries.
|
||||
if (distracterFilter.isDistracterToWordsInDictionaries(prevWordsInfo, word, locale)) {
|
||||
if (DEBUG) {
|
||||
Log.d(TAG, "The word (" + word + ") is a distracter. Skip this word.");
|
||||
}
|
||||
return null;
|
||||
}
|
||||
final PrevWordsInfo prevWordsInfo, final String word, final int timestamp,
|
||||
final boolean isValidWord) {
|
||||
final int unigramProbability = isValidWord ?
|
||||
UNIGRAM_PROBABILITY_FOR_VALID_WORD : UNIGRAM_PROBABILITY_FOR_OOV_WORD;
|
||||
if (!prevWordsInfo.isValid()) {
|
||||
|
|
|
@ -14,7 +14,7 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package com.android.inputmethod.latin;
|
||||
package com.android.inputmethod.latin.utils;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Locale;
|
||||
|
@ -24,24 +24,22 @@ import android.test.AndroidTestCase;
|
|||
import android.test.suitebuilder.annotation.LargeTest;
|
||||
import android.view.inputmethod.InputMethodSubtype;
|
||||
|
||||
import com.android.inputmethod.latin.utils.DistracterFilterCheckingExactMatchesAndSuggestions;
|
||||
import com.android.inputmethod.latin.PrevWordsInfo;
|
||||
import com.android.inputmethod.latin.RichInputMethodManager;
|
||||
import com.android.inputmethod.latin.utils.DistracterFilter.HandlingType;
|
||||
|
||||
/**
|
||||
* Unit test for DistracterFilter
|
||||
*/
|
||||
@LargeTest
|
||||
public class DistracterFilterTest extends AndroidTestCase {
|
||||
private DictionaryFacilitatorLruCache mDictionaryFacilitatorLruCache;
|
||||
private DistracterFilterCheckingExactMatchesAndSuggestions mDistracterFilter;
|
||||
|
||||
@Override
|
||||
protected void setUp() throws Exception {
|
||||
super.setUp();
|
||||
final Context context = getContext();
|
||||
mDictionaryFacilitatorLruCache = new DictionaryFacilitatorLruCache(context,
|
||||
2 /* maxSize */, "" /* dictionaryNamePrefix */);
|
||||
mDistracterFilter = new DistracterFilterCheckingExactMatchesAndSuggestions(context,
|
||||
mDictionaryFacilitatorLruCache);
|
||||
mDistracterFilter = new DistracterFilterCheckingExactMatchesAndSuggestions(context);
|
||||
RichInputMethodManager.init(context);
|
||||
final RichInputMethodManager richImm = RichInputMethodManager.getInstance();
|
||||
final ArrayList<InputMethodSubtype> subtypes = new ArrayList<>();
|
||||
|
@ -56,7 +54,7 @@ public class DistracterFilterTest extends AndroidTestCase {
|
|||
|
||||
@Override
|
||||
protected void tearDown() {
|
||||
mDictionaryFacilitatorLruCache.evictAll();
|
||||
mDistracterFilter.close();
|
||||
}
|
||||
|
||||
public void testIsDistractorToWordsInDictionaries() {
|
||||
|
@ -203,4 +201,25 @@ public class DistracterFilterTest extends AndroidTestCase {
|
|||
assertTrue(mDistracterFilter.isDistracterToWordsInDictionaries(
|
||||
EMPTY_PREV_WORDS_INFO, typedWord, localeFrFr));
|
||||
}
|
||||
|
||||
public void testGetWordHandlingType() {
|
||||
final Locale localeEnUs = new Locale("en", "US");
|
||||
final PrevWordsInfo EMPTY_PREV_WORDS_INFO = PrevWordsInfo.EMPTY_PREV_WORDS_INFO;
|
||||
int handlingType = 0;
|
||||
|
||||
handlingType = mDistracterFilter.getWordHandlingType(EMPTY_PREV_WORDS_INFO,
|
||||
"this", localeEnUs);
|
||||
assertFalse(HandlingType.shouldBeLowerCased(handlingType));
|
||||
assertFalse(HandlingType.shouldBeHandledAsOov(handlingType));
|
||||
|
||||
handlingType = mDistracterFilter.getWordHandlingType(EMPTY_PREV_WORDS_INFO,
|
||||
"This", localeEnUs);
|
||||
assertTrue(HandlingType.shouldBeLowerCased(handlingType));
|
||||
assertFalse(HandlingType.shouldBeHandledAsOov(handlingType));
|
||||
|
||||
handlingType = mDistracterFilter.getWordHandlingType(EMPTY_PREV_WORDS_INFO,
|
||||
"thibk", localeEnUs);
|
||||
assertFalse(HandlingType.shouldBeLowerCased(handlingType));
|
||||
assertTrue(HandlingType.shouldBeHandledAsOov(handlingType));
|
||||
}
|
||||
}
|
Loading…
Reference in a new issue