Merge "Implement the distracter filter"

This commit is contained in:
Xiaojun Bi 2014-05-13 23:08:40 +00:00 committed by Android (Google) Code Review
commit 25ec32f635
4 changed files with 170 additions and 17 deletions

View file

@ -540,18 +540,6 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen
refreshPersonalizationDictionarySession(); refreshPersonalizationDictionarySession();
} }
private DistracterFilter createDistracterFilter() {
final MainKeyboardView mainKeyboardView = mKeyboardSwitcher.getMainKeyboardView();
// TODO: Create Keyboard when mainKeyboardView is null.
// TODO: Figure out the most reasonable keyboard for the filter. Refer to the
// spellchecker's logic.
final Keyboard keyboard = (mainKeyboardView != null) ?
mainKeyboardView.getKeyboard() : null;
final DistracterFilter distracterFilter = new DistracterFilter(mInputLogic.mSuggest,
keyboard);
return distracterFilter;
}
private void refreshPersonalizationDictionarySession() { private void refreshPersonalizationDictionarySession() {
final DictionaryFacilitatorForSuggest dictionaryFacilitator = final DictionaryFacilitatorForSuggest dictionaryFacilitator =
mInputLogic.mSuggest.mDictionaryFacilitator; mInputLogic.mSuggest.mDictionaryFacilitator;
@ -1755,6 +1743,11 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen
mInputLogic.mSuggest.mDictionaryFacilitator.clearPersonalizationDictionary(); mInputLogic.mSuggest.mDictionaryFacilitator.clearPersonalizationDictionary();
} }
@UsedForTesting
/* package for test */ DistracterFilter createDistracterFilter() {
return DistracterFilter.createDistracterFilter(mInputLogic.mSuggest, mKeyboardSwitcher);
}
public void dumpDictionaryForDebug(final String dictName) { public void dumpDictionaryForDebug(final String dictName) {
final DictionaryFacilitatorForSuggest dictionaryFacilitator = final DictionaryFacilitatorForSuggest dictionaryFacilitator =
mInputLogic.mSuggest.mDictionaryFacilitator; mInputLogic.mSuggest.mDictionaryFacilitator;

View file

@ -17,21 +17,35 @@
package com.android.inputmethod.latin.utils; package com.android.inputmethod.latin.utils;
import com.android.inputmethod.keyboard.Keyboard; import com.android.inputmethod.keyboard.Keyboard;
import com.android.inputmethod.keyboard.KeyboardSwitcher;
import com.android.inputmethod.keyboard.MainKeyboardView;
import com.android.inputmethod.latin.Constants;
import com.android.inputmethod.latin.Suggest; import com.android.inputmethod.latin.Suggest;
import com.android.inputmethod.latin.Suggest.OnGetSuggestedWordsCallback;
import com.android.inputmethod.latin.SuggestedWords;
import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo;
import com.android.inputmethod.latin.WordComposer;
/** /**
* This class is used to prevent distracters/misspellings being added to personalization * This class is used to prevent distracters being added to personalization
* or user history dictionaries * or user history dictionaries
*/ */
public class DistracterFilter { public class DistracterFilter {
private final Suggest mSuggest; private final Suggest mSuggest;
private final Keyboard mKeyboard; private final Keyboard mKeyboard;
// If the score of the top suggestion exceeds this value, the tested word (e.g.,
// an OOV, a misspelling, or an in-vocabulary word) would be considered as a distracter to
// words in dictionary. The greater the threshold is, the less likely the tested word would
// become a distracter, which means the tested word will be more likely to be added to
// the dictionary.
private static final float DISTRACTER_WORD_SCORE_THRESHOLD = 2.0f;
/** /**
* Create a DistracterFilter instance. * Create a DistracterFilter instance.
* *
* @param suggest an instance of Suggest which will be used to obtain a list of suggestions * @param suggest an instance of Suggest which will be used to obtain a list of suggestions
* for a potential distracter/misspelling * for a potential distracter
* @param keyboard the keyboard that is currently being used. This information is needed * @param keyboard the keyboard that is currently being used. This information is needed
* when calling mSuggest.getSuggestedWords(...) to obtain a list of suggestions. * when calling mSuggest.getSuggestedWords(...) to obtain a list of suggestions.
*/ */
@ -40,9 +54,79 @@ public class DistracterFilter {
mKeyboard = keyboard; mKeyboard = keyboard;
} }
public boolean isDistracterToWordsInDictionaries(final String prevWord, public static DistracterFilter createDistracterFilter(final Suggest suggest,
final String targetWord) { final KeyboardSwitcher keyboardSwitcher) {
// TODO: to be implemented final MainKeyboardView mainKeyboardView = keyboardSwitcher.getMainKeyboardView();
// TODO: Create Keyboard when mainKeyboardView is null.
// TODO: Figure out the most reasonable keyboard for the filter. Refer to the
// spellchecker's logic.
final Keyboard keyboard = (mainKeyboardView != null) ?
mainKeyboardView.getKeyboard() : null;
final DistracterFilter distracterFilter = new DistracterFilter(suggest, keyboard);
return distracterFilter;
}
private static boolean suggestionExceedsDistracterThreshold(
final SuggestedWordInfo suggestion, final String consideredWord,
final float distracterThreshold) {
if (null != suggestion) {
final int suggestionScore = suggestion.mScore;
final float normalizedScore = BinaryDictionaryUtils.calcNormalizedScore(
consideredWord, suggestion.mWord, suggestionScore);
if (normalizedScore > distracterThreshold) {
return true;
}
}
return false; return false;
} }
/**
* Determine whether a word is a distracter to words in dictionaries.
*
* @param prevWord the previous word, or null if none.
* @param testedWord the word that will be tested to see whether it is a distracter to words
* in dictionaries.
* @return true if testedWord is a distracter, otherwise false.
*/
public boolean isDistracterToWordsInDictionaries(final String prevWord,
final String testedWord) {
if (mSuggest == null) {
return false;
}
final WordComposer composer = new WordComposer();
final int[] codePoints = StringUtils.toCodePointArray(testedWord);
final int[] coordinates;
if (null == mKeyboard) {
coordinates = CoordinateUtils.newCoordinateArray(codePoints.length,
Constants.NOT_A_COORDINATE, Constants.NOT_A_COORDINATE);
} else {
coordinates = mKeyboard.getCoordinates(codePoints);
}
composer.setComposingWord(codePoints, coordinates, prevWord);
final int trailingSingleQuotesCount = composer.trailingSingleQuotesCount();
final String consideredWord = trailingSingleQuotesCount > 0 ? testedWord.substring(0,
testedWord.length() - trailingSingleQuotesCount) : testedWord;
final AsyncResultHolder<Boolean> holder = new AsyncResultHolder<Boolean>();
final OnGetSuggestedWordsCallback callback = new OnGetSuggestedWordsCallback() {
@Override
public void onGetSuggestedWords(final SuggestedWords suggestedWords) {
if (suggestedWords != null && suggestedWords.size() > 1) {
// The suggestedWordInfo at 0 is the typed word. The 1st suggestion from
// the decoder is at index 1.
final SuggestedWordInfo firstSuggestion = suggestedWords.getInfo(1);
final boolean hasStrongDistractor = suggestionExceedsDistracterThreshold(
firstSuggestion, consideredWord, DISTRACTER_WORD_SCORE_THRESHOLD);
holder.set(hasStrongDistractor);
}
}
};
mSuggest.getSuggestedWords(composer, prevWord, mKeyboard.getProximityInfo(),
true /* blockOffensiveWords */, true /* isCorrectionEnbaled */,
null /* additionalFeaturesOptions */, 0 /* sessionId */,
SuggestedWords.NOT_A_SEQUENCE_NUMBER, callback);
return holder.get(false /* defaultValue */, Constants.GET_SUGGESTED_WORDS_TIMEOUT);
}
} }

View file

@ -129,6 +129,9 @@ public final class LanguageModelParam {
if (locale == null) { if (locale == null) {
return null; return null;
} }
// TODO: Though targetWord is an IV (in-vocabulary) word, we should still apply
// distracterFilter in the following code. If targetWord is a distracter,
// it should be filtered out.
if (dictionaryFacilitator.isValidWord(targetWord, false /* ignoreCase */)) { if (dictionaryFacilitator.isValidWord(targetWord, false /* ignoreCase */)) {
return createAndGetLanguageModelParamOfWord(prevWord, targetWord, timestamp, return createAndGetLanguageModelParamOfWord(prevWord, targetWord, timestamp,
true /* isValidWord */, locale); true /* isValidWord */, locale);

View file

@ -0,0 +1,73 @@
/*
* Copyright (C) 2014 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.android.inputmethod.latin;
import android.test.suitebuilder.annotation.LargeTest;
import com.android.inputmethod.latin.utils.DistracterFilter;
/**
* Unit test for DistracterFilter
*/
@LargeTest
public class DistracterFilterTest extends InputTestsBase {
private DistracterFilter mDistracterFilter;
@Override
protected void setUp() throws Exception {
super.setUp();
mDistracterFilter = mLatinIME.createDistracterFilter();
}
public void testIsDistractorToWordsInDictionaries() {
final String EMPTY_PREV_WORD = null;
String typedWord = "alot";
// For this test case, we consider "alot" is a distracter to "a lot".
assertTrue(mDistracterFilter.isDistracterToWordsInDictionaries(EMPTY_PREV_WORD, typedWord));
typedWord = "mot";
// For this test case, we consider "mot" is a distracter to "not".
assertTrue(mDistracterFilter.isDistracterToWordsInDictionaries(EMPTY_PREV_WORD, typedWord));
typedWord = "wierd";
// For this test case, we consider "wierd" is a distracter to "weird".
assertTrue(mDistracterFilter.isDistracterToWordsInDictionaries(EMPTY_PREV_WORD, typedWord));
typedWord = "hoe";
// For this test case, we consider "hoe" is a distracter to "how".
assertTrue(mDistracterFilter.isDistracterToWordsInDictionaries(EMPTY_PREV_WORD, typedWord));
typedWord = "nit";
// For this test case, we consider "nit" is a distracter to "not".
assertTrue(mDistracterFilter.isDistracterToWordsInDictionaries(EMPTY_PREV_WORD, typedWord));
typedWord = "ill";
// For this test case, we consider "ill" is a distracter to "I'll".
assertTrue(mDistracterFilter.isDistracterToWordsInDictionaries(EMPTY_PREV_WORD, typedWord));
typedWord = "asdfd";
// For this test case, we consider "asdfd" is not a distracter to any word in dictionaries.
assertFalse(
mDistracterFilter.isDistracterToWordsInDictionaries(EMPTY_PREV_WORD, typedWord));
typedWord = "thank";
// For this test case, we consider "thank" is not a distracter to any other word
// in dictionaries.
assertFalse(
mDistracterFilter.isDistracterToWordsInDictionaries(EMPTY_PREV_WORD, typedWord));
}
}