Merge "Use whether it's exact match to detect distracters."

main
Keisuke Kuroyanagi 2014-05-26 13:53:19 +00:00 committed by Android (Google) Code Review
commit 0088d1f668
11 changed files with 77 additions and 73 deletions

View File

@ -78,7 +78,7 @@ public final class SuggestionSpanUtils {
break;
}
final SuggestedWordInfo info = suggestedWords.getInfo(i);
if (info.mKind == SuggestedWordInfo.KIND_PREDICTION) {
if (info.isKindOf(SuggestedWordInfo.KIND_PREDICTION)) {
continue;
}
final String word = suggestedWords.getWord(i);

View File

@ -326,13 +326,8 @@ public final class BinaryDictionary extends Dictionary {
// offensive, then we don't output it unless it's also an exact match.
continue;
}
final int kind = mOutputTypes[j] & SuggestedWordInfo.KIND_MASK_KIND;
final int score = SuggestedWordInfo.KIND_WHITELIST == kind
? SuggestedWordInfo.MAX_SCORE : mOutputScores[j];
// TODO: check that all users of the `kind' parameter are ready to accept
// flags too and pass mOutputTypes[j] instead of kind
suggestions.add(new SuggestedWordInfo(new String(mOutputCodePoints, start, len),
score, kind, this /* sourceDict */,
mOutputScores[j], mOutputTypes[j], this /* sourceDict */,
mSpaceIndices[j] /* indexOfTouchPointOfSecondWord */,
mOutputAutoCommitFirstWordConfidence[0]));
}

View File

@ -127,7 +127,7 @@ public final class Suggest {
suggestionResults.first(), suggestionResults.mLocale, isAllUpperCase,
isFirstCharCapitalized, trailingSingleQuotesCount);
firstSuggestion = firstSuggestedWordInfo.mWord;
if (SuggestedWordInfo.KIND_WHITELIST != firstSuggestedWordInfo.mKind) {
if (!firstSuggestedWordInfo.isKindOf(SuggestedWordInfo.KIND_WHITELIST)) {
whitelistedWord = null;
} else {
whitelistedWord = firstSuggestion;
@ -158,7 +158,7 @@ public final class Suggest {
|| suggestionResults.isEmpty() || wordComposer.hasDigits()
|| wordComposer.isMostlyCaps() || wordComposer.isResumed()
|| !mDictionaryFacilitator.hasInitializedMainDictionary()
|| SuggestedWordInfo.KIND_SHORTCUT == suggestionResults.first().mKind) {
|| suggestionResults.first().isKindOf(SuggestedWordInfo.KIND_SHORTCUT)) {
// If we don't have a main dictionary, we never want to auto-correct. The reason for
// this is, the user may have a contact whose name happens to match a valid word in
// their language, and it will unexpectedly auto-correct. For example, if the user

View File

@ -225,13 +225,14 @@ public class SuggestedWords {
public static final int KIND_MASK_FLAGS = 0xFFFFFF00; // Mask to get the flags
public static final int KIND_FLAG_POSSIBLY_OFFENSIVE = 0x80000000;
public static final int KIND_FLAG_EXACT_MATCH = 0x40000000;
public static final int KIND_FLAG_EXACT_MATCH_WITH_INTENTIONAL_OMISSION = 0x20000000;
public final String mWord;
// The completion info from the application. Null for suggestions that don't come from
// the application (including keyboard-computed ones, so this is almost always null)
public final CompletionInfo mApplicationSpecifiedCompletionInfo;
public final int mScore;
public final int mKind; // one of the KIND_* constants above
public final int mKind; // kind and kind flags
public final int mCodePointCount;
public final Dictionary mSourceDict;
// For auto-commit. This keeps track of the index inside the touch coordinates array
@ -247,7 +248,7 @@ public class SuggestedWords {
* Create a new suggested word info.
* @param word The string to suggest.
* @param score A measure of how likely this suggestion is.
* @param kind The kind of suggestion, as one of the above KIND_* constants.
* @param kind The kind of suggestion, as one of the above KIND_* constants with flags.
* @param sourceDict What instance of Dictionary produced this suggestion.
* @param indexOfTouchPointOfSecondWord See mIndexOfTouchPointOfSecondWord.
* @param autoCommitFirstWordConfidence See mAutoCommitFirstWordConfidence.
@ -282,7 +283,11 @@ public class SuggestedWords {
}
public boolean isEligibleForAutoCommit() {
return (KIND_CORRECTION == mKind && NOT_AN_INDEX != mIndexOfTouchPointOfSecondWord);
return (isKindOf(KIND_CORRECTION) && NOT_AN_INDEX != mIndexOfTouchPointOfSecondWord);
}
public boolean isKindOf(final int kind) {
return (mKind & KIND_MASK_KIND) == kind;
}
public void setDebugString(final String str) {
@ -339,7 +344,7 @@ public class SuggestedWords {
String typedWord = null;
for (int i = 0; i < mSuggestedWordInfoList.size(); ++i) {
final SuggestedWordInfo info = mSuggestedWordInfoList.get(i);
if (SuggestedWordInfo.KIND_TYPED != info.mKind) {
if (!info.isKindOf(SuggestedWordInfo.KIND_TYPED)) {
newSuggestions.add(info);
} else {
assert(null == typedWord);

View File

@ -270,7 +270,7 @@ public final class InputLogic {
// code path as for other kinds, use commitChosenWord, and do everything normally. We will
// however need to reset the suggestion strip right away, because we know we can't take
// the risk of calling commitCompletion twice because we don't know how the app will react.
if (SuggestedWordInfo.KIND_APP_DEFINED == suggestionInfo.mKind) {
if (suggestionInfo.isKindOf(SuggestedWordInfo.KIND_APP_DEFINED)) {
mSuggestedWords = SuggestedWords.EMPTY;
mSuggestionStripViewAccessor.setNeutralSuggestionStrip();
inputTransaction.requireShiftUpdate(InputTransaction.SHIFT_UPDATE_NOW);

View File

@ -32,12 +32,8 @@ import android.view.inputmethod.InputMethodSubtype;
import com.android.inputmethod.keyboard.Keyboard;
import com.android.inputmethod.keyboard.KeyboardId;
import com.android.inputmethod.keyboard.KeyboardLayoutSet;
import com.android.inputmethod.latin.Constants;
import com.android.inputmethod.latin.DictionaryFacilitator;
import com.android.inputmethod.latin.PrevWordsInfo;
import com.android.inputmethod.latin.Suggest;
import com.android.inputmethod.latin.Suggest.OnGetSuggestedWordsCallback;
import com.android.inputmethod.latin.SuggestedWords;
import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo;
import com.android.inputmethod.latin.WordComposer;
@ -47,6 +43,7 @@ import com.android.inputmethod.latin.WordComposer;
*/
public class DistracterFilterUsingSuggestion implements DistracterFilter {
private static final String TAG = DistracterFilterUsingSuggestion.class.getSimpleName();
private static final boolean DEBUG = false;
private static final long TIMEOUT_TO_WAIT_LOADING_DICTIONARIES_IN_SECONDS = 120;
@ -54,17 +51,9 @@ public class DistracterFilterUsingSuggestion implements DistracterFilter {
private final Map<Locale, InputMethodSubtype> mLocaleToSubtypeMap;
private final Map<Locale, Keyboard> mLocaleToKeyboardMap;
private final DictionaryFacilitator mDictionaryFacilitator;
private final Suggest mSuggest;
private Keyboard mKeyboard;
private final Object mLock = new Object();
// If the score of the top suggestion exceeds this value, the tested word (e.g.,
// an OOV, a misspelling, or an in-vocabulary word) would be considered as a distracter to
// words in dictionary. The greater the threshold is, the less likely the tested word would
// become a distracter, which means the tested word will be more likely to be added to
// the dictionary.
private static final float DISTRACTER_WORD_SCORE_THRESHOLD = 2.0f;
/**
* Create a DistracterFilter instance.
*
@ -75,7 +64,6 @@ public class DistracterFilterUsingSuggestion implements DistracterFilter {
mLocaleToSubtypeMap = new HashMap<>();
mLocaleToKeyboardMap = new HashMap<>();
mDictionaryFacilitator = new DictionaryFacilitator();
mSuggest = new Suggest(mDictionaryFacilitator);
mKeyboard = null;
}
@ -109,16 +97,30 @@ public class DistracterFilterUsingSuggestion implements DistracterFilter {
}
}
private static boolean suggestionExceedsDistracterThreshold(
final SuggestedWordInfo suggestion, final String consideredWord,
final float distracterThreshold) {
if (null != suggestion) {
final int suggestionScore = suggestion.mScore;
final float normalizedScore = BinaryDictionaryUtils.calcNormalizedScore(
consideredWord, suggestion.mWord, suggestionScore);
if (normalizedScore > distracterThreshold) {
return true;
private static boolean isDistracter(
final SuggestionResults suggestionResults, final String consideredWord) {
for (final SuggestedWordInfo suggestedWordInfo : suggestionResults) {
if (suggestedWordInfo.mWord.equals(consideredWord)) {
continue;
}
// Exact match can include case errors, accent errors, digraph conversions.
final boolean isExactMatch =
(suggestedWordInfo.mKind & SuggestedWordInfo.KIND_FLAG_EXACT_MATCH) != 0;
final boolean isExactMatchWithIntentionalOmission = (suggestedWordInfo.mKind
& SuggestedWordInfo.KIND_FLAG_EXACT_MATCH_WITH_INTENTIONAL_OMISSION) != 0;
if (DEBUG) {
final float normalizedScore = BinaryDictionaryUtils.calcNormalizedScore(
consideredWord, suggestedWordInfo.mWord, suggestedWordInfo.mScore);
Log.d(TAG, "consideredWord: " + consideredWord);
Log.d(TAG, "top suggestion: " + suggestedWordInfo.mWord);
Log.d(TAG, "suggestionScore: " + suggestedWordInfo.mScore);
Log.d(TAG, "normalizedScore: " + normalizedScore);
Log.d(TAG, "isExactMatch: " + isExactMatch);
Log.d(TAG, "isExactMatchWithIntentionalOmission: "
+ isExactMatchWithIntentionalOmission);
}
return isExactMatch || isExactMatchWithIntentionalOmission;
}
return false;
}
@ -161,7 +163,7 @@ public class DistracterFilterUsingSuggestion implements DistracterFilter {
/**
* Determine whether a word is a distracter to words in dictionaries.
*
* @param prevWordsInfo the information of previous words.
* @param prevWordsInfo the information of previous words. Not used for now.
* @param testedWord the word that will be tested to see whether it is a distracter to words
* in dictionaries.
* @param locale the locale of word.
@ -197,31 +199,20 @@ public class DistracterFilterUsingSuggestion implements DistracterFilter {
final WordComposer composer = new WordComposer();
final int[] codePoints = StringUtils.toCodePointArray(testedWord);
final int[] coordinates = mKeyboard.getCoordinates(codePoints);
composer.setComposingWord(codePoints, coordinates, prevWordsInfo);
composer.setComposingWord(codePoints, coordinates, PrevWordsInfo.EMPTY_PREV_WORDS_INFO);
final int trailingSingleQuotesCount = StringUtils.getTrailingSingleQuotesCount(testedWord);
final String consideredWord = trailingSingleQuotesCount > 0 ?
testedWord.substring(0, testedWord.length() - trailingSingleQuotesCount) :
testedWord;
final AsyncResultHolder<Boolean> holder = new AsyncResultHolder<>();
final OnGetSuggestedWordsCallback callback = new OnGetSuggestedWordsCallback() {
@Override
public void onGetSuggestedWords(final SuggestedWords suggestedWords) {
if (suggestedWords != null && suggestedWords.size() > 1) {
// The suggestedWordInfo at 0 is the typed word. The 1st suggestion from
// the decoder is at index 1.
final SuggestedWordInfo firstSuggestion = suggestedWords.getInfo(1);
final boolean hasStrongDistractor = suggestionExceedsDistracterThreshold(
firstSuggestion, consideredWord, DISTRACTER_WORD_SCORE_THRESHOLD);
holder.set(hasStrongDistractor);
}
}
};
mSuggest.getSuggestedWords(composer, prevWordsInfo, mKeyboard.getProximityInfo(),
true /* blockOffensiveWords */, true /* isCorrectionEnbaled */,
null /* additionalFeaturesOptions */, 0 /* sessionId */,
SuggestedWords.NOT_A_SEQUENCE_NUMBER, callback);
return holder.get(false /* defaultValue */, Constants.GET_SUGGESTED_WORDS_TIMEOUT);
final SuggestionResults suggestionResults = mDictionaryFacilitator.getSuggestionResults(
composer, PrevWordsInfo.EMPTY_PREV_WORDS_INFO, mKeyboard.getProximityInfo(),
true /* blockOffensiveWords */, null /* additionalFeaturesOptions */,
0 /* sessionId */, null /* rawSuggestions */);
if (suggestionResults.isEmpty()) {
return false;
}
return isDistracter(suggestionResults, consideredWord);
}
}

View File

@ -57,6 +57,7 @@ class Dictionary {
static const int KIND_MASK_FLAGS = 0xFFFFFF00; // Mask to get the flags
static const int KIND_FLAG_POSSIBLY_OFFENSIVE = 0x80000000;
static const int KIND_FLAG_EXACT_MATCH = 0x40000000;
static const int KIND_FLAG_EXACT_MATCH_WITH_INTENTIONAL_OMISSION = 0x20000000;
Dictionary(JNIEnv *env, DictionaryStructureWithBufferPolicy::StructurePolicyPtr
dictionaryStructureWithBufferPolicy);

View File

@ -31,4 +31,8 @@ const ErrorTypeUtils::ErrorType ErrorTypeUtils::NEW_WORD = 0x80;
const ErrorTypeUtils::ErrorType ErrorTypeUtils::ERRORS_TREATED_AS_AN_EXACT_MATCH =
NOT_AN_ERROR | MATCH_WITH_CASE_ERROR | MATCH_WITH_ACCENT_ERROR | MATCH_WITH_DIGRAPH;
const ErrorTypeUtils::ErrorType
ErrorTypeUtils::ERRORS_TREATED_AS_AN_EXACT_MATCH_WITH_INTENTIONAL_OMISSION =
ERRORS_TREATED_AS_AN_EXACT_MATCH | INTENTIONAL_OMISSION;
} // namespace latinime

View File

@ -51,6 +51,11 @@ class ErrorTypeUtils {
return (containedErrorTypes & ~ERRORS_TREATED_AS_AN_EXACT_MATCH) == 0;
}
static bool isExactMatchWithIntentionalOmission(const ErrorType containedErrorTypes) {
return (containedErrorTypes
& ~ERRORS_TREATED_AS_AN_EXACT_MATCH_WITH_INTENTIONAL_OMISSION) == 0;
}
static bool isEditCorrectionError(const ErrorType errorType) {
return (errorType & EDIT_CORRECTION) != 0;
}
@ -67,6 +72,7 @@ class ErrorTypeUtils {
DISALLOW_IMPLICIT_CONSTRUCTORS(ErrorTypeUtils);
static const ErrorType ERRORS_TREATED_AS_AN_EXACT_MATCH;
static const ErrorType ERRORS_TREATED_AS_AN_EXACT_MATCH_WITH_INTENTIONAL_OMISSION;
};
} // namespace latinime
#endif // LATINIME_ERROR_TYPE_UTILS_H

View File

@ -89,6 +89,9 @@ const int SuggestionsOutputUtils::MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT = 16;
terminalDicNode->getProbability(), NOT_A_PROBABILITY) <= 0;
const bool isExactMatch =
ErrorTypeUtils::isExactMatch(terminalDicNode->getContainedErrorTypes());
const bool isExactMatchWithIntentionalOmission =
ErrorTypeUtils::isExactMatchWithIntentionalOmission(
terminalDicNode->getContainedErrorTypes());
const bool isFirstCharUppercase = terminalDicNode->isFirstCharUppercase();
// Heuristic: We exclude probability=0 first-char-uppercase words from exact match.
// (e.g. "AMD" and "and")
@ -96,7 +99,9 @@ const int SuggestionsOutputUtils::MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT = 16;
&& !(isPossiblyOffensiveWord && isFirstCharUppercase);
const int outputTypeFlags =
(isPossiblyOffensiveWord ? Dictionary::KIND_FLAG_POSSIBLY_OFFENSIVE : 0)
| ((isSafeExactMatch && boostExactMatches) ? Dictionary::KIND_FLAG_EXACT_MATCH : 0);
| ((isSafeExactMatch && boostExactMatches) ? Dictionary::KIND_FLAG_EXACT_MATCH : 0)
| (isExactMatchWithIntentionalOmission ?
Dictionary::KIND_FLAG_EXACT_MATCH_WITH_INTENTIONAL_OMISSION : 0);
// Entries that are blacklisted or do not represent a word should not be output.
const bool isValidWord = !terminalDicNode->isBlacklistedOrNotAWord();

View File

@ -40,34 +40,31 @@ public class DistracterFilterTest extends InputTestsBase {
final PrevWordsInfo EMPTY_PREV_WORDS_INFO = PrevWordsInfo.EMPTY_PREV_WORDS_INFO;
final Locale localeEnUs = new Locale("en", "US");
String typedWord = "alot";
// For this test case, we consider "alot" is a distracter to "a lot".
String typedWord;
typedWord = "google";
// For this test case, we consider "google" is a distracter to "Google".
assertTrue(mDistracterFilter.isDistracterToWordsInDictionaries(
EMPTY_PREV_WORDS_INFO, typedWord, localeEnUs));
typedWord = "mot";
// For this test case, we consider "mot" is a distracter to "not".
typedWord = "Bill";
// For this test case, we consider "Bill" is a distracter to "bill".
assertTrue(mDistracterFilter.isDistracterToWordsInDictionaries(
EMPTY_PREV_WORDS_INFO, typedWord, localeEnUs));
typedWord = "wierd";
// For this test case, we consider "wierd" is a distracter to "weird".
typedWord = "nOt";
// For this test case, we consider "nOt" is a distracter to "not".
assertTrue(mDistracterFilter.isDistracterToWordsInDictionaries(
EMPTY_PREV_WORDS_INFO, typedWord, localeEnUs));
typedWord = "hoe";
// For this test case, we consider "hoe" is a distracter to "how".
assertTrue(mDistracterFilter.isDistracterToWordsInDictionaries(
EMPTY_PREV_WORDS_INFO, typedWord, localeEnUs));
typedWord = "nit";
// For this test case, we consider "nit" is a distracter to "not".
typedWord = "were";
// For this test case, we consider "were" is a distracter to "we're".
assertTrue(mDistracterFilter.isDistracterToWordsInDictionaries(
EMPTY_PREV_WORDS_INFO, typedWord, localeEnUs));
typedWord = "ill";
// For this test case, we consider "ill" is a distracter to "I'll".
assertTrue(mDistracterFilter.isDistracterToWordsInDictionaries(
// For this test case, we consider "ill" is not a distracter to any word in dictionaries.
assertFalse(mDistracterFilter.isDistracterToWordsInDictionaries(
EMPTY_PREV_WORDS_INFO, typedWord, localeEnUs));
typedWord = "asdfd";