Use whether it's exact match to detect distracters.
Bug: 13142176 Change-Id: Id5b7286d28897931f7bfe571be45d46ffeef4adf
This commit is contained in:
parent
4905d83270
commit
a6278eb9c1
11 changed files with 77 additions and 73 deletions
|
@ -78,7 +78,7 @@ public final class SuggestionSpanUtils {
|
|||
break;
|
||||
}
|
||||
final SuggestedWordInfo info = suggestedWords.getInfo(i);
|
||||
if (info.mKind == SuggestedWordInfo.KIND_PREDICTION) {
|
||||
if (info.isKindOf(SuggestedWordInfo.KIND_PREDICTION)) {
|
||||
continue;
|
||||
}
|
||||
final String word = suggestedWords.getWord(i);
|
||||
|
|
|
@ -326,13 +326,8 @@ public final class BinaryDictionary extends Dictionary {
|
|||
// offensive, then we don't output it unless it's also an exact match.
|
||||
continue;
|
||||
}
|
||||
final int kind = mOutputTypes[j] & SuggestedWordInfo.KIND_MASK_KIND;
|
||||
final int score = SuggestedWordInfo.KIND_WHITELIST == kind
|
||||
? SuggestedWordInfo.MAX_SCORE : mOutputScores[j];
|
||||
// TODO: check that all users of the `kind' parameter are ready to accept
|
||||
// flags too and pass mOutputTypes[j] instead of kind
|
||||
suggestions.add(new SuggestedWordInfo(new String(mOutputCodePoints, start, len),
|
||||
score, kind, this /* sourceDict */,
|
||||
mOutputScores[j], mOutputTypes[j], this /* sourceDict */,
|
||||
mSpaceIndices[j] /* indexOfTouchPointOfSecondWord */,
|
||||
mOutputAutoCommitFirstWordConfidence[0]));
|
||||
}
|
||||
|
|
|
@ -127,7 +127,7 @@ public final class Suggest {
|
|||
suggestionResults.first(), suggestionResults.mLocale, isAllUpperCase,
|
||||
isFirstCharCapitalized, trailingSingleQuotesCount);
|
||||
firstSuggestion = firstSuggestedWordInfo.mWord;
|
||||
if (SuggestedWordInfo.KIND_WHITELIST != firstSuggestedWordInfo.mKind) {
|
||||
if (!firstSuggestedWordInfo.isKindOf(SuggestedWordInfo.KIND_WHITELIST)) {
|
||||
whitelistedWord = null;
|
||||
} else {
|
||||
whitelistedWord = firstSuggestion;
|
||||
|
@ -158,7 +158,7 @@ public final class Suggest {
|
|||
|| suggestionResults.isEmpty() || wordComposer.hasDigits()
|
||||
|| wordComposer.isMostlyCaps() || wordComposer.isResumed()
|
||||
|| !mDictionaryFacilitator.hasInitializedMainDictionary()
|
||||
|| SuggestedWordInfo.KIND_SHORTCUT == suggestionResults.first().mKind) {
|
||||
|| suggestionResults.first().isKindOf(SuggestedWordInfo.KIND_SHORTCUT)) {
|
||||
// If we don't have a main dictionary, we never want to auto-correct. The reason for
|
||||
// this is, the user may have a contact whose name happens to match a valid word in
|
||||
// their language, and it will unexpectedly auto-correct. For example, if the user
|
||||
|
|
|
@ -225,13 +225,14 @@ public class SuggestedWords {
|
|||
public static final int KIND_MASK_FLAGS = 0xFFFFFF00; // Mask to get the flags
|
||||
public static final int KIND_FLAG_POSSIBLY_OFFENSIVE = 0x80000000;
|
||||
public static final int KIND_FLAG_EXACT_MATCH = 0x40000000;
|
||||
public static final int KIND_FLAG_EXACT_MATCH_WITH_INTENTIONAL_OMISSION = 0x20000000;
|
||||
|
||||
public final String mWord;
|
||||
// The completion info from the application. Null for suggestions that don't come from
|
||||
// the application (including keyboard-computed ones, so this is almost always null)
|
||||
public final CompletionInfo mApplicationSpecifiedCompletionInfo;
|
||||
public final int mScore;
|
||||
public final int mKind; // one of the KIND_* constants above
|
||||
public final int mKind; // kind and kind flags
|
||||
public final int mCodePointCount;
|
||||
public final Dictionary mSourceDict;
|
||||
// For auto-commit. This keeps track of the index inside the touch coordinates array
|
||||
|
@ -247,7 +248,7 @@ public class SuggestedWords {
|
|||
* Create a new suggested word info.
|
||||
* @param word The string to suggest.
|
||||
* @param score A measure of how likely this suggestion is.
|
||||
* @param kind The kind of suggestion, as one of the above KIND_* constants.
|
||||
* @param kind The kind of suggestion, as one of the above KIND_* constants with flags.
|
||||
* @param sourceDict What instance of Dictionary produced this suggestion.
|
||||
* @param indexOfTouchPointOfSecondWord See mIndexOfTouchPointOfSecondWord.
|
||||
* @param autoCommitFirstWordConfidence See mAutoCommitFirstWordConfidence.
|
||||
|
@ -282,7 +283,11 @@ public class SuggestedWords {
|
|||
}
|
||||
|
||||
public boolean isEligibleForAutoCommit() {
|
||||
return (KIND_CORRECTION == mKind && NOT_AN_INDEX != mIndexOfTouchPointOfSecondWord);
|
||||
return (isKindOf(KIND_CORRECTION) && NOT_AN_INDEX != mIndexOfTouchPointOfSecondWord);
|
||||
}
|
||||
|
||||
public boolean isKindOf(final int kind) {
|
||||
return (mKind & KIND_MASK_KIND) == kind;
|
||||
}
|
||||
|
||||
public void setDebugString(final String str) {
|
||||
|
@ -339,7 +344,7 @@ public class SuggestedWords {
|
|||
String typedWord = null;
|
||||
for (int i = 0; i < mSuggestedWordInfoList.size(); ++i) {
|
||||
final SuggestedWordInfo info = mSuggestedWordInfoList.get(i);
|
||||
if (SuggestedWordInfo.KIND_TYPED != info.mKind) {
|
||||
if (!info.isKindOf(SuggestedWordInfo.KIND_TYPED)) {
|
||||
newSuggestions.add(info);
|
||||
} else {
|
||||
assert(null == typedWord);
|
||||
|
|
|
@ -270,7 +270,7 @@ public final class InputLogic {
|
|||
// code path as for other kinds, use commitChosenWord, and do everything normally. We will
|
||||
// however need to reset the suggestion strip right away, because we know we can't take
|
||||
// the risk of calling commitCompletion twice because we don't know how the app will react.
|
||||
if (SuggestedWordInfo.KIND_APP_DEFINED == suggestionInfo.mKind) {
|
||||
if (suggestionInfo.isKindOf(SuggestedWordInfo.KIND_APP_DEFINED)) {
|
||||
mSuggestedWords = SuggestedWords.EMPTY;
|
||||
mSuggestionStripViewAccessor.setNeutralSuggestionStrip();
|
||||
inputTransaction.requireShiftUpdate(InputTransaction.SHIFT_UPDATE_NOW);
|
||||
|
|
|
@ -32,12 +32,8 @@ import android.view.inputmethod.InputMethodSubtype;
|
|||
import com.android.inputmethod.keyboard.Keyboard;
|
||||
import com.android.inputmethod.keyboard.KeyboardId;
|
||||
import com.android.inputmethod.keyboard.KeyboardLayoutSet;
|
||||
import com.android.inputmethod.latin.Constants;
|
||||
import com.android.inputmethod.latin.DictionaryFacilitator;
|
||||
import com.android.inputmethod.latin.PrevWordsInfo;
|
||||
import com.android.inputmethod.latin.Suggest;
|
||||
import com.android.inputmethod.latin.Suggest.OnGetSuggestedWordsCallback;
|
||||
import com.android.inputmethod.latin.SuggestedWords;
|
||||
import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo;
|
||||
import com.android.inputmethod.latin.WordComposer;
|
||||
|
||||
|
@ -47,6 +43,7 @@ import com.android.inputmethod.latin.WordComposer;
|
|||
*/
|
||||
public class DistracterFilterUsingSuggestion implements DistracterFilter {
|
||||
private static final String TAG = DistracterFilterUsingSuggestion.class.getSimpleName();
|
||||
private static final boolean DEBUG = false;
|
||||
|
||||
private static final long TIMEOUT_TO_WAIT_LOADING_DICTIONARIES_IN_SECONDS = 120;
|
||||
|
||||
|
@ -54,17 +51,9 @@ public class DistracterFilterUsingSuggestion implements DistracterFilter {
|
|||
private final Map<Locale, InputMethodSubtype> mLocaleToSubtypeMap;
|
||||
private final Map<Locale, Keyboard> mLocaleToKeyboardMap;
|
||||
private final DictionaryFacilitator mDictionaryFacilitator;
|
||||
private final Suggest mSuggest;
|
||||
private Keyboard mKeyboard;
|
||||
private final Object mLock = new Object();
|
||||
|
||||
// If the score of the top suggestion exceeds this value, the tested word (e.g.,
|
||||
// an OOV, a misspelling, or an in-vocabulary word) would be considered as a distracter to
|
||||
// words in dictionary. The greater the threshold is, the less likely the tested word would
|
||||
// become a distracter, which means the tested word will be more likely to be added to
|
||||
// the dictionary.
|
||||
private static final float DISTRACTER_WORD_SCORE_THRESHOLD = 2.0f;
|
||||
|
||||
/**
|
||||
* Create a DistracterFilter instance.
|
||||
*
|
||||
|
@ -75,7 +64,6 @@ public class DistracterFilterUsingSuggestion implements DistracterFilter {
|
|||
mLocaleToSubtypeMap = new HashMap<>();
|
||||
mLocaleToKeyboardMap = new HashMap<>();
|
||||
mDictionaryFacilitator = new DictionaryFacilitator();
|
||||
mSuggest = new Suggest(mDictionaryFacilitator);
|
||||
mKeyboard = null;
|
||||
}
|
||||
|
||||
|
@ -109,16 +97,30 @@ public class DistracterFilterUsingSuggestion implements DistracterFilter {
|
|||
}
|
||||
}
|
||||
|
||||
private static boolean suggestionExceedsDistracterThreshold(
|
||||
final SuggestedWordInfo suggestion, final String consideredWord,
|
||||
final float distracterThreshold) {
|
||||
if (null != suggestion) {
|
||||
final int suggestionScore = suggestion.mScore;
|
||||
final float normalizedScore = BinaryDictionaryUtils.calcNormalizedScore(
|
||||
consideredWord, suggestion.mWord, suggestionScore);
|
||||
if (normalizedScore > distracterThreshold) {
|
||||
return true;
|
||||
private static boolean isDistracter(
|
||||
final SuggestionResults suggestionResults, final String consideredWord) {
|
||||
for (final SuggestedWordInfo suggestedWordInfo : suggestionResults) {
|
||||
if (suggestedWordInfo.mWord.equals(consideredWord)) {
|
||||
continue;
|
||||
}
|
||||
// Exact match can include case errors, accent errors, digraph conversions.
|
||||
final boolean isExactMatch =
|
||||
(suggestedWordInfo.mKind & SuggestedWordInfo.KIND_FLAG_EXACT_MATCH) != 0;
|
||||
final boolean isExactMatchWithIntentionalOmission = (suggestedWordInfo.mKind
|
||||
& SuggestedWordInfo.KIND_FLAG_EXACT_MATCH_WITH_INTENTIONAL_OMISSION) != 0;
|
||||
|
||||
if (DEBUG) {
|
||||
final float normalizedScore = BinaryDictionaryUtils.calcNormalizedScore(
|
||||
consideredWord, suggestedWordInfo.mWord, suggestedWordInfo.mScore);
|
||||
Log.d(TAG, "consideredWord: " + consideredWord);
|
||||
Log.d(TAG, "top suggestion: " + suggestedWordInfo.mWord);
|
||||
Log.d(TAG, "suggestionScore: " + suggestedWordInfo.mScore);
|
||||
Log.d(TAG, "normalizedScore: " + normalizedScore);
|
||||
Log.d(TAG, "isExactMatch: " + isExactMatch);
|
||||
Log.d(TAG, "isExactMatchWithIntentionalOmission: "
|
||||
+ isExactMatchWithIntentionalOmission);
|
||||
}
|
||||
return isExactMatch || isExactMatchWithIntentionalOmission;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
@ -161,7 +163,7 @@ public class DistracterFilterUsingSuggestion implements DistracterFilter {
|
|||
/**
|
||||
* Determine whether a word is a distracter to words in dictionaries.
|
||||
*
|
||||
* @param prevWordsInfo the information of previous words.
|
||||
* @param prevWordsInfo the information of previous words. Not used for now.
|
||||
* @param testedWord the word that will be tested to see whether it is a distracter to words
|
||||
* in dictionaries.
|
||||
* @param locale the locale of word.
|
||||
|
@ -197,31 +199,20 @@ public class DistracterFilterUsingSuggestion implements DistracterFilter {
|
|||
final WordComposer composer = new WordComposer();
|
||||
final int[] codePoints = StringUtils.toCodePointArray(testedWord);
|
||||
final int[] coordinates = mKeyboard.getCoordinates(codePoints);
|
||||
composer.setComposingWord(codePoints, coordinates, prevWordsInfo);
|
||||
composer.setComposingWord(codePoints, coordinates, PrevWordsInfo.EMPTY_PREV_WORDS_INFO);
|
||||
|
||||
final int trailingSingleQuotesCount = StringUtils.getTrailingSingleQuotesCount(testedWord);
|
||||
final String consideredWord = trailingSingleQuotesCount > 0 ?
|
||||
testedWord.substring(0, testedWord.length() - trailingSingleQuotesCount) :
|
||||
testedWord;
|
||||
final AsyncResultHolder<Boolean> holder = new AsyncResultHolder<>();
|
||||
final OnGetSuggestedWordsCallback callback = new OnGetSuggestedWordsCallback() {
|
||||
@Override
|
||||
public void onGetSuggestedWords(final SuggestedWords suggestedWords) {
|
||||
if (suggestedWords != null && suggestedWords.size() > 1) {
|
||||
// The suggestedWordInfo at 0 is the typed word. The 1st suggestion from
|
||||
// the decoder is at index 1.
|
||||
final SuggestedWordInfo firstSuggestion = suggestedWords.getInfo(1);
|
||||
final boolean hasStrongDistractor = suggestionExceedsDistracterThreshold(
|
||||
firstSuggestion, consideredWord, DISTRACTER_WORD_SCORE_THRESHOLD);
|
||||
holder.set(hasStrongDistractor);
|
||||
}
|
||||
}
|
||||
};
|
||||
mSuggest.getSuggestedWords(composer, prevWordsInfo, mKeyboard.getProximityInfo(),
|
||||
true /* blockOffensiveWords */, true /* isCorrectionEnbaled */,
|
||||
null /* additionalFeaturesOptions */, 0 /* sessionId */,
|
||||
SuggestedWords.NOT_A_SEQUENCE_NUMBER, callback);
|
||||
|
||||
return holder.get(false /* defaultValue */, Constants.GET_SUGGESTED_WORDS_TIMEOUT);
|
||||
final SuggestionResults suggestionResults = mDictionaryFacilitator.getSuggestionResults(
|
||||
composer, PrevWordsInfo.EMPTY_PREV_WORDS_INFO, mKeyboard.getProximityInfo(),
|
||||
true /* blockOffensiveWords */, null /* additionalFeaturesOptions */,
|
||||
0 /* sessionId */, null /* rawSuggestions */);
|
||||
if (suggestionResults.isEmpty()) {
|
||||
return false;
|
||||
}
|
||||
return isDistracter(suggestionResults, consideredWord);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -57,6 +57,7 @@ class Dictionary {
|
|||
static const int KIND_MASK_FLAGS = 0xFFFFFF00; // Mask to get the flags
|
||||
static const int KIND_FLAG_POSSIBLY_OFFENSIVE = 0x80000000;
|
||||
static const int KIND_FLAG_EXACT_MATCH = 0x40000000;
|
||||
static const int KIND_FLAG_EXACT_MATCH_WITH_INTENTIONAL_OMISSION = 0x20000000;
|
||||
|
||||
Dictionary(JNIEnv *env, DictionaryStructureWithBufferPolicy::StructurePolicyPtr
|
||||
dictionaryStructureWithBufferPolicy);
|
||||
|
|
|
@ -31,4 +31,8 @@ const ErrorTypeUtils::ErrorType ErrorTypeUtils::NEW_WORD = 0x80;
|
|||
const ErrorTypeUtils::ErrorType ErrorTypeUtils::ERRORS_TREATED_AS_AN_EXACT_MATCH =
|
||||
NOT_AN_ERROR | MATCH_WITH_CASE_ERROR | MATCH_WITH_ACCENT_ERROR | MATCH_WITH_DIGRAPH;
|
||||
|
||||
const ErrorTypeUtils::ErrorType
|
||||
ErrorTypeUtils::ERRORS_TREATED_AS_AN_EXACT_MATCH_WITH_INTENTIONAL_OMISSION =
|
||||
ERRORS_TREATED_AS_AN_EXACT_MATCH | INTENTIONAL_OMISSION;
|
||||
|
||||
} // namespace latinime
|
||||
|
|
|
@ -51,6 +51,11 @@ class ErrorTypeUtils {
|
|||
return (containedErrorTypes & ~ERRORS_TREATED_AS_AN_EXACT_MATCH) == 0;
|
||||
}
|
||||
|
||||
static bool isExactMatchWithIntentionalOmission(const ErrorType containedErrorTypes) {
|
||||
return (containedErrorTypes
|
||||
& ~ERRORS_TREATED_AS_AN_EXACT_MATCH_WITH_INTENTIONAL_OMISSION) == 0;
|
||||
}
|
||||
|
||||
static bool isEditCorrectionError(const ErrorType errorType) {
|
||||
return (errorType & EDIT_CORRECTION) != 0;
|
||||
}
|
||||
|
@ -67,6 +72,7 @@ class ErrorTypeUtils {
|
|||
DISALLOW_IMPLICIT_CONSTRUCTORS(ErrorTypeUtils);
|
||||
|
||||
static const ErrorType ERRORS_TREATED_AS_AN_EXACT_MATCH;
|
||||
static const ErrorType ERRORS_TREATED_AS_AN_EXACT_MATCH_WITH_INTENTIONAL_OMISSION;
|
||||
};
|
||||
} // namespace latinime
|
||||
#endif // LATINIME_ERROR_TYPE_UTILS_H
|
||||
|
|
|
@ -89,6 +89,9 @@ const int SuggestionsOutputUtils::MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT = 16;
|
|||
terminalDicNode->getProbability(), NOT_A_PROBABILITY) <= 0;
|
||||
const bool isExactMatch =
|
||||
ErrorTypeUtils::isExactMatch(terminalDicNode->getContainedErrorTypes());
|
||||
const bool isExactMatchWithIntentionalOmission =
|
||||
ErrorTypeUtils::isExactMatchWithIntentionalOmission(
|
||||
terminalDicNode->getContainedErrorTypes());
|
||||
const bool isFirstCharUppercase = terminalDicNode->isFirstCharUppercase();
|
||||
// Heuristic: We exclude probability=0 first-char-uppercase words from exact match.
|
||||
// (e.g. "AMD" and "and")
|
||||
|
@ -96,7 +99,9 @@ const int SuggestionsOutputUtils::MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT = 16;
|
|||
&& !(isPossiblyOffensiveWord && isFirstCharUppercase);
|
||||
const int outputTypeFlags =
|
||||
(isPossiblyOffensiveWord ? Dictionary::KIND_FLAG_POSSIBLY_OFFENSIVE : 0)
|
||||
| ((isSafeExactMatch && boostExactMatches) ? Dictionary::KIND_FLAG_EXACT_MATCH : 0);
|
||||
| ((isSafeExactMatch && boostExactMatches) ? Dictionary::KIND_FLAG_EXACT_MATCH : 0)
|
||||
| (isExactMatchWithIntentionalOmission ?
|
||||
Dictionary::KIND_FLAG_EXACT_MATCH_WITH_INTENTIONAL_OMISSION : 0);
|
||||
|
||||
// Entries that are blacklisted or do not represent a word should not be output.
|
||||
const bool isValidWord = !terminalDicNode->isBlacklistedOrNotAWord();
|
||||
|
|
|
@ -40,34 +40,31 @@ public class DistracterFilterTest extends InputTestsBase {
|
|||
final PrevWordsInfo EMPTY_PREV_WORDS_INFO = PrevWordsInfo.EMPTY_PREV_WORDS_INFO;
|
||||
|
||||
final Locale localeEnUs = new Locale("en", "US");
|
||||
String typedWord = "alot";
|
||||
// For this test case, we consider "alot" is a distracter to "a lot".
|
||||
String typedWord;
|
||||
|
||||
typedWord = "google";
|
||||
// For this test case, we consider "google" is a distracter to "Google".
|
||||
assertTrue(mDistracterFilter.isDistracterToWordsInDictionaries(
|
||||
EMPTY_PREV_WORDS_INFO, typedWord, localeEnUs));
|
||||
|
||||
typedWord = "mot";
|
||||
// For this test case, we consider "mot" is a distracter to "not".
|
||||
typedWord = "Bill";
|
||||
// For this test case, we consider "Bill" is a distracter to "bill".
|
||||
assertTrue(mDistracterFilter.isDistracterToWordsInDictionaries(
|
||||
EMPTY_PREV_WORDS_INFO, typedWord, localeEnUs));
|
||||
|
||||
typedWord = "wierd";
|
||||
// For this test case, we consider "wierd" is a distracter to "weird".
|
||||
typedWord = "nOt";
|
||||
// For this test case, we consider "nOt" is a distracter to "not".
|
||||
assertTrue(mDistracterFilter.isDistracterToWordsInDictionaries(
|
||||
EMPTY_PREV_WORDS_INFO, typedWord, localeEnUs));
|
||||
|
||||
typedWord = "hoe";
|
||||
// For this test case, we consider "hoe" is a distracter to "how".
|
||||
assertTrue(mDistracterFilter.isDistracterToWordsInDictionaries(
|
||||
EMPTY_PREV_WORDS_INFO, typedWord, localeEnUs));
|
||||
|
||||
typedWord = "nit";
|
||||
// For this test case, we consider "nit" is a distracter to "not".
|
||||
typedWord = "were";
|
||||
// For this test case, we consider "were" is a distracter to "we're".
|
||||
assertTrue(mDistracterFilter.isDistracterToWordsInDictionaries(
|
||||
EMPTY_PREV_WORDS_INFO, typedWord, localeEnUs));
|
||||
|
||||
typedWord = "ill";
|
||||
// For this test case, we consider "ill" is a distracter to "I'll".
|
||||
assertTrue(mDistracterFilter.isDistracterToWordsInDictionaries(
|
||||
// For this test case, we consider "ill" is not a distracter to any word in dictionaries.
|
||||
assertFalse(mDistracterFilter.isDistracterToWordsInDictionaries(
|
||||
EMPTY_PREV_WORDS_INFO, typedWord, localeEnUs));
|
||||
|
||||
typedWord = "asdfd";
|
||||
|
|
Loading…
Reference in a new issue