From 0c16a5c6eef645fd536671994e0b4f05864ac338 Mon Sep 17 00:00:00 2001 From: Kurt Partridge Date: Tue, 15 Jan 2013 13:48:15 -0800 Subject: [PATCH] [Rlog81a] Determine correction type of words Heuristic to determine whether a word was a typo correction or a complete replacement by examining a correction to see if it falls within the list of suggested words of the original. Change-Id: Ieec4861a811e96aef0d14622e662b589ef8b4772 --- .../android/inputmethod/latin/LatinIME.java | 5 +- .../android/inputmethod/research/LogUnit.java | 92 ++++++++++++++++++- .../inputmethod/research/ResearchLogger.java | 19 +++- 3 files changed, 109 insertions(+), 7 deletions(-) diff --git a/java/src/com/android/inputmethod/latin/LatinIME.java b/java/src/com/android/inputmethod/latin/LatinIME.java index a48778ab3..6085cb4ab 100644 --- a/java/src/com/android/inputmethod/latin/LatinIME.java +++ b/java/src/com/android/inputmethod/latin/LatinIME.java @@ -1647,7 +1647,7 @@ public final class LatinIME extends InputMethodService implements KeyboardAction mExpectingUpdateSelection = true; mConnection.endBatchEdit(); if (ProductionFlag.IS_EXPERIMENTAL) { - ResearchLogger.latinIME_onEndBatchInput(batchInputText, 0); + ResearchLogger.latinIME_onEndBatchInput(batchInputText, 0, suggestedWords); } // Space state must be updated before calling updateShiftState mSpaceState = SPACE_STATE_PHANTOM; @@ -2123,8 +2123,9 @@ public final class LatinIME extends InputMethodService implements KeyboardAction Stats.onAutoCorrection(typedWord, autoCorrection, separatorString, mWordComposer); } if (ProductionFlag.IS_EXPERIMENTAL) { + final SuggestedWords suggestedWords = mSuggestionStripView.getSuggestions(); ResearchLogger.latinIme_commitCurrentAutoCorrection(typedWord, autoCorrection, - separatorString, mWordComposer.isBatchMode()); + separatorString, mWordComposer.isBatchMode(), suggestedWords); } mExpectingUpdateSelection = true; commitChosenWord(autoCorrection, LastComposedWord.COMMIT_TYPE_DECIDED_WORD, diff --git a/java/src/com/android/inputmethod/research/LogUnit.java b/java/src/com/android/inputmethod/research/LogUnit.java index 715000d28..0234bbc5b 100644 --- a/java/src/com/android/inputmethod/research/LogUnit.java +++ b/java/src/com/android/inputmethod/research/LogUnit.java @@ -17,6 +17,7 @@ package com.android.inputmethod.research; import android.content.SharedPreferences; +import android.text.TextUtils; import android.util.JsonWriter; import android.util.Log; import android.view.MotionEvent; @@ -57,16 +58,32 @@ import java.util.Map; // Assume that mTimeList is sorted in increasing order. Do not insert null values into // mTimeList. private final ArrayList mTimeList; + // Word that this LogUnit generates. Should be null if the LogUnit does not generate a genuine + // word (i.e. separators alone do not count as a word). Should never be empty. private String mWord; private boolean mMayContainDigit; private boolean mIsPartOfMegaword; private boolean mContainsCorrection; + // mCorrectionType indicates whether the word was corrected at all, and if so, whether it was + // to a different word or just a "typo" correction. It is considered a "typo" if the final + // word was listed in the suggestions available the first time the word was gestured or + // tapped. + private int mCorrectionType; + public static final int CORRECTIONTYPE_NO_CORRECTION = 0; + public static final int CORRECTIONTYPE_CORRECTION = 1; + public static final int CORRECTIONTYPE_DIFFERENT_WORD = 2; + public static final int CORRECTIONTYPE_TYPO = 3; + + private SuggestedWords mSuggestedWords; + public LogUnit() { mLogStatementList = new ArrayList(); mValuesList = new ArrayList(); mTimeList = new ArrayList(); mIsPartOfMegaword = false; + mCorrectionType = CORRECTIONTYPE_NO_CORRECTION; + mSuggestedWords = null; } private LogUnit(final ArrayList logStatementList, @@ -77,6 +94,8 @@ import java.util.Map; mValuesList = valuesList; mTimeList = timeList; mIsPartOfMegaword = isPartOfMegaword; + mCorrectionType = CORRECTIONTYPE_NO_CORRECTION; + mSuggestedWords = null; } private static final Object[] NULL_VALUES = new Object[0]; @@ -167,6 +186,7 @@ import java.util.Map; private static final String UPTIME_KEY = "_ut"; private static final String EVENT_TYPE_KEY = "_ty"; private static final String WORD_KEY = "_wo"; + private static final String CORRECTION_TYPE_KEY = "_corType"; private static final String LOG_UNIT_BEGIN_KEY = "logUnitStart"; private static final String LOG_UNIT_END_KEY = "logUnitEnd"; @@ -177,6 +197,7 @@ import java.util.Map; jsonWriter.name(CURRENT_TIME_KEY).value(System.currentTimeMillis()); if (canIncludePrivateData) { jsonWriter.name(WORD_KEY).value(getWord()); + jsonWriter.name(CORRECTION_TYPE_KEY).value(getCorrectionType()); } jsonWriter.name(EVENT_TYPE_KEY).value(LOG_UNIT_BEGIN_KEY); jsonWriter.endObject(); @@ -254,7 +275,33 @@ import java.util.Map; return true; } - public void setWord(String word) { + /** + * Mark the current logUnit as containing data to generate {@code word}. + * + * If {@code setWord()} was previously called for this LogUnit, then the method will try to + * determine what kind of correction it is, and update its internal state of the correctionType + * accordingly. + * + * @param word The word this LogUnit generates. Caller should not pass null or the empty + * string. + */ + public void setWord(final String word) { + if (mWord != null) { + // The word was already set once, and it is now being changed. See if the new word + // is close to the old word. If so, then the change is probably a typo correction. + // If not, the user may have decided to enter a different word, so flag it. + if (mSuggestedWords != null) { + if (isInSuggestedWords(word, mSuggestedWords)) { + mCorrectionType = CORRECTIONTYPE_TYPO; + } else { + mCorrectionType = CORRECTIONTYPE_DIFFERENT_WORD; + } + } else { + // No suggested words, so it's not clear whether it's a typo or different word. + // Mark it as a generic correction. + mCorrectionType = CORRECTIONTYPE_CORRECTION; + } + } mWord = word; } @@ -282,6 +329,14 @@ import java.util.Map; return mContainsCorrection; } + public void setCorrectionType(final int correctionType) { + mCorrectionType = correctionType; + } + + public int getCorrectionType() { + return mCorrectionType; + } + public boolean isEmpty() { return mLogStatementList.isEmpty(); } @@ -328,8 +383,43 @@ import java.util.Map; mValuesList.addAll(logUnit.mValuesList); mTimeList.addAll(logUnit.mTimeList); mWord = null; + if (logUnit.mWord != null) { + setWord(logUnit.mWord); + } mMayContainDigit = mMayContainDigit || logUnit.mMayContainDigit; mContainsCorrection = mContainsCorrection || logUnit.mContainsCorrection; mIsPartOfMegaword = false; } + + public SuggestedWords getSuggestions() { + return mSuggestedWords; + } + + /** + * Initialize the suggestions. + * + * Once set to a non-null value, the suggestions may not be changed again. This is to keep + * track of the list of words that are close to the user's initial effort to type the word. + * Only words that are close to the initial effort are considered typo corrections. + */ + public void initializeSuggestions(final SuggestedWords suggestedWords) { + if (mSuggestedWords == null) { + mSuggestedWords = suggestedWords; + } + } + + private static boolean isInSuggestedWords(final String queryWord, + final SuggestedWords suggestedWords) { + if (TextUtils.isEmpty(queryWord)) { + return false; + } + final int size = suggestedWords.size(); + for (int i = 0; i < size; i++) { + final SuggestedWordInfo wordInfo = suggestedWords.getInfo(i); + if (queryWord.equals(wordInfo.mWord)) { + return true; + } + } + return false; + } } diff --git a/java/src/com/android/inputmethod/research/ResearchLogger.java b/java/src/com/android/inputmethod/research/ResearchLogger.java index 0a24af6d5..29bc70880 100644 --- a/java/src/com/android/inputmethod/research/ResearchLogger.java +++ b/java/src/com/android/inputmethod/research/ResearchLogger.java @@ -745,6 +745,10 @@ public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChang mCurrentLogUnit.setContainsCorrection(); } + private void setCurrentLogUnitCorrectionType(final int correctionType) { + mCurrentLogUnit.setCorrectionType(correctionType); + } + /* package for test */ void commitCurrentLogUnit() { if (DEBUG) { Log.d(TAG, "commitCurrentLogUnit" + (mCurrentLogUnit.hasWord() ? @@ -1194,13 +1198,17 @@ public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChang "suggestion", "x", "y"); public static void latinIME_pickSuggestionManually(final String replacedWord, final int index, final String suggestion, final boolean isBatchMode) { - final String scrubbedWord = scrubDigitsFromString(suggestion); final ResearchLogger researchLogger = getInstance(); + if (!replacedWord.equals(suggestion.toString())) { + // The user choose something other than what was already there. + researchLogger.setCurrentLogUnitContainsCorrection(); + researchLogger.setCurrentLogUnitCorrectionType(LogUnit.CORRECTIONTYPE_TYPO); + } + final String scrubbedWord = scrubDigitsFromString(suggestion); researchLogger.enqueueEvent(LOGSTATEMENT_LATINIME_PICKSUGGESTIONMANUALLY, scrubDigitsFromString(replacedWord), index, suggestion == null ? null : scrubbedWord, Constants.SUGGESTION_STRIP_COORDINATE, Constants.SUGGESTION_STRIP_COORDINATE); - researchLogger.setCurrentLogUnitContainsCorrection(); researchLogger.commitCurrentLogUnitAsWord(scrubbedWord, Long.MAX_VALUE, isBatchMode); researchLogger.mStatistics.recordManualSuggestion(SystemClock.uptimeMillis()); } @@ -1490,10 +1498,12 @@ public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChang new LogStatement("LatinIMECommitCurrentAutoCorrection", true, true, "typedWord", "autoCorrection", "separatorString"); public static void latinIme_commitCurrentAutoCorrection(final String typedWord, - final String autoCorrection, final String separatorString, final boolean isBatchMode) { + final String autoCorrection, final String separatorString, final boolean isBatchMode, + final SuggestedWords suggestedWords) { final String scrubbedTypedWord = scrubDigitsFromString(typedWord); final String scrubbedAutoCorrection = scrubDigitsFromString(autoCorrection); final ResearchLogger researchLogger = getInstance(); + researchLogger.mCurrentLogUnit.initializeSuggestions(suggestedWords); researchLogger.commitCurrentLogUnitAsWord(scrubbedAutoCorrection, Long.MAX_VALUE, isBatchMode); @@ -1691,10 +1701,11 @@ public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChang new LogStatement("LatinIMEOnEndBatchInput", true, false, "enteredText", "enteredWordPos"); public static void latinIME_onEndBatchInput(final CharSequence enteredText, - final int enteredWordPos) { + final int enteredWordPos, final SuggestedWords suggestedWords) { final ResearchLogger researchLogger = getInstance(); researchLogger.enqueueEvent(LOGSTATEMENT_LATINIME_ONENDBATCHINPUT, enteredText, enteredWordPos); + researchLogger.mCurrentLogUnit.initializeSuggestions(suggestedWords); researchLogger.mStatistics.recordGestureInput(enteredText.length(), SystemClock.uptimeMillis()); }