[Rlog81a] Determine correction type of words

Heuristic to determine whether a word was a typo correction or a complete
replacement by examining a correction to see if it falls within the list of
suggested words of the original.

Change-Id: Ieec4861a811e96aef0d14622e662b589ef8b4772
main
Kurt Partridge 2013-01-15 13:48:15 -08:00
parent 8b788374de
commit 0c16a5c6ee
3 changed files with 109 additions and 7 deletions

View File

@ -1647,7 +1647,7 @@ public final class LatinIME extends InputMethodService implements KeyboardAction
mExpectingUpdateSelection = true; mExpectingUpdateSelection = true;
mConnection.endBatchEdit(); mConnection.endBatchEdit();
if (ProductionFlag.IS_EXPERIMENTAL) { if (ProductionFlag.IS_EXPERIMENTAL) {
ResearchLogger.latinIME_onEndBatchInput(batchInputText, 0); ResearchLogger.latinIME_onEndBatchInput(batchInputText, 0, suggestedWords);
} }
// Space state must be updated before calling updateShiftState // Space state must be updated before calling updateShiftState
mSpaceState = SPACE_STATE_PHANTOM; mSpaceState = SPACE_STATE_PHANTOM;
@ -2123,8 +2123,9 @@ public final class LatinIME extends InputMethodService implements KeyboardAction
Stats.onAutoCorrection(typedWord, autoCorrection, separatorString, mWordComposer); Stats.onAutoCorrection(typedWord, autoCorrection, separatorString, mWordComposer);
} }
if (ProductionFlag.IS_EXPERIMENTAL) { if (ProductionFlag.IS_EXPERIMENTAL) {
final SuggestedWords suggestedWords = mSuggestionStripView.getSuggestions();
ResearchLogger.latinIme_commitCurrentAutoCorrection(typedWord, autoCorrection, ResearchLogger.latinIme_commitCurrentAutoCorrection(typedWord, autoCorrection,
separatorString, mWordComposer.isBatchMode()); separatorString, mWordComposer.isBatchMode(), suggestedWords);
} }
mExpectingUpdateSelection = true; mExpectingUpdateSelection = true;
commitChosenWord(autoCorrection, LastComposedWord.COMMIT_TYPE_DECIDED_WORD, commitChosenWord(autoCorrection, LastComposedWord.COMMIT_TYPE_DECIDED_WORD,

View File

@ -17,6 +17,7 @@
package com.android.inputmethod.research; package com.android.inputmethod.research;
import android.content.SharedPreferences; import android.content.SharedPreferences;
import android.text.TextUtils;
import android.util.JsonWriter; import android.util.JsonWriter;
import android.util.Log; import android.util.Log;
import android.view.MotionEvent; import android.view.MotionEvent;
@ -57,16 +58,32 @@ import java.util.Map;
// Assume that mTimeList is sorted in increasing order. Do not insert null values into // Assume that mTimeList is sorted in increasing order. Do not insert null values into
// mTimeList. // mTimeList.
private final ArrayList<Long> mTimeList; private final ArrayList<Long> mTimeList;
// Word that this LogUnit generates. Should be null if the LogUnit does not generate a genuine
// word (i.e. separators alone do not count as a word). Should never be empty.
private String mWord; private String mWord;
private boolean mMayContainDigit; private boolean mMayContainDigit;
private boolean mIsPartOfMegaword; private boolean mIsPartOfMegaword;
private boolean mContainsCorrection; private boolean mContainsCorrection;
// mCorrectionType indicates whether the word was corrected at all, and if so, whether it was
// to a different word or just a "typo" correction. It is considered a "typo" if the final
// word was listed in the suggestions available the first time the word was gestured or
// tapped.
private int mCorrectionType;
public static final int CORRECTIONTYPE_NO_CORRECTION = 0;
public static final int CORRECTIONTYPE_CORRECTION = 1;
public static final int CORRECTIONTYPE_DIFFERENT_WORD = 2;
public static final int CORRECTIONTYPE_TYPO = 3;
private SuggestedWords mSuggestedWords;
public LogUnit() { public LogUnit() {
mLogStatementList = new ArrayList<LogStatement>(); mLogStatementList = new ArrayList<LogStatement>();
mValuesList = new ArrayList<Object[]>(); mValuesList = new ArrayList<Object[]>();
mTimeList = new ArrayList<Long>(); mTimeList = new ArrayList<Long>();
mIsPartOfMegaword = false; mIsPartOfMegaword = false;
mCorrectionType = CORRECTIONTYPE_NO_CORRECTION;
mSuggestedWords = null;
} }
private LogUnit(final ArrayList<LogStatement> logStatementList, private LogUnit(final ArrayList<LogStatement> logStatementList,
@ -77,6 +94,8 @@ import java.util.Map;
mValuesList = valuesList; mValuesList = valuesList;
mTimeList = timeList; mTimeList = timeList;
mIsPartOfMegaword = isPartOfMegaword; mIsPartOfMegaword = isPartOfMegaword;
mCorrectionType = CORRECTIONTYPE_NO_CORRECTION;
mSuggestedWords = null;
} }
private static final Object[] NULL_VALUES = new Object[0]; private static final Object[] NULL_VALUES = new Object[0];
@ -167,6 +186,7 @@ import java.util.Map;
private static final String UPTIME_KEY = "_ut"; private static final String UPTIME_KEY = "_ut";
private static final String EVENT_TYPE_KEY = "_ty"; private static final String EVENT_TYPE_KEY = "_ty";
private static final String WORD_KEY = "_wo"; private static final String WORD_KEY = "_wo";
private static final String CORRECTION_TYPE_KEY = "_corType";
private static final String LOG_UNIT_BEGIN_KEY = "logUnitStart"; private static final String LOG_UNIT_BEGIN_KEY = "logUnitStart";
private static final String LOG_UNIT_END_KEY = "logUnitEnd"; private static final String LOG_UNIT_END_KEY = "logUnitEnd";
@ -177,6 +197,7 @@ import java.util.Map;
jsonWriter.name(CURRENT_TIME_KEY).value(System.currentTimeMillis()); jsonWriter.name(CURRENT_TIME_KEY).value(System.currentTimeMillis());
if (canIncludePrivateData) { if (canIncludePrivateData) {
jsonWriter.name(WORD_KEY).value(getWord()); jsonWriter.name(WORD_KEY).value(getWord());
jsonWriter.name(CORRECTION_TYPE_KEY).value(getCorrectionType());
} }
jsonWriter.name(EVENT_TYPE_KEY).value(LOG_UNIT_BEGIN_KEY); jsonWriter.name(EVENT_TYPE_KEY).value(LOG_UNIT_BEGIN_KEY);
jsonWriter.endObject(); jsonWriter.endObject();
@ -254,7 +275,33 @@ import java.util.Map;
return true; return true;
} }
public void setWord(String word) { /**
* Mark the current logUnit as containing data to generate {@code word}.
*
* If {@code setWord()} was previously called for this LogUnit, then the method will try to
* determine what kind of correction it is, and update its internal state of the correctionType
* accordingly.
*
* @param word The word this LogUnit generates. Caller should not pass null or the empty
* string.
*/
public void setWord(final String word) {
if (mWord != null) {
// The word was already set once, and it is now being changed. See if the new word
// is close to the old word. If so, then the change is probably a typo correction.
// If not, the user may have decided to enter a different word, so flag it.
if (mSuggestedWords != null) {
if (isInSuggestedWords(word, mSuggestedWords)) {
mCorrectionType = CORRECTIONTYPE_TYPO;
} else {
mCorrectionType = CORRECTIONTYPE_DIFFERENT_WORD;
}
} else {
// No suggested words, so it's not clear whether it's a typo or different word.
// Mark it as a generic correction.
mCorrectionType = CORRECTIONTYPE_CORRECTION;
}
}
mWord = word; mWord = word;
} }
@ -282,6 +329,14 @@ import java.util.Map;
return mContainsCorrection; return mContainsCorrection;
} }
public void setCorrectionType(final int correctionType) {
mCorrectionType = correctionType;
}
public int getCorrectionType() {
return mCorrectionType;
}
public boolean isEmpty() { public boolean isEmpty() {
return mLogStatementList.isEmpty(); return mLogStatementList.isEmpty();
} }
@ -328,8 +383,43 @@ import java.util.Map;
mValuesList.addAll(logUnit.mValuesList); mValuesList.addAll(logUnit.mValuesList);
mTimeList.addAll(logUnit.mTimeList); mTimeList.addAll(logUnit.mTimeList);
mWord = null; mWord = null;
if (logUnit.mWord != null) {
setWord(logUnit.mWord);
}
mMayContainDigit = mMayContainDigit || logUnit.mMayContainDigit; mMayContainDigit = mMayContainDigit || logUnit.mMayContainDigit;
mContainsCorrection = mContainsCorrection || logUnit.mContainsCorrection; mContainsCorrection = mContainsCorrection || logUnit.mContainsCorrection;
mIsPartOfMegaword = false; mIsPartOfMegaword = false;
} }
public SuggestedWords getSuggestions() {
return mSuggestedWords;
}
/**
* Initialize the suggestions.
*
* Once set to a non-null value, the suggestions may not be changed again. This is to keep
* track of the list of words that are close to the user's initial effort to type the word.
* Only words that are close to the initial effort are considered typo corrections.
*/
public void initializeSuggestions(final SuggestedWords suggestedWords) {
if (mSuggestedWords == null) {
mSuggestedWords = suggestedWords;
}
}
private static boolean isInSuggestedWords(final String queryWord,
final SuggestedWords suggestedWords) {
if (TextUtils.isEmpty(queryWord)) {
return false;
}
final int size = suggestedWords.size();
for (int i = 0; i < size; i++) {
final SuggestedWordInfo wordInfo = suggestedWords.getInfo(i);
if (queryWord.equals(wordInfo.mWord)) {
return true;
}
}
return false;
}
} }

View File

@ -745,6 +745,10 @@ public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChang
mCurrentLogUnit.setContainsCorrection(); mCurrentLogUnit.setContainsCorrection();
} }
private void setCurrentLogUnitCorrectionType(final int correctionType) {
mCurrentLogUnit.setCorrectionType(correctionType);
}
/* package for test */ void commitCurrentLogUnit() { /* package for test */ void commitCurrentLogUnit() {
if (DEBUG) { if (DEBUG) {
Log.d(TAG, "commitCurrentLogUnit" + (mCurrentLogUnit.hasWord() ? Log.d(TAG, "commitCurrentLogUnit" + (mCurrentLogUnit.hasWord() ?
@ -1194,13 +1198,17 @@ public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChang
"suggestion", "x", "y"); "suggestion", "x", "y");
public static void latinIME_pickSuggestionManually(final String replacedWord, public static void latinIME_pickSuggestionManually(final String replacedWord,
final int index, final String suggestion, final boolean isBatchMode) { final int index, final String suggestion, final boolean isBatchMode) {
final String scrubbedWord = scrubDigitsFromString(suggestion);
final ResearchLogger researchLogger = getInstance(); final ResearchLogger researchLogger = getInstance();
if (!replacedWord.equals(suggestion.toString())) {
// The user choose something other than what was already there.
researchLogger.setCurrentLogUnitContainsCorrection();
researchLogger.setCurrentLogUnitCorrectionType(LogUnit.CORRECTIONTYPE_TYPO);
}
final String scrubbedWord = scrubDigitsFromString(suggestion);
researchLogger.enqueueEvent(LOGSTATEMENT_LATINIME_PICKSUGGESTIONMANUALLY, researchLogger.enqueueEvent(LOGSTATEMENT_LATINIME_PICKSUGGESTIONMANUALLY,
scrubDigitsFromString(replacedWord), index, scrubDigitsFromString(replacedWord), index,
suggestion == null ? null : scrubbedWord, Constants.SUGGESTION_STRIP_COORDINATE, suggestion == null ? null : scrubbedWord, Constants.SUGGESTION_STRIP_COORDINATE,
Constants.SUGGESTION_STRIP_COORDINATE); Constants.SUGGESTION_STRIP_COORDINATE);
researchLogger.setCurrentLogUnitContainsCorrection();
researchLogger.commitCurrentLogUnitAsWord(scrubbedWord, Long.MAX_VALUE, isBatchMode); researchLogger.commitCurrentLogUnitAsWord(scrubbedWord, Long.MAX_VALUE, isBatchMode);
researchLogger.mStatistics.recordManualSuggestion(SystemClock.uptimeMillis()); researchLogger.mStatistics.recordManualSuggestion(SystemClock.uptimeMillis());
} }
@ -1490,10 +1498,12 @@ public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChang
new LogStatement("LatinIMECommitCurrentAutoCorrection", true, true, "typedWord", new LogStatement("LatinIMECommitCurrentAutoCorrection", true, true, "typedWord",
"autoCorrection", "separatorString"); "autoCorrection", "separatorString");
public static void latinIme_commitCurrentAutoCorrection(final String typedWord, public static void latinIme_commitCurrentAutoCorrection(final String typedWord,
final String autoCorrection, final String separatorString, final boolean isBatchMode) { final String autoCorrection, final String separatorString, final boolean isBatchMode,
final SuggestedWords suggestedWords) {
final String scrubbedTypedWord = scrubDigitsFromString(typedWord); final String scrubbedTypedWord = scrubDigitsFromString(typedWord);
final String scrubbedAutoCorrection = scrubDigitsFromString(autoCorrection); final String scrubbedAutoCorrection = scrubDigitsFromString(autoCorrection);
final ResearchLogger researchLogger = getInstance(); final ResearchLogger researchLogger = getInstance();
researchLogger.mCurrentLogUnit.initializeSuggestions(suggestedWords);
researchLogger.commitCurrentLogUnitAsWord(scrubbedAutoCorrection, Long.MAX_VALUE, researchLogger.commitCurrentLogUnitAsWord(scrubbedAutoCorrection, Long.MAX_VALUE,
isBatchMode); isBatchMode);
@ -1691,10 +1701,11 @@ public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChang
new LogStatement("LatinIMEOnEndBatchInput", true, false, "enteredText", new LogStatement("LatinIMEOnEndBatchInput", true, false, "enteredText",
"enteredWordPos"); "enteredWordPos");
public static void latinIME_onEndBatchInput(final CharSequence enteredText, public static void latinIME_onEndBatchInput(final CharSequence enteredText,
final int enteredWordPos) { final int enteredWordPos, final SuggestedWords suggestedWords) {
final ResearchLogger researchLogger = getInstance(); final ResearchLogger researchLogger = getInstance();
researchLogger.enqueueEvent(LOGSTATEMENT_LATINIME_ONENDBATCHINPUT, enteredText, researchLogger.enqueueEvent(LOGSTATEMENT_LATINIME_ONENDBATCHINPUT, enteredText,
enteredWordPos); enteredWordPos);
researchLogger.mCurrentLogUnit.initializeSuggestions(suggestedWords);
researchLogger.mStatistics.recordGestureInput(enteredText.length(), researchLogger.mStatistics.recordGestureInput(enteredText.length(),
SystemClock.uptimeMillis()); SystemClock.uptimeMillis());
} }