/* * Copyright (C) 2012 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.android.inputmethod.research; import android.os.SystemClock; import android.text.TextUtils; import android.util.JsonWriter; import android.util.Log; import com.android.inputmethod.latin.SuggestedWords; import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo; import com.android.inputmethod.latin.define.ProductionFlag; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.regex.Pattern; /** * A group of log statements related to each other. * * A LogUnit is collection of LogStatements, each of which is generated by at a particular point * in the code. (There is no LogStatement class; the data is stored across the instance variables * here.) A single LogUnit's statements can correspond to all the calls made while in the same * composing region, or all the calls between committing the last composing region, and the first * character of the next composing region. * * Individual statements in a log may be marked as potentially private. If so, then they are only * published to a ResearchLog if the ResearchLogger determines that publishing the entire LogUnit * will not violate the user's privacy. Checks for this may include whether other LogUnits have * been published recently, or whether the LogUnit contains numbers, etc. */ public class LogUnit { private static final String TAG = LogUnit.class.getSimpleName(); private static final boolean DEBUG = false && ProductionFlag.USES_DEVELOPMENT_ONLY_DIAGNOSTICS_DEBUG; private static final Pattern WHITESPACE_PATTERN = Pattern.compile("\\s+"); private static final String[] EMPTY_STRING_ARRAY = new String[0]; private final ArrayList mLogStatementList; private final ArrayList mValuesList; // Assume that mTimeList is sorted in increasing order. Do not insert null values into // mTimeList. private final ArrayList mTimeList; // Words that this LogUnit generates. Should be null if the data in the LogUnit does not // generate a genuine word (i.e. separators alone do not count as a word). Should never be // empty. Note that if the user types spaces explicitly, then normally mWords should contain // only a single word; it will only contain space-separate multiple words if the user does not // enter a space, and the system enters one automatically. private String mWords; private String[] mWordArray = EMPTY_STRING_ARRAY; private boolean mMayContainDigit; private boolean mIsPartOfMegaword; private boolean mContainsCorrection; // mCorrectionType indicates whether the word was corrected at all, and if so, the nature of the // correction. private int mCorrectionType; // LogUnits start in this state. If a word is entered without being corrected, it will have // this CorrectiontType. public static final int CORRECTIONTYPE_NO_CORRECTION = 0; // The LogUnit was corrected manually by the user in an unspecified way. public static final int CORRECTIONTYPE_CORRECTION = 1; // The LogUnit was corrected manually by the user to a word not in the list of suggestions of // the first word typed here. (Note: this is a heuristic value, it may be incorrect, for // example, if the user repositions the cursor). public static final int CORRECTIONTYPE_DIFFERENT_WORD = 2; // The LogUnit was corrected manually by the user to a word that was in the list of suggestions // of the first word typed here. (Again, a heuristic). It is probably a typo correction. public static final int CORRECTIONTYPE_TYPO = 3; // TODO: Rather than just tracking the current state, keep a historical record of the LogUnit's // state and statistics. This should include how many times it has been corrected, whether // other LogUnit edits were done between edits to this LogUnit, etc. Also track when a LogUnit // previously contained a word, but was corrected to empty (because it was deleted, and there is // no known replacement). private SuggestedWords mSuggestedWords; public LogUnit() { mLogStatementList = new ArrayList(); mValuesList = new ArrayList(); mTimeList = new ArrayList(); mIsPartOfMegaword = false; mCorrectionType = CORRECTIONTYPE_NO_CORRECTION; mSuggestedWords = null; } private LogUnit(final ArrayList logStatementList, final ArrayList valuesList, final ArrayList timeList, final boolean isPartOfMegaword) { mLogStatementList = logStatementList; mValuesList = valuesList; mTimeList = timeList; mIsPartOfMegaword = isPartOfMegaword; mCorrectionType = CORRECTIONTYPE_NO_CORRECTION; mSuggestedWords = null; } private static final Object[] NULL_VALUES = new Object[0]; /** * Adds a new log statement. The time parameter in successive calls to this method must be * monotonically increasing, or splitByTime() will not work. */ public void addLogStatement(final LogStatement logStatement, final long time, Object... values) { if (values == null) { values = NULL_VALUES; } mLogStatementList.add(logStatement); mValuesList.add(values); mTimeList.add(time); } /** * Publish the contents of this LogUnit to {@code researchLog}. * * For each publishable {@code LogStatement}, invoke {@link LogStatement#outputToLocked}. * * @param researchLog where to publish the contents of this {@code LogUnit} * @param canIncludePrivateData whether the private data in this {@code LogUnit} should be * included */ public synchronized void publishTo(final ResearchLog researchLog, final boolean canIncludePrivateData) { // Write out any logStatement that passes the privacy filter. final int size = mLogStatementList.size(); if (size != 0) { // Note that jsonWriter is only set to a non-null value if the logUnit start text is // output and at least one logStatement is output. JsonWriter jsonWriter = null; for (int i = 0; i < size; i++) { final LogStatement logStatement = mLogStatementList.get(i); if (!canIncludePrivateData && logStatement.isPotentiallyPrivate()) { continue; } if (mIsPartOfMegaword && logStatement.isPotentiallyRevealing()) { continue; } // Only retrieve the jsonWriter if we need to. If we don't get this far, then // researchLog.getInitializedJsonWriterLocked() will not ever be called, and the // file will not have been opened for writing. if (jsonWriter == null) { jsonWriter = researchLog.getInitializedJsonWriterLocked(); outputLogUnitStart(jsonWriter, canIncludePrivateData); } logStatement.outputToLocked(jsonWriter, mTimeList.get(i), mValuesList.get(i)); } if (jsonWriter != null) { // We must have called logUnitStart earlier, so emit a logUnitStop. outputLogUnitStop(jsonWriter); } } } private static final String WORD_KEY = "_wo"; private static final String CORRECTION_TYPE_KEY = "_corType"; private static final String LOG_UNIT_BEGIN_KEY = "logUnitStart"; private static final String LOG_UNIT_END_KEY = "logUnitEnd"; final LogStatement LOGSTATEMENT_LOG_UNIT_BEGIN_WITH_PRIVATE_DATA = new LogStatement(LOG_UNIT_BEGIN_KEY, false /* isPotentiallyPrivate */, false /* isPotentiallyRevealing */, WORD_KEY, CORRECTION_TYPE_KEY); final LogStatement LOGSTATEMENT_LOG_UNIT_BEGIN_WITHOUT_PRIVATE_DATA = new LogStatement(LOG_UNIT_BEGIN_KEY, false /* isPotentiallyPrivate */, false /* isPotentiallyRevealing */); private void outputLogUnitStart(final JsonWriter jsonWriter, final boolean canIncludePrivateData) { final LogStatement logStatement; if (canIncludePrivateData) { LOGSTATEMENT_LOG_UNIT_BEGIN_WITH_PRIVATE_DATA.outputToLocked(jsonWriter, SystemClock.uptimeMillis(), getWordsAsString(), getCorrectionType()); } else { LOGSTATEMENT_LOG_UNIT_BEGIN_WITHOUT_PRIVATE_DATA.outputToLocked(jsonWriter, SystemClock.uptimeMillis()); } } final LogStatement LOGSTATEMENT_LOG_UNIT_END = new LogStatement(LOG_UNIT_END_KEY, false /* isPotentiallyPrivate */, false /* isPotentiallyRevealing */); private void outputLogUnitStop(final JsonWriter jsonWriter) { LOGSTATEMENT_LOG_UNIT_END.outputToLocked(jsonWriter, SystemClock.uptimeMillis()); } /** * Mark the current logUnit as containing data to generate {@code newWords}. * * If {@code setWord()} was previously called for this LogUnit, then the method will try to * determine what kind of correction it is, and update its internal state of the correctionType * accordingly. * * @param newWords The words this LogUnit generates. Caller should not pass null or the empty * string. */ public void setWords(final String newWords) { if (hasOneOrMoreWords()) { // The word was already set once, and it is now being changed. See if the new word // is close to the old word. If so, then the change is probably a typo correction. // If not, the user may have decided to enter a different word, so flag it. if (mSuggestedWords != null) { if (isInSuggestedWords(newWords, mSuggestedWords)) { mCorrectionType = CORRECTIONTYPE_TYPO; } else { mCorrectionType = CORRECTIONTYPE_DIFFERENT_WORD; } } else { // No suggested words, so it's not clear whether it's a typo or different word. // Mark it as a generic correction. mCorrectionType = CORRECTIONTYPE_CORRECTION; } } else { mCorrectionType = CORRECTIONTYPE_NO_CORRECTION; } mWords = newWords; // Update mWordArray mWordArray = (TextUtils.isEmpty(mWords)) ? EMPTY_STRING_ARRAY : WHITESPACE_PATTERN.split(mWords); if (mWordArray.length > 0 && TextUtils.isEmpty(mWordArray[0])) { // Empty string at beginning of array. Must have been whitespace at the start of the // word. Remove the empty string. mWordArray = Arrays.copyOfRange(mWordArray, 1, mWordArray.length); } } public String getWordsAsString() { return mWords; } /** * Retuns the words generated by the data in this LogUnit. * * The first word may be an empty string, if the data in the LogUnit started by generating * whitespace. * * @return the array of words. an empty list of there are no words associated with this LogUnit. */ public String[] getWordsAsStringArray() { return mWordArray; } public boolean hasOneOrMoreWords() { return mWordArray.length >= 1; } public int getNumWords() { return mWordArray.length; } // TODO: Refactor to eliminate getter/setters public void setMayContainDigit() { mMayContainDigit = true; } // TODO: Refactor to eliminate getter/setters public boolean mayContainDigit() { return mMayContainDigit; } // TODO: Refactor to eliminate getter/setters public void setContainsCorrection() { mContainsCorrection = true; } // TODO: Refactor to eliminate getter/setters public boolean containsCorrection() { return mContainsCorrection; } // TODO: Refactor to eliminate getter/setters public void setCorrectionType(final int correctionType) { mCorrectionType = correctionType; } // TODO: Refactor to eliminate getter/setters public int getCorrectionType() { return mCorrectionType; } public boolean isEmpty() { return mLogStatementList.isEmpty(); } /** * Split this logUnit, with all events before maxTime staying in the current logUnit, and all * events after maxTime going into a new LogUnit that is returned. */ public LogUnit splitByTime(final long maxTime) { // Assume that mTimeList is in sorted order. final int length = mTimeList.size(); // TODO: find time by binary search, e.g. using Collections#binarySearch() for (int index = 0; index < length; index++) { if (mTimeList.get(index) > maxTime) { final List laterLogStatements = mLogStatementList.subList(index, length); final List laterValues = mValuesList.subList(index, length); final List laterTimes = mTimeList.subList(index, length); // Create the LogUnit containing the later logStatements and associated data. final LogUnit newLogUnit = new LogUnit( new ArrayList(laterLogStatements), new ArrayList(laterValues), new ArrayList(laterTimes), true /* isPartOfMegaword */); newLogUnit.mWords = null; newLogUnit.mMayContainDigit = mMayContainDigit; newLogUnit.mContainsCorrection = mContainsCorrection; // Purge the logStatements and associated data from this LogUnit. laterLogStatements.clear(); laterValues.clear(); laterTimes.clear(); mIsPartOfMegaword = true; return newLogUnit; } } return new LogUnit(); } public void append(final LogUnit logUnit) { mLogStatementList.addAll(logUnit.mLogStatementList); mValuesList.addAll(logUnit.mValuesList); mTimeList.addAll(logUnit.mTimeList); mWords = null; if (logUnit.mWords != null) { setWords(logUnit.mWords); } mMayContainDigit = mMayContainDigit || logUnit.mMayContainDigit; mContainsCorrection = mContainsCorrection || logUnit.mContainsCorrection; mIsPartOfMegaword = false; } public SuggestedWords getSuggestions() { return mSuggestedWords; } /** * Initialize the suggestions. * * Once set to a non-null value, the suggestions may not be changed again. This is to keep * track of the list of words that are close to the user's initial effort to type the word. * Only words that are close to the initial effort are considered typo corrections. */ public void initializeSuggestions(final SuggestedWords suggestedWords) { if (mSuggestedWords == null) { mSuggestedWords = suggestedWords; } } private static boolean isInSuggestedWords(final String queryWord, final SuggestedWords suggestedWords) { if (TextUtils.isEmpty(queryWord)) { return false; } final int size = suggestedWords.size(); for (int i = 0; i < size; i++) { final SuggestedWordInfo wordInfo = suggestedWords.getInfo(i); if (queryWord.equals(wordInfo.mWord)) { return true; } } return false; } /** * Remove data associated with selecting the Research button. * * A LogUnit will capture all user interactions with the IME, including the "meta-interactions" * of using the Research button to control the logging (e.g. by starting and stopping recording * of a test case). Because meta-interactions should not be part of the normal log, calling * this method will set a field in the LogStatements of the motion events to indiciate that * they should be disregarded. * * This implementation assumes that the data recorded by the meta-interaction takes the * form of all events following the first MotionEvent.ACTION_DOWN before the first long-press * before the last onCodeEvent containing a code matching {@code LogStatement.VALUE_RESEARCH}. * * @returns true if data was removed */ public boolean removeResearchButtonInvocation() { // This method is designed to be idempotent. // First, find last invocation of "research" key final int indexOfLastResearchKey = findLastIndexContainingKeyValue( LogStatement.TYPE_POINTER_TRACKER_CALL_LISTENER_ON_CODE_INPUT, LogStatement.KEY_CODE, LogStatement.VALUE_RESEARCH); if (indexOfLastResearchKey < 0) { // Could not find invocation of "research" key. Leave log as is. if (DEBUG) { Log.d(TAG, "Could not find research key"); } return false; } // Look for the long press that started the invocation of the research key code input. final int indexOfLastLongPressBeforeResearchKey = findLastIndexBefore(LogStatement.TYPE_MAIN_KEYBOARD_VIEW_ON_LONG_PRESS, indexOfLastResearchKey); // Look for DOWN event preceding the long press final int indexOfLastDownEventBeforeLongPress = findLastIndexContainingKeyValueBefore(LogStatement.TYPE_MOTION_EVENT, LogStatement.ACTION, LogStatement.VALUE_DOWN, indexOfLastLongPressBeforeResearchKey); // Flag all LatinKeyboardViewProcessMotionEvents from the DOWN event to the research key as // logging-related final int startingIndex = indexOfLastDownEventBeforeLongPress == -1 ? 0 : indexOfLastDownEventBeforeLongPress; for (int index = startingIndex; index < indexOfLastResearchKey; index++) { final LogStatement logStatement = mLogStatementList.get(index); final String type = logStatement.getType(); final Object[] values = mValuesList.get(index); if (type.equals(LogStatement.TYPE_MOTION_EVENT)) { logStatement.setValue(LogStatement.KEY_IS_LOGGING_RELATED, values, true); } } return true; } /** * Find the index of the last LogStatement before {@code startingIndex} of type {@code type}. * * @param queryType a String that must be {@code String.equals()} to the LogStatement type * @param startingIndex the index to start the backward search from. Must be less than the * length of mLogStatementList, or an IndexOutOfBoundsException is thrown. Can be negative, * in which case -1 is returned. * * @return The index of the last LogStatement, -1 if none exists. */ private int findLastIndexBefore(final String queryType, final int startingIndex) { return findLastIndexContainingKeyValueBefore(queryType, null, null, startingIndex); } /** * Find the index of the last LogStatement before {@code startingIndex} of type {@code type} * containing the given key-value pair. * * @param queryType a String that must be {@code String.equals()} to the LogStatement type * @param queryKey a String that must be {@code String.equals()} to a key in the LogStatement * @param queryValue an Object that must be {@code String.equals()} to the key's corresponding * value * * @return The index of the last LogStatement, -1 if none exists. */ private int findLastIndexContainingKeyValue(final String queryType, final String queryKey, final Object queryValue) { return findLastIndexContainingKeyValueBefore(queryType, queryKey, queryValue, mLogStatementList.size() - 1); } /** * Find the index of the last LogStatement before {@code startingIndex} of type {@code type} * containing the given key-value pair. * * @param queryType a String that must be {@code String.equals()} to the LogStatement type * @param queryKey a String that must be {@code String.equals()} to a key in the LogStatement * @param queryValue an Object that must be {@code String.equals()} to the key's corresponding * value * @param startingIndex the index to start the backward search from. Must be less than the * length of mLogStatementList, or an IndexOutOfBoundsException is thrown. Can be negative, * in which case -1 is returned. * * @return The index of the last LogStatement, -1 if none exists. */ private int findLastIndexContainingKeyValueBefore(final String queryType, final String queryKey, final Object queryValue, final int startingIndex) { if (startingIndex < 0) { return -1; } for (int index = startingIndex; index >= 0; index--) { final LogStatement logStatement = mLogStatementList.get(index); final String type = logStatement.getType(); if (type.equals(queryType) && (queryKey == null || logStatement.containsKeyValuePair(queryKey, queryValue, mValuesList.get(index)))) { return index; } } return -1; } }