Merge "Allow LogUnits to hold >1 word"

main
Kurt Partridge 2013-04-19 16:04:12 +00:00 committed by Android (Google) Code Review
commit 41b34cf2ae
4 changed files with 147 additions and 86 deletions

View File

@ -57,28 +57,29 @@ public class FixedLogBuffer extends LogBuffer {
*/
@Override
public void shiftIn(final LogUnit newLogUnit) {
if (!newLogUnit.hasWord()) {
// This LogUnit isn't a word, so it doesn't count toward the word-limit.
if (!newLogUnit.hasOneOrMoreWords()) {
// This LogUnit doesn't contain any word, so it doesn't count toward the word-limit.
super.shiftIn(newLogUnit);
return;
}
final int numWordsIncoming = newLogUnit.getNumWords();
if (mNumActualWords >= mWordCapacity) {
// Give subclass a chance to handle the buffer full condition by shifting out logUnits.
onBufferFull();
// If still full, evict.
if (mNumActualWords >= mWordCapacity) {
shiftOutWords(1);
shiftOutWords(numWordsIncoming);
}
}
super.shiftIn(newLogUnit);
mNumActualWords++; // Must be a word, or we wouldn't be here.
mNumActualWords += numWordsIncoming;
}
@Override
public LogUnit unshiftIn() {
final LogUnit logUnit = super.unshiftIn();
if (logUnit != null && logUnit.hasWord()) {
mNumActualWords--;
if (logUnit != null && logUnit.hasOneOrMoreWords()) {
mNumActualWords -= logUnit.getNumWords();
}
return logUnit;
}
@ -109,8 +110,8 @@ public class FixedLogBuffer extends LogBuffer {
@Override
public LogUnit shiftOut() {
final LogUnit logUnit = super.shiftOut();
if (logUnit != null && logUnit.hasWord()) {
mNumActualWords--;
if (logUnit != null && logUnit.hasOneOrMoreWords()) {
mNumActualWords -= logUnit.getNumWords();
}
return logUnit;
}
@ -121,15 +122,15 @@ public class FixedLogBuffer extends LogBuffer {
* If there are less than {@code numWords} word-containing {@link LogUnit}s, shifts out
* all {@code LogUnit}s in the buffer.
*
* @param numWords the number of word-containing {@link LogUnit}s to shift out
* @param numWords the minimum number of word-containing {@link LogUnit}s to shift out
* @return the number of actual {@code LogUnit}s shifted out
*/
protected int shiftOutWords(final int numWords) {
int numWordContainingLogUnitsShiftedOut = 0;
for (LogUnit logUnit = shiftOut(); logUnit != null
&& numWordContainingLogUnitsShiftedOut < numWords; logUnit = shiftOut()) {
if (logUnit.hasWord()) {
numWordContainingLogUnitsShiftedOut++;
if (logUnit.hasOneOrMoreWords()) {
numWordContainingLogUnitsShiftedOut += logUnit.getNumWords();
}
}
return numWordContainingLogUnitsShiftedOut;
@ -144,27 +145,31 @@ public class FixedLogBuffer extends LogBuffer {
}
/**
* Returns a list of {@link LogUnit}s at the front of the buffer that have associated words. No
* more than {@code n} LogUnits will have words associated with them. If there are not enough
* LogUnits in the buffer to meet the word requirement, returns the all LogUnits.
* Returns a list of {@link LogUnit}s at the front of the buffer that have words associated with
* them.
*
* There will be no more than {@code n} words in the returned list. So if 2 words are
* requested, and the first LogUnit has 3 words, it is not returned. If 2 words are requested,
* and the first LogUnit has only 1 word, and the next LogUnit 2 words, only the first LogUnit
* is returned. If the first LogUnit has no words associated with it, and the second LogUnit
* has three words, then only the first LogUnit (which has no associated words) is returned. If
* there are not enough LogUnits in the buffer to meet the word requirement, then all LogUnits
* will be returned.
*
* @param n The maximum number of {@link LogUnit}s with words to return.
* @return The list of the {@link LogUnit}s containing the first n words
*/
public ArrayList<LogUnit> peekAtFirstNWords(int n) {
final LinkedList<LogUnit> logUnits = getLogUnits();
final int length = logUnits.size();
// Allocate space for n*2 logUnits. There will be at least n, one for each word, and
// there may be additional for punctuation, between-word commands, etc. This should be
// enough that reallocation won't be necessary.
final ArrayList<LogUnit> list = new ArrayList<LogUnit>(n * 2);
for (int i = 0; i < length && n > 0; i++) {
final LogUnit logUnit = logUnits.get(i);
list.add(logUnit);
if (logUnit.hasWord()) {
n--;
}
final ArrayList<LogUnit> resultList = new ArrayList<LogUnit>(n * 2);
for (final LogUnit logUnit : logUnits) {
n -= logUnit.getNumWords();
if (n < 0) break;
resultList.add(logUnit);
}
return list;
return resultList;
}
}

View File

@ -25,10 +25,10 @@ import com.android.inputmethod.latin.SuggestedWords;
import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo;
import com.android.inputmethod.latin.define.ProductionFlag;
import java.io.IOException;
import java.io.StringWriter;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.regex.Pattern;
/**
* A group of log statements related to each other.
@ -49,27 +49,45 @@ public class LogUnit {
private static final boolean DEBUG = false
&& ProductionFlag.USES_DEVELOPMENT_ONLY_DIAGNOSTICS_DEBUG;
private static final Pattern WHITESPACE_PATTERN = Pattern.compile("\\s+");
private static final String[] EMPTY_STRING_ARRAY = new String[0];
private final ArrayList<LogStatement> mLogStatementList;
private final ArrayList<Object[]> mValuesList;
// Assume that mTimeList is sorted in increasing order. Do not insert null values into
// mTimeList.
private final ArrayList<Long> mTimeList;
// Word that this LogUnit generates. Should be null if the LogUnit does not generate a genuine
// word (i.e. separators alone do not count as a word). Should never be empty.
private String mWord;
// Words that this LogUnit generates. Should be null if the data in the LogUnit does not
// generate a genuine word (i.e. separators alone do not count as a word). Should never be
// empty. Note that if the user types spaces explicitly, then normally mWords should contain
// only a single word; it will only contain space-separate multiple words if the user does not
// enter a space, and the system enters one automatically.
private String mWords;
private String[] mWordArray = EMPTY_STRING_ARRAY;
private boolean mMayContainDigit;
private boolean mIsPartOfMegaword;
private boolean mContainsCorrection;
// mCorrectionType indicates whether the word was corrected at all, and if so, whether it was
// to a different word or just a "typo" correction. It is considered a "typo" if the final
// word was listed in the suggestions available the first time the word was gestured or
// tapped.
// mCorrectionType indicates whether the word was corrected at all, and if so, the nature of the
// correction.
private int mCorrectionType;
// LogUnits start in this state. If a word is entered without being corrected, it will have
// this CorrectiontType.
public static final int CORRECTIONTYPE_NO_CORRECTION = 0;
// The LogUnit was corrected manually by the user in an unspecified way.
public static final int CORRECTIONTYPE_CORRECTION = 1;
// The LogUnit was corrected manually by the user to a word not in the list of suggestions of
// the first word typed here. (Note: this is a heuristic value, it may be incorrect, for
// example, if the user repositions the cursor).
public static final int CORRECTIONTYPE_DIFFERENT_WORD = 2;
// The LogUnit was corrected manually by the user to a word that was in the list of suggestions
// of the first word typed here. (Again, a heuristic). It is probably a typo correction.
public static final int CORRECTIONTYPE_TYPO = 3;
// TODO: Rather than just tracking the current state, keep a historical record of the LogUnit's
// state and statistics. This should include how many times it has been corrected, whether
// other LogUnit edits were done between edits to this LogUnit, etc. Also track when a LogUnit
// previously contained a word, but was corrected to empty (because it was deleted, and there is
// no known replacement).
private SuggestedWords mSuggestedWords;
@ -166,7 +184,7 @@ public class LogUnit {
final LogStatement logStatement;
if (canIncludePrivateData) {
LOGSTATEMENT_LOG_UNIT_BEGIN_WITH_PRIVATE_DATA.outputToLocked(jsonWriter,
SystemClock.uptimeMillis(), getWord(), getCorrectionType());
SystemClock.uptimeMillis(), getWordsAsString(), getCorrectionType());
} else {
LOGSTATEMENT_LOG_UNIT_BEGIN_WITHOUT_PRIVATE_DATA.outputToLocked(jsonWriter,
SystemClock.uptimeMillis());
@ -181,22 +199,22 @@ public class LogUnit {
}
/**
* Mark the current logUnit as containing data to generate {@code word}.
* Mark the current logUnit as containing data to generate {@code newWords}.
*
* If {@code setWord()} was previously called for this LogUnit, then the method will try to
* determine what kind of correction it is, and update its internal state of the correctionType
* accordingly.
*
* @param word The word this LogUnit generates. Caller should not pass null or the empty
* @param newWords The words this LogUnit generates. Caller should not pass null or the empty
* string.
*/
public void setWord(final String word) {
if (hasWord()) {
public void setWords(final String newWords) {
if (hasOneOrMoreWords()) {
// The word was already set once, and it is now being changed. See if the new word
// is close to the old word. If so, then the change is probably a typo correction.
// If not, the user may have decided to enter a different word, so flag it.
if (mSuggestedWords != null) {
if (isInSuggestedWords(word, mSuggestedWords)) {
if (isInSuggestedWords(newWords, mSuggestedWords)) {
mCorrectionType = CORRECTIONTYPE_TYPO;
} else {
mCorrectionType = CORRECTIONTYPE_DIFFERENT_WORD;
@ -206,38 +224,71 @@ public class LogUnit {
// Mark it as a generic correction.
mCorrectionType = CORRECTIONTYPE_CORRECTION;
}
} else {
mCorrectionType = CORRECTIONTYPE_NO_CORRECTION;
}
mWords = newWords;
// Update mWordArray
mWordArray = (TextUtils.isEmpty(mWords)) ? EMPTY_STRING_ARRAY
: WHITESPACE_PATTERN.split(mWords);
if (mWordArray.length > 0 && TextUtils.isEmpty(mWordArray[0])) {
// Empty string at beginning of array. Must have been whitespace at the start of the
// word. Remove the empty string.
mWordArray = Arrays.copyOfRange(mWordArray, 1, mWordArray.length);
}
mWord = word;
}
public String getWord() {
return mWord;
public String getWordsAsString() {
return mWords;
}
public boolean hasWord() {
return mWord != null && !TextUtils.isEmpty(mWord.trim());
/**
* Retuns the words generated by the data in this LogUnit.
*
* The first word may be an empty string, if the data in the LogUnit started by generating
* whitespace.
*
* @return the array of words. an empty list of there are no words associated with this LogUnit.
*/
public String[] getWordsAsStringArray() {
return mWordArray;
}
public boolean hasOneOrMoreWords() {
return mWordArray.length >= 1;
}
public int getNumWords() {
return mWordArray.length;
}
// TODO: Refactor to eliminate getter/setters
public void setMayContainDigit() {
mMayContainDigit = true;
}
// TODO: Refactor to eliminate getter/setters
public boolean mayContainDigit() {
return mMayContainDigit;
}
// TODO: Refactor to eliminate getter/setters
public void setContainsCorrection() {
mContainsCorrection = true;
}
// TODO: Refactor to eliminate getter/setters
public boolean containsCorrection() {
return mContainsCorrection;
}
// TODO: Refactor to eliminate getter/setters
public void setCorrectionType(final int correctionType) {
mCorrectionType = correctionType;
}
// TODO: Refactor to eliminate getter/setters
public int getCorrectionType() {
return mCorrectionType;
}
@ -267,7 +318,7 @@ public class LogUnit {
new ArrayList<Object[]>(laterValues),
new ArrayList<Long>(laterTimes),
true /* isPartOfMegaword */);
newLogUnit.mWord = null;
newLogUnit.mWords = null;
newLogUnit.mMayContainDigit = mMayContainDigit;
newLogUnit.mContainsCorrection = mContainsCorrection;
@ -287,9 +338,9 @@ public class LogUnit {
mLogStatementList.addAll(logUnit.mLogStatementList);
mValuesList.addAll(logUnit.mValuesList);
mTimeList.addAll(logUnit.mTimeList);
mWord = null;
if (logUnit.mWord != null) {
setWord(logUnit.mWord);
mWords = null;
if (logUnit.mWords != null) {
setWords(logUnit.mWords);
}
mMayContainDigit = mMayContainDigit || logUnit.mMayContainDigit;
mContainsCorrection = mContainsCorrection || logUnit.mContainsCorrection;

View File

@ -126,10 +126,7 @@ public abstract class MainLogBuffer extends FixedLogBuffer {
final int length = logUnits.size();
for (int i = 0; i < length; i++) {
final LogUnit logUnit = logUnits.get(i);
final String word = logUnit.getWord();
if (word != null) {
numWordsInLogUnitList++;
}
numWordsInLogUnitList += logUnit.getNumWords();
}
return numWordsInLogUnitList >= minNGramSize;
}
@ -153,29 +150,31 @@ public abstract class MainLogBuffer extends FixedLogBuffer {
// the complete buffer contents in detail.
int numWordsInLogUnitList = 0;
final int length = logUnits.size();
for (int i = 0; i < length; i++) {
final LogUnit logUnit = logUnits.get(i);
if (!logUnit.hasWord()) {
for (final LogUnit logUnit : logUnits) {
if (!logUnit.hasOneOrMoreWords()) {
// Digits outside words are a privacy threat.
if (logUnit.mayContainDigit()) {
return false;
}
} else {
numWordsInLogUnitList++;
final String word = logUnit.getWord();
// Words not in the dictionary are a privacy threat.
if (ResearchLogger.hasLetters(word) && !(dictionary.isValidWord(word))) {
if (DEBUG) {
Log.d(TAG, "NOT SAFE!: hasLetters: " + ResearchLogger.hasLetters(word)
+ ", isValid: " + (dictionary.isValidWord(word)));
numWordsInLogUnitList += logUnit.getNumWords();
final String[] words = logUnit.getWordsAsStringArray();
for (final String word : words) {
// Words not in the dictionary are a privacy threat.
if (ResearchLogger.hasLetters(word) && !(dictionary.isValidWord(word))) {
if (DEBUG) {
Log.d(TAG, "\"" + word + "\" NOT SAFE!: hasLetters: "
+ ResearchLogger.hasLetters(word)
+ ", isValid: " + (dictionary.isValidWord(word)));
}
return false;
}
return false;
}
}
}
// Finally, only return true if the minNGramSize is met.
return numWordsInLogUnitList >= minNGramSize;
// Finally, only return true if the ngram is the right size.
return numWordsInLogUnitList == minNGramSize;
}
public void shiftAndPublishAll() {
@ -198,11 +197,14 @@ public abstract class MainLogBuffer extends FixedLogBuffer {
shiftOutWords(N_GRAM_SIZE);
mNumWordsUntilSafeToSample = mNumWordsBetweenNGrams;
} else {
// No good n-gram at front, and buffer is full. Shift out the first word (or if there
// is none, the existing logUnits).
logUnits = peekAtFirstNWords(1);
// No good n-gram at front, and buffer is full. Shift out up through the first logUnit
// with associated words (or if there is none, all the existing logUnits).
logUnits.clear();
for (LogUnit logUnit = shiftOut(); logUnit != null && !logUnit.hasOneOrMoreWords();
logUnit = shiftOut()) {
logUnits.add(logUnit);
}
publish(logUnits, false /* canIncludePrivateData */);
shiftOutWords(1);
}
}

View File

@ -397,13 +397,14 @@ public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChang
protected void publish(final ArrayList<LogUnit> logUnits,
boolean canIncludePrivateData) {
canIncludePrivateData |= IS_LOGGING_EVERYTHING;
final int length = logUnits.size();
for (int i = 0; i < length; i++) {
final LogUnit logUnit = logUnits.get(i);
final String word = logUnit.getWord();
if (word != null && word.length() > 0 && hasLetters(word)) {
Log.d(TAG, "onPublish: " + word + ", hc: "
+ logUnit.containsCorrection());
for (final LogUnit logUnit : logUnits) {
if (DEBUG) {
final String wordsString = logUnit.getWordsAsString();
Log.d(TAG, "onPublish: '" + wordsString
+ "', hc: " + logUnit.containsCorrection()
+ ", cipd: " + canIncludePrivateData);
}
for (final String word : logUnit.getWordsAsStringArray()) {
final Dictionary dictionary = getDictionary();
mStatistics.recordWordEntered(
dictionary != null && dictionary.isValidWord(word),
@ -852,8 +853,8 @@ public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChang
/* package for test */ void commitCurrentLogUnit() {
if (DEBUG) {
Log.d(TAG, "commitCurrentLogUnit" + (mCurrentLogUnit.hasWord() ?
": " + mCurrentLogUnit.getWord() : ""));
Log.d(TAG, "commitCurrentLogUnit" + (mCurrentLogUnit.hasOneOrMoreWords() ?
": " + mCurrentLogUnit.getWordsAsString() : ""));
}
if (!mCurrentLogUnit.isEmpty()) {
if (mMainLogBuffer != null) {
@ -893,8 +894,8 @@ public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChang
// Check that expected word matches.
if (oldLogUnit != null) {
final String oldLogUnitWord = oldLogUnit.getWord();
if (oldLogUnitWord != null && !oldLogUnitWord.equals(expectedWord)) {
final String oldLogUnitWords = oldLogUnit.getWordsAsString();
if (oldLogUnitWords != null && !oldLogUnitWords.equals(expectedWord)) {
return;
}
}
@ -916,7 +917,8 @@ public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChang
enqueueEvent(LOGSTATEMENT_UNCOMMIT_CURRENT_LOGUNIT);
if (DEBUG) {
Log.d(TAG, "uncommitCurrentLogUnit (dump=" + dumpCurrentLogUnit + ") back to "
+ (mCurrentLogUnit.hasWord() ? ": '" + mCurrentLogUnit.getWord() + "'" : ""));
+ (mCurrentLogUnit.hasOneOrMoreWords() ? ": '"
+ mCurrentLogUnit.getWordsAsString() + "'" : ""));
}
}
@ -950,8 +952,9 @@ public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChang
}
for (LogUnit logUnit : logUnits) {
if (DEBUG) {
Log.d(TAG, "publishLogBuffer: " + (logUnit.hasWord() ? logUnit.getWord()
: "<wordless>") + ", correction?: " + logUnit.containsCorrection());
Log.d(TAG, "publishLogBuffer: " + (logUnit.hasOneOrMoreWords()
? logUnit.getWordsAsString() : "<wordless>")
+ ", correction?: " + logUnit.containsCorrection());
}
researchLog.publish(logUnit, canIncludePrivateData);
}
@ -986,7 +989,7 @@ public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChang
return;
}
if (word.length() > 0 && hasLetters(word)) {
mCurrentLogUnit.setWord(word);
mCurrentLogUnit.setWords(word);
}
final LogUnit newLogUnit = mCurrentLogUnit.splitByTime(maxTime);
enqueueCommitText(word, isBatchMode);
@ -1478,7 +1481,7 @@ public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChang
}
if (originallyTypedWord.length() > 0 && hasLetters(originallyTypedWord)) {
if (logUnit != null) {
logUnit.setWord(originallyTypedWord);
logUnit.setWords(originallyTypedWord);
}
}
researchLogger.enqueueEvent(logUnit != null ? logUnit : researchLogger.mCurrentLogUnit,
@ -1616,7 +1619,7 @@ public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChang
* Log a call to LatinIME.commitCurrentAutoCorrection().
*
* SystemResponse: The IME has committed an auto-correction. An auto-correction changes the raw
* text input to another word that the user more likely desired to type.
* text input to another word (or words) that the user more likely desired to type.
*/
private static final LogStatement LOGSTATEMENT_LATINIME_COMMITCURRENTAUTOCORRECTION =
new LogStatement("LatinIMECommitCurrentAutoCorrection", true, true, "typedWord",
@ -1827,7 +1830,7 @@ public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChang
final int enteredWordPos, final SuggestedWords suggestedWords) {
final ResearchLogger researchLogger = getInstance();
if (!TextUtils.isEmpty(enteredText) && hasLetters(enteredText.toString())) {
researchLogger.mCurrentLogUnit.setWord(enteredText.toString());
researchLogger.mCurrentLogUnit.setWords(enteredText.toString());
}
researchLogger.enqueueEvent(LOGSTATEMENT_LATINIME_ONENDBATCHINPUT, enteredText,
enteredWordPos);