[Rlog78b] Make log privacy filtering decisions on n-grams

Previously, words were pushed out of a LogBuffer one at a time.  The receiving code had to keep
state to know whether a n-gram was safe to log.  This patch looks at the entire n-gram and makes a
single decision based on it alone.

mult-project commit with I3c40d7e02c77943d2668094ddb1d03efb942c74f

Change-Id: Id7d90bbd551b1a2f4e0e35f38852652f68f273f8
main
Kurt Partridge 2013-01-11 16:49:54 -08:00
parent 345ef67627
commit 80685aa4b9
5 changed files with 220 additions and 123 deletions

View File

@ -16,6 +16,7 @@
package com.android.inputmethod.research; package com.android.inputmethod.research;
import java.util.ArrayList;
import java.util.LinkedList; import java.util.LinkedList;
/** /**
@ -65,8 +66,13 @@ public class FixedLogBuffer extends LogBuffer {
super.shiftIn(newLogUnit); super.shiftIn(newLogUnit);
return; return;
} }
if (mNumActualWords == mWordCapacity) { if (mNumActualWords >= mWordCapacity) {
shiftOutThroughFirstWord(); // Give subclass a chance to handle the buffer full condition by shifting out logUnits.
onBufferFull();
// If still full, evict.
if (mNumActualWords >= mWordCapacity) {
shiftOutWords(1);
}
} }
super.shiftIn(newLogUnit); super.shiftIn(newLogUnit);
mNumActualWords++; // Must be a word, or we wouldn't be here. mNumActualWords++; // Must be a word, or we wouldn't be here.
@ -81,18 +87,8 @@ public class FixedLogBuffer extends LogBuffer {
return logUnit; return logUnit;
} }
public void shiftOutThroughFirstWord() { public int getNumWords() {
final LinkedList<LogUnit> logUnits = getLogUnits(); return mNumActualWords;
while (!logUnits.isEmpty()) {
final LogUnit logUnit = logUnits.removeFirst();
onShiftOut(logUnit);
if (logUnit.hasWord()) {
// Successfully shifted out a word-containing LogUnit and made space for the new
// LogUnit.
mNumActualWords--;
break;
}
}
} }
/** /**
@ -105,28 +101,63 @@ public class FixedLogBuffer extends LogBuffer {
} }
/** /**
* Called when a LogUnit is removed from the LogBuffer as a result of a shiftIn. LogUnits are * Called when the buffer has just shifted in one more word than its maximum, and its about to
* removed in the order entered. This method is not called when shiftOut is called directly. * shift out LogUnits to bring it back down to the maximum.
* *
* Base class does nothing; subclasses may override if they want to record non-privacy sensitive * Base class does nothing; subclasses may override if they want to record non-privacy sensitive
* events that fall off the end. * events that fall off the end.
*/ */
protected void onShiftOut(final LogUnit logUnit) { protected void onBufferFull() {
} }
/**
* Called to deliberately remove the oldest LogUnit. Usually called when draining the
* LogBuffer.
*/
@Override @Override
public LogUnit shiftOut() { public LogUnit shiftOut() {
if (isEmpty()) {
return null;
}
final LogUnit logUnit = super.shiftOut(); final LogUnit logUnit = super.shiftOut();
if (logUnit.hasWord()) { if (logUnit != null && logUnit.hasWord()) {
mNumActualWords--; mNumActualWords--;
} }
return logUnit; return logUnit;
} }
protected void shiftOutWords(final int numWords) {
final int targetNumWords = mNumActualWords - numWords;
final LinkedList<LogUnit> logUnits = getLogUnits();
while (mNumActualWords > targetNumWords && !logUnits.isEmpty()) {
shiftOut();
}
}
public void shiftOutAll() {
final LinkedList<LogUnit> logUnits = getLogUnits();
while (!logUnits.isEmpty()) {
shiftOut();
}
mNumActualWords = 0;
}
/**
* Returns a list of {@link LogUnit}s at the front of the buffer that have associated words. No
* more than {@code n} LogUnits will have words associated with them. If there are not enough
* LogUnits in the buffer to meet the word requirement, returns the all LogUnits.
*
* @param n The maximum number of {@link LogUnit}s with words to return.
* @return The list of the {@link LogUnit}s containing the first n words
*/
public ArrayList<LogUnit> peekAtFirstNWords(int n) {
final LinkedList<LogUnit> logUnits = getLogUnits();
final int length = logUnits.size();
// Allocate space for n*2 logUnits. There will be at least n, one for each word, and
// there may be additional for punctuation, between-word commands, etc. This should be
// enough that reallocation won't be necessary.
final ArrayList<LogUnit> list = new ArrayList<LogUnit>(n * 2);
for (int i = 0; i < length && n > 0; i++) {
final LogUnit logUnit = logUnits.get(i);
list.add(logUnit);
final String word = logUnit.getWord();
if (word != null) {
n--;
}
}
return list;
}
} }

View File

@ -98,7 +98,7 @@ import java.util.Map;
* Publish the contents of this LogUnit to researchLog. * Publish the contents of this LogUnit to researchLog.
*/ */
public synchronized void publishTo(final ResearchLog researchLog, public synchronized void publishTo(final ResearchLog researchLog,
final boolean isIncludingPrivateData) { final boolean canIncludePrivateData) {
// Prepare debugging output if necessary // Prepare debugging output if necessary
final StringWriter debugStringWriter; final StringWriter debugStringWriter;
final JsonWriter debugJsonWriter; final JsonWriter debugJsonWriter;
@ -123,7 +123,7 @@ import java.util.Map;
JsonWriter jsonWriter = null; JsonWriter jsonWriter = null;
for (int i = 0; i < size; i++) { for (int i = 0; i < size; i++) {
final LogStatement logStatement = mLogStatementList.get(i); final LogStatement logStatement = mLogStatementList.get(i);
if (!isIncludingPrivateData && logStatement.mIsPotentiallyPrivate) { if (!canIncludePrivateData && logStatement.mIsPotentiallyPrivate) {
continue; continue;
} }
if (mIsPartOfMegaword && logStatement.mIsPotentiallyRevealing) { if (mIsPartOfMegaword && logStatement.mIsPotentiallyRevealing) {
@ -134,7 +134,7 @@ import java.util.Map;
// will not have been opened for writing. // will not have been opened for writing.
if (jsonWriter == null) { if (jsonWriter == null) {
jsonWriter = researchLog.getValidJsonWriterLocked(); jsonWriter = researchLog.getValidJsonWriterLocked();
outputLogUnitStart(jsonWriter, isIncludingPrivateData); outputLogUnitStart(jsonWriter, canIncludePrivateData);
} }
outputLogStatementToLocked(jsonWriter, mLogStatementList.get(i), mValuesList.get(i), outputLogStatementToLocked(jsonWriter, mLogStatementList.get(i), mValuesList.get(i),
mTimeList.get(i)); mTimeList.get(i));
@ -145,7 +145,7 @@ import java.util.Map;
} }
if (jsonWriter != null) { if (jsonWriter != null) {
// We must have called logUnitStart earlier, so emit a logUnitStop. // We must have called logUnitStart earlier, so emit a logUnitStop.
outputLogUnitStop(jsonWriter, isIncludingPrivateData); outputLogUnitStop(jsonWriter);
} }
} }
if (DEBUG) { if (DEBUG) {
@ -171,11 +171,11 @@ import java.util.Map;
private static final String LOG_UNIT_END_KEY = "logUnitEnd"; private static final String LOG_UNIT_END_KEY = "logUnitEnd";
private void outputLogUnitStart(final JsonWriter jsonWriter, private void outputLogUnitStart(final JsonWriter jsonWriter,
final boolean isIncludingPrivateData) { final boolean canIncludePrivateData) {
try { try {
jsonWriter.beginObject(); jsonWriter.beginObject();
jsonWriter.name(CURRENT_TIME_KEY).value(System.currentTimeMillis()); jsonWriter.name(CURRENT_TIME_KEY).value(System.currentTimeMillis());
if (isIncludingPrivateData) { if (canIncludePrivateData) {
jsonWriter.name(WORD_KEY).value(getWord()); jsonWriter.name(WORD_KEY).value(getWord());
} }
jsonWriter.name(EVENT_TYPE_KEY).value(LOG_UNIT_BEGIN_KEY); jsonWriter.name(EVENT_TYPE_KEY).value(LOG_UNIT_BEGIN_KEY);
@ -186,8 +186,7 @@ import java.util.Map;
} }
} }
private void outputLogUnitStop(final JsonWriter jsonWriter, private void outputLogUnitStop(final JsonWriter jsonWriter) {
final boolean isIncludingPrivateData) {
try { try {
jsonWriter.beginObject(); jsonWriter.beginObject();
jsonWriter.name(CURRENT_TIME_KEY).value(System.currentTimeMillis()); jsonWriter.name(CURRENT_TIME_KEY).value(System.currentTimeMillis());

View File

@ -22,6 +22,7 @@ import com.android.inputmethod.latin.Dictionary;
import com.android.inputmethod.latin.Suggest; import com.android.inputmethod.latin.Suggest;
import com.android.inputmethod.latin.define.ProductionFlag; import com.android.inputmethod.latin.define.ProductionFlag;
import java.util.ArrayList;
import java.util.LinkedList; import java.util.LinkedList;
import java.util.Random; import java.util.Random;
@ -56,19 +57,24 @@ import java.util.Random;
* If the user closes a session, then the entire LogBuffer is flushed, publishing any embedded * If the user closes a session, then the entire LogBuffer is flushed, publishing any embedded
* n-gram containing dictionary words. * n-gram containing dictionary words.
*/ */
public class MainLogBuffer extends FixedLogBuffer { public abstract class MainLogBuffer extends FixedLogBuffer {
private static final String TAG = MainLogBuffer.class.getSimpleName(); private static final String TAG = MainLogBuffer.class.getSimpleName();
private static final boolean DEBUG = false && ProductionFlag.IS_EXPERIMENTAL_DEBUG; private static final boolean DEBUG = false && ProductionFlag.IS_EXPERIMENTAL_DEBUG;
// The size of the n-grams logged. E.g. N_GRAM_SIZE = 2 means to sample bigrams. // The size of the n-grams logged. E.g. N_GRAM_SIZE = 2 means to sample bigrams.
public static final int N_GRAM_SIZE = 2; public static final int N_GRAM_SIZE = 2;
// The number of words between n-grams to omit from the log. If debugging, record 50% of all
// words. Otherwise, only record 10%.
private static final int DEFAULT_NUMBER_OF_WORDS_BETWEEN_SAMPLES =
ProductionFlag.IS_EXPERIMENTAL_DEBUG ? 2 : 18;
private final ResearchLog mResearchLog; // Whether all words should be recorded, leaving unsampled word between bigrams. Useful for
// testing.
/* package for test */ static final boolean IS_LOGGING_EVERYTHING = false
&& ProductionFlag.IS_EXPERIMENTAL_DEBUG;
// The number of words between n-grams to omit from the log.
private static final int DEFAULT_NUMBER_OF_WORDS_BETWEEN_SAMPLES =
IS_LOGGING_EVERYTHING ? 0 : (DEBUG ? 2 : 18);
private Suggest mSuggest; private Suggest mSuggest;
private boolean mIsStopping = false;
/* package for test */ int mNumWordsBetweenNGrams; /* package for test */ int mNumWordsBetweenNGrams;
@ -76,9 +82,8 @@ public class MainLogBuffer extends FixedLogBuffer {
// after a sample is taken. // after a sample is taken.
/* package for test */ int mNumWordsUntilSafeToSample; /* package for test */ int mNumWordsUntilSafeToSample;
public MainLogBuffer(final ResearchLog researchLog) { public MainLogBuffer() {
super(N_GRAM_SIZE + DEFAULT_NUMBER_OF_WORDS_BETWEEN_SAMPLES); super(N_GRAM_SIZE + DEFAULT_NUMBER_OF_WORDS_BETWEEN_SAMPLES);
mResearchLog = researchLog;
mNumWordsBetweenNGrams = DEFAULT_NUMBER_OF_WORDS_BETWEEN_SAMPLES; mNumWordsBetweenNGrams = DEFAULT_NUMBER_OF_WORDS_BETWEEN_SAMPLES;
final Random random = new Random(); final Random random = new Random();
mNumWordsUntilSafeToSample = DEBUG ? 0 : random.nextInt(mNumWordsBetweenNGrams + 1); mNumWordsUntilSafeToSample = DEBUG ? 0 : random.nextInt(mNumWordsBetweenNGrams + 1);
@ -92,6 +97,10 @@ public class MainLogBuffer extends FixedLogBuffer {
mNumWordsUntilSafeToSample = mNumWordsBetweenNGrams; mNumWordsUntilSafeToSample = mNumWordsBetweenNGrams;
} }
public void setIsStopping() {
mIsStopping = true;
}
/** /**
* Determines whether uploading the n words at the front the MainLogBuffer will not violate * Determines whether uploading the n words at the front the MainLogBuffer will not violate
* user privacy. * user privacy.
@ -103,16 +112,36 @@ public class MainLogBuffer extends FixedLogBuffer {
* the screen orientation and other characteristics about the device can be uploaded without * the screen orientation and other characteristics about the device can be uploaded without
* revealing much about the user. * revealing much about the user.
*/ */
public boolean isNGramSafe() { private boolean isSafeNGram(final ArrayList<LogUnit> logUnits, final int minNGramSize) {
// Bypass privacy checks when debugging.
if (IS_LOGGING_EVERYTHING) {
if (mIsStopping) {
return true;
} else {
// Only check that it is the right length. If not, wait for later words to make
// complete n-grams.
int numWordsInLogUnitList = 0;
final int length = logUnits.size();
for (int i = 0; i < length; i++) {
final LogUnit logUnit = logUnits.get(i);
final String word = logUnit.getWord();
if (word != null) {
numWordsInLogUnitList++;
}
}
return numWordsInLogUnitList >= minNGramSize;
}
}
// Check that we are not sampling too frequently. Having sampled recently might disclose // Check that we are not sampling too frequently. Having sampled recently might disclose
// too much of the user's intended meaning. // too much of the user's intended meaning.
if (mNumWordsUntilSafeToSample > 0) { if (mNumWordsUntilSafeToSample > 0) {
return false; return false;
} }
if (mSuggest == null || !mSuggest.hasMainDictionary()) { if (mSuggest == null || !mSuggest.hasMainDictionary()) {
// Main dictionary is unavailable. Since we cannot check it, we cannot tell if a word // Main dictionary is unavailable. Since we cannot check it, we cannot tell if a
// is out-of-vocabulary or not. Therefore, we must judge the entire buffer contents to // word is out-of-vocabulary or not. Therefore, we must judge the entire buffer
// potentially pose a privacy risk. // contents to potentially pose a privacy risk.
return false; return false;
} }
// Reload the dictionary in case it has changed (e.g., because the user has changed // Reload the dictionary in case it has changed (e.g., because the user has changed
@ -121,12 +150,12 @@ public class MainLogBuffer extends FixedLogBuffer {
if (dictionary == null) { if (dictionary == null) {
return false; return false;
} }
// Check each word in the buffer. If any word poses a privacy threat, we cannot upload the
// complete buffer contents in detail. // Check each word in the buffer. If any word poses a privacy threat, we cannot upload
final LinkedList<LogUnit> logUnits = getLogUnits(); // the complete buffer contents in detail.
int numWordsInLogUnitList = 0;
final int length = logUnits.size(); final int length = logUnits.size();
int wordsNeeded = N_GRAM_SIZE; for (int i = 0; i < length; i++) {
for (int i = 0; i < length && wordsNeeded > 0; i++) {
final LogUnit logUnit = logUnits.get(i); final LogUnit logUnit = logUnits.get(i);
final String word = logUnit.getWord(); final String word = logUnit.getWord();
if (word == null) { if (word == null) {
@ -135,6 +164,7 @@ public class MainLogBuffer extends FixedLogBuffer {
return false; return false;
} }
} else { } else {
numWordsInLogUnitList++;
// Words not in the dictionary are a privacy threat. // Words not in the dictionary are a privacy threat.
if (ResearchLogger.hasLetters(word) && !(dictionary.isValidWord(word))) { if (ResearchLogger.hasLetters(word) && !(dictionary.isValidWord(word))) {
if (DEBUG) { if (DEBUG) {
@ -145,38 +175,59 @@ public class MainLogBuffer extends FixedLogBuffer {
} }
} }
} }
// All checks have passed; this buffer's content can be safely uploaded.
return true; // Finally, only return true if the minNGramSize is met.
return numWordsInLogUnitList >= minNGramSize;
} }
public boolean isNGramComplete() { public void shiftAndPublishAll() {
final LinkedList<LogUnit> logUnits = getLogUnits(); final LinkedList<LogUnit> logUnits = getLogUnits();
final int length = logUnits.size(); while (!logUnits.isEmpty()) {
int wordsNeeded = N_GRAM_SIZE; publishLogUnitsAtFrontOfBuffer();
for (int i = 0; i < length && wordsNeeded > 0; i++) {
final LogUnit logUnit = logUnits.get(i);
final String word = logUnit.getWord();
if (word != null) {
wordsNeeded--;
}
} }
return wordsNeeded == 0;
} }
@Override @Override
protected void onShiftOut(final LogUnit logUnit) { protected final void onBufferFull() {
if (mResearchLog != null) { publishLogUnitsAtFrontOfBuffer();
mResearchLog.publish(logUnit, }
ResearchLogger.IS_LOGGING_EVERYTHING /* isIncludingPrivateData */);
} protected final void publishLogUnitsAtFrontOfBuffer() {
if (logUnit.hasWord()) { ArrayList<LogUnit> logUnits = peekAtFirstNWords(N_GRAM_SIZE);
if (mNumWordsUntilSafeToSample > 0) { if (isSafeNGram(logUnits, N_GRAM_SIZE)) {
mNumWordsUntilSafeToSample--; // Good n-gram at the front of the buffer. Publish it, disclosing details.
Log.d(TAG, "wordsUntilSafeToSample now at " + mNumWordsUntilSafeToSample); publish(logUnits, true /* canIncludePrivateData */);
} shiftOutWords(N_GRAM_SIZE);
resetWordCounter();
} else {
// No good n-gram at front, and buffer is full. Shift out the first word (or if there
// is none, the existing logUnits).
logUnits = peekAtFirstNWords(1);
publish(logUnits, false /* canIncludePrivateData */);
shiftOutWords(1);
} }
}
/**
* Called when a list of logUnits should be published.
*
* It is the subclass's responsibility to implement the publication.
*
* @param logUnits The list of logUnits to be published.
* @param canIncludePrivateData Whether the private data in the logUnits can be included in
* publication.
*/
protected abstract void publish(final ArrayList<LogUnit> logUnits,
final boolean canIncludePrivateData);
@Override
protected void shiftOutWords(int numWords) {
int oldNumActualWords = getNumActualWords();
super.shiftOutWords(numWords);
int numWordsShifted = oldNumActualWords - getNumActualWords();
mNumWordsUntilSafeToSample -= numWordsShifted;
if (DEBUG) { if (DEBUG) {
Log.d(TAG, "shiftedOut " + (logUnit.hasWord() ? logUnit.getWord() : "")); Log.d(TAG, "wordsUntilSafeToSample now at " + mNumWordsUntilSafeToSample);
} }
} }
} }

View File

@ -185,12 +185,12 @@ public class ResearchLog {
mFlushFuture = mExecutor.schedule(mFlushCallable, FLUSH_DELAY_IN_MS, TimeUnit.MILLISECONDS); mFlushFuture = mExecutor.schedule(mFlushCallable, FLUSH_DELAY_IN_MS, TimeUnit.MILLISECONDS);
} }
public synchronized void publish(final LogUnit logUnit, final boolean isIncludingPrivateData) { public synchronized void publish(final LogUnit logUnit, final boolean canIncludePrivateData) {
try { try {
mExecutor.submit(new Callable<Object>() { mExecutor.submit(new Callable<Object>() {
@Override @Override
public Object call() throws Exception { public Object call() throws Exception {
logUnit.publishTo(ResearchLog.this, isIncludingPrivateData); logUnit.publishTo(ResearchLog.this, canIncludePrivateData);
scheduleFlush(); scheduleFlush();
return null; return null;
} }

View File

@ -69,7 +69,9 @@ import com.android.inputmethod.latin.define.ProductionFlag;
import java.io.File; import java.io.File;
import java.text.SimpleDateFormat; import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date; import java.util.Date;
import java.util.List;
import java.util.Locale; import java.util.Locale;
import java.util.UUID; import java.util.UUID;
@ -84,9 +86,6 @@ import java.util.UUID;
public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChangeListener { public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChangeListener {
private static final String TAG = ResearchLogger.class.getSimpleName(); private static final String TAG = ResearchLogger.class.getSimpleName();
private static final boolean DEBUG = false && ProductionFlag.IS_EXPERIMENTAL_DEBUG; private static final boolean DEBUG = false && ProductionFlag.IS_EXPERIMENTAL_DEBUG;
// Whether all n-grams should be logged. true will disclose private info.
public static final boolean IS_LOGGING_EVERYTHING = false
&& ProductionFlag.IS_EXPERIMENTAL_DEBUG;
// Whether the TextView contents are logged at the end of the session. true will disclose // Whether the TextView contents are logged at the end of the session. true will disclose
// private info. // private info.
private static final boolean LOG_FULL_TEXTVIEW_CONTENTS = false private static final boolean LOG_FULL_TEXTVIEW_CONTENTS = false
@ -105,7 +104,8 @@ public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChang
private static final boolean IS_SHOWING_INDICATOR = true; private static final boolean IS_SHOWING_INDICATOR = true;
// Change the default indicator to something very visible. Currently two red vertical bars on // Change the default indicator to something very visible. Currently two red vertical bars on
// either side of they keyboard. // either side of they keyboard.
private static final boolean IS_SHOWING_INDICATOR_CLEARLY = false || IS_LOGGING_EVERYTHING; private static final boolean IS_SHOWING_INDICATOR_CLEARLY = false ||
(MainLogBuffer.IS_LOGGING_EVERYTHING && ProductionFlag.IS_EXPERIMENTAL_DEBUG);
// FEEDBACK_WORD_BUFFER_SIZE should add 1 because it must also hold the feedback LogUnit itself. // FEEDBACK_WORD_BUFFER_SIZE should add 1 because it must also hold the feedback LogUnit itself.
public static final int FEEDBACK_WORD_BUFFER_SIZE = (Integer.MAX_VALUE - 1) + 1; public static final int FEEDBACK_WORD_BUFFER_SIZE = (Integer.MAX_VALUE - 1) + 1;
@ -387,15 +387,41 @@ public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChang
} }
if (mMainLogBuffer == null) { if (mMainLogBuffer == null) {
mMainResearchLog = new ResearchLog(createLogFile(mFilesDir), mLatinIME); mMainResearchLog = new ResearchLog(createLogFile(mFilesDir), mLatinIME);
mMainLogBuffer = new MainLogBuffer(mMainResearchLog); mMainLogBuffer = new MainLogBuffer() {
@Override
protected void publish(final ArrayList<LogUnit> logUnits,
boolean canIncludePrivateData) {
canIncludePrivateData |= MainLogBuffer.IS_LOGGING_EVERYTHING;
final int length = logUnits.size();
for (int i = 0; i < length; i++) {
final LogUnit logUnit = logUnits.get(i);
final String word = logUnit.getWord();
if (word != null && word.length() > 0 && hasLetters(word)) {
Log.d(TAG, "onPublish: " + word + ", hc: "
+ logUnit.containsCorrection());
final Dictionary dictionary = getDictionary();
mStatistics.recordWordEntered(
dictionary != null && dictionary.isValidWord(word),
logUnit.containsCorrection());
}
}
if (mMainResearchLog != null) {
publishLogUnits(logUnits, mMainResearchLog, canIncludePrivateData);
}
}
};
mMainLogBuffer.setSuggest(mSuggest); mMainLogBuffer.setSuggest(mSuggest);
} }
if (mFeedbackLogBuffer == null) { if (mFeedbackLogBuffer == null) {
mFeedbackLog = new ResearchLog(createLogFile(mFilesDir), mLatinIME); resetFeedbackLogging();
mFeedbackLogBuffer = new FixedLogBuffer(FEEDBACK_WORD_BUFFER_SIZE);
} }
} }
private void resetFeedbackLogging() {
mFeedbackLog = new ResearchLog(createLogFile(mFilesDir), mLatinIME);
mFeedbackLogBuffer = new FixedLogBuffer(FEEDBACK_WORD_BUFFER_SIZE);
}
/* package */ void stop() { /* package */ void stop() {
if (DEBUG) { if (DEBUG) {
Log.d(TAG, "stop called"); Log.d(TAG, "stop called");
@ -404,16 +430,11 @@ public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChang
commitCurrentLogUnit(); commitCurrentLogUnit();
if (mMainLogBuffer != null) { if (mMainLogBuffer != null) {
while (!mMainLogBuffer.isEmpty()) { mMainLogBuffer.shiftAndPublishAll();
if ((mMainLogBuffer.isNGramSafe() || IS_LOGGING_EVERYTHING) && logStatistics();
mMainResearchLog != null) { commitCurrentLogUnit();
publishLogBuffer(mMainLogBuffer, mMainResearchLog, mMainLogBuffer.setIsStopping();
true /* isIncludingPrivateData */); mMainLogBuffer.shiftAndPublishAll();
mMainLogBuffer.resetWordCounter();
} else {
mMainLogBuffer.shiftOutThroughFirstWord();
}
}
mMainResearchLog.close(null /* callback */); mMainResearchLog.close(null /* callback */);
mMainLogBuffer = null; mMainLogBuffer = null;
} }
@ -731,13 +752,6 @@ public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChang
} }
if (!mCurrentLogUnit.isEmpty()) { if (!mCurrentLogUnit.isEmpty()) {
if (mMainLogBuffer != null) { if (mMainLogBuffer != null) {
if ((mMainLogBuffer.isNGramSafe() || IS_LOGGING_EVERYTHING) &&
mMainLogBuffer.isNGramComplete() &&
mMainResearchLog != null) {
publishLogBuffer(mMainLogBuffer, mMainResearchLog,
true /* isIncludingPrivateData */);
mMainLogBuffer.resetWordCounter();
}
mMainLogBuffer.shiftIn(mCurrentLogUnit); mMainLogBuffer.shiftIn(mCurrentLogUnit);
} }
if (mFeedbackLogBuffer != null) { if (mFeedbackLogBuffer != null) {
@ -798,33 +812,39 @@ public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChang
} }
} }
/* package for test */ void publishLogBuffer(final LogBuffer logBuffer,
final ResearchLog researchLog, final boolean isIncludingPrivateData) {
publishLogUnits(logBuffer.getLogUnits(), researchLog, isIncludingPrivateData);
}
private static final LogStatement LOGSTATEMENT_LOG_SEGMENT_OPENING = private static final LogStatement LOGSTATEMENT_LOG_SEGMENT_OPENING =
new LogStatement("logSegmentStart", false, false, "isIncludingPrivateData"); new LogStatement("logSegmentStart", false, false, "isIncludingPrivateData");
private static final LogStatement LOGSTATEMENT_LOG_SEGMENT_CLOSING = private static final LogStatement LOGSTATEMENT_LOG_SEGMENT_CLOSING =
new LogStatement("logSegmentEnd", false, false); new LogStatement("logSegmentEnd", false, false);
/* package for test */ void publishLogBuffer(final LogBuffer logBuffer, /* package for test */ void publishLogUnits(final List<LogUnit> logUnits,
final ResearchLog researchLog, final boolean isIncludingPrivateData) { final ResearchLog researchLog, final boolean canIncludePrivateData) {
final LogUnit openingLogUnit = new LogUnit(); final LogUnit openingLogUnit = new LogUnit();
if (logBuffer.isEmpty()) return; if (logUnits.isEmpty()) return;
openingLogUnit.addLogStatement(LOGSTATEMENT_LOG_SEGMENT_OPENING, SystemClock.uptimeMillis(), // LogUnits not containing private data, such as contextual data for the log, do not require
isIncludingPrivateData); // logSegment boundary statements.
researchLog.publish(openingLogUnit, true /* isIncludingPrivateData */); if (canIncludePrivateData) {
LogUnit logUnit; openingLogUnit.addLogStatement(LOGSTATEMENT_LOG_SEGMENT_OPENING,
int numWordsToPublish = MainLogBuffer.N_GRAM_SIZE; SystemClock.uptimeMillis(), canIncludePrivateData);
while ((logUnit = logBuffer.shiftOut()) != null && numWordsToPublish > 0) { researchLog.publish(openingLogUnit, true /* isIncludingPrivateData */);
}
for (LogUnit logUnit : logUnits) {
if (DEBUG) { if (DEBUG) {
Log.d(TAG, "publishLogBuffer: " + (logUnit.hasWord() ? logUnit.getWord() Log.d(TAG, "publishLogBuffer: " + (logUnit.hasWord() ? logUnit.getWord()
: "<wordless>")); : "<wordless>") + ", correction?: " + logUnit.containsCorrection());
}
researchLog.publish(logUnit, isIncludingPrivateData);
if (logUnit.getWord() != null) {
numWordsToPublish--;
} }
researchLog.publish(logUnit, canIncludePrivateData);
}
if (canIncludePrivateData) {
final LogUnit closingLogUnit = new LogUnit();
closingLogUnit.addLogStatement(LOGSTATEMENT_LOG_SEGMENT_CLOSING,
SystemClock.uptimeMillis());
researchLog.publish(closingLogUnit, true /* isIncludingPrivateData */);
} }
final LogUnit closingLogUnit = new LogUnit();
closingLogUnit.addLogStatement(LOGSTATEMENT_LOG_SEGMENT_CLOSING,
SystemClock.uptimeMillis());
researchLog.publish(closingLogUnit, true /* isIncludingPrivateData */);
} }
public static boolean hasLetters(final String word) { public static boolean hasLetters(final String word) {
@ -849,12 +869,8 @@ public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChang
if (word == null) { if (word == null) {
return; return;
} }
final Dictionary dictionary = getDictionary();
if (word.length() > 0 && hasLetters(word)) { if (word.length() > 0 && hasLetters(word)) {
mCurrentLogUnit.setWord(word); mCurrentLogUnit.setWord(word);
final boolean isDictionaryWord = dictionary != null
&& dictionary.isValidWord(word);
mStatistics.recordWordEntered(isDictionaryWord, mCurrentLogUnit.containsCorrection());
} }
final LogUnit newLogUnit = mCurrentLogUnit.splitByTime(maxTime); final LogUnit newLogUnit = mCurrentLogUnit.splitByTime(maxTime);
enqueueCommitText(word, isBatchMode); enqueueCommitText(word, isBatchMode);
@ -967,7 +983,7 @@ public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChang
Integer.toHexString(editorInfo.inputType), Integer.toHexString(editorInfo.inputType),
Integer.toHexString(editorInfo.imeOptions), editorInfo.fieldId, Integer.toHexString(editorInfo.imeOptions), editorInfo.fieldId,
Build.DISPLAY, Build.MODEL, prefs, versionCode, versionName, Build.DISPLAY, Build.MODEL, prefs, versionCode, versionName,
OUTPUT_FORMAT_VERSION, IS_LOGGING_EVERYTHING, OUTPUT_FORMAT_VERSION, MainLogBuffer.IS_LOGGING_EVERYTHING,
ProductionFlag.IS_EXPERIMENTAL_DEBUG); ProductionFlag.IS_EXPERIMENTAL_DEBUG);
} catch (NameNotFoundException e) { } catch (NameNotFoundException e) {
e.printStackTrace(); e.printStackTrace();
@ -976,7 +992,6 @@ public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChang
} }
public void latinIME_onFinishInputViewInternal() { public void latinIME_onFinishInputViewInternal() {
logStatistics();
stop(); stop();
} }
@ -1524,6 +1539,7 @@ public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChang
public static void richInputConnection_commitText(final String committedWord, public static void richInputConnection_commitText(final String committedWord,
final int newCursorPosition, final boolean isBatchMode) { final int newCursorPosition, final boolean isBatchMode) {
final ResearchLogger researchLogger = getInstance(); final ResearchLogger researchLogger = getInstance();
// Only include opening and closing logSegments if private data is included
final String scrubbedWord = scrubDigitsFromString(committedWord); final String scrubbedWord = scrubDigitsFromString(committedWord);
if (!researchLogger.isExpectingCommitText) { if (!researchLogger.isExpectingCommitText) {
researchLogger.enqueueEvent(LOGSTATEMENT_RICHINPUTCONNECTIONCOMMITTEXT, researchLogger.enqueueEvent(LOGSTATEMENT_RICHINPUTCONNECTIONCOMMITTEXT,