am 8b788374
: Merge "[Rlog78b] Make log privacy filtering decisions on n-grams"
* commit '8b788374dee56dfe95e7af42a358923cfcb3668e': [Rlog78b] Make log privacy filtering decisions on n-grams
This commit is contained in:
commit
2464cd7f2d
5 changed files with 220 additions and 123 deletions
|
@ -16,6 +16,7 @@
|
||||||
|
|
||||||
package com.android.inputmethod.research;
|
package com.android.inputmethod.research;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
import java.util.LinkedList;
|
import java.util.LinkedList;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -65,8 +66,13 @@ public class FixedLogBuffer extends LogBuffer {
|
||||||
super.shiftIn(newLogUnit);
|
super.shiftIn(newLogUnit);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (mNumActualWords == mWordCapacity) {
|
if (mNumActualWords >= mWordCapacity) {
|
||||||
shiftOutThroughFirstWord();
|
// Give subclass a chance to handle the buffer full condition by shifting out logUnits.
|
||||||
|
onBufferFull();
|
||||||
|
// If still full, evict.
|
||||||
|
if (mNumActualWords >= mWordCapacity) {
|
||||||
|
shiftOutWords(1);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
super.shiftIn(newLogUnit);
|
super.shiftIn(newLogUnit);
|
||||||
mNumActualWords++; // Must be a word, or we wouldn't be here.
|
mNumActualWords++; // Must be a word, or we wouldn't be here.
|
||||||
|
@ -81,18 +87,8 @@ public class FixedLogBuffer extends LogBuffer {
|
||||||
return logUnit;
|
return logUnit;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void shiftOutThroughFirstWord() {
|
public int getNumWords() {
|
||||||
final LinkedList<LogUnit> logUnits = getLogUnits();
|
return mNumActualWords;
|
||||||
while (!logUnits.isEmpty()) {
|
|
||||||
final LogUnit logUnit = logUnits.removeFirst();
|
|
||||||
onShiftOut(logUnit);
|
|
||||||
if (logUnit.hasWord()) {
|
|
||||||
// Successfully shifted out a word-containing LogUnit and made space for the new
|
|
||||||
// LogUnit.
|
|
||||||
mNumActualWords--;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -105,28 +101,63 @@ public class FixedLogBuffer extends LogBuffer {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Called when a LogUnit is removed from the LogBuffer as a result of a shiftIn. LogUnits are
|
* Called when the buffer has just shifted in one more word than its maximum, and its about to
|
||||||
* removed in the order entered. This method is not called when shiftOut is called directly.
|
* shift out LogUnits to bring it back down to the maximum.
|
||||||
*
|
*
|
||||||
* Base class does nothing; subclasses may override if they want to record non-privacy sensitive
|
* Base class does nothing; subclasses may override if they want to record non-privacy sensitive
|
||||||
* events that fall off the end.
|
* events that fall off the end.
|
||||||
*/
|
*/
|
||||||
protected void onShiftOut(final LogUnit logUnit) {
|
protected void onBufferFull() {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Called to deliberately remove the oldest LogUnit. Usually called when draining the
|
|
||||||
* LogBuffer.
|
|
||||||
*/
|
|
||||||
@Override
|
@Override
|
||||||
public LogUnit shiftOut() {
|
public LogUnit shiftOut() {
|
||||||
if (isEmpty()) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
final LogUnit logUnit = super.shiftOut();
|
final LogUnit logUnit = super.shiftOut();
|
||||||
if (logUnit.hasWord()) {
|
if (logUnit != null && logUnit.hasWord()) {
|
||||||
mNumActualWords--;
|
mNumActualWords--;
|
||||||
}
|
}
|
||||||
return logUnit;
|
return logUnit;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
protected void shiftOutWords(final int numWords) {
|
||||||
|
final int targetNumWords = mNumActualWords - numWords;
|
||||||
|
final LinkedList<LogUnit> logUnits = getLogUnits();
|
||||||
|
while (mNumActualWords > targetNumWords && !logUnits.isEmpty()) {
|
||||||
|
shiftOut();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void shiftOutAll() {
|
||||||
|
final LinkedList<LogUnit> logUnits = getLogUnits();
|
||||||
|
while (!logUnits.isEmpty()) {
|
||||||
|
shiftOut();
|
||||||
|
}
|
||||||
|
mNumActualWords = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns a list of {@link LogUnit}s at the front of the buffer that have associated words. No
|
||||||
|
* more than {@code n} LogUnits will have words associated with them. If there are not enough
|
||||||
|
* LogUnits in the buffer to meet the word requirement, returns the all LogUnits.
|
||||||
|
*
|
||||||
|
* @param n The maximum number of {@link LogUnit}s with words to return.
|
||||||
|
* @return The list of the {@link LogUnit}s containing the first n words
|
||||||
|
*/
|
||||||
|
public ArrayList<LogUnit> peekAtFirstNWords(int n) {
|
||||||
|
final LinkedList<LogUnit> logUnits = getLogUnits();
|
||||||
|
final int length = logUnits.size();
|
||||||
|
// Allocate space for n*2 logUnits. There will be at least n, one for each word, and
|
||||||
|
// there may be additional for punctuation, between-word commands, etc. This should be
|
||||||
|
// enough that reallocation won't be necessary.
|
||||||
|
final ArrayList<LogUnit> list = new ArrayList<LogUnit>(n * 2);
|
||||||
|
for (int i = 0; i < length && n > 0; i++) {
|
||||||
|
final LogUnit logUnit = logUnits.get(i);
|
||||||
|
list.add(logUnit);
|
||||||
|
final String word = logUnit.getWord();
|
||||||
|
if (word != null) {
|
||||||
|
n--;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return list;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -98,7 +98,7 @@ import java.util.Map;
|
||||||
* Publish the contents of this LogUnit to researchLog.
|
* Publish the contents of this LogUnit to researchLog.
|
||||||
*/
|
*/
|
||||||
public synchronized void publishTo(final ResearchLog researchLog,
|
public synchronized void publishTo(final ResearchLog researchLog,
|
||||||
final boolean isIncludingPrivateData) {
|
final boolean canIncludePrivateData) {
|
||||||
// Prepare debugging output if necessary
|
// Prepare debugging output if necessary
|
||||||
final StringWriter debugStringWriter;
|
final StringWriter debugStringWriter;
|
||||||
final JsonWriter debugJsonWriter;
|
final JsonWriter debugJsonWriter;
|
||||||
|
@ -123,7 +123,7 @@ import java.util.Map;
|
||||||
JsonWriter jsonWriter = null;
|
JsonWriter jsonWriter = null;
|
||||||
for (int i = 0; i < size; i++) {
|
for (int i = 0; i < size; i++) {
|
||||||
final LogStatement logStatement = mLogStatementList.get(i);
|
final LogStatement logStatement = mLogStatementList.get(i);
|
||||||
if (!isIncludingPrivateData && logStatement.mIsPotentiallyPrivate) {
|
if (!canIncludePrivateData && logStatement.mIsPotentiallyPrivate) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (mIsPartOfMegaword && logStatement.mIsPotentiallyRevealing) {
|
if (mIsPartOfMegaword && logStatement.mIsPotentiallyRevealing) {
|
||||||
|
@ -134,7 +134,7 @@ import java.util.Map;
|
||||||
// will not have been opened for writing.
|
// will not have been opened for writing.
|
||||||
if (jsonWriter == null) {
|
if (jsonWriter == null) {
|
||||||
jsonWriter = researchLog.getValidJsonWriterLocked();
|
jsonWriter = researchLog.getValidJsonWriterLocked();
|
||||||
outputLogUnitStart(jsonWriter, isIncludingPrivateData);
|
outputLogUnitStart(jsonWriter, canIncludePrivateData);
|
||||||
}
|
}
|
||||||
outputLogStatementToLocked(jsonWriter, mLogStatementList.get(i), mValuesList.get(i),
|
outputLogStatementToLocked(jsonWriter, mLogStatementList.get(i), mValuesList.get(i),
|
||||||
mTimeList.get(i));
|
mTimeList.get(i));
|
||||||
|
@ -145,7 +145,7 @@ import java.util.Map;
|
||||||
}
|
}
|
||||||
if (jsonWriter != null) {
|
if (jsonWriter != null) {
|
||||||
// We must have called logUnitStart earlier, so emit a logUnitStop.
|
// We must have called logUnitStart earlier, so emit a logUnitStop.
|
||||||
outputLogUnitStop(jsonWriter, isIncludingPrivateData);
|
outputLogUnitStop(jsonWriter);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (DEBUG) {
|
if (DEBUG) {
|
||||||
|
@ -171,11 +171,11 @@ import java.util.Map;
|
||||||
private static final String LOG_UNIT_END_KEY = "logUnitEnd";
|
private static final String LOG_UNIT_END_KEY = "logUnitEnd";
|
||||||
|
|
||||||
private void outputLogUnitStart(final JsonWriter jsonWriter,
|
private void outputLogUnitStart(final JsonWriter jsonWriter,
|
||||||
final boolean isIncludingPrivateData) {
|
final boolean canIncludePrivateData) {
|
||||||
try {
|
try {
|
||||||
jsonWriter.beginObject();
|
jsonWriter.beginObject();
|
||||||
jsonWriter.name(CURRENT_TIME_KEY).value(System.currentTimeMillis());
|
jsonWriter.name(CURRENT_TIME_KEY).value(System.currentTimeMillis());
|
||||||
if (isIncludingPrivateData) {
|
if (canIncludePrivateData) {
|
||||||
jsonWriter.name(WORD_KEY).value(getWord());
|
jsonWriter.name(WORD_KEY).value(getWord());
|
||||||
}
|
}
|
||||||
jsonWriter.name(EVENT_TYPE_KEY).value(LOG_UNIT_BEGIN_KEY);
|
jsonWriter.name(EVENT_TYPE_KEY).value(LOG_UNIT_BEGIN_KEY);
|
||||||
|
@ -186,8 +186,7 @@ import java.util.Map;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private void outputLogUnitStop(final JsonWriter jsonWriter,
|
private void outputLogUnitStop(final JsonWriter jsonWriter) {
|
||||||
final boolean isIncludingPrivateData) {
|
|
||||||
try {
|
try {
|
||||||
jsonWriter.beginObject();
|
jsonWriter.beginObject();
|
||||||
jsonWriter.name(CURRENT_TIME_KEY).value(System.currentTimeMillis());
|
jsonWriter.name(CURRENT_TIME_KEY).value(System.currentTimeMillis());
|
||||||
|
|
|
@ -22,6 +22,7 @@ import com.android.inputmethod.latin.Dictionary;
|
||||||
import com.android.inputmethod.latin.Suggest;
|
import com.android.inputmethod.latin.Suggest;
|
||||||
import com.android.inputmethod.latin.define.ProductionFlag;
|
import com.android.inputmethod.latin.define.ProductionFlag;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
import java.util.LinkedList;
|
import java.util.LinkedList;
|
||||||
import java.util.Random;
|
import java.util.Random;
|
||||||
|
|
||||||
|
@ -56,19 +57,24 @@ import java.util.Random;
|
||||||
* If the user closes a session, then the entire LogBuffer is flushed, publishing any embedded
|
* If the user closes a session, then the entire LogBuffer is flushed, publishing any embedded
|
||||||
* n-gram containing dictionary words.
|
* n-gram containing dictionary words.
|
||||||
*/
|
*/
|
||||||
public class MainLogBuffer extends FixedLogBuffer {
|
public abstract class MainLogBuffer extends FixedLogBuffer {
|
||||||
private static final String TAG = MainLogBuffer.class.getSimpleName();
|
private static final String TAG = MainLogBuffer.class.getSimpleName();
|
||||||
private static final boolean DEBUG = false && ProductionFlag.IS_EXPERIMENTAL_DEBUG;
|
private static final boolean DEBUG = false && ProductionFlag.IS_EXPERIMENTAL_DEBUG;
|
||||||
|
|
||||||
// The size of the n-grams logged. E.g. N_GRAM_SIZE = 2 means to sample bigrams.
|
// The size of the n-grams logged. E.g. N_GRAM_SIZE = 2 means to sample bigrams.
|
||||||
public static final int N_GRAM_SIZE = 2;
|
public static final int N_GRAM_SIZE = 2;
|
||||||
// The number of words between n-grams to omit from the log. If debugging, record 50% of all
|
|
||||||
// words. Otherwise, only record 10%.
|
|
||||||
private static final int DEFAULT_NUMBER_OF_WORDS_BETWEEN_SAMPLES =
|
|
||||||
ProductionFlag.IS_EXPERIMENTAL_DEBUG ? 2 : 18;
|
|
||||||
|
|
||||||
private final ResearchLog mResearchLog;
|
// Whether all words should be recorded, leaving unsampled word between bigrams. Useful for
|
||||||
|
// testing.
|
||||||
|
/* package for test */ static final boolean IS_LOGGING_EVERYTHING = false
|
||||||
|
&& ProductionFlag.IS_EXPERIMENTAL_DEBUG;
|
||||||
|
|
||||||
|
// The number of words between n-grams to omit from the log.
|
||||||
|
private static final int DEFAULT_NUMBER_OF_WORDS_BETWEEN_SAMPLES =
|
||||||
|
IS_LOGGING_EVERYTHING ? 0 : (DEBUG ? 2 : 18);
|
||||||
|
|
||||||
private Suggest mSuggest;
|
private Suggest mSuggest;
|
||||||
|
private boolean mIsStopping = false;
|
||||||
|
|
||||||
/* package for test */ int mNumWordsBetweenNGrams;
|
/* package for test */ int mNumWordsBetweenNGrams;
|
||||||
|
|
||||||
|
@ -76,9 +82,8 @@ public class MainLogBuffer extends FixedLogBuffer {
|
||||||
// after a sample is taken.
|
// after a sample is taken.
|
||||||
/* package for test */ int mNumWordsUntilSafeToSample;
|
/* package for test */ int mNumWordsUntilSafeToSample;
|
||||||
|
|
||||||
public MainLogBuffer(final ResearchLog researchLog) {
|
public MainLogBuffer() {
|
||||||
super(N_GRAM_SIZE + DEFAULT_NUMBER_OF_WORDS_BETWEEN_SAMPLES);
|
super(N_GRAM_SIZE + DEFAULT_NUMBER_OF_WORDS_BETWEEN_SAMPLES);
|
||||||
mResearchLog = researchLog;
|
|
||||||
mNumWordsBetweenNGrams = DEFAULT_NUMBER_OF_WORDS_BETWEEN_SAMPLES;
|
mNumWordsBetweenNGrams = DEFAULT_NUMBER_OF_WORDS_BETWEEN_SAMPLES;
|
||||||
final Random random = new Random();
|
final Random random = new Random();
|
||||||
mNumWordsUntilSafeToSample = DEBUG ? 0 : random.nextInt(mNumWordsBetweenNGrams + 1);
|
mNumWordsUntilSafeToSample = DEBUG ? 0 : random.nextInt(mNumWordsBetweenNGrams + 1);
|
||||||
|
@ -92,6 +97,10 @@ public class MainLogBuffer extends FixedLogBuffer {
|
||||||
mNumWordsUntilSafeToSample = mNumWordsBetweenNGrams;
|
mNumWordsUntilSafeToSample = mNumWordsBetweenNGrams;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void setIsStopping() {
|
||||||
|
mIsStopping = true;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Determines whether uploading the n words at the front the MainLogBuffer will not violate
|
* Determines whether uploading the n words at the front the MainLogBuffer will not violate
|
||||||
* user privacy.
|
* user privacy.
|
||||||
|
@ -103,16 +112,36 @@ public class MainLogBuffer extends FixedLogBuffer {
|
||||||
* the screen orientation and other characteristics about the device can be uploaded without
|
* the screen orientation and other characteristics about the device can be uploaded without
|
||||||
* revealing much about the user.
|
* revealing much about the user.
|
||||||
*/
|
*/
|
||||||
public boolean isNGramSafe() {
|
private boolean isSafeNGram(final ArrayList<LogUnit> logUnits, final int minNGramSize) {
|
||||||
|
// Bypass privacy checks when debugging.
|
||||||
|
if (IS_LOGGING_EVERYTHING) {
|
||||||
|
if (mIsStopping) {
|
||||||
|
return true;
|
||||||
|
} else {
|
||||||
|
// Only check that it is the right length. If not, wait for later words to make
|
||||||
|
// complete n-grams.
|
||||||
|
int numWordsInLogUnitList = 0;
|
||||||
|
final int length = logUnits.size();
|
||||||
|
for (int i = 0; i < length; i++) {
|
||||||
|
final LogUnit logUnit = logUnits.get(i);
|
||||||
|
final String word = logUnit.getWord();
|
||||||
|
if (word != null) {
|
||||||
|
numWordsInLogUnitList++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return numWordsInLogUnitList >= minNGramSize;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Check that we are not sampling too frequently. Having sampled recently might disclose
|
// Check that we are not sampling too frequently. Having sampled recently might disclose
|
||||||
// too much of the user's intended meaning.
|
// too much of the user's intended meaning.
|
||||||
if (mNumWordsUntilSafeToSample > 0) {
|
if (mNumWordsUntilSafeToSample > 0) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (mSuggest == null || !mSuggest.hasMainDictionary()) {
|
if (mSuggest == null || !mSuggest.hasMainDictionary()) {
|
||||||
// Main dictionary is unavailable. Since we cannot check it, we cannot tell if a word
|
// Main dictionary is unavailable. Since we cannot check it, we cannot tell if a
|
||||||
// is out-of-vocabulary or not. Therefore, we must judge the entire buffer contents to
|
// word is out-of-vocabulary or not. Therefore, we must judge the entire buffer
|
||||||
// potentially pose a privacy risk.
|
// contents to potentially pose a privacy risk.
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
// Reload the dictionary in case it has changed (e.g., because the user has changed
|
// Reload the dictionary in case it has changed (e.g., because the user has changed
|
||||||
|
@ -121,12 +150,12 @@ public class MainLogBuffer extends FixedLogBuffer {
|
||||||
if (dictionary == null) {
|
if (dictionary == null) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
// Check each word in the buffer. If any word poses a privacy threat, we cannot upload the
|
|
||||||
// complete buffer contents in detail.
|
// Check each word in the buffer. If any word poses a privacy threat, we cannot upload
|
||||||
final LinkedList<LogUnit> logUnits = getLogUnits();
|
// the complete buffer contents in detail.
|
||||||
|
int numWordsInLogUnitList = 0;
|
||||||
final int length = logUnits.size();
|
final int length = logUnits.size();
|
||||||
int wordsNeeded = N_GRAM_SIZE;
|
for (int i = 0; i < length; i++) {
|
||||||
for (int i = 0; i < length && wordsNeeded > 0; i++) {
|
|
||||||
final LogUnit logUnit = logUnits.get(i);
|
final LogUnit logUnit = logUnits.get(i);
|
||||||
final String word = logUnit.getWord();
|
final String word = logUnit.getWord();
|
||||||
if (word == null) {
|
if (word == null) {
|
||||||
|
@ -135,6 +164,7 @@ public class MainLogBuffer extends FixedLogBuffer {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
numWordsInLogUnitList++;
|
||||||
// Words not in the dictionary are a privacy threat.
|
// Words not in the dictionary are a privacy threat.
|
||||||
if (ResearchLogger.hasLetters(word) && !(dictionary.isValidWord(word))) {
|
if (ResearchLogger.hasLetters(word) && !(dictionary.isValidWord(word))) {
|
||||||
if (DEBUG) {
|
if (DEBUG) {
|
||||||
|
@ -145,38 +175,59 @@ public class MainLogBuffer extends FixedLogBuffer {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// All checks have passed; this buffer's content can be safely uploaded.
|
|
||||||
return true;
|
// Finally, only return true if the minNGramSize is met.
|
||||||
|
return numWordsInLogUnitList >= minNGramSize;
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean isNGramComplete() {
|
public void shiftAndPublishAll() {
|
||||||
final LinkedList<LogUnit> logUnits = getLogUnits();
|
final LinkedList<LogUnit> logUnits = getLogUnits();
|
||||||
final int length = logUnits.size();
|
while (!logUnits.isEmpty()) {
|
||||||
int wordsNeeded = N_GRAM_SIZE;
|
publishLogUnitsAtFrontOfBuffer();
|
||||||
for (int i = 0; i < length && wordsNeeded > 0; i++) {
|
|
||||||
final LogUnit logUnit = logUnits.get(i);
|
|
||||||
final String word = logUnit.getWord();
|
|
||||||
if (word != null) {
|
|
||||||
wordsNeeded--;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return wordsNeeded == 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void onShiftOut(final LogUnit logUnit) {
|
protected final void onBufferFull() {
|
||||||
if (mResearchLog != null) {
|
publishLogUnitsAtFrontOfBuffer();
|
||||||
mResearchLog.publish(logUnit,
|
|
||||||
ResearchLogger.IS_LOGGING_EVERYTHING /* isIncludingPrivateData */);
|
|
||||||
}
|
}
|
||||||
if (logUnit.hasWord()) {
|
|
||||||
if (mNumWordsUntilSafeToSample > 0) {
|
protected final void publishLogUnitsAtFrontOfBuffer() {
|
||||||
mNumWordsUntilSafeToSample--;
|
ArrayList<LogUnit> logUnits = peekAtFirstNWords(N_GRAM_SIZE);
|
||||||
|
if (isSafeNGram(logUnits, N_GRAM_SIZE)) {
|
||||||
|
// Good n-gram at the front of the buffer. Publish it, disclosing details.
|
||||||
|
publish(logUnits, true /* canIncludePrivateData */);
|
||||||
|
shiftOutWords(N_GRAM_SIZE);
|
||||||
|
resetWordCounter();
|
||||||
|
} else {
|
||||||
|
// No good n-gram at front, and buffer is full. Shift out the first word (or if there
|
||||||
|
// is none, the existing logUnits).
|
||||||
|
logUnits = peekAtFirstNWords(1);
|
||||||
|
publish(logUnits, false /* canIncludePrivateData */);
|
||||||
|
shiftOutWords(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Called when a list of logUnits should be published.
|
||||||
|
*
|
||||||
|
* It is the subclass's responsibility to implement the publication.
|
||||||
|
*
|
||||||
|
* @param logUnits The list of logUnits to be published.
|
||||||
|
* @param canIncludePrivateData Whether the private data in the logUnits can be included in
|
||||||
|
* publication.
|
||||||
|
*/
|
||||||
|
protected abstract void publish(final ArrayList<LogUnit> logUnits,
|
||||||
|
final boolean canIncludePrivateData);
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void shiftOutWords(int numWords) {
|
||||||
|
int oldNumActualWords = getNumActualWords();
|
||||||
|
super.shiftOutWords(numWords);
|
||||||
|
int numWordsShifted = oldNumActualWords - getNumActualWords();
|
||||||
|
mNumWordsUntilSafeToSample -= numWordsShifted;
|
||||||
|
if (DEBUG) {
|
||||||
Log.d(TAG, "wordsUntilSafeToSample now at " + mNumWordsUntilSafeToSample);
|
Log.d(TAG, "wordsUntilSafeToSample now at " + mNumWordsUntilSafeToSample);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (DEBUG) {
|
|
||||||
Log.d(TAG, "shiftedOut " + (logUnit.hasWord() ? logUnit.getWord() : ""));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -185,12 +185,12 @@ public class ResearchLog {
|
||||||
mFlushFuture = mExecutor.schedule(mFlushCallable, FLUSH_DELAY_IN_MS, TimeUnit.MILLISECONDS);
|
mFlushFuture = mExecutor.schedule(mFlushCallable, FLUSH_DELAY_IN_MS, TimeUnit.MILLISECONDS);
|
||||||
}
|
}
|
||||||
|
|
||||||
public synchronized void publish(final LogUnit logUnit, final boolean isIncludingPrivateData) {
|
public synchronized void publish(final LogUnit logUnit, final boolean canIncludePrivateData) {
|
||||||
try {
|
try {
|
||||||
mExecutor.submit(new Callable<Object>() {
|
mExecutor.submit(new Callable<Object>() {
|
||||||
@Override
|
@Override
|
||||||
public Object call() throws Exception {
|
public Object call() throws Exception {
|
||||||
logUnit.publishTo(ResearchLog.this, isIncludingPrivateData);
|
logUnit.publishTo(ResearchLog.this, canIncludePrivateData);
|
||||||
scheduleFlush();
|
scheduleFlush();
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
|
@ -69,7 +69,9 @@ import com.android.inputmethod.latin.define.ProductionFlag;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.text.SimpleDateFormat;
|
import java.text.SimpleDateFormat;
|
||||||
|
import java.util.ArrayList;
|
||||||
import java.util.Date;
|
import java.util.Date;
|
||||||
|
import java.util.List;
|
||||||
import java.util.Locale;
|
import java.util.Locale;
|
||||||
import java.util.UUID;
|
import java.util.UUID;
|
||||||
|
|
||||||
|
@ -84,9 +86,6 @@ import java.util.UUID;
|
||||||
public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChangeListener {
|
public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChangeListener {
|
||||||
private static final String TAG = ResearchLogger.class.getSimpleName();
|
private static final String TAG = ResearchLogger.class.getSimpleName();
|
||||||
private static final boolean DEBUG = false && ProductionFlag.IS_EXPERIMENTAL_DEBUG;
|
private static final boolean DEBUG = false && ProductionFlag.IS_EXPERIMENTAL_DEBUG;
|
||||||
// Whether all n-grams should be logged. true will disclose private info.
|
|
||||||
public static final boolean IS_LOGGING_EVERYTHING = false
|
|
||||||
&& ProductionFlag.IS_EXPERIMENTAL_DEBUG;
|
|
||||||
// Whether the TextView contents are logged at the end of the session. true will disclose
|
// Whether the TextView contents are logged at the end of the session. true will disclose
|
||||||
// private info.
|
// private info.
|
||||||
private static final boolean LOG_FULL_TEXTVIEW_CONTENTS = false
|
private static final boolean LOG_FULL_TEXTVIEW_CONTENTS = false
|
||||||
|
@ -105,7 +104,8 @@ public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChang
|
||||||
private static final boolean IS_SHOWING_INDICATOR = true;
|
private static final boolean IS_SHOWING_INDICATOR = true;
|
||||||
// Change the default indicator to something very visible. Currently two red vertical bars on
|
// Change the default indicator to something very visible. Currently two red vertical bars on
|
||||||
// either side of they keyboard.
|
// either side of they keyboard.
|
||||||
private static final boolean IS_SHOWING_INDICATOR_CLEARLY = false || IS_LOGGING_EVERYTHING;
|
private static final boolean IS_SHOWING_INDICATOR_CLEARLY = false ||
|
||||||
|
(MainLogBuffer.IS_LOGGING_EVERYTHING && ProductionFlag.IS_EXPERIMENTAL_DEBUG);
|
||||||
// FEEDBACK_WORD_BUFFER_SIZE should add 1 because it must also hold the feedback LogUnit itself.
|
// FEEDBACK_WORD_BUFFER_SIZE should add 1 because it must also hold the feedback LogUnit itself.
|
||||||
public static final int FEEDBACK_WORD_BUFFER_SIZE = (Integer.MAX_VALUE - 1) + 1;
|
public static final int FEEDBACK_WORD_BUFFER_SIZE = (Integer.MAX_VALUE - 1) + 1;
|
||||||
|
|
||||||
|
@ -387,14 +387,40 @@ public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChang
|
||||||
}
|
}
|
||||||
if (mMainLogBuffer == null) {
|
if (mMainLogBuffer == null) {
|
||||||
mMainResearchLog = new ResearchLog(createLogFile(mFilesDir), mLatinIME);
|
mMainResearchLog = new ResearchLog(createLogFile(mFilesDir), mLatinIME);
|
||||||
mMainLogBuffer = new MainLogBuffer(mMainResearchLog);
|
mMainLogBuffer = new MainLogBuffer() {
|
||||||
|
@Override
|
||||||
|
protected void publish(final ArrayList<LogUnit> logUnits,
|
||||||
|
boolean canIncludePrivateData) {
|
||||||
|
canIncludePrivateData |= MainLogBuffer.IS_LOGGING_EVERYTHING;
|
||||||
|
final int length = logUnits.size();
|
||||||
|
for (int i = 0; i < length; i++) {
|
||||||
|
final LogUnit logUnit = logUnits.get(i);
|
||||||
|
final String word = logUnit.getWord();
|
||||||
|
if (word != null && word.length() > 0 && hasLetters(word)) {
|
||||||
|
Log.d(TAG, "onPublish: " + word + ", hc: "
|
||||||
|
+ logUnit.containsCorrection());
|
||||||
|
final Dictionary dictionary = getDictionary();
|
||||||
|
mStatistics.recordWordEntered(
|
||||||
|
dictionary != null && dictionary.isValidWord(word),
|
||||||
|
logUnit.containsCorrection());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (mMainResearchLog != null) {
|
||||||
|
publishLogUnits(logUnits, mMainResearchLog, canIncludePrivateData);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
mMainLogBuffer.setSuggest(mSuggest);
|
mMainLogBuffer.setSuggest(mSuggest);
|
||||||
}
|
}
|
||||||
if (mFeedbackLogBuffer == null) {
|
if (mFeedbackLogBuffer == null) {
|
||||||
|
resetFeedbackLogging();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void resetFeedbackLogging() {
|
||||||
mFeedbackLog = new ResearchLog(createLogFile(mFilesDir), mLatinIME);
|
mFeedbackLog = new ResearchLog(createLogFile(mFilesDir), mLatinIME);
|
||||||
mFeedbackLogBuffer = new FixedLogBuffer(FEEDBACK_WORD_BUFFER_SIZE);
|
mFeedbackLogBuffer = new FixedLogBuffer(FEEDBACK_WORD_BUFFER_SIZE);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
/* package */ void stop() {
|
/* package */ void stop() {
|
||||||
if (DEBUG) {
|
if (DEBUG) {
|
||||||
|
@ -404,16 +430,11 @@ public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChang
|
||||||
commitCurrentLogUnit();
|
commitCurrentLogUnit();
|
||||||
|
|
||||||
if (mMainLogBuffer != null) {
|
if (mMainLogBuffer != null) {
|
||||||
while (!mMainLogBuffer.isEmpty()) {
|
mMainLogBuffer.shiftAndPublishAll();
|
||||||
if ((mMainLogBuffer.isNGramSafe() || IS_LOGGING_EVERYTHING) &&
|
logStatistics();
|
||||||
mMainResearchLog != null) {
|
commitCurrentLogUnit();
|
||||||
publishLogBuffer(mMainLogBuffer, mMainResearchLog,
|
mMainLogBuffer.setIsStopping();
|
||||||
true /* isIncludingPrivateData */);
|
mMainLogBuffer.shiftAndPublishAll();
|
||||||
mMainLogBuffer.resetWordCounter();
|
|
||||||
} else {
|
|
||||||
mMainLogBuffer.shiftOutThroughFirstWord();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
mMainResearchLog.close(null /* callback */);
|
mMainResearchLog.close(null /* callback */);
|
||||||
mMainLogBuffer = null;
|
mMainLogBuffer = null;
|
||||||
}
|
}
|
||||||
|
@ -731,13 +752,6 @@ public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChang
|
||||||
}
|
}
|
||||||
if (!mCurrentLogUnit.isEmpty()) {
|
if (!mCurrentLogUnit.isEmpty()) {
|
||||||
if (mMainLogBuffer != null) {
|
if (mMainLogBuffer != null) {
|
||||||
if ((mMainLogBuffer.isNGramSafe() || IS_LOGGING_EVERYTHING) &&
|
|
||||||
mMainLogBuffer.isNGramComplete() &&
|
|
||||||
mMainResearchLog != null) {
|
|
||||||
publishLogBuffer(mMainLogBuffer, mMainResearchLog,
|
|
||||||
true /* isIncludingPrivateData */);
|
|
||||||
mMainLogBuffer.resetWordCounter();
|
|
||||||
}
|
|
||||||
mMainLogBuffer.shiftIn(mCurrentLogUnit);
|
mMainLogBuffer.shiftIn(mCurrentLogUnit);
|
||||||
}
|
}
|
||||||
if (mFeedbackLogBuffer != null) {
|
if (mFeedbackLogBuffer != null) {
|
||||||
|
@ -798,34 +812,40 @@ public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChang
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* package for test */ void publishLogBuffer(final LogBuffer logBuffer,
|
||||||
|
final ResearchLog researchLog, final boolean isIncludingPrivateData) {
|
||||||
|
publishLogUnits(logBuffer.getLogUnits(), researchLog, isIncludingPrivateData);
|
||||||
|
}
|
||||||
|
|
||||||
private static final LogStatement LOGSTATEMENT_LOG_SEGMENT_OPENING =
|
private static final LogStatement LOGSTATEMENT_LOG_SEGMENT_OPENING =
|
||||||
new LogStatement("logSegmentStart", false, false, "isIncludingPrivateData");
|
new LogStatement("logSegmentStart", false, false, "isIncludingPrivateData");
|
||||||
private static final LogStatement LOGSTATEMENT_LOG_SEGMENT_CLOSING =
|
private static final LogStatement LOGSTATEMENT_LOG_SEGMENT_CLOSING =
|
||||||
new LogStatement("logSegmentEnd", false, false);
|
new LogStatement("logSegmentEnd", false, false);
|
||||||
/* package for test */ void publishLogBuffer(final LogBuffer logBuffer,
|
/* package for test */ void publishLogUnits(final List<LogUnit> logUnits,
|
||||||
final ResearchLog researchLog, final boolean isIncludingPrivateData) {
|
final ResearchLog researchLog, final boolean canIncludePrivateData) {
|
||||||
final LogUnit openingLogUnit = new LogUnit();
|
final LogUnit openingLogUnit = new LogUnit();
|
||||||
if (logBuffer.isEmpty()) return;
|
if (logUnits.isEmpty()) return;
|
||||||
openingLogUnit.addLogStatement(LOGSTATEMENT_LOG_SEGMENT_OPENING, SystemClock.uptimeMillis(),
|
// LogUnits not containing private data, such as contextual data for the log, do not require
|
||||||
isIncludingPrivateData);
|
// logSegment boundary statements.
|
||||||
|
if (canIncludePrivateData) {
|
||||||
|
openingLogUnit.addLogStatement(LOGSTATEMENT_LOG_SEGMENT_OPENING,
|
||||||
|
SystemClock.uptimeMillis(), canIncludePrivateData);
|
||||||
researchLog.publish(openingLogUnit, true /* isIncludingPrivateData */);
|
researchLog.publish(openingLogUnit, true /* isIncludingPrivateData */);
|
||||||
LogUnit logUnit;
|
}
|
||||||
int numWordsToPublish = MainLogBuffer.N_GRAM_SIZE;
|
for (LogUnit logUnit : logUnits) {
|
||||||
while ((logUnit = logBuffer.shiftOut()) != null && numWordsToPublish > 0) {
|
|
||||||
if (DEBUG) {
|
if (DEBUG) {
|
||||||
Log.d(TAG, "publishLogBuffer: " + (logUnit.hasWord() ? logUnit.getWord()
|
Log.d(TAG, "publishLogBuffer: " + (logUnit.hasWord() ? logUnit.getWord()
|
||||||
: "<wordless>"));
|
: "<wordless>") + ", correction?: " + logUnit.containsCorrection());
|
||||||
}
|
|
||||||
researchLog.publish(logUnit, isIncludingPrivateData);
|
|
||||||
if (logUnit.getWord() != null) {
|
|
||||||
numWordsToPublish--;
|
|
||||||
}
|
}
|
||||||
|
researchLog.publish(logUnit, canIncludePrivateData);
|
||||||
}
|
}
|
||||||
|
if (canIncludePrivateData) {
|
||||||
final LogUnit closingLogUnit = new LogUnit();
|
final LogUnit closingLogUnit = new LogUnit();
|
||||||
closingLogUnit.addLogStatement(LOGSTATEMENT_LOG_SEGMENT_CLOSING,
|
closingLogUnit.addLogStatement(LOGSTATEMENT_LOG_SEGMENT_CLOSING,
|
||||||
SystemClock.uptimeMillis());
|
SystemClock.uptimeMillis());
|
||||||
researchLog.publish(closingLogUnit, true /* isIncludingPrivateData */);
|
researchLog.publish(closingLogUnit, true /* isIncludingPrivateData */);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public static boolean hasLetters(final String word) {
|
public static boolean hasLetters(final String word) {
|
||||||
final int length = word.length();
|
final int length = word.length();
|
||||||
|
@ -849,12 +869,8 @@ public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChang
|
||||||
if (word == null) {
|
if (word == null) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
final Dictionary dictionary = getDictionary();
|
|
||||||
if (word.length() > 0 && hasLetters(word)) {
|
if (word.length() > 0 && hasLetters(word)) {
|
||||||
mCurrentLogUnit.setWord(word);
|
mCurrentLogUnit.setWord(word);
|
||||||
final boolean isDictionaryWord = dictionary != null
|
|
||||||
&& dictionary.isValidWord(word);
|
|
||||||
mStatistics.recordWordEntered(isDictionaryWord, mCurrentLogUnit.containsCorrection());
|
|
||||||
}
|
}
|
||||||
final LogUnit newLogUnit = mCurrentLogUnit.splitByTime(maxTime);
|
final LogUnit newLogUnit = mCurrentLogUnit.splitByTime(maxTime);
|
||||||
enqueueCommitText(word, isBatchMode);
|
enqueueCommitText(word, isBatchMode);
|
||||||
|
@ -967,7 +983,7 @@ public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChang
|
||||||
Integer.toHexString(editorInfo.inputType),
|
Integer.toHexString(editorInfo.inputType),
|
||||||
Integer.toHexString(editorInfo.imeOptions), editorInfo.fieldId,
|
Integer.toHexString(editorInfo.imeOptions), editorInfo.fieldId,
|
||||||
Build.DISPLAY, Build.MODEL, prefs, versionCode, versionName,
|
Build.DISPLAY, Build.MODEL, prefs, versionCode, versionName,
|
||||||
OUTPUT_FORMAT_VERSION, IS_LOGGING_EVERYTHING,
|
OUTPUT_FORMAT_VERSION, MainLogBuffer.IS_LOGGING_EVERYTHING,
|
||||||
ProductionFlag.IS_EXPERIMENTAL_DEBUG);
|
ProductionFlag.IS_EXPERIMENTAL_DEBUG);
|
||||||
} catch (NameNotFoundException e) {
|
} catch (NameNotFoundException e) {
|
||||||
e.printStackTrace();
|
e.printStackTrace();
|
||||||
|
@ -976,7 +992,6 @@ public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChang
|
||||||
}
|
}
|
||||||
|
|
||||||
public void latinIME_onFinishInputViewInternal() {
|
public void latinIME_onFinishInputViewInternal() {
|
||||||
logStatistics();
|
|
||||||
stop();
|
stop();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1524,6 +1539,7 @@ public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChang
|
||||||
public static void richInputConnection_commitText(final String committedWord,
|
public static void richInputConnection_commitText(final String committedWord,
|
||||||
final int newCursorPosition, final boolean isBatchMode) {
|
final int newCursorPosition, final boolean isBatchMode) {
|
||||||
final ResearchLogger researchLogger = getInstance();
|
final ResearchLogger researchLogger = getInstance();
|
||||||
|
// Only include opening and closing logSegments if private data is included
|
||||||
final String scrubbedWord = scrubDigitsFromString(committedWord);
|
final String scrubbedWord = scrubDigitsFromString(committedWord);
|
||||||
if (!researchLogger.isExpectingCommitText) {
|
if (!researchLogger.isExpectingCommitText) {
|
||||||
researchLogger.enqueueEvent(LOGSTATEMENT_RICHINPUTCONNECTIONCOMMITTEXT,
|
researchLogger.enqueueEvent(LOGSTATEMENT_RICHINPUTCONNECTIONCOMMITTEXT,
|
||||||
|
|
Loading…
Reference in a new issue