From 3e8df13cd761e376a9a8cb324f6ea9e5db0af9fc Mon Sep 17 00:00:00 2001 From: Kurt Partridge Date: Tue, 14 May 2013 06:23:52 -0700 Subject: [PATCH] Add claifying comment This is a follow-up to I7c01c3dd3ac33d7e96c00836256bae9c14b124ed Change-Id: I9e3f8968c4bbf9525d5dfe101f71373c42f88361 --- .../inputmethod/research/MainLogBuffer.java | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/java/src/com/android/inputmethod/research/MainLogBuffer.java b/java/src/com/android/inputmethod/research/MainLogBuffer.java index 9aa349906..7e8f16697 100644 --- a/java/src/com/android/inputmethod/research/MainLogBuffer.java +++ b/java/src/com/android/inputmethod/research/MainLogBuffer.java @@ -196,6 +196,22 @@ public abstract class MainLogBuffer extends FixedLogBuffer { } } + /** + * If there is a safe n-gram at the front of this log buffer, publish it with all details, and + * remove the LogUnits that constitute it. + * + * An n-gram might not be "safe" if it violates privacy controls. E.g., it might contain + * numbers, an out-of-vocabulary word, or another n-gram may have been published recently. If + * there is no safe n-gram, then the LogUnits up through the first word-containing LogUnit are + * published, but without disclosing any privacy-related details, such as the word the LogUnit + * generated, motion data, etc. + * + * Note that a LogUnit can hold more than one word if the user types without explicit spaces. + * In this case, the words may be grouped together in such a way that pulling an n-gram off the + * front would require splitting a LogUnit. Splitting a LogUnit is not possible, so this case + * is treated just as the unsafe n-gram case. This may cause n-grams to be sampled at slightly + * less than the target frequency. + */ protected final void publishLogUnitsAtFrontOfBuffer() throws IOException { // TODO: Refactor this method to require fewer passes through the LogUnits. Should really // require only one pass.