include text context in researchLogger

when logging LatinIME.onUpdateSelection(), now include the current word and
preceding word.  no escaping of the word is performed; this is temporary
until the output format is cleaned up.

also fix EditingUtils.getWordRangeAtCursor to support supplementary
UTF-16 characters.

Bug: 6188932
Change-Id: If4612a2627537d5d8bb2f9585a3ad1b4e56c2e26
This commit is contained in:
Kurt Partridge 2012-05-09 16:04:26 -07:00
parent 2b49579961
commit aec44d50a7
5 changed files with 215 additions and 78 deletions

View file

@ -54,7 +54,7 @@ public class EditingUtils {
*/
public static String getWordAtCursor(InputConnection connection, String separators) {
// getWordRangeAtCursor returns null if the connection is null
Range r = getWordRangeAtCursor(connection, separators);
Range r = getWordRangeAtCursor(connection, separators, 0);
return (r == null) ? null : r.mWord;
}
@ -84,7 +84,17 @@ public class EditingUtils {
}
}
private static Range getWordRangeAtCursor(InputConnection connection, String sep) {
/**
* Returns the text surrounding the cursor.
*
* @param connection the InputConnection to the TextView
* @param sep a string of characters that split words.
* @param additionalPrecedingWordsCount the number of words before the current word that should
* be included in the returned range
* @return a range containing the text surrounding the cursor
*/
public static Range getWordRangeAtCursor(InputConnection connection, String sep,
int additionalPrecedingWordsCount) {
if (connection == null || sep == null) {
return null;
}
@ -94,14 +104,40 @@ public class EditingUtils {
return null;
}
// Find first word separator before the cursor
// Going backward, alternate skipping non-separators and separators until enough words
// have been read.
int start = before.length();
while (start > 0 && !isWhitespace(before.charAt(start - 1), sep)) start--;
boolean isStoppingAtWhitespace = true; // toggles to indicate what to stop at
while (true) { // see comments below for why this is guaranteed to halt
while (start > 0) {
final int codePoint = Character.codePointBefore(before, start);
if (isStoppingAtWhitespace == isSeparator(codePoint, sep)) {
break; // inner loop
}
--start;
if (Character.isSupplementaryCodePoint(codePoint)) {
--start;
}
}
// isStoppingAtWhitespace is true every other time through the loop,
// so additionalPrecedingWordsCount is guaranteed to become < 0, which
// guarantees outer loop termination
if (isStoppingAtWhitespace && (--additionalPrecedingWordsCount < 0)) {
break; // outer loop
}
isStoppingAtWhitespace = !isStoppingAtWhitespace;
}
// Find last word separator after the cursor
int end = -1;
while (++end < after.length() && !isWhitespace(after.charAt(end), sep)) {
// Nothing to do here.
while (++end < after.length()) {
final int codePoint = Character.codePointAt(after, end);
if (isSeparator(codePoint, sep)) {
break;
}
if (Character.isSupplementaryCodePoint(codePoint)) {
++end;
}
}
int cursor = getCursorPosition(connection);
@ -114,8 +150,8 @@ public class EditingUtils {
return null;
}
private static boolean isWhitespace(int code, String whitespace) {
return whitespace.contains(String.valueOf((char) code));
private static boolean isSeparator(int code, String sep) {
return sep.indexOf(code) != -1;
}
private static final Pattern spaceRegex = Pattern.compile("\\s+");

View file

@ -755,7 +755,7 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen
ResearchLogger.latinIME_onUpdateSelection(mLastSelectionStart, mLastSelectionEnd,
oldSelStart, oldSelEnd, newSelStart, newSelEnd, composingSpanStart,
composingSpanEnd, mExpectingUpdateSelection,
expectingUpdateSelectionFromLogger);
expectingUpdateSelectionFromLogger, getCurrentInputConnection());
if (expectingUpdateSelectionFromLogger) {
return;
}

View file

@ -36,6 +36,7 @@ import com.android.inputmethod.keyboard.Key;
import com.android.inputmethod.keyboard.KeyDetector;
import com.android.inputmethod.keyboard.Keyboard;
import com.android.inputmethod.keyboard.internal.KeyboardState;
import com.android.inputmethod.latin.EditingUtils.Range;
import com.android.inputmethod.latin.define.ProductionFlag;
import java.io.BufferedWriter;
@ -64,6 +65,7 @@ public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChang
private static final String PREF_USABILITY_STUDY_MODE = "usability_study_mode";
private static final String PREF_RESEARCH_LOGGER_UUID_STRING = "pref_research_logger_uuid";
private static final boolean DEBUG = false;
private static final String WHITESPACE_SEPARATORS = " \t\n\r";
private static final ResearchLogger sInstance = new ResearchLogger(new LogFileManager());
private static final int MAX_INPUTVIEW_LENGTH_TO_CAPTURE = 8192; // must be >=1
@ -558,9 +560,10 @@ public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChang
logUnstructured("LatinIME_onWindowHidden", "<no text>");
} else {
if (charSequence.length() > MAX_INPUTVIEW_LENGTH_TO_CAPTURE) {
// do not cut in the middle of a supplementary character
int length = MAX_INPUTVIEW_LENGTH_TO_CAPTURE;
if (!Character.isLetter(charSequence.charAt(length))) {
// do not cut in the middle of a supplementary character
final char c = charSequence.charAt(length-1);
if (Character.isHighSurrogate(c)) {
length--;
}
final CharSequence truncatedCharSequence = charSequence.subSequence(0,
@ -614,7 +617,7 @@ public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChang
final int lastSelectionEnd, final int oldSelStart, final int oldSelEnd,
final int newSelStart, final int newSelEnd, final int composingSpanStart,
final int composingSpanEnd, final boolean expectingUpdateSelection,
final boolean expectingUpdateSelectionFromLogger) {
final boolean expectingUpdateSelectionFromLogger, final InputConnection connection) {
if (UnsLogGroup.LATINIME_ONUPDATESELECTION_ENABLED) {
final String s = "onUpdateSelection: oss=" + oldSelStart
+ ", ose=" + oldSelEnd
@ -625,7 +628,9 @@ public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChang
+ ", cs=" + composingSpanStart
+ ", ce=" + composingSpanEnd
+ ", eus=" + expectingUpdateSelection
+ ", eusfl=" + expectingUpdateSelectionFromLogger;
+ ", eusfl=" + expectingUpdateSelectionFromLogger
+ ", context=\"" + EditingUtils.getWordRangeAtCursor(connection,
WHITESPACE_SEPARATORS, 1).mWord + "\"";
logUnstructured("LatinIME_onUpdateSelection", s);
}
}

View file

@ -0,0 +1,161 @@
/*
* Copyright (C) 2010 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package com.android.inputmethod.latin;
import android.test.AndroidTestCase;
import android.view.inputmethod.ExtractedText;
import android.view.inputmethod.ExtractedTextRequest;
import android.view.inputmethod.InputConnection;
import android.view.inputmethod.InputConnectionWrapper;
import com.android.inputmethod.latin.EditingUtils.Range;
public class EditingUtilsTests extends AndroidTestCase {
// The following is meant to be a reasonable default for
// the "word_separators" resource.
private static final String sSeparators = ".,:;!?-";
@Override
protected void setUp() throws Exception {
super.setUp();
}
private class MockConnection extends InputConnectionWrapper {
final String mTextBefore;
final String mTextAfter;
final ExtractedText mExtractedText;
public MockConnection(String textBefore, String textAfter, ExtractedText extractedText) {
super(null, false);
mTextBefore = textBefore;
mTextAfter = textAfter;
mExtractedText = extractedText;
}
/* (non-Javadoc)
* @see android.view.inputmethod.InputConnectionWrapper#getTextBeforeCursor(int, int)
*/
@Override
public CharSequence getTextBeforeCursor(int n, int flags) {
return mTextBefore;
}
/* (non-Javadoc)
* @see android.view.inputmethod.InputConnectionWrapper#getTextAfterCursor(int, int)
*/
@Override
public CharSequence getTextAfterCursor(int n, int flags) {
return mTextAfter;
}
/* (non-Javadoc)
* @see android.view.inputmethod.InputConnectionWrapper#getExtractedText(ExtractedTextRequest, int)
*/
@Override
public ExtractedText getExtractedText(ExtractedTextRequest request, int flags) {
return mExtractedText;
}
}
/************************** Tests ************************/
/**
* Test for getting previous word (for bigram suggestions)
*/
public void testGetPreviousWord() {
// If one of the following cases breaks, the bigram suggestions won't work.
assertEquals(EditingUtils.getPreviousWord("abc def", sSeparators), "abc");
assertNull(EditingUtils.getPreviousWord("abc", sSeparators));
assertNull(EditingUtils.getPreviousWord("abc. def", sSeparators));
// The following tests reflect the current behavior of the function
// EditingUtils#getPreviousWord.
// TODO: However at this time, the code does never go
// into such a path, so it should be safe to change the behavior of
// this function if needed - especially since it does not seem very
// logical. These tests are just there to catch any unintentional
// changes in the behavior of the EditingUtils#getPreviousWord method.
assertEquals(EditingUtils.getPreviousWord("abc def ", sSeparators), "abc");
assertEquals(EditingUtils.getPreviousWord("abc def.", sSeparators), "abc");
assertEquals(EditingUtils.getPreviousWord("abc def .", sSeparators), "def");
assertNull(EditingUtils.getPreviousWord("abc ", sSeparators));
}
/**
* Test for getting the word before the cursor (for bigram)
*/
public void testGetThisWord() {
assertEquals(EditingUtils.getThisWord("abc def", sSeparators), "def");
assertEquals(EditingUtils.getThisWord("abc def ", sSeparators), "def");
assertNull(EditingUtils.getThisWord("abc def.", sSeparators));
assertNull(EditingUtils.getThisWord("abc def .", sSeparators));
}
/**
* Test logic in getting the word range at the cursor.
*/
public void testGetWordRangeAtCursor() {
ExtractedText et = new ExtractedText();
InputConnection mockConnection;
mockConnection = new MockConnection("word wo", "rd", et);
et.startOffset = 0;
et.selectionStart = 7;
Range r;
// basic case
r = EditingUtils.getWordRangeAtCursor(mockConnection, " ", 0);
assertEquals("word", r.mWord);
r = null;
// more than one word
r = EditingUtils.getWordRangeAtCursor(mockConnection, " ", 1);
assertEquals("word word", r.mWord);
r = null;
// tab character instead of space
mockConnection = new MockConnection("one\tword\two", "rd", et);
r = EditingUtils.getWordRangeAtCursor(mockConnection, "\t", 1);
assertEquals("word\tword", r.mWord);
r = null;
// only one word doesn't go too far
mockConnection = new MockConnection("one\tword\two", "rd", et);
r = EditingUtils.getWordRangeAtCursor(mockConnection, "\t", 1);
assertEquals("word\tword", r.mWord);
r = null;
// tab or space
mockConnection = new MockConnection("one word\two", "rd", et);
r = EditingUtils.getWordRangeAtCursor(mockConnection, " \t", 1);
assertEquals("word\tword", r.mWord);
r = null;
// tab or space multiword
mockConnection = new MockConnection("one word\two", "rd", et);
r = EditingUtils.getWordRangeAtCursor(mockConnection, " \t", 2);
assertEquals("one word\tword", r.mWord);
r = null;
// splitting on supplementary character
final String supplementaryChar = "\uD840\uDC8A";
mockConnection = new MockConnection("one word" + supplementaryChar + "wo", "rd", et);
r = EditingUtils.getWordRangeAtCursor(mockConnection, supplementaryChar, 0);
assertEquals("word", r.mWord);
r = null;
}
}

View file

@ -1,65 +0,0 @@
/*
* Copyright (C) 2010,2011 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package com.android.inputmethod.latin;
import android.test.AndroidTestCase;
public class UtilsTests extends AndroidTestCase {
// The following is meant to be a reasonable default for
// the "word_separators" resource.
private static final String sSeparators = ".,:;!?-";
@Override
protected void setUp() throws Exception {
super.setUp();
}
/************************** Tests ************************/
/**
* Test for getting previous word (for bigram suggestions)
*/
public void testGetPreviousWord() {
// If one of the following cases breaks, the bigram suggestions won't work.
assertEquals(EditingUtils.getPreviousWord("abc def", sSeparators), "abc");
assertNull(EditingUtils.getPreviousWord("abc", sSeparators));
assertNull(EditingUtils.getPreviousWord("abc. def", sSeparators));
// The following tests reflect the current behavior of the function
// EditingUtils#getPreviousWord.
// TODO: However at this time, the code does never go
// into such a path, so it should be safe to change the behavior of
// this function if needed - especially since it does not seem very
// logical. These tests are just there to catch any unintentional
// changes in the behavior of the EditingUtils#getPreviousWord method.
assertEquals(EditingUtils.getPreviousWord("abc def ", sSeparators), "abc");
assertEquals(EditingUtils.getPreviousWord("abc def.", sSeparators), "abc");
assertEquals(EditingUtils.getPreviousWord("abc def .", sSeparators), "def");
assertNull(EditingUtils.getPreviousWord("abc ", sSeparators));
}
/**
* Test for getting the word before the cursor (for bigram)
*/
public void testGetThisWord() {
assertEquals(EditingUtils.getThisWord("abc def", sSeparators), "def");
assertEquals(EditingUtils.getThisWord("abc def ", sSeparators), "def");
assertNull(EditingUtils.getThisWord("abc def.", sSeparators));
assertNull(EditingUtils.getThisWord("abc def .", sSeparators));
}
}