include text context in researchLogger
when logging LatinIME.onUpdateSelection(), now include the current word and preceding word. no escaping of the word is performed; this is temporary until the output format is cleaned up. also fix EditingUtils.getWordRangeAtCursor to support supplementary UTF-16 characters. Bug: 6188932 Change-Id: If4612a2627537d5d8bb2f9585a3ad1b4e56c2e26
This commit is contained in:
parent
2b49579961
commit
aec44d50a7
5 changed files with 215 additions and 78 deletions
|
@ -54,7 +54,7 @@ public class EditingUtils {
|
|||
*/
|
||||
public static String getWordAtCursor(InputConnection connection, String separators) {
|
||||
// getWordRangeAtCursor returns null if the connection is null
|
||||
Range r = getWordRangeAtCursor(connection, separators);
|
||||
Range r = getWordRangeAtCursor(connection, separators, 0);
|
||||
return (r == null) ? null : r.mWord;
|
||||
}
|
||||
|
||||
|
@ -84,7 +84,17 @@ public class EditingUtils {
|
|||
}
|
||||
}
|
||||
|
||||
private static Range getWordRangeAtCursor(InputConnection connection, String sep) {
|
||||
/**
|
||||
* Returns the text surrounding the cursor.
|
||||
*
|
||||
* @param connection the InputConnection to the TextView
|
||||
* @param sep a string of characters that split words.
|
||||
* @param additionalPrecedingWordsCount the number of words before the current word that should
|
||||
* be included in the returned range
|
||||
* @return a range containing the text surrounding the cursor
|
||||
*/
|
||||
public static Range getWordRangeAtCursor(InputConnection connection, String sep,
|
||||
int additionalPrecedingWordsCount) {
|
||||
if (connection == null || sep == null) {
|
||||
return null;
|
||||
}
|
||||
|
@ -94,14 +104,40 @@ public class EditingUtils {
|
|||
return null;
|
||||
}
|
||||
|
||||
// Find first word separator before the cursor
|
||||
// Going backward, alternate skipping non-separators and separators until enough words
|
||||
// have been read.
|
||||
int start = before.length();
|
||||
while (start > 0 && !isWhitespace(before.charAt(start - 1), sep)) start--;
|
||||
boolean isStoppingAtWhitespace = true; // toggles to indicate what to stop at
|
||||
while (true) { // see comments below for why this is guaranteed to halt
|
||||
while (start > 0) {
|
||||
final int codePoint = Character.codePointBefore(before, start);
|
||||
if (isStoppingAtWhitespace == isSeparator(codePoint, sep)) {
|
||||
break; // inner loop
|
||||
}
|
||||
--start;
|
||||
if (Character.isSupplementaryCodePoint(codePoint)) {
|
||||
--start;
|
||||
}
|
||||
}
|
||||
// isStoppingAtWhitespace is true every other time through the loop,
|
||||
// so additionalPrecedingWordsCount is guaranteed to become < 0, which
|
||||
// guarantees outer loop termination
|
||||
if (isStoppingAtWhitespace && (--additionalPrecedingWordsCount < 0)) {
|
||||
break; // outer loop
|
||||
}
|
||||
isStoppingAtWhitespace = !isStoppingAtWhitespace;
|
||||
}
|
||||
|
||||
// Find last word separator after the cursor
|
||||
int end = -1;
|
||||
while (++end < after.length() && !isWhitespace(after.charAt(end), sep)) {
|
||||
// Nothing to do here.
|
||||
while (++end < after.length()) {
|
||||
final int codePoint = Character.codePointAt(after, end);
|
||||
if (isSeparator(codePoint, sep)) {
|
||||
break;
|
||||
}
|
||||
if (Character.isSupplementaryCodePoint(codePoint)) {
|
||||
++end;
|
||||
}
|
||||
}
|
||||
|
||||
int cursor = getCursorPosition(connection);
|
||||
|
@ -114,8 +150,8 @@ public class EditingUtils {
|
|||
return null;
|
||||
}
|
||||
|
||||
private static boolean isWhitespace(int code, String whitespace) {
|
||||
return whitespace.contains(String.valueOf((char) code));
|
||||
private static boolean isSeparator(int code, String sep) {
|
||||
return sep.indexOf(code) != -1;
|
||||
}
|
||||
|
||||
private static final Pattern spaceRegex = Pattern.compile("\\s+");
|
||||
|
|
|
@ -755,7 +755,7 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen
|
|||
ResearchLogger.latinIME_onUpdateSelection(mLastSelectionStart, mLastSelectionEnd,
|
||||
oldSelStart, oldSelEnd, newSelStart, newSelEnd, composingSpanStart,
|
||||
composingSpanEnd, mExpectingUpdateSelection,
|
||||
expectingUpdateSelectionFromLogger);
|
||||
expectingUpdateSelectionFromLogger, getCurrentInputConnection());
|
||||
if (expectingUpdateSelectionFromLogger) {
|
||||
return;
|
||||
}
|
||||
|
|
|
@ -36,6 +36,7 @@ import com.android.inputmethod.keyboard.Key;
|
|||
import com.android.inputmethod.keyboard.KeyDetector;
|
||||
import com.android.inputmethod.keyboard.Keyboard;
|
||||
import com.android.inputmethod.keyboard.internal.KeyboardState;
|
||||
import com.android.inputmethod.latin.EditingUtils.Range;
|
||||
import com.android.inputmethod.latin.define.ProductionFlag;
|
||||
|
||||
import java.io.BufferedWriter;
|
||||
|
@ -64,6 +65,7 @@ public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChang
|
|||
private static final String PREF_USABILITY_STUDY_MODE = "usability_study_mode";
|
||||
private static final String PREF_RESEARCH_LOGGER_UUID_STRING = "pref_research_logger_uuid";
|
||||
private static final boolean DEBUG = false;
|
||||
private static final String WHITESPACE_SEPARATORS = " \t\n\r";
|
||||
|
||||
private static final ResearchLogger sInstance = new ResearchLogger(new LogFileManager());
|
||||
private static final int MAX_INPUTVIEW_LENGTH_TO_CAPTURE = 8192; // must be >=1
|
||||
|
@ -558,9 +560,10 @@ public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChang
|
|||
logUnstructured("LatinIME_onWindowHidden", "<no text>");
|
||||
} else {
|
||||
if (charSequence.length() > MAX_INPUTVIEW_LENGTH_TO_CAPTURE) {
|
||||
// do not cut in the middle of a supplementary character
|
||||
int length = MAX_INPUTVIEW_LENGTH_TO_CAPTURE;
|
||||
if (!Character.isLetter(charSequence.charAt(length))) {
|
||||
// do not cut in the middle of a supplementary character
|
||||
final char c = charSequence.charAt(length-1);
|
||||
if (Character.isHighSurrogate(c)) {
|
||||
length--;
|
||||
}
|
||||
final CharSequence truncatedCharSequence = charSequence.subSequence(0,
|
||||
|
@ -614,7 +617,7 @@ public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChang
|
|||
final int lastSelectionEnd, final int oldSelStart, final int oldSelEnd,
|
||||
final int newSelStart, final int newSelEnd, final int composingSpanStart,
|
||||
final int composingSpanEnd, final boolean expectingUpdateSelection,
|
||||
final boolean expectingUpdateSelectionFromLogger) {
|
||||
final boolean expectingUpdateSelectionFromLogger, final InputConnection connection) {
|
||||
if (UnsLogGroup.LATINIME_ONUPDATESELECTION_ENABLED) {
|
||||
final String s = "onUpdateSelection: oss=" + oldSelStart
|
||||
+ ", ose=" + oldSelEnd
|
||||
|
@ -625,7 +628,9 @@ public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChang
|
|||
+ ", cs=" + composingSpanStart
|
||||
+ ", ce=" + composingSpanEnd
|
||||
+ ", eus=" + expectingUpdateSelection
|
||||
+ ", eusfl=" + expectingUpdateSelectionFromLogger;
|
||||
+ ", eusfl=" + expectingUpdateSelectionFromLogger
|
||||
+ ", context=\"" + EditingUtils.getWordRangeAtCursor(connection,
|
||||
WHITESPACE_SEPARATORS, 1).mWord + "\"";
|
||||
logUnstructured("LatinIME_onUpdateSelection", s);
|
||||
}
|
||||
}
|
||||
|
|
161
tests/src/com/android/inputmethod/latin/EditingUtilsTests.java
Normal file
161
tests/src/com/android/inputmethod/latin/EditingUtilsTests.java
Normal file
|
@ -0,0 +1,161 @@
|
|||
/*
|
||||
* Copyright (C) 2010 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
|
||||
* use this file except in compliance with the License. You may obtain a copy of
|
||||
* the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
* License for the specific language governing permissions and limitations under
|
||||
* the License.
|
||||
*/
|
||||
|
||||
package com.android.inputmethod.latin;
|
||||
|
||||
import android.test.AndroidTestCase;
|
||||
import android.view.inputmethod.ExtractedText;
|
||||
import android.view.inputmethod.ExtractedTextRequest;
|
||||
import android.view.inputmethod.InputConnection;
|
||||
import android.view.inputmethod.InputConnectionWrapper;
|
||||
|
||||
import com.android.inputmethod.latin.EditingUtils.Range;
|
||||
|
||||
public class EditingUtilsTests extends AndroidTestCase {
|
||||
|
||||
// The following is meant to be a reasonable default for
|
||||
// the "word_separators" resource.
|
||||
private static final String sSeparators = ".,:;!?-";
|
||||
|
||||
@Override
|
||||
protected void setUp() throws Exception {
|
||||
super.setUp();
|
||||
}
|
||||
|
||||
private class MockConnection extends InputConnectionWrapper {
|
||||
final String mTextBefore;
|
||||
final String mTextAfter;
|
||||
final ExtractedText mExtractedText;
|
||||
|
||||
public MockConnection(String textBefore, String textAfter, ExtractedText extractedText) {
|
||||
super(null, false);
|
||||
mTextBefore = textBefore;
|
||||
mTextAfter = textAfter;
|
||||
mExtractedText = extractedText;
|
||||
}
|
||||
|
||||
/* (non-Javadoc)
|
||||
* @see android.view.inputmethod.InputConnectionWrapper#getTextBeforeCursor(int, int)
|
||||
*/
|
||||
@Override
|
||||
public CharSequence getTextBeforeCursor(int n, int flags) {
|
||||
return mTextBefore;
|
||||
}
|
||||
|
||||
/* (non-Javadoc)
|
||||
* @see android.view.inputmethod.InputConnectionWrapper#getTextAfterCursor(int, int)
|
||||
*/
|
||||
@Override
|
||||
public CharSequence getTextAfterCursor(int n, int flags) {
|
||||
return mTextAfter;
|
||||
}
|
||||
|
||||
/* (non-Javadoc)
|
||||
* @see android.view.inputmethod.InputConnectionWrapper#getExtractedText(ExtractedTextRequest, int)
|
||||
*/
|
||||
@Override
|
||||
public ExtractedText getExtractedText(ExtractedTextRequest request, int flags) {
|
||||
return mExtractedText;
|
||||
}
|
||||
}
|
||||
|
||||
/************************** Tests ************************/
|
||||
|
||||
/**
|
||||
* Test for getting previous word (for bigram suggestions)
|
||||
*/
|
||||
public void testGetPreviousWord() {
|
||||
// If one of the following cases breaks, the bigram suggestions won't work.
|
||||
assertEquals(EditingUtils.getPreviousWord("abc def", sSeparators), "abc");
|
||||
assertNull(EditingUtils.getPreviousWord("abc", sSeparators));
|
||||
assertNull(EditingUtils.getPreviousWord("abc. def", sSeparators));
|
||||
|
||||
// The following tests reflect the current behavior of the function
|
||||
// EditingUtils#getPreviousWord.
|
||||
// TODO: However at this time, the code does never go
|
||||
// into such a path, so it should be safe to change the behavior of
|
||||
// this function if needed - especially since it does not seem very
|
||||
// logical. These tests are just there to catch any unintentional
|
||||
// changes in the behavior of the EditingUtils#getPreviousWord method.
|
||||
assertEquals(EditingUtils.getPreviousWord("abc def ", sSeparators), "abc");
|
||||
assertEquals(EditingUtils.getPreviousWord("abc def.", sSeparators), "abc");
|
||||
assertEquals(EditingUtils.getPreviousWord("abc def .", sSeparators), "def");
|
||||
assertNull(EditingUtils.getPreviousWord("abc ", sSeparators));
|
||||
}
|
||||
|
||||
/**
|
||||
* Test for getting the word before the cursor (for bigram)
|
||||
*/
|
||||
public void testGetThisWord() {
|
||||
assertEquals(EditingUtils.getThisWord("abc def", sSeparators), "def");
|
||||
assertEquals(EditingUtils.getThisWord("abc def ", sSeparators), "def");
|
||||
assertNull(EditingUtils.getThisWord("abc def.", sSeparators));
|
||||
assertNull(EditingUtils.getThisWord("abc def .", sSeparators));
|
||||
}
|
||||
|
||||
/**
|
||||
* Test logic in getting the word range at the cursor.
|
||||
*/
|
||||
public void testGetWordRangeAtCursor() {
|
||||
ExtractedText et = new ExtractedText();
|
||||
InputConnection mockConnection;
|
||||
mockConnection = new MockConnection("word wo", "rd", et);
|
||||
et.startOffset = 0;
|
||||
et.selectionStart = 7;
|
||||
Range r;
|
||||
|
||||
// basic case
|
||||
r = EditingUtils.getWordRangeAtCursor(mockConnection, " ", 0);
|
||||
assertEquals("word", r.mWord);
|
||||
r = null;
|
||||
|
||||
// more than one word
|
||||
r = EditingUtils.getWordRangeAtCursor(mockConnection, " ", 1);
|
||||
assertEquals("word word", r.mWord);
|
||||
r = null;
|
||||
|
||||
// tab character instead of space
|
||||
mockConnection = new MockConnection("one\tword\two", "rd", et);
|
||||
r = EditingUtils.getWordRangeAtCursor(mockConnection, "\t", 1);
|
||||
assertEquals("word\tword", r.mWord);
|
||||
r = null;
|
||||
|
||||
// only one word doesn't go too far
|
||||
mockConnection = new MockConnection("one\tword\two", "rd", et);
|
||||
r = EditingUtils.getWordRangeAtCursor(mockConnection, "\t", 1);
|
||||
assertEquals("word\tword", r.mWord);
|
||||
r = null;
|
||||
|
||||
// tab or space
|
||||
mockConnection = new MockConnection("one word\two", "rd", et);
|
||||
r = EditingUtils.getWordRangeAtCursor(mockConnection, " \t", 1);
|
||||
assertEquals("word\tword", r.mWord);
|
||||
r = null;
|
||||
|
||||
// tab or space multiword
|
||||
mockConnection = new MockConnection("one word\two", "rd", et);
|
||||
r = EditingUtils.getWordRangeAtCursor(mockConnection, " \t", 2);
|
||||
assertEquals("one word\tword", r.mWord);
|
||||
r = null;
|
||||
|
||||
// splitting on supplementary character
|
||||
final String supplementaryChar = "\uD840\uDC8A";
|
||||
mockConnection = new MockConnection("one word" + supplementaryChar + "wo", "rd", et);
|
||||
r = EditingUtils.getWordRangeAtCursor(mockConnection, supplementaryChar, 0);
|
||||
assertEquals("word", r.mWord);
|
||||
r = null;
|
||||
}
|
||||
}
|
|
@ -1,65 +0,0 @@
|
|||
/*
|
||||
* Copyright (C) 2010,2011 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
|
||||
* use this file except in compliance with the License. You may obtain a copy of
|
||||
* the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
* License for the specific language governing permissions and limitations under
|
||||
* the License.
|
||||
*/
|
||||
|
||||
package com.android.inputmethod.latin;
|
||||
|
||||
import android.test.AndroidTestCase;
|
||||
|
||||
public class UtilsTests extends AndroidTestCase {
|
||||
|
||||
// The following is meant to be a reasonable default for
|
||||
// the "word_separators" resource.
|
||||
private static final String sSeparators = ".,:;!?-";
|
||||
|
||||
@Override
|
||||
protected void setUp() throws Exception {
|
||||
super.setUp();
|
||||
}
|
||||
|
||||
/************************** Tests ************************/
|
||||
|
||||
/**
|
||||
* Test for getting previous word (for bigram suggestions)
|
||||
*/
|
||||
public void testGetPreviousWord() {
|
||||
// If one of the following cases breaks, the bigram suggestions won't work.
|
||||
assertEquals(EditingUtils.getPreviousWord("abc def", sSeparators), "abc");
|
||||
assertNull(EditingUtils.getPreviousWord("abc", sSeparators));
|
||||
assertNull(EditingUtils.getPreviousWord("abc. def", sSeparators));
|
||||
|
||||
// The following tests reflect the current behavior of the function
|
||||
// EditingUtils#getPreviousWord.
|
||||
// TODO: However at this time, the code does never go
|
||||
// into such a path, so it should be safe to change the behavior of
|
||||
// this function if needed - especially since it does not seem very
|
||||
// logical. These tests are just there to catch any unintentional
|
||||
// changes in the behavior of the EditingUtils#getPreviousWord method.
|
||||
assertEquals(EditingUtils.getPreviousWord("abc def ", sSeparators), "abc");
|
||||
assertEquals(EditingUtils.getPreviousWord("abc def.", sSeparators), "abc");
|
||||
assertEquals(EditingUtils.getPreviousWord("abc def .", sSeparators), "def");
|
||||
assertNull(EditingUtils.getPreviousWord("abc ", sSeparators));
|
||||
}
|
||||
|
||||
/**
|
||||
* Test for getting the word before the cursor (for bigram)
|
||||
*/
|
||||
public void testGetThisWord() {
|
||||
assertEquals(EditingUtils.getThisWord("abc def", sSeparators), "def");
|
||||
assertEquals(EditingUtils.getThisWord("abc def ", sSeparators), "def");
|
||||
assertNull(EditingUtils.getThisWord("abc def.", sSeparators));
|
||||
assertNull(EditingUtils.getThisWord("abc def .", sSeparators));
|
||||
}
|
||||
}
|
Loading…
Reference in a new issue