From aec44d50a7534d8704a7006b4f90f5e8040a931b Mon Sep 17 00:00:00 2001 From: Kurt Partridge Date: Wed, 9 May 2012 16:04:26 -0700 Subject: [PATCH] include text context in researchLogger when logging LatinIME.onUpdateSelection(), now include the current word and preceding word. no escaping of the word is performed; this is temporary until the output format is cleaned up. also fix EditingUtils.getWordRangeAtCursor to support supplementary UTF-16 characters. Bug: 6188932 Change-Id: If4612a2627537d5d8bb2f9585a3ad1b4e56c2e26 --- .../inputmethod/latin/EditingUtils.java | 52 +++++- .../android/inputmethod/latin/LatinIME.java | 2 +- .../inputmethod/latin/ResearchLogger.java | 13 +- .../inputmethod/latin/EditingUtilsTests.java | 161 ++++++++++++++++++ .../android/inputmethod/latin/UtilsTests.java | 65 ------- 5 files changed, 215 insertions(+), 78 deletions(-) create mode 100644 tests/src/com/android/inputmethod/latin/EditingUtilsTests.java delete mode 100644 tests/src/com/android/inputmethod/latin/UtilsTests.java diff --git a/java/src/com/android/inputmethod/latin/EditingUtils.java b/java/src/com/android/inputmethod/latin/EditingUtils.java index 93106ac27..be4cb7787 100644 --- a/java/src/com/android/inputmethod/latin/EditingUtils.java +++ b/java/src/com/android/inputmethod/latin/EditingUtils.java @@ -54,7 +54,7 @@ public class EditingUtils { */ public static String getWordAtCursor(InputConnection connection, String separators) { // getWordRangeAtCursor returns null if the connection is null - Range r = getWordRangeAtCursor(connection, separators); + Range r = getWordRangeAtCursor(connection, separators, 0); return (r == null) ? null : r.mWord; } @@ -84,7 +84,17 @@ public class EditingUtils { } } - private static Range getWordRangeAtCursor(InputConnection connection, String sep) { + /** + * Returns the text surrounding the cursor. + * + * @param connection the InputConnection to the TextView + * @param sep a string of characters that split words. + * @param additionalPrecedingWordsCount the number of words before the current word that should + * be included in the returned range + * @return a range containing the text surrounding the cursor + */ + public static Range getWordRangeAtCursor(InputConnection connection, String sep, + int additionalPrecedingWordsCount) { if (connection == null || sep == null) { return null; } @@ -94,14 +104,40 @@ public class EditingUtils { return null; } - // Find first word separator before the cursor + // Going backward, alternate skipping non-separators and separators until enough words + // have been read. int start = before.length(); - while (start > 0 && !isWhitespace(before.charAt(start - 1), sep)) start--; + boolean isStoppingAtWhitespace = true; // toggles to indicate what to stop at + while (true) { // see comments below for why this is guaranteed to halt + while (start > 0) { + final int codePoint = Character.codePointBefore(before, start); + if (isStoppingAtWhitespace == isSeparator(codePoint, sep)) { + break; // inner loop + } + --start; + if (Character.isSupplementaryCodePoint(codePoint)) { + --start; + } + } + // isStoppingAtWhitespace is true every other time through the loop, + // so additionalPrecedingWordsCount is guaranteed to become < 0, which + // guarantees outer loop termination + if (isStoppingAtWhitespace && (--additionalPrecedingWordsCount < 0)) { + break; // outer loop + } + isStoppingAtWhitespace = !isStoppingAtWhitespace; + } // Find last word separator after the cursor int end = -1; - while (++end < after.length() && !isWhitespace(after.charAt(end), sep)) { - // Nothing to do here. + while (++end < after.length()) { + final int codePoint = Character.codePointAt(after, end); + if (isSeparator(codePoint, sep)) { + break; + } + if (Character.isSupplementaryCodePoint(codePoint)) { + ++end; + } } int cursor = getCursorPosition(connection); @@ -114,8 +150,8 @@ public class EditingUtils { return null; } - private static boolean isWhitespace(int code, String whitespace) { - return whitespace.contains(String.valueOf((char) code)); + private static boolean isSeparator(int code, String sep) { + return sep.indexOf(code) != -1; } private static final Pattern spaceRegex = Pattern.compile("\\s+"); diff --git a/java/src/com/android/inputmethod/latin/LatinIME.java b/java/src/com/android/inputmethod/latin/LatinIME.java index e8211aecf..b7385ed21 100644 --- a/java/src/com/android/inputmethod/latin/LatinIME.java +++ b/java/src/com/android/inputmethod/latin/LatinIME.java @@ -755,7 +755,7 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen ResearchLogger.latinIME_onUpdateSelection(mLastSelectionStart, mLastSelectionEnd, oldSelStart, oldSelEnd, newSelStart, newSelEnd, composingSpanStart, composingSpanEnd, mExpectingUpdateSelection, - expectingUpdateSelectionFromLogger); + expectingUpdateSelectionFromLogger, getCurrentInputConnection()); if (expectingUpdateSelectionFromLogger) { return; } diff --git a/java/src/com/android/inputmethod/latin/ResearchLogger.java b/java/src/com/android/inputmethod/latin/ResearchLogger.java index 92a9633de..a46ed03af 100644 --- a/java/src/com/android/inputmethod/latin/ResearchLogger.java +++ b/java/src/com/android/inputmethod/latin/ResearchLogger.java @@ -36,6 +36,7 @@ import com.android.inputmethod.keyboard.Key; import com.android.inputmethod.keyboard.KeyDetector; import com.android.inputmethod.keyboard.Keyboard; import com.android.inputmethod.keyboard.internal.KeyboardState; +import com.android.inputmethod.latin.EditingUtils.Range; import com.android.inputmethod.latin.define.ProductionFlag; import java.io.BufferedWriter; @@ -64,6 +65,7 @@ public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChang private static final String PREF_USABILITY_STUDY_MODE = "usability_study_mode"; private static final String PREF_RESEARCH_LOGGER_UUID_STRING = "pref_research_logger_uuid"; private static final boolean DEBUG = false; + private static final String WHITESPACE_SEPARATORS = " \t\n\r"; private static final ResearchLogger sInstance = new ResearchLogger(new LogFileManager()); private static final int MAX_INPUTVIEW_LENGTH_TO_CAPTURE = 8192; // must be >=1 @@ -558,9 +560,10 @@ public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChang logUnstructured("LatinIME_onWindowHidden", ""); } else { if (charSequence.length() > MAX_INPUTVIEW_LENGTH_TO_CAPTURE) { - // do not cut in the middle of a supplementary character int length = MAX_INPUTVIEW_LENGTH_TO_CAPTURE; - if (!Character.isLetter(charSequence.charAt(length))) { + // do not cut in the middle of a supplementary character + final char c = charSequence.charAt(length-1); + if (Character.isHighSurrogate(c)) { length--; } final CharSequence truncatedCharSequence = charSequence.subSequence(0, @@ -614,7 +617,7 @@ public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChang final int lastSelectionEnd, final int oldSelStart, final int oldSelEnd, final int newSelStart, final int newSelEnd, final int composingSpanStart, final int composingSpanEnd, final boolean expectingUpdateSelection, - final boolean expectingUpdateSelectionFromLogger) { + final boolean expectingUpdateSelectionFromLogger, final InputConnection connection) { if (UnsLogGroup.LATINIME_ONUPDATESELECTION_ENABLED) { final String s = "onUpdateSelection: oss=" + oldSelStart + ", ose=" + oldSelEnd @@ -625,7 +628,9 @@ public class ResearchLogger implements SharedPreferences.OnSharedPreferenceChang + ", cs=" + composingSpanStart + ", ce=" + composingSpanEnd + ", eus=" + expectingUpdateSelection - + ", eusfl=" + expectingUpdateSelectionFromLogger; + + ", eusfl=" + expectingUpdateSelectionFromLogger + + ", context=\"" + EditingUtils.getWordRangeAtCursor(connection, + WHITESPACE_SEPARATORS, 1).mWord + "\""; logUnstructured("LatinIME_onUpdateSelection", s); } } diff --git a/tests/src/com/android/inputmethod/latin/EditingUtilsTests.java b/tests/src/com/android/inputmethod/latin/EditingUtilsTests.java new file mode 100644 index 000000000..c73f8891f --- /dev/null +++ b/tests/src/com/android/inputmethod/latin/EditingUtilsTests.java @@ -0,0 +1,161 @@ +/* + * Copyright (C) 2010 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +package com.android.inputmethod.latin; + +import android.test.AndroidTestCase; +import android.view.inputmethod.ExtractedText; +import android.view.inputmethod.ExtractedTextRequest; +import android.view.inputmethod.InputConnection; +import android.view.inputmethod.InputConnectionWrapper; + +import com.android.inputmethod.latin.EditingUtils.Range; + +public class EditingUtilsTests extends AndroidTestCase { + + // The following is meant to be a reasonable default for + // the "word_separators" resource. + private static final String sSeparators = ".,:;!?-"; + + @Override + protected void setUp() throws Exception { + super.setUp(); + } + + private class MockConnection extends InputConnectionWrapper { + final String mTextBefore; + final String mTextAfter; + final ExtractedText mExtractedText; + + public MockConnection(String textBefore, String textAfter, ExtractedText extractedText) { + super(null, false); + mTextBefore = textBefore; + mTextAfter = textAfter; + mExtractedText = extractedText; + } + + /* (non-Javadoc) + * @see android.view.inputmethod.InputConnectionWrapper#getTextBeforeCursor(int, int) + */ + @Override + public CharSequence getTextBeforeCursor(int n, int flags) { + return mTextBefore; + } + + /* (non-Javadoc) + * @see android.view.inputmethod.InputConnectionWrapper#getTextAfterCursor(int, int) + */ + @Override + public CharSequence getTextAfterCursor(int n, int flags) { + return mTextAfter; + } + + /* (non-Javadoc) + * @see android.view.inputmethod.InputConnectionWrapper#getExtractedText(ExtractedTextRequest, int) + */ + @Override + public ExtractedText getExtractedText(ExtractedTextRequest request, int flags) { + return mExtractedText; + } + } + + /************************** Tests ************************/ + + /** + * Test for getting previous word (for bigram suggestions) + */ + public void testGetPreviousWord() { + // If one of the following cases breaks, the bigram suggestions won't work. + assertEquals(EditingUtils.getPreviousWord("abc def", sSeparators), "abc"); + assertNull(EditingUtils.getPreviousWord("abc", sSeparators)); + assertNull(EditingUtils.getPreviousWord("abc. def", sSeparators)); + + // The following tests reflect the current behavior of the function + // EditingUtils#getPreviousWord. + // TODO: However at this time, the code does never go + // into such a path, so it should be safe to change the behavior of + // this function if needed - especially since it does not seem very + // logical. These tests are just there to catch any unintentional + // changes in the behavior of the EditingUtils#getPreviousWord method. + assertEquals(EditingUtils.getPreviousWord("abc def ", sSeparators), "abc"); + assertEquals(EditingUtils.getPreviousWord("abc def.", sSeparators), "abc"); + assertEquals(EditingUtils.getPreviousWord("abc def .", sSeparators), "def"); + assertNull(EditingUtils.getPreviousWord("abc ", sSeparators)); + } + + /** + * Test for getting the word before the cursor (for bigram) + */ + public void testGetThisWord() { + assertEquals(EditingUtils.getThisWord("abc def", sSeparators), "def"); + assertEquals(EditingUtils.getThisWord("abc def ", sSeparators), "def"); + assertNull(EditingUtils.getThisWord("abc def.", sSeparators)); + assertNull(EditingUtils.getThisWord("abc def .", sSeparators)); + } + + /** + * Test logic in getting the word range at the cursor. + */ + public void testGetWordRangeAtCursor() { + ExtractedText et = new ExtractedText(); + InputConnection mockConnection; + mockConnection = new MockConnection("word wo", "rd", et); + et.startOffset = 0; + et.selectionStart = 7; + Range r; + + // basic case + r = EditingUtils.getWordRangeAtCursor(mockConnection, " ", 0); + assertEquals("word", r.mWord); + r = null; + + // more than one word + r = EditingUtils.getWordRangeAtCursor(mockConnection, " ", 1); + assertEquals("word word", r.mWord); + r = null; + + // tab character instead of space + mockConnection = new MockConnection("one\tword\two", "rd", et); + r = EditingUtils.getWordRangeAtCursor(mockConnection, "\t", 1); + assertEquals("word\tword", r.mWord); + r = null; + + // only one word doesn't go too far + mockConnection = new MockConnection("one\tword\two", "rd", et); + r = EditingUtils.getWordRangeAtCursor(mockConnection, "\t", 1); + assertEquals("word\tword", r.mWord); + r = null; + + // tab or space + mockConnection = new MockConnection("one word\two", "rd", et); + r = EditingUtils.getWordRangeAtCursor(mockConnection, " \t", 1); + assertEquals("word\tword", r.mWord); + r = null; + + // tab or space multiword + mockConnection = new MockConnection("one word\two", "rd", et); + r = EditingUtils.getWordRangeAtCursor(mockConnection, " \t", 2); + assertEquals("one word\tword", r.mWord); + r = null; + + // splitting on supplementary character + final String supplementaryChar = "\uD840\uDC8A"; + mockConnection = new MockConnection("one word" + supplementaryChar + "wo", "rd", et); + r = EditingUtils.getWordRangeAtCursor(mockConnection, supplementaryChar, 0); + assertEquals("word", r.mWord); + r = null; + } +} diff --git a/tests/src/com/android/inputmethod/latin/UtilsTests.java b/tests/src/com/android/inputmethod/latin/UtilsTests.java deleted file mode 100644 index 2ef4e2ff5..000000000 --- a/tests/src/com/android/inputmethod/latin/UtilsTests.java +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Copyright (C) 2010,2011 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); you may not - * use this file except in compliance with the License. You may obtain a copy of - * the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations under - * the License. - */ - -package com.android.inputmethod.latin; - -import android.test.AndroidTestCase; - -public class UtilsTests extends AndroidTestCase { - - // The following is meant to be a reasonable default for - // the "word_separators" resource. - private static final String sSeparators = ".,:;!?-"; - - @Override - protected void setUp() throws Exception { - super.setUp(); - } - - /************************** Tests ************************/ - - /** - * Test for getting previous word (for bigram suggestions) - */ - public void testGetPreviousWord() { - // If one of the following cases breaks, the bigram suggestions won't work. - assertEquals(EditingUtils.getPreviousWord("abc def", sSeparators), "abc"); - assertNull(EditingUtils.getPreviousWord("abc", sSeparators)); - assertNull(EditingUtils.getPreviousWord("abc. def", sSeparators)); - - // The following tests reflect the current behavior of the function - // EditingUtils#getPreviousWord. - // TODO: However at this time, the code does never go - // into such a path, so it should be safe to change the behavior of - // this function if needed - especially since it does not seem very - // logical. These tests are just there to catch any unintentional - // changes in the behavior of the EditingUtils#getPreviousWord method. - assertEquals(EditingUtils.getPreviousWord("abc def ", sSeparators), "abc"); - assertEquals(EditingUtils.getPreviousWord("abc def.", sSeparators), "abc"); - assertEquals(EditingUtils.getPreviousWord("abc def .", sSeparators), "def"); - assertNull(EditingUtils.getPreviousWord("abc ", sSeparators)); - } - - /** - * Test for getting the word before the cursor (for bigram) - */ - public void testGetThisWord() { - assertEquals(EditingUtils.getThisWord("abc def", sSeparators), "def"); - assertEquals(EditingUtils.getThisWord("abc def ", sSeparators), "def"); - assertNull(EditingUtils.getThisWord("abc def.", sSeparators)); - assertNull(EditingUtils.getThisWord("abc def .", sSeparators)); - } -}