From bb843eb223ce0f8fb1088ed3393a4165123ddb1f Mon Sep 17 00:00:00 2001 From: Keisuke Kuroyanagi Date: Mon, 7 Jul 2014 13:07:57 +0900 Subject: [PATCH] Move getPrevWordsInfoFromNthPreviousWord to PrevWordsInfoUtils. Bug: 14425059 Change-Id: Id37022ac6c1545d6845abfbcdb7ed47f0e250eec --- .../latin/RichInputConnection.java | 79 +------------- .../latin/utils/PrevWordsInfoUtils.java | 103 ++++++++++++++++++ .../RichInputConnectionAndTextRangeTests.java | 43 ++++---- 3 files changed, 128 insertions(+), 97 deletions(-) create mode 100644 java/src/com/android/inputmethod/latin/utils/PrevWordsInfoUtils.java diff --git a/java/src/com/android/inputmethod/latin/RichInputConnection.java b/java/src/com/android/inputmethod/latin/RichInputConnection.java index fdd47a40f..a6b3b710b 100644 --- a/java/src/com/android/inputmethod/latin/RichInputConnection.java +++ b/java/src/com/android/inputmethod/latin/RichInputConnection.java @@ -26,17 +26,16 @@ import android.view.inputmethod.ExtractedText; import android.view.inputmethod.ExtractedTextRequest; import android.view.inputmethod.InputConnection; -import com.android.inputmethod.latin.PrevWordsInfo.WordInfo; import com.android.inputmethod.latin.settings.SpacingAndPunctuations; import com.android.inputmethod.latin.utils.CapsModeUtils; import com.android.inputmethod.latin.utils.DebugLogUtils; +import com.android.inputmethod.latin.utils.PrevWordsInfoUtils; import com.android.inputmethod.latin.utils.ScriptUtils; import com.android.inputmethod.latin.utils.SpannableStringUtils; import com.android.inputmethod.latin.utils.StringUtils; import com.android.inputmethod.latin.utils.TextRange; import java.util.Arrays; -import java.util.regex.Pattern; /** * Enrichment class for InputConnection to simplify interaction and add functionality. @@ -55,7 +54,6 @@ public final class RichInputConnection { private static final int LOOKBACK_CHARACTER_NUM = Constants.DICTIONARY_MAX_WORD_LENGTH * (Constants.MAX_PREV_WORD_COUNT_FOR_N_GRAM + 1) /* words */ + Constants.MAX_PREV_WORD_COUNT_FOR_N_GRAM /* separators */; - private static final Pattern spaceRegex = Pattern.compile("\\s+"); private static final int INVALID_CURSOR_POSITION = -1; /** @@ -541,85 +539,14 @@ public final class RichInputConnection { } } } - return getPrevWordsInfoFromNthPreviousWord(prev, spacingAndPunctuations, n); + return PrevWordsInfoUtils.getPrevWordsInfoFromNthPreviousWord( + prev, spacingAndPunctuations, n); } private static boolean isSeparator(final int code, final int[] sortedSeparators) { return Arrays.binarySearch(sortedSeparators, code) >= 0; } - // Get context information from nth word before the cursor. n = 1 retrieves the words - // immediately before the cursor, n = 2 retrieves the words before that, and so on. This splits - // on whitespace only. - // Also, it won't return words that end in a separator (if the nth word before the cursor - // ends in a separator, it returns information representing beginning-of-sentence). - // Example (when Constants.MAX_PREV_WORD_COUNT_FOR_N_GRAM is 2): - // (n = 1) "abc def|" -> abc, def - // (n = 1) "abc def |" -> abc, def - // (n = 1) "abc 'def|" -> empty, 'def - // (n = 1) "abc def. |" -> beginning-of-sentence - // (n = 1) "abc def . |" -> beginning-of-sentence - // (n = 2) "abc def|" -> beginning-of-sentence, abc - // (n = 2) "abc def |" -> beginning-of-sentence, abc - // (n = 2) "abc 'def|" -> empty. The context is different from "abc def", but we cannot - // represent this situation using PrevWordsInfo. See TODO in the method. - // TODO: The next example's result should be "abc, def". This have to be fixed before we - // retrieve the prior context of Beginning-of-Sentence. - // (n = 2) "abc def. |" -> beginning-of-sentence, abc - // (n = 2) "abc def . |" -> abc, def - // (n = 2) "abc|" -> beginning-of-sentence - // (n = 2) "abc |" -> beginning-of-sentence - // (n = 2) "abc. def|" -> beginning-of-sentence - public static PrevWordsInfo getPrevWordsInfoFromNthPreviousWord(final CharSequence prev, - final SpacingAndPunctuations spacingAndPunctuations, final int n) { - if (prev == null) return PrevWordsInfo.EMPTY_PREV_WORDS_INFO; - final String[] w = spaceRegex.split(prev); - final WordInfo[] prevWordsInfo = new WordInfo[Constants.MAX_PREV_WORD_COUNT_FOR_N_GRAM]; - for (int i = 0; i < prevWordsInfo.length; i++) { - final int focusedWordIndex = w.length - n - i; - // Referring to the word after the focused word. - if ((focusedWordIndex + 1) >= 0 && (focusedWordIndex + 1) < w.length) { - final String wordFollowingTheNthPrevWord = w[focusedWordIndex + 1]; - if (!wordFollowingTheNthPrevWord.isEmpty()) { - final char firstChar = wordFollowingTheNthPrevWord.charAt(0); - if (spacingAndPunctuations.isWordConnector(firstChar)) { - // The word following the focused word is starting with a word connector. - // TODO: Return meaningful context for this case. - prevWordsInfo[i] = WordInfo.EMPTY_WORD_INFO; - break; - } - } - } - // If we can't find (n + i) words, the context is beginning-of-sentence. - if (focusedWordIndex < 0) { - prevWordsInfo[i] = WordInfo.BEGINNING_OF_SENTENCE; - break; - } - final String focusedWord = w[focusedWordIndex]; - // If the word is empty, the context is beginning-of-sentence. - final int length = focusedWord.length(); - if (length <= 0) { - prevWordsInfo[i] = WordInfo.BEGINNING_OF_SENTENCE; - break; - } - // If ends in a sentence separator, the context is beginning-of-sentence. - final char lastChar = focusedWord.charAt(length - 1); - if (spacingAndPunctuations.isSentenceSeparator(lastChar)) { - prevWordsInfo[i] = WordInfo.BEGINNING_OF_SENTENCE; - break; - } - // If ends in a word separator or connector, the context is unclear. - // TODO: Return meaningful context for this case. - if (spacingAndPunctuations.isWordSeparator(lastChar) - || spacingAndPunctuations.isWordConnector(lastChar)) { - prevWordsInfo[i] = WordInfo.EMPTY_WORD_INFO; - break; - } - prevWordsInfo[i] = new WordInfo(focusedWord); - } - return new PrevWordsInfo(prevWordsInfo); - } - /** * Returns the text surrounding the cursor. * diff --git a/java/src/com/android/inputmethod/latin/utils/PrevWordsInfoUtils.java b/java/src/com/android/inputmethod/latin/utils/PrevWordsInfoUtils.java new file mode 100644 index 000000000..3cd63612c --- /dev/null +++ b/java/src/com/android/inputmethod/latin/utils/PrevWordsInfoUtils.java @@ -0,0 +1,103 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.android.inputmethod.latin.utils; + +import java.util.regex.Pattern; + +import com.android.inputmethod.latin.Constants; +import com.android.inputmethod.latin.PrevWordsInfo; +import com.android.inputmethod.latin.PrevWordsInfo.WordInfo; +import com.android.inputmethod.latin.settings.SpacingAndPunctuations; + +public final class PrevWordsInfoUtils { + private PrevWordsInfoUtils() { + // Intentional empty constructor for utility class. + } + + private static final Pattern SPACE_REGEX = Pattern.compile("\\s+"); + // Get context information from nth word before the cursor. n = 1 retrieves the words + // immediately before the cursor, n = 2 retrieves the words before that, and so on. This splits + // on whitespace only. + // Also, it won't return words that end in a separator (if the nth word before the cursor + // ends in a separator, it returns information representing beginning-of-sentence). + // Example (when Constants.MAX_PREV_WORD_COUNT_FOR_N_GRAM is 2): + // (n = 1) "abc def|" -> abc, def + // (n = 1) "abc def |" -> abc, def + // (n = 1) "abc 'def|" -> empty, 'def + // (n = 1) "abc def. |" -> beginning-of-sentence + // (n = 1) "abc def . |" -> beginning-of-sentence + // (n = 2) "abc def|" -> beginning-of-sentence, abc + // (n = 2) "abc def |" -> beginning-of-sentence, abc + // (n = 2) "abc 'def|" -> empty. The context is different from "abc def", but we cannot + // represent this situation using PrevWordsInfo. See TODO in the method. + // TODO: The next example's result should be "abc, def". This have to be fixed before we + // retrieve the prior context of Beginning-of-Sentence. + // (n = 2) "abc def. |" -> beginning-of-sentence, abc + // (n = 2) "abc def . |" -> abc, def + // (n = 2) "abc|" -> beginning-of-sentence + // (n = 2) "abc |" -> beginning-of-sentence + // (n = 2) "abc. def|" -> beginning-of-sentence + public static PrevWordsInfo getPrevWordsInfoFromNthPreviousWord(final CharSequence prev, + final SpacingAndPunctuations spacingAndPunctuations, final int n) { + if (prev == null) return PrevWordsInfo.EMPTY_PREV_WORDS_INFO; + final String[] w = SPACE_REGEX.split(prev); + final WordInfo[] prevWordsInfo = new WordInfo[Constants.MAX_PREV_WORD_COUNT_FOR_N_GRAM]; + for (int i = 0; i < prevWordsInfo.length; i++) { + final int focusedWordIndex = w.length - n - i; + // Referring to the word after the focused word. + if ((focusedWordIndex + 1) >= 0 && (focusedWordIndex + 1) < w.length) { + final String wordFollowingTheNthPrevWord = w[focusedWordIndex + 1]; + if (!wordFollowingTheNthPrevWord.isEmpty()) { + final char firstChar = wordFollowingTheNthPrevWord.charAt(0); + if (spacingAndPunctuations.isWordConnector(firstChar)) { + // The word following the focused word is starting with a word connector. + // TODO: Return meaningful context for this case. + prevWordsInfo[i] = WordInfo.EMPTY_WORD_INFO; + break; + } + } + } + // If we can't find (n + i) words, the context is beginning-of-sentence. + if (focusedWordIndex < 0) { + prevWordsInfo[i] = WordInfo.BEGINNING_OF_SENTENCE; + break; + } + final String focusedWord = w[focusedWordIndex]; + // If the word is, the context is beginning-of-sentence. + final int length = focusedWord.length(); + if (length <= 0) { + prevWordsInfo[i] = WordInfo.BEGINNING_OF_SENTENCE; + break; + } + // If ends in a sentence separator, the context is beginning-of-sentence. + final char lastChar = focusedWord.charAt(length - 1); + if (spacingAndPunctuations.isSentenceSeparator(lastChar)) { + prevWordsInfo[i] = WordInfo.BEGINNING_OF_SENTENCE; + break; + } + // If ends in a word separator or connector, the context is unclear. + // TODO: Return meaningful context for this case. + if (spacingAndPunctuations.isWordSeparator(lastChar) + || spacingAndPunctuations.isWordConnector(lastChar)) { + prevWordsInfo[i] = WordInfo.EMPTY_WORD_INFO; + break; + } + prevWordsInfo[i] = new WordInfo(focusedWord); + } + return new PrevWordsInfo(prevWordsInfo); + } +} diff --git a/tests/src/com/android/inputmethod/latin/RichInputConnectionAndTextRangeTests.java b/tests/src/com/android/inputmethod/latin/RichInputConnectionAndTextRangeTests.java index 2d92e691b..199922491 100644 --- a/tests/src/com/android/inputmethod/latin/RichInputConnectionAndTextRangeTests.java +++ b/tests/src/com/android/inputmethod/latin/RichInputConnectionAndTextRangeTests.java @@ -32,6 +32,7 @@ import android.view.inputmethod.InputConnectionWrapper; import com.android.inputmethod.latin.PrevWordsInfo.WordInfo; import com.android.inputmethod.latin.settings.SpacingAndPunctuations; +import com.android.inputmethod.latin.utils.PrevWordsInfoUtils; import com.android.inputmethod.latin.utils.RunInLocale; import com.android.inputmethod.latin.utils.ScriptUtils; import com.android.inputmethod.latin.utils.StringUtils; @@ -157,24 +158,24 @@ public class RichInputConnectionAndTextRangeTests extends AndroidTestCase { */ public void testGetPreviousWord() { // If one of the following cases breaks, the bigram suggestions won't work. - assertEquals(RichInputConnection.getPrevWordsInfoFromNthPreviousWord( + assertEquals(PrevWordsInfoUtils.getPrevWordsInfoFromNthPreviousWord( "abc def", mSpacingAndPunctuations, 2).mPrevWordsInfo[0].mWord, "abc"); - assertEquals(RichInputConnection.getPrevWordsInfoFromNthPreviousWord( + assertEquals(PrevWordsInfoUtils.getPrevWordsInfoFromNthPreviousWord( "abc", mSpacingAndPunctuations, 2), PrevWordsInfo.BEGINNING_OF_SENTENCE); - assertEquals(RichInputConnection.getPrevWordsInfoFromNthPreviousWord( + assertEquals(PrevWordsInfoUtils.getPrevWordsInfoFromNthPreviousWord( "abc. def", mSpacingAndPunctuations, 2), PrevWordsInfo.BEGINNING_OF_SENTENCE); - assertFalse(RichInputConnection.getPrevWordsInfoFromNthPreviousWord( + assertFalse(PrevWordsInfoUtils.getPrevWordsInfoFromNthPreviousWord( "abc def", mSpacingAndPunctuations, 2).mPrevWordsInfo[0].mIsBeginningOfSentence); - assertTrue(RichInputConnection.getPrevWordsInfoFromNthPreviousWord( + assertTrue(PrevWordsInfoUtils.getPrevWordsInfoFromNthPreviousWord( "abc", mSpacingAndPunctuations, 2).mPrevWordsInfo[0].mIsBeginningOfSentence); // For n-gram - assertEquals(RichInputConnection.getPrevWordsInfoFromNthPreviousWord( + assertEquals(PrevWordsInfoUtils.getPrevWordsInfoFromNthPreviousWord( "abc def", mSpacingAndPunctuations, 1).mPrevWordsInfo[0].mWord, "def"); - assertEquals(RichInputConnection.getPrevWordsInfoFromNthPreviousWord( + assertEquals(PrevWordsInfoUtils.getPrevWordsInfoFromNthPreviousWord( "abc def", mSpacingAndPunctuations, 1).mPrevWordsInfo[1].mWord, "abc"); - assertEquals(RichInputConnection.getPrevWordsInfoFromNthPreviousWord( + assertEquals(PrevWordsInfoUtils.getPrevWordsInfoFromNthPreviousWord( "abc def", mSpacingAndPunctuations, 2).mPrevWordsInfo[1], WordInfo.BEGINNING_OF_SENTENCE); @@ -185,32 +186,32 @@ public class RichInputConnectionAndTextRangeTests extends AndroidTestCase { // this function if needed - especially since it does not seem very // logical. These tests are just there to catch any unintentional // changes in the behavior of the RichInputConnection#getPreviousWord method. - assertEquals(RichInputConnection.getPrevWordsInfoFromNthPreviousWord( + assertEquals(PrevWordsInfoUtils.getPrevWordsInfoFromNthPreviousWord( "abc def ", mSpacingAndPunctuations, 2).mPrevWordsInfo[0].mWord, "abc"); - assertEquals(RichInputConnection.getPrevWordsInfoFromNthPreviousWord( + assertEquals(PrevWordsInfoUtils.getPrevWordsInfoFromNthPreviousWord( "abc def.", mSpacingAndPunctuations, 2).mPrevWordsInfo[0].mWord, "abc"); - assertEquals(RichInputConnection.getPrevWordsInfoFromNthPreviousWord( + assertEquals(PrevWordsInfoUtils.getPrevWordsInfoFromNthPreviousWord( "abc def .", mSpacingAndPunctuations, 2).mPrevWordsInfo[0].mWord, "def"); - assertEquals(RichInputConnection.getPrevWordsInfoFromNthPreviousWord( + assertEquals(PrevWordsInfoUtils.getPrevWordsInfoFromNthPreviousWord( "abc ", mSpacingAndPunctuations, 2), PrevWordsInfo.BEGINNING_OF_SENTENCE); - assertEquals(RichInputConnection.getPrevWordsInfoFromNthPreviousWord( + assertEquals(PrevWordsInfoUtils.getPrevWordsInfoFromNthPreviousWord( "abc def", mSpacingAndPunctuations, 1).mPrevWordsInfo[0].mWord, "def"); - assertEquals(RichInputConnection.getPrevWordsInfoFromNthPreviousWord( + assertEquals(PrevWordsInfoUtils.getPrevWordsInfoFromNthPreviousWord( "abc def ", mSpacingAndPunctuations, 1).mPrevWordsInfo[0].mWord, "def"); - assertEquals(RichInputConnection.getPrevWordsInfoFromNthPreviousWord( + assertEquals(PrevWordsInfoUtils.getPrevWordsInfoFromNthPreviousWord( "abc 'def", mSpacingAndPunctuations, 1).mPrevWordsInfo[0].mWord, "'def"); - assertEquals(RichInputConnection.getPrevWordsInfoFromNthPreviousWord( + assertEquals(PrevWordsInfoUtils.getPrevWordsInfoFromNthPreviousWord( "abc def.", mSpacingAndPunctuations, 1), PrevWordsInfo.BEGINNING_OF_SENTENCE); - assertEquals(RichInputConnection.getPrevWordsInfoFromNthPreviousWord( + assertEquals(PrevWordsInfoUtils.getPrevWordsInfoFromNthPreviousWord( "abc def .", mSpacingAndPunctuations, 1), PrevWordsInfo.BEGINNING_OF_SENTENCE); - assertEquals(RichInputConnection.getPrevWordsInfoFromNthPreviousWord( + assertEquals(PrevWordsInfoUtils.getPrevWordsInfoFromNthPreviousWord( "abc, def", mSpacingAndPunctuations, 2), PrevWordsInfo.EMPTY_PREV_WORDS_INFO); - assertEquals(RichInputConnection.getPrevWordsInfoFromNthPreviousWord( + assertEquals(PrevWordsInfoUtils.getPrevWordsInfoFromNthPreviousWord( "abc? def", mSpacingAndPunctuations, 2), PrevWordsInfo.EMPTY_PREV_WORDS_INFO); - assertEquals(RichInputConnection.getPrevWordsInfoFromNthPreviousWord( + assertEquals(PrevWordsInfoUtils.getPrevWordsInfoFromNthPreviousWord( "abc! def", mSpacingAndPunctuations, 2), PrevWordsInfo.EMPTY_PREV_WORDS_INFO); - assertEquals(RichInputConnection.getPrevWordsInfoFromNthPreviousWord( + assertEquals(PrevWordsInfoUtils.getPrevWordsInfoFromNthPreviousWord( "abc 'def", mSpacingAndPunctuations, 2), PrevWordsInfo.EMPTY_PREV_WORDS_INFO); }