am 1a2f3c44: Merge "Move getPrevWordsInfoFromNthPreviousWord to StringUtlis."

* commit '1a2f3c4433d154c07c5017066c5dfe0f0070d520':
  Move getPrevWordsInfoFromNthPreviousWord to StringUtlis.
main
Keisuke Kuroyanagi 2014-06-27 09:29:50 +00:00 committed by Android Git Automerger
commit a1923ce777
3 changed files with 99 additions and 97 deletions

View File

@ -26,7 +26,6 @@ import android.view.inputmethod.ExtractedText;
import android.view.inputmethod.ExtractedTextRequest; import android.view.inputmethod.ExtractedTextRequest;
import android.view.inputmethod.InputConnection; import android.view.inputmethod.InputConnection;
import com.android.inputmethod.latin.PrevWordsInfo.WordInfo;
import com.android.inputmethod.latin.settings.SpacingAndPunctuations; import com.android.inputmethod.latin.settings.SpacingAndPunctuations;
import com.android.inputmethod.latin.utils.CapsModeUtils; import com.android.inputmethod.latin.utils.CapsModeUtils;
import com.android.inputmethod.latin.utils.DebugLogUtils; import com.android.inputmethod.latin.utils.DebugLogUtils;
@ -35,7 +34,6 @@ import com.android.inputmethod.latin.utils.StringUtils;
import com.android.inputmethod.latin.utils.TextRange; import com.android.inputmethod.latin.utils.TextRange;
import java.util.Arrays; import java.util.Arrays;
import java.util.regex.Pattern;
/** /**
* Enrichment class for InputConnection to simplify interaction and add functionality. * Enrichment class for InputConnection to simplify interaction and add functionality.
@ -54,7 +52,6 @@ public final class RichInputConnection {
private static final int LOOKBACK_CHARACTER_NUM = Constants.DICTIONARY_MAX_WORD_LENGTH private static final int LOOKBACK_CHARACTER_NUM = Constants.DICTIONARY_MAX_WORD_LENGTH
* (Constants.MAX_PREV_WORD_COUNT_FOR_N_GRAM + 1) /* words */ * (Constants.MAX_PREV_WORD_COUNT_FOR_N_GRAM + 1) /* words */
+ Constants.MAX_PREV_WORD_COUNT_FOR_N_GRAM /* separators */; + Constants.MAX_PREV_WORD_COUNT_FOR_N_GRAM /* separators */;
private static final Pattern spaceRegex = Pattern.compile("\\s+");
private static final int INVALID_CURSOR_POSITION = -1; private static final int INVALID_CURSOR_POSITION = -1;
/** /**
@ -540,85 +537,13 @@ public final class RichInputConnection {
} }
} }
} }
return getPrevWordsInfoFromNthPreviousWord(prev, spacingAndPunctuations, n); return StringUtils.getPrevWordsInfoFromNthPreviousWord(prev, spacingAndPunctuations, n);
} }
private static boolean isSeparator(final int code, final int[] sortedSeparators) { private static boolean isSeparator(final int code, final int[] sortedSeparators) {
return Arrays.binarySearch(sortedSeparators, code) >= 0; return Arrays.binarySearch(sortedSeparators, code) >= 0;
} }
// Get context information from nth word before the cursor. n = 1 retrieves the words
// immediately before the cursor, n = 2 retrieves the words before that, and so on. This splits
// on whitespace only.
// Also, it won't return words that end in a separator (if the nth word before the cursor
// ends in a separator, it returns information representing beginning-of-sentence).
// Example (when Constants.MAX_PREV_WORD_COUNT_FOR_N_GRAM is 2):
// (n = 1) "abc def|" -> abc, def
// (n = 1) "abc def |" -> abc, def
// (n = 1) "abc 'def|" -> empty, 'def
// (n = 1) "abc def. |" -> beginning-of-sentence
// (n = 1) "abc def . |" -> beginning-of-sentence
// (n = 2) "abc def|" -> beginning-of-sentence, abc
// (n = 2) "abc def |" -> beginning-of-sentence, abc
// (n = 2) "abc 'def|" -> empty. The context is different from "abc def", but we cannot
// represent this situation using PrevWordsInfo. See TODO in the method.
// TODO: The next example's result should be "abc, def". This have to be fixed before we
// retrieve the prior context of Beginning-of-Sentence.
// (n = 2) "abc def. |" -> beginning-of-sentence, abc
// (n = 2) "abc def . |" -> abc, def
// (n = 2) "abc|" -> beginning-of-sentence
// (n = 2) "abc |" -> beginning-of-sentence
// (n = 2) "abc. def|" -> beginning-of-sentence
public static PrevWordsInfo getPrevWordsInfoFromNthPreviousWord(final CharSequence prev,
final SpacingAndPunctuations spacingAndPunctuations, final int n) {
if (prev == null) return PrevWordsInfo.EMPTY_PREV_WORDS_INFO;
final String[] w = spaceRegex.split(prev);
final WordInfo[] prevWordsInfo = new WordInfo[Constants.MAX_PREV_WORD_COUNT_FOR_N_GRAM];
for (int i = 0; i < prevWordsInfo.length; i++) {
final int focusedWordIndex = w.length - n - i;
// Referring to the word after the focused word.
if ((focusedWordIndex + 1) >= 0 && (focusedWordIndex + 1) < w.length) {
final String wordFollowingTheNthPrevWord = w[focusedWordIndex + 1];
if (!wordFollowingTheNthPrevWord.isEmpty()) {
final char firstChar = wordFollowingTheNthPrevWord.charAt(0);
if (spacingAndPunctuations.isWordConnector(firstChar)) {
// The word following the focused word is starting with a word connector.
// TODO: Return meaningful context for this case.
prevWordsInfo[i] = WordInfo.EMPTY_WORD_INFO;
break;
}
}
}
// If we can't find (n + i) words, the context is beginning-of-sentence.
if (focusedWordIndex < 0) {
prevWordsInfo[i] = WordInfo.BEGINNING_OF_SENTENCE;
break;
}
final String focusedWord = w[focusedWordIndex];
// If the word is empty, the context is beginning-of-sentence.
final int length = focusedWord.length();
if (length <= 0) {
prevWordsInfo[i] = WordInfo.BEGINNING_OF_SENTENCE;
break;
}
// If ends in a sentence separator, the context is beginning-of-sentence.
final char lastChar = focusedWord.charAt(length - 1);
if (spacingAndPunctuations.isSentenceSeparator(lastChar)) {
prevWordsInfo[i] = WordInfo.BEGINNING_OF_SENTENCE;
break;
}
// If ends in a word separator or connector, the context is unclear.
// TODO: Return meaningful context for this case.
if (spacingAndPunctuations.isWordSeparator(lastChar)
|| spacingAndPunctuations.isWordConnector(lastChar)) {
prevWordsInfo[i] = WordInfo.EMPTY_WORD_INFO;
break;
}
prevWordsInfo[i] = new WordInfo(focusedWord);
}
return new PrevWordsInfo(prevWordsInfo);
}
/** /**
* @param sortedSeparators a sorted array of code points which may separate words * @param sortedSeparators a sorted array of code points which may separate words
* @return the word that surrounds the cursor, including up to one trailing * @return the word that surrounds the cursor, including up to one trailing

View File

@ -22,10 +22,14 @@ import android.text.TextUtils;
import com.android.inputmethod.annotations.UsedForTesting; import com.android.inputmethod.annotations.UsedForTesting;
import com.android.inputmethod.latin.Constants; import com.android.inputmethod.latin.Constants;
import com.android.inputmethod.latin.PrevWordsInfo;
import com.android.inputmethod.latin.PrevWordsInfo.WordInfo;
import com.android.inputmethod.latin.settings.SpacingAndPunctuations;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.Locale; import java.util.Locale;
import java.util.regex.Pattern;
public final class StringUtils { public final class StringUtils {
public static final int CAPITALIZE_NONE = 0; // No caps, or mixed case public static final int CAPITALIZE_NONE = 0; // No caps, or mixed case
@ -567,4 +571,77 @@ public final class StringUtils {
return sb + "]"; return sb + "]";
} }
} }
private static final Pattern SPACE_REGEX = Pattern.compile("\\s+");
// Get context information from nth word before the cursor. n = 1 retrieves the words
// immediately before the cursor, n = 2 retrieves the words before that, and so on. This splits
// on whitespace only.
// Also, it won't return words that end in a separator (if the nth word before the cursor
// ends in a separator, it returns information representing beginning-of-sentence).
// Example (when Constants.MAX_PREV_WORD_COUNT_FOR_N_GRAM is 2):
// (n = 1) "abc def|" -> abc, def
// (n = 1) "abc def |" -> abc, def
// (n = 1) "abc 'def|" -> empty, 'def
// (n = 1) "abc def. |" -> beginning-of-sentence
// (n = 1) "abc def . |" -> beginning-of-sentence
// (n = 2) "abc def|" -> beginning-of-sentence, abc
// (n = 2) "abc def |" -> beginning-of-sentence, abc
// (n = 2) "abc 'def|" -> empty. The context is different from "abc def", but we cannot
// represent this situation using PrevWordsInfo. See TODO in the method.
// TODO: The next example's result should be "abc, def". This have to be fixed before we
// retrieve the prior context of Beginning-of-Sentence.
// (n = 2) "abc def. |" -> beginning-of-sentence, abc
// (n = 2) "abc def . |" -> abc, def
// (n = 2) "abc|" -> beginning-of-sentence
// (n = 2) "abc |" -> beginning-of-sentence
// (n = 2) "abc. def|" -> beginning-of-sentence
public static PrevWordsInfo getPrevWordsInfoFromNthPreviousWord(final CharSequence prev,
final SpacingAndPunctuations spacingAndPunctuations, final int n) {
if (prev == null) return PrevWordsInfo.EMPTY_PREV_WORDS_INFO;
final String[] w = SPACE_REGEX.split(prev);
final WordInfo[] prevWordsInfo = new WordInfo[Constants.MAX_PREV_WORD_COUNT_FOR_N_GRAM];
for (int i = 0; i < prevWordsInfo.length; i++) {
final int focusedWordIndex = w.length - n - i;
// Referring to the word after the focused word.
if ((focusedWordIndex + 1) >= 0 && (focusedWordIndex + 1) < w.length) {
final String wordFollowingTheNthPrevWord = w[focusedWordIndex + 1];
if (!wordFollowingTheNthPrevWord.isEmpty()) {
final char firstChar = wordFollowingTheNthPrevWord.charAt(0);
if (spacingAndPunctuations.isWordConnector(firstChar)) {
// The word following the focused word is starting with a word connector.
// TODO: Return meaningful context for this case.
prevWordsInfo[i] = WordInfo.EMPTY_WORD_INFO;
break;
}
}
}
// If we can't find (n + i) words, the context is beginning-of-sentence.
if (focusedWordIndex < 0) {
prevWordsInfo[i] = WordInfo.BEGINNING_OF_SENTENCE;
break;
}
final String focusedWord = w[focusedWordIndex];
// If the word is, the context is beginning-of-sentence.
final int length = focusedWord.length();
if (length <= 0) {
prevWordsInfo[i] = WordInfo.BEGINNING_OF_SENTENCE;
break;
}
// If ends in a sentence separator, the context is beginning-of-sentence.
final char lastChar = focusedWord.charAt(length - 1);
if (spacingAndPunctuations.isSentenceSeparator(lastChar)) {
prevWordsInfo[i] = WordInfo.BEGINNING_OF_SENTENCE;
break;
}
// If ends in a word separator or connector, the context is unclear.
// TODO: Return meaningful context for this case.
if (spacingAndPunctuations.isWordSeparator(lastChar)
|| spacingAndPunctuations.isWordConnector(lastChar)) {
prevWordsInfo[i] = WordInfo.EMPTY_WORD_INFO;
break;
}
prevWordsInfo[i] = new WordInfo(focusedWord);
}
return new PrevWordsInfo(prevWordsInfo);
}
} }

View File

@ -156,24 +156,24 @@ public class RichInputConnectionAndTextRangeTests extends AndroidTestCase {
*/ */
public void testGetPreviousWord() { public void testGetPreviousWord() {
// If one of the following cases breaks, the bigram suggestions won't work. // If one of the following cases breaks, the bigram suggestions won't work.
assertEquals(RichInputConnection.getPrevWordsInfoFromNthPreviousWord( assertEquals(StringUtils.getPrevWordsInfoFromNthPreviousWord(
"abc def", mSpacingAndPunctuations, 2).mPrevWordsInfo[0].mWord, "abc"); "abc def", mSpacingAndPunctuations, 2).mPrevWordsInfo[0].mWord, "abc");
assertEquals(RichInputConnection.getPrevWordsInfoFromNthPreviousWord( assertEquals(StringUtils.getPrevWordsInfoFromNthPreviousWord(
"abc", mSpacingAndPunctuations, 2), PrevWordsInfo.BEGINNING_OF_SENTENCE); "abc", mSpacingAndPunctuations, 2), PrevWordsInfo.BEGINNING_OF_SENTENCE);
assertEquals(RichInputConnection.getPrevWordsInfoFromNthPreviousWord( assertEquals(StringUtils.getPrevWordsInfoFromNthPreviousWord(
"abc. def", mSpacingAndPunctuations, 2), PrevWordsInfo.BEGINNING_OF_SENTENCE); "abc. def", mSpacingAndPunctuations, 2), PrevWordsInfo.BEGINNING_OF_SENTENCE);
assertFalse(RichInputConnection.getPrevWordsInfoFromNthPreviousWord( assertFalse(StringUtils.getPrevWordsInfoFromNthPreviousWord(
"abc def", mSpacingAndPunctuations, 2).mPrevWordsInfo[0].mIsBeginningOfSentence); "abc def", mSpacingAndPunctuations, 2).mPrevWordsInfo[0].mIsBeginningOfSentence);
assertTrue(RichInputConnection.getPrevWordsInfoFromNthPreviousWord( assertTrue(StringUtils.getPrevWordsInfoFromNthPreviousWord(
"abc", mSpacingAndPunctuations, 2).mPrevWordsInfo[0].mIsBeginningOfSentence); "abc", mSpacingAndPunctuations, 2).mPrevWordsInfo[0].mIsBeginningOfSentence);
// For n-gram // For n-gram
assertEquals(RichInputConnection.getPrevWordsInfoFromNthPreviousWord( assertEquals(StringUtils.getPrevWordsInfoFromNthPreviousWord(
"abc def", mSpacingAndPunctuations, 1).mPrevWordsInfo[0].mWord, "def"); "abc def", mSpacingAndPunctuations, 1).mPrevWordsInfo[0].mWord, "def");
assertEquals(RichInputConnection.getPrevWordsInfoFromNthPreviousWord( assertEquals(StringUtils.getPrevWordsInfoFromNthPreviousWord(
"abc def", mSpacingAndPunctuations, 1).mPrevWordsInfo[1].mWord, "abc"); "abc def", mSpacingAndPunctuations, 1).mPrevWordsInfo[1].mWord, "abc");
assertEquals(RichInputConnection.getPrevWordsInfoFromNthPreviousWord( assertEquals(StringUtils.getPrevWordsInfoFromNthPreviousWord(
"abc def", mSpacingAndPunctuations, 2).mPrevWordsInfo[1], "abc def", mSpacingAndPunctuations, 2).mPrevWordsInfo[1],
WordInfo.BEGINNING_OF_SENTENCE); WordInfo.BEGINNING_OF_SENTENCE);
@ -184,32 +184,32 @@ public class RichInputConnectionAndTextRangeTests extends AndroidTestCase {
// this function if needed - especially since it does not seem very // this function if needed - especially since it does not seem very
// logical. These tests are just there to catch any unintentional // logical. These tests are just there to catch any unintentional
// changes in the behavior of the RichInputConnection#getPreviousWord method. // changes in the behavior of the RichInputConnection#getPreviousWord method.
assertEquals(RichInputConnection.getPrevWordsInfoFromNthPreviousWord( assertEquals(StringUtils.getPrevWordsInfoFromNthPreviousWord(
"abc def ", mSpacingAndPunctuations, 2).mPrevWordsInfo[0].mWord, "abc"); "abc def ", mSpacingAndPunctuations, 2).mPrevWordsInfo[0].mWord, "abc");
assertEquals(RichInputConnection.getPrevWordsInfoFromNthPreviousWord( assertEquals(StringUtils.getPrevWordsInfoFromNthPreviousWord(
"abc def.", mSpacingAndPunctuations, 2).mPrevWordsInfo[0].mWord, "abc"); "abc def.", mSpacingAndPunctuations, 2).mPrevWordsInfo[0].mWord, "abc");
assertEquals(RichInputConnection.getPrevWordsInfoFromNthPreviousWord( assertEquals(StringUtils.getPrevWordsInfoFromNthPreviousWord(
"abc def .", mSpacingAndPunctuations, 2).mPrevWordsInfo[0].mWord, "def"); "abc def .", mSpacingAndPunctuations, 2).mPrevWordsInfo[0].mWord, "def");
assertEquals(RichInputConnection.getPrevWordsInfoFromNthPreviousWord( assertEquals(StringUtils.getPrevWordsInfoFromNthPreviousWord(
"abc ", mSpacingAndPunctuations, 2), PrevWordsInfo.BEGINNING_OF_SENTENCE); "abc ", mSpacingAndPunctuations, 2), PrevWordsInfo.BEGINNING_OF_SENTENCE);
assertEquals(RichInputConnection.getPrevWordsInfoFromNthPreviousWord( assertEquals(StringUtils.getPrevWordsInfoFromNthPreviousWord(
"abc def", mSpacingAndPunctuations, 1).mPrevWordsInfo[0].mWord, "def"); "abc def", mSpacingAndPunctuations, 1).mPrevWordsInfo[0].mWord, "def");
assertEquals(RichInputConnection.getPrevWordsInfoFromNthPreviousWord( assertEquals(StringUtils.getPrevWordsInfoFromNthPreviousWord(
"abc def ", mSpacingAndPunctuations, 1).mPrevWordsInfo[0].mWord, "def"); "abc def ", mSpacingAndPunctuations, 1).mPrevWordsInfo[0].mWord, "def");
assertEquals(RichInputConnection.getPrevWordsInfoFromNthPreviousWord( assertEquals(StringUtils.getPrevWordsInfoFromNthPreviousWord(
"abc 'def", mSpacingAndPunctuations, 1).mPrevWordsInfo[0].mWord, "'def"); "abc 'def", mSpacingAndPunctuations, 1).mPrevWordsInfo[0].mWord, "'def");
assertEquals(RichInputConnection.getPrevWordsInfoFromNthPreviousWord( assertEquals(StringUtils.getPrevWordsInfoFromNthPreviousWord(
"abc def.", mSpacingAndPunctuations, 1), PrevWordsInfo.BEGINNING_OF_SENTENCE); "abc def.", mSpacingAndPunctuations, 1), PrevWordsInfo.BEGINNING_OF_SENTENCE);
assertEquals(RichInputConnection.getPrevWordsInfoFromNthPreviousWord( assertEquals(StringUtils.getPrevWordsInfoFromNthPreviousWord(
"abc def .", mSpacingAndPunctuations, 1), PrevWordsInfo.BEGINNING_OF_SENTENCE); "abc def .", mSpacingAndPunctuations, 1), PrevWordsInfo.BEGINNING_OF_SENTENCE);
assertEquals(RichInputConnection.getPrevWordsInfoFromNthPreviousWord( assertEquals(StringUtils.getPrevWordsInfoFromNthPreviousWord(
"abc, def", mSpacingAndPunctuations, 2), PrevWordsInfo.EMPTY_PREV_WORDS_INFO); "abc, def", mSpacingAndPunctuations, 2), PrevWordsInfo.EMPTY_PREV_WORDS_INFO);
assertEquals(RichInputConnection.getPrevWordsInfoFromNthPreviousWord( assertEquals(StringUtils.getPrevWordsInfoFromNthPreviousWord(
"abc? def", mSpacingAndPunctuations, 2), PrevWordsInfo.EMPTY_PREV_WORDS_INFO); "abc? def", mSpacingAndPunctuations, 2), PrevWordsInfo.EMPTY_PREV_WORDS_INFO);
assertEquals(RichInputConnection.getPrevWordsInfoFromNthPreviousWord( assertEquals(StringUtils.getPrevWordsInfoFromNthPreviousWord(
"abc! def", mSpacingAndPunctuations, 2), PrevWordsInfo.EMPTY_PREV_WORDS_INFO); "abc! def", mSpacingAndPunctuations, 2), PrevWordsInfo.EMPTY_PREV_WORDS_INFO);
assertEquals(RichInputConnection.getPrevWordsInfoFromNthPreviousWord( assertEquals(StringUtils.getPrevWordsInfoFromNthPreviousWord(
"abc 'def", mSpacingAndPunctuations, 2), PrevWordsInfo.EMPTY_PREV_WORDS_INFO); "abc 'def", mSpacingAndPunctuations, 2), PrevWordsInfo.EMPTY_PREV_WORDS_INFO);
} }