From 1c2f1ada8305e36defa8572da687a4596bf083ea Mon Sep 17 00:00:00 2001 From: Keisuke Kuroyanagi Date: Fri, 27 Jun 2014 17:59:21 +0900 Subject: [PATCH] Find multiple previous word information to support n-gram. Bug: 14425059 Change-Id: Ieace636334a9b2a094527341d4fcfc05958296c5 --- .../inputmethod/latin/PrevWordsInfo.java | 43 +++++-- .../latin/RichInputConnection.java | 105 ++++++++++-------- .../latin/inputlogic/InputLogic.java | 7 +- .../RichInputConnectionAndTextRangeTests.java | 11 ++ 4 files changed, 108 insertions(+), 58 deletions(-) diff --git a/java/src/com/android/inputmethod/latin/PrevWordsInfo.java b/java/src/com/android/inputmethod/latin/PrevWordsInfo.java index 5dda44445..f45c73f53 100644 --- a/java/src/com/android/inputmethod/latin/PrevWordsInfo.java +++ b/java/src/com/android/inputmethod/latin/PrevWordsInfo.java @@ -27,7 +27,8 @@ import com.android.inputmethod.latin.utils.StringUtils; public class PrevWordsInfo { public static final PrevWordsInfo EMPTY_PREV_WORDS_INFO = new PrevWordsInfo(WordInfo.EMPTY_WORD_INFO); - public static final PrevWordsInfo BEGINNING_OF_SENTENCE = new PrevWordsInfo(); + public static final PrevWordsInfo BEGINNING_OF_SENTENCE = + new PrevWordsInfo(WordInfo.BEGINNING_OF_SENTENCE); /** * Word information used to represent previous words information. @@ -57,6 +58,24 @@ public class PrevWordsInfo { public boolean isValid() { return mWord != null; } + + @Override + public int hashCode() { + return Arrays.hashCode(new Object[] { mWord, mIsBeginningOfSentence } ); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (!(o instanceof WordInfo)) return false; + final WordInfo wordInfo = (WordInfo)o; + if (mWord == null || wordInfo.mWord == null) { + return mWord == wordInfo.mWord + && mIsBeginningOfSentence == wordInfo.mIsBeginningOfSentence; + } + return mWord.equals(wordInfo.mWord) + && mIsBeginningOfSentence == wordInfo.mIsBeginningOfSentence; + } } // The words immediately before the considered word. EMPTY_WORD_INFO element means we don't @@ -67,16 +86,9 @@ public class PrevWordsInfo { // calling getSuggetions* in this situation. public WordInfo[] mPrevWordsInfo = new WordInfo[Constants.MAX_PREV_WORD_COUNT_FOR_N_GRAM]; - // Beginning of sentence. - public PrevWordsInfo() { - mPrevWordsInfo[0] = WordInfo.BEGINNING_OF_SENTENCE; - Arrays.fill(mPrevWordsInfo, 1 /* start */, mPrevWordsInfo.length, WordInfo.EMPTY_WORD_INFO); - } - // Construct from the previous word information. public PrevWordsInfo(final WordInfo prevWordInfo) { mPrevWordsInfo[0] = prevWordInfo; - Arrays.fill(mPrevWordsInfo, 1 /* start */, mPrevWordsInfo.length, WordInfo.EMPTY_WORD_INFO); } // Construct from WordInfo array. n-th element represents (n+1)-th previous word's information. @@ -115,6 +127,19 @@ public class PrevWordsInfo { } } + @Override + public int hashCode() { + return Arrays.hashCode(mPrevWordsInfo); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (!(o instanceof PrevWordsInfo)) return false; + final PrevWordsInfo prevWordsInfo = (PrevWordsInfo)o; + return Arrays.equals(mPrevWordsInfo, prevWordsInfo.mPrevWordsInfo); + } + @Override public String toString() { final StringBuffer builder = new StringBuffer(); @@ -123,7 +148,7 @@ public class PrevWordsInfo { builder.append("PrevWord["); builder.append(i); builder.append("]: "); - if (!wordInfo.isValid()) { + if (wordInfo == null || !wordInfo.isValid()) { builder.append("Empty. "); continue; } diff --git a/java/src/com/android/inputmethod/latin/RichInputConnection.java b/java/src/com/android/inputmethod/latin/RichInputConnection.java index 3be6bccc6..8f252bd84 100644 --- a/java/src/com/android/inputmethod/latin/RichInputConnection.java +++ b/java/src/com/android/inputmethod/latin/RichInputConnection.java @@ -26,6 +26,7 @@ import android.view.inputmethod.ExtractedText; import android.view.inputmethod.ExtractedTextRequest; import android.view.inputmethod.InputConnection; +import com.android.inputmethod.latin.PrevWordsInfo.WordInfo; import com.android.inputmethod.latin.settings.SpacingAndPunctuations; import com.android.inputmethod.latin.utils.CapsModeUtils; import com.android.inputmethod.latin.utils.DebugLogUtils; @@ -49,8 +50,10 @@ public final class RichInputConnection { private static final boolean DBG = false; private static final boolean DEBUG_PREVIOUS_TEXT = false; private static final boolean DEBUG_BATCH_NESTING = false; - // Provision for a long word pair and a separator - private static final int LOOKBACK_CHARACTER_NUM = Constants.DICTIONARY_MAX_WORD_LENGTH * 2 + 1; + // Provision for long words and separators between the words. + private static final int LOOKBACK_CHARACTER_NUM = Constants.DICTIONARY_MAX_WORD_LENGTH + * (Constants.MAX_PREV_WORD_COUNT_FOR_N_GRAM + 1) /* words */ + + Constants.MAX_PREV_WORD_COUNT_FOR_N_GRAM /* separators */; private static final Pattern spaceRegex = Pattern.compile("\\s+"); private static final int INVALID_CURSOR_POSITION = -1; @@ -544,22 +547,25 @@ public final class RichInputConnection { return Arrays.binarySearch(sortedSeparators, code) >= 0; } - // Get information of the nth word before cursor. n = 1 retrieves the word immediately before - // the cursor, n = 2 retrieves the word before that, and so on. This splits on whitespace only. + // Get context information from nth word before the cursor. n = 1 retrieves the words + // immediately before the cursor, n = 2 retrieves the words before that, and so on. This splits + // on whitespace only. // Also, it won't return words that end in a separator (if the nth word before the cursor // ends in a separator, it returns information representing beginning-of-sentence). - // Example : - // (n = 1) "abc def|" -> def - // (n = 1) "abc def |" -> def - // (n = 1) "abc 'def|" -> 'def + // Example (when Constants.MAX_PREV_WORD_COUNT_FOR_N_GRAM is 2): + // (n = 1) "abc def|" -> abc, def + // (n = 1) "abc def |" -> abc, def + // (n = 1) "abc 'def|" -> empty, 'def // (n = 1) "abc def. |" -> beginning-of-sentence // (n = 1) "abc def . |" -> beginning-of-sentence - // (n = 2) "abc def|" -> abc - // (n = 2) "abc def |" -> abc + // (n = 2) "abc def|" -> beginning-of-sentence, abc + // (n = 2) "abc def |" -> beginning-of-sentence, abc // (n = 2) "abc 'def|" -> empty. The context is different from "abc def", but we cannot // represent this situation using PrevWordsInfo. See TODO in the method. - // (n = 2) "abc def. |" -> abc - // (n = 2) "abc def . |" -> def + // TODO: The next example's result should be "abc, def". This have to be fixed before we + // retrieve the prior context of Beginning-of-Sentence. + // (n = 2) "abc def. |" -> beginning-of-sentence, abc + // (n = 2) "abc def . |" -> abc, def // (n = 2) "abc|" -> beginning-of-sentence // (n = 2) "abc |" -> beginning-of-sentence // (n = 2) "abc. def|" -> beginning-of-sentence @@ -567,43 +573,50 @@ public final class RichInputConnection { final SpacingAndPunctuations spacingAndPunctuations, final int n) { if (prev == null) return PrevWordsInfo.EMPTY_PREV_WORDS_INFO; final String[] w = spaceRegex.split(prev); - - // Referring to the word after the nth word. - if ((n - 1) > 0 && (n - 1) <= w.length) { - final String wordFollowingTheNthPrevWord = w[w.length - n + 1]; - if (!wordFollowingTheNthPrevWord.isEmpty()) { - final char firstChar = wordFollowingTheNthPrevWord.charAt(0); - if (spacingAndPunctuations.isWordConnector(firstChar)) { - // The word following the n-th prev word is starting with a word connector. - // TODO: Return meaningful context for this case. - return PrevWordsInfo.EMPTY_PREV_WORDS_INFO; + final WordInfo[] prevWordsInfo = new WordInfo[Constants.MAX_PREV_WORD_COUNT_FOR_N_GRAM]; + for (int i = 0; i < prevWordsInfo.length; i++) { + final int focusedWordIndex = w.length - n - i; + // Referring to the word after the focused word. + if ((focusedWordIndex + 1) >= 0 && (focusedWordIndex + 1) < w.length) { + final String wordFollowingTheNthPrevWord = w[focusedWordIndex + 1]; + if (!wordFollowingTheNthPrevWord.isEmpty()) { + final char firstChar = wordFollowingTheNthPrevWord.charAt(0); + if (spacingAndPunctuations.isWordConnector(firstChar)) { + // The word following the focused word is starting with a word connector. + // TODO: Return meaningful context for this case. + prevWordsInfo[i] = WordInfo.EMPTY_WORD_INFO; + break; + } } } + // If we can't find (n + i) words, the context is beginning-of-sentence. + if (focusedWordIndex < 0) { + prevWordsInfo[i] = WordInfo.BEGINNING_OF_SENTENCE; + break; + } + final String focusedWord = w[focusedWordIndex]; + // If the word is empty, the context is beginning-of-sentence. + final int length = focusedWord.length(); + if (length <= 0) { + prevWordsInfo[i] = WordInfo.BEGINNING_OF_SENTENCE; + break; + } + // If ends in a sentence separator, the context is beginning-of-sentence. + final char lastChar = focusedWord.charAt(length - 1); + if (spacingAndPunctuations.isSentenceSeparator(lastChar)) { + prevWordsInfo[i] = WordInfo.BEGINNING_OF_SENTENCE; + break; + } + // If ends in a word separator or connector, the context is unclear. + // TODO: Return meaningful context for this case. + if (spacingAndPunctuations.isWordSeparator(lastChar) + || spacingAndPunctuations.isWordConnector(lastChar)) { + prevWordsInfo[i] = WordInfo.EMPTY_WORD_INFO; + break; + } + prevWordsInfo[i] = new WordInfo(focusedWord); } - - // If we can't find n words, or we found an empty word, the context is - // beginning-of-sentence. - if (w.length < n) { - return PrevWordsInfo.BEGINNING_OF_SENTENCE; - } - final String nthPrevWord = w[w.length - n]; - final int length = nthPrevWord.length(); - if (length <= 0) { - return PrevWordsInfo.BEGINNING_OF_SENTENCE; - } - - // If ends in a sentence separator, the context is beginning-of-sentence. - final char lastChar = nthPrevWord.charAt(length - 1); - if (spacingAndPunctuations.isSentenceSeparator(lastChar)) { - return PrevWordsInfo.BEGINNING_OF_SENTENCE; - } - // If ends in a word separator or connector, the context is unclear. - // TODO: Return meaningful context for this case. - if (spacingAndPunctuations.isWordSeparator(lastChar) - || spacingAndPunctuations.isWordConnector(lastChar)) { - return PrevWordsInfo.EMPTY_PREV_WORDS_INFO; - } - return new PrevWordsInfo(new PrevWordsInfo.WordInfo(nthPrevWord)); + return new PrevWordsInfo(prevWordsInfo); } /** diff --git a/java/src/com/android/inputmethod/latin/inputlogic/InputLogic.java b/java/src/com/android/inputmethod/latin/inputlogic/InputLogic.java index c9d0dcf60..4ebdcbd70 100644 --- a/java/src/com/android/inputmethod/latin/inputlogic/InputLogic.java +++ b/java/src/com/android/inputmethod/latin/inputlogic/InputLogic.java @@ -1882,10 +1882,11 @@ public final class InputLogic { final CharSequence chosenWordWithSuggestions = SuggestionSpanUtils.getTextWithSuggestionSpan(mLatinIME, chosenWord, suggestedWords); - // Use the 2nd previous word as the previous word because the 1st previous word is the word - // to be committed. + // When we are composing word, get previous words information from the 2nd previous word + // because the 1st previous word is the word to be committed. Otherwise get previous words + // information from the 1st previous word. final PrevWordsInfo prevWordsInfo = mConnection.getPrevWordsInfoFromNthPreviousWord( - settingsValues.mSpacingAndPunctuations, 2); + settingsValues.mSpacingAndPunctuations, mWordComposer.isComposingWord() ? 2 : 1); mConnection.commitText(chosenWordWithSuggestions, 1); // Add the word to the user history dictionary performAdditionToUserHistoryDictionary(settingsValues, chosenWord, prevWordsInfo); diff --git a/tests/src/com/android/inputmethod/latin/RichInputConnectionAndTextRangeTests.java b/tests/src/com/android/inputmethod/latin/RichInputConnectionAndTextRangeTests.java index 5a5ec6d2b..ac52a412c 100644 --- a/tests/src/com/android/inputmethod/latin/RichInputConnectionAndTextRangeTests.java +++ b/tests/src/com/android/inputmethod/latin/RichInputConnectionAndTextRangeTests.java @@ -30,6 +30,7 @@ import android.view.inputmethod.ExtractedTextRequest; import android.view.inputmethod.InputConnection; import android.view.inputmethod.InputConnectionWrapper; +import com.android.inputmethod.latin.PrevWordsInfo.WordInfo; import com.android.inputmethod.latin.settings.SpacingAndPunctuations; import com.android.inputmethod.latin.utils.RunInLocale; import com.android.inputmethod.latin.utils.StringUtils; @@ -166,6 +167,16 @@ public class RichInputConnectionAndTextRangeTests extends AndroidTestCase { "abc def", mSpacingAndPunctuations, 2).mPrevWordsInfo[0].mIsBeginningOfSentence); assertTrue(RichInputConnection.getPrevWordsInfoFromNthPreviousWord( "abc", mSpacingAndPunctuations, 2).mPrevWordsInfo[0].mIsBeginningOfSentence); + + // For n-gram + assertEquals(RichInputConnection.getPrevWordsInfoFromNthPreviousWord( + "abc def", mSpacingAndPunctuations, 1).mPrevWordsInfo[0].mWord, "def"); + assertEquals(RichInputConnection.getPrevWordsInfoFromNthPreviousWord( + "abc def", mSpacingAndPunctuations, 1).mPrevWordsInfo[1].mWord, "abc"); + assertEquals(RichInputConnection.getPrevWordsInfoFromNthPreviousWord( + "abc def", mSpacingAndPunctuations, 2).mPrevWordsInfo[1], + WordInfo.BEGINNING_OF_SENTENCE); + // The following tests reflect the current behavior of the function // RichInputConnection#getNthPreviousWord. // TODO: However at this time, the code does never go