Find multiple previous word information to support n-gram.

Bug: 14425059 Change-Id: Ieace636334a9b2a094527341d4fcfc05958296c5
2014-06-27 17:59:21 +09:00 · 2014-06-27 17:59:21 +09:00 · 1c2f1ada83
commit 1c2f1ada83
parent 05b1e0d42f
4 changed files with 108 additions and 58 deletions
--- a/java/src/com/android/inputmethod/latin/PrevWordsInfo.java
+++ b/java/src/com/android/inputmethod/latin/PrevWordsInfo.java
@ -27,7 +27,8 @@ import com.android.inputmethod.latin.utils.StringUtils;
 public class PrevWordsInfo {
    public static final PrevWordsInfo EMPTY_PREV_WORDS_INFO =
            new PrevWordsInfo(WordInfo.EMPTY_WORD_INFO);
-    public static final PrevWordsInfo BEGINNING_OF_SENTENCE = new PrevWordsInfo();
+    public static final PrevWordsInfo BEGINNING_OF_SENTENCE =
            new PrevWordsInfo(WordInfo.BEGINNING_OF_SENTENCE);
    /**
     * Word information used to represent previous words information.
@ -57,6 +58,24 @@ public class PrevWordsInfo {
        public boolean isValid() {
            return mWord != null;
        }
        @Override
        public int hashCode() {
            return Arrays.hashCode(new Object[] { mWord, mIsBeginningOfSentence } );
        }
        @Override
        public boolean equals(Object o) {
            if (this == o) return true;
            if (!(o instanceof WordInfo)) return false;
            final WordInfo wordInfo = (WordInfo)o;
            if (mWord == null || wordInfo.mWord == null) {
                return mWord == wordInfo.mWord
                        && mIsBeginningOfSentence == wordInfo.mIsBeginningOfSentence;
            }
            return mWord.equals(wordInfo.mWord)
                    && mIsBeginningOfSentence == wordInfo.mIsBeginningOfSentence;
        }
    }
    // The words immediately before the considered word. EMPTY_WORD_INFO element means we don't
@ -67,16 +86,9 @@ public class PrevWordsInfo {
    // calling getSuggetions* in this situation.
    public WordInfo[] mPrevWordsInfo = new WordInfo[Constants.MAX_PREV_WORD_COUNT_FOR_N_GRAM];
    // Beginning of sentence.
    public PrevWordsInfo() {
        mPrevWordsInfo[0] = WordInfo.BEGINNING_OF_SENTENCE;
        Arrays.fill(mPrevWordsInfo, 1 /* start */, mPrevWordsInfo.length, WordInfo.EMPTY_WORD_INFO);
    }
    // Construct from the previous word information.
    public PrevWordsInfo(final WordInfo prevWordInfo) {
        mPrevWordsInfo[0] = prevWordInfo;
        Arrays.fill(mPrevWordsInfo, 1 /* start */, mPrevWordsInfo.length, WordInfo.EMPTY_WORD_INFO);
    }
    // Construct from WordInfo array. n-th element represents (n+1)-th previous word's information.
@ -115,6 +127,19 @@ public class PrevWordsInfo {
        }
    }
    @Override
    public int hashCode() {
        return Arrays.hashCode(mPrevWordsInfo);
    }
    @Override
    public boolean equals(Object o) {
        if (this == o) return true;
        if (!(o instanceof PrevWordsInfo)) return false;
        final PrevWordsInfo prevWordsInfo = (PrevWordsInfo)o;
        return Arrays.equals(mPrevWordsInfo, prevWordsInfo.mPrevWordsInfo);
    }
    @Override
    public String toString() {
        final StringBuffer builder = new StringBuffer();
@ -123,7 +148,7 @@ public class PrevWordsInfo {
            builder.append("PrevWord[");
            builder.append(i);
            builder.append("]: ");
-            if (!wordInfo.isValid()) {
+            if (wordInfo == null || !wordInfo.isValid()) {
                builder.append("Empty. ");
                continue;
            }
--- a/java/src/com/android/inputmethod/latin/RichInputConnection.java
+++ b/java/src/com/android/inputmethod/latin/RichInputConnection.java
@ -26,6 +26,7 @@ import android.view.inputmethod.ExtractedText;
 import android.view.inputmethod.ExtractedTextRequest;
 import android.view.inputmethod.InputConnection;
 import com.android.inputmethod.latin.PrevWordsInfo.WordInfo;
 import com.android.inputmethod.latin.settings.SpacingAndPunctuations;
 import com.android.inputmethod.latin.utils.CapsModeUtils;
 import com.android.inputmethod.latin.utils.DebugLogUtils;
@ -49,8 +50,10 @@ public final class RichInputConnection {
    private static final boolean DBG = false;
    private static final boolean DEBUG_PREVIOUS_TEXT = false;
    private static final boolean DEBUG_BATCH_NESTING = false;
-    // Provision for a long word pair and a separator
+    // Provision for long words and separators between the words.
-    private static final int LOOKBACK_CHARACTER_NUM = Constants.DICTIONARY_MAX_WORD_LENGTH * 2 + 1;
+    private static final int LOOKBACK_CHARACTER_NUM = Constants.DICTIONARY_MAX_WORD_LENGTH
            * (Constants.MAX_PREV_WORD_COUNT_FOR_N_GRAM + 1) /* words */
            + Constants.MAX_PREV_WORD_COUNT_FOR_N_GRAM /* separators */;
    private static final Pattern spaceRegex = Pattern.compile("\\s+");
    private static final int INVALID_CURSOR_POSITION = -1;
@ -544,22 +547,25 @@ public final class RichInputConnection {
        return Arrays.binarySearch(sortedSeparators, code) >= 0;
    }
-    // Get information of the nth word before cursor. n = 1 retrieves the word immediately before
+    // Get context information from nth word before the cursor. n = 1 retrieves the words
-    // the cursor, n = 2 retrieves the word before that, and so on. This splits on whitespace only.
+    // immediately before the cursor, n = 2 retrieves the words before that, and so on. This splits
    // on whitespace only.
    // Also, it won't return words that end in a separator (if the nth word before the cursor
    // ends in a separator, it returns information representing beginning-of-sentence).
-    // Example :
+    // Example (when Constants.MAX_PREV_WORD_COUNT_FOR_N_GRAM is 2):
-    // (n = 1) "abc def|" -> def
+    // (n = 1) "abc def|" -> abc, def
-    // (n = 1) "abc def |" -> def
+    // (n = 1) "abc def |" -> abc, def
-    // (n = 1) "abc 'def|" -> 'def
+    // (n = 1) "abc 'def|" -> empty, 'def
    // (n = 1) "abc def. |" -> beginning-of-sentence
    // (n = 1) "abc def . |" -> beginning-of-sentence
-    // (n = 2) "abc def|" -> abc
+    // (n = 2) "abc def|" -> beginning-of-sentence, abc
-    // (n = 2) "abc def |" -> abc
+    // (n = 2) "abc def |" -> beginning-of-sentence, abc
    // (n = 2) "abc 'def|" -> empty. The context is different from "abc def", but we cannot
    // represent this situation using PrevWordsInfo. See TODO in the method.
-    // (n = 2) "abc def. |" -> abc
+    // TODO: The next example's result should be "abc, def". This have to be fixed before we
-    // (n = 2) "abc def . |" -> def
+    // retrieve the prior context of Beginning-of-Sentence.
    // (n = 2) "abc def. |" -> beginning-of-sentence, abc
    // (n = 2) "abc def . |" -> abc, def
    // (n = 2) "abc|" -> beginning-of-sentence
    // (n = 2) "abc |" -> beginning-of-sentence
    // (n = 2) "abc. def|" -> beginning-of-sentence
@ -567,43 +573,50 @@ public final class RichInputConnection {
            final SpacingAndPunctuations spacingAndPunctuations, final int n) {
        if (prev == null) return PrevWordsInfo.EMPTY_PREV_WORDS_INFO;
        final String[] w = spaceRegex.split(prev);
-
+        final WordInfo[] prevWordsInfo = new WordInfo[Constants.MAX_PREV_WORD_COUNT_FOR_N_GRAM];
-        // Referring to the word after the nth word.
+        for (int i = 0; i < prevWordsInfo.length; i++) {
-        if ((n - 1) > 0 && (n - 1) <= w.length) {
+            final int focusedWordIndex = w.length - n - i;
-            final String wordFollowingTheNthPrevWord = w[w.length - n + 1];
+            // Referring to the word after the focused word.
            if ((focusedWordIndex + 1) >= 0 && (focusedWordIndex + 1) < w.length) {
                final String wordFollowingTheNthPrevWord = w[focusedWordIndex + 1];
                if (!wordFollowingTheNthPrevWord.isEmpty()) {
                    final char firstChar = wordFollowingTheNthPrevWord.charAt(0);
                    if (spacingAndPunctuations.isWordConnector(firstChar)) {
-                    // The word following the n-th prev word is starting with a word connector.
+                        // The word following the focused word is starting with a word connector.
                        // TODO: Return meaningful context for this case.
-                    return PrevWordsInfo.EMPTY_PREV_WORDS_INFO;
+                        prevWordsInfo[i] = WordInfo.EMPTY_WORD_INFO;
                        break;
                    }
                }
            }
-
+            // If we can't find (n + i) words, the context is beginning-of-sentence.
-        // If we can't find n words, or we found an empty word, the context is
+            if (focusedWordIndex < 0) {
-        // beginning-of-sentence.
+                prevWordsInfo[i] = WordInfo.BEGINNING_OF_SENTENCE;
-        if (w.length < n) {
+                break;
            return PrevWordsInfo.BEGINNING_OF_SENTENCE;
            }
-        final String nthPrevWord = w[w.length - n];
+            final String focusedWord = w[focusedWordIndex];
-        final int length = nthPrevWord.length();
+            // If the word is empty, the context is beginning-of-sentence.
            final int length = focusedWord.length();
            if (length <= 0) {
-            return PrevWordsInfo.BEGINNING_OF_SENTENCE;
+                prevWordsInfo[i] = WordInfo.BEGINNING_OF_SENTENCE;
                break;
            }
            // If ends in a sentence separator, the context is beginning-of-sentence.
-        final char lastChar = nthPrevWord.charAt(length - 1);
+            final char lastChar = focusedWord.charAt(length - 1);
            if (spacingAndPunctuations.isSentenceSeparator(lastChar)) {
-            return PrevWordsInfo.BEGINNING_OF_SENTENCE;
+                prevWordsInfo[i] = WordInfo.BEGINNING_OF_SENTENCE;
                break;
            }
            // If ends in a word separator or connector, the context is unclear.
            // TODO: Return meaningful context for this case.
            if (spacingAndPunctuations.isWordSeparator(lastChar)
                    || spacingAndPunctuations.isWordConnector(lastChar)) {
-            return PrevWordsInfo.EMPTY_PREV_WORDS_INFO;
+                prevWordsInfo[i] = WordInfo.EMPTY_WORD_INFO;
                break;
            }
-        return new PrevWordsInfo(new PrevWordsInfo.WordInfo(nthPrevWord));
+            prevWordsInfo[i] = new WordInfo(focusedWord);
        }
        return new PrevWordsInfo(prevWordsInfo);
    }
    /**
--- a/java/src/com/android/inputmethod/latin/inputlogic/InputLogic.java
+++ b/java/src/com/android/inputmethod/latin/inputlogic/InputLogic.java
@ -1882,10 +1882,11 @@ public final class InputLogic {
        final CharSequence chosenWordWithSuggestions =
                SuggestionSpanUtils.getTextWithSuggestionSpan(mLatinIME, chosenWord,
                        suggestedWords);
-        // Use the 2nd previous word as the previous word because the 1st previous word is the word
+        // When we are composing word, get previous words information from the 2nd previous word
-        // to be committed.
+        // because the 1st previous word is the word to be committed. Otherwise get previous words
        // information from the 1st previous word.
        final PrevWordsInfo prevWordsInfo = mConnection.getPrevWordsInfoFromNthPreviousWord(
-                settingsValues.mSpacingAndPunctuations, 2);
+                settingsValues.mSpacingAndPunctuations, mWordComposer.isComposingWord() ? 2 : 1);
        mConnection.commitText(chosenWordWithSuggestions, 1);
        // Add the word to the user history dictionary
        performAdditionToUserHistoryDictionary(settingsValues, chosenWord, prevWordsInfo);
--- a/tests/src/com/android/inputmethod/latin/RichInputConnectionAndTextRangeTests.java
+++ b/tests/src/com/android/inputmethod/latin/RichInputConnectionAndTextRangeTests.java
@ -30,6 +30,7 @@ import android.view.inputmethod.ExtractedTextRequest;
 import android.view.inputmethod.InputConnection;
 import android.view.inputmethod.InputConnectionWrapper;
 import com.android.inputmethod.latin.PrevWordsInfo.WordInfo;
 import com.android.inputmethod.latin.settings.SpacingAndPunctuations;
 import com.android.inputmethod.latin.utils.RunInLocale;
 import com.android.inputmethod.latin.utils.StringUtils;
@ -166,6 +167,16 @@ public class RichInputConnectionAndTextRangeTests extends AndroidTestCase {
                "abc def", mSpacingAndPunctuations, 2).mPrevWordsInfo[0].mIsBeginningOfSentence);
        assertTrue(RichInputConnection.getPrevWordsInfoFromNthPreviousWord(
                "abc", mSpacingAndPunctuations, 2).mPrevWordsInfo[0].mIsBeginningOfSentence);
        // For n-gram
        assertEquals(RichInputConnection.getPrevWordsInfoFromNthPreviousWord(
                "abc def", mSpacingAndPunctuations, 1).mPrevWordsInfo[0].mWord, "def");
        assertEquals(RichInputConnection.getPrevWordsInfoFromNthPreviousWord(
                "abc def", mSpacingAndPunctuations, 1).mPrevWordsInfo[1].mWord, "abc");
        assertEquals(RichInputConnection.getPrevWordsInfoFromNthPreviousWord(
                "abc def", mSpacingAndPunctuations, 2).mPrevWordsInfo[1],
                WordInfo.BEGINNING_OF_SENTENCE);
        // The following tests reflect the current behavior of the function
        // RichInputConnection#getNthPreviousWord.
        // TODO: However at this time, the code does never go