Merge "Find multiple previous word information to support n-gram."
commit
17c5a388d6
|
@ -27,7 +27,8 @@ import com.android.inputmethod.latin.utils.StringUtils;
|
||||||
public class PrevWordsInfo {
|
public class PrevWordsInfo {
|
||||||
public static final PrevWordsInfo EMPTY_PREV_WORDS_INFO =
|
public static final PrevWordsInfo EMPTY_PREV_WORDS_INFO =
|
||||||
new PrevWordsInfo(WordInfo.EMPTY_WORD_INFO);
|
new PrevWordsInfo(WordInfo.EMPTY_WORD_INFO);
|
||||||
public static final PrevWordsInfo BEGINNING_OF_SENTENCE = new PrevWordsInfo();
|
public static final PrevWordsInfo BEGINNING_OF_SENTENCE =
|
||||||
|
new PrevWordsInfo(WordInfo.BEGINNING_OF_SENTENCE);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Word information used to represent previous words information.
|
* Word information used to represent previous words information.
|
||||||
|
@ -57,6 +58,24 @@ public class PrevWordsInfo {
|
||||||
public boolean isValid() {
|
public boolean isValid() {
|
||||||
return mWord != null;
|
return mWord != null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int hashCode() {
|
||||||
|
return Arrays.hashCode(new Object[] { mWord, mIsBeginningOfSentence } );
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean equals(Object o) {
|
||||||
|
if (this == o) return true;
|
||||||
|
if (!(o instanceof WordInfo)) return false;
|
||||||
|
final WordInfo wordInfo = (WordInfo)o;
|
||||||
|
if (mWord == null || wordInfo.mWord == null) {
|
||||||
|
return mWord == wordInfo.mWord
|
||||||
|
&& mIsBeginningOfSentence == wordInfo.mIsBeginningOfSentence;
|
||||||
|
}
|
||||||
|
return mWord.equals(wordInfo.mWord)
|
||||||
|
&& mIsBeginningOfSentence == wordInfo.mIsBeginningOfSentence;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// The words immediately before the considered word. EMPTY_WORD_INFO element means we don't
|
// The words immediately before the considered word. EMPTY_WORD_INFO element means we don't
|
||||||
|
@ -67,16 +86,9 @@ public class PrevWordsInfo {
|
||||||
// calling getSuggetions* in this situation.
|
// calling getSuggetions* in this situation.
|
||||||
public WordInfo[] mPrevWordsInfo = new WordInfo[Constants.MAX_PREV_WORD_COUNT_FOR_N_GRAM];
|
public WordInfo[] mPrevWordsInfo = new WordInfo[Constants.MAX_PREV_WORD_COUNT_FOR_N_GRAM];
|
||||||
|
|
||||||
// Beginning of sentence.
|
|
||||||
public PrevWordsInfo() {
|
|
||||||
mPrevWordsInfo[0] = WordInfo.BEGINNING_OF_SENTENCE;
|
|
||||||
Arrays.fill(mPrevWordsInfo, 1 /* start */, mPrevWordsInfo.length, WordInfo.EMPTY_WORD_INFO);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Construct from the previous word information.
|
// Construct from the previous word information.
|
||||||
public PrevWordsInfo(final WordInfo prevWordInfo) {
|
public PrevWordsInfo(final WordInfo prevWordInfo) {
|
||||||
mPrevWordsInfo[0] = prevWordInfo;
|
mPrevWordsInfo[0] = prevWordInfo;
|
||||||
Arrays.fill(mPrevWordsInfo, 1 /* start */, mPrevWordsInfo.length, WordInfo.EMPTY_WORD_INFO);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Construct from WordInfo array. n-th element represents (n+1)-th previous word's information.
|
// Construct from WordInfo array. n-th element represents (n+1)-th previous word's information.
|
||||||
|
@ -115,6 +127,19 @@ public class PrevWordsInfo {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int hashCode() {
|
||||||
|
return Arrays.hashCode(mPrevWordsInfo);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean equals(Object o) {
|
||||||
|
if (this == o) return true;
|
||||||
|
if (!(o instanceof PrevWordsInfo)) return false;
|
||||||
|
final PrevWordsInfo prevWordsInfo = (PrevWordsInfo)o;
|
||||||
|
return Arrays.equals(mPrevWordsInfo, prevWordsInfo.mPrevWordsInfo);
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String toString() {
|
public String toString() {
|
||||||
final StringBuffer builder = new StringBuffer();
|
final StringBuffer builder = new StringBuffer();
|
||||||
|
@ -123,7 +148,7 @@ public class PrevWordsInfo {
|
||||||
builder.append("PrevWord[");
|
builder.append("PrevWord[");
|
||||||
builder.append(i);
|
builder.append(i);
|
||||||
builder.append("]: ");
|
builder.append("]: ");
|
||||||
if (!wordInfo.isValid()) {
|
if (wordInfo == null || !wordInfo.isValid()) {
|
||||||
builder.append("Empty. ");
|
builder.append("Empty. ");
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
|
@ -26,6 +26,7 @@ import android.view.inputmethod.ExtractedText;
|
||||||
import android.view.inputmethod.ExtractedTextRequest;
|
import android.view.inputmethod.ExtractedTextRequest;
|
||||||
import android.view.inputmethod.InputConnection;
|
import android.view.inputmethod.InputConnection;
|
||||||
|
|
||||||
|
import com.android.inputmethod.latin.PrevWordsInfo.WordInfo;
|
||||||
import com.android.inputmethod.latin.settings.SpacingAndPunctuations;
|
import com.android.inputmethod.latin.settings.SpacingAndPunctuations;
|
||||||
import com.android.inputmethod.latin.utils.CapsModeUtils;
|
import com.android.inputmethod.latin.utils.CapsModeUtils;
|
||||||
import com.android.inputmethod.latin.utils.DebugLogUtils;
|
import com.android.inputmethod.latin.utils.DebugLogUtils;
|
||||||
|
@ -49,8 +50,10 @@ public final class RichInputConnection {
|
||||||
private static final boolean DBG = false;
|
private static final boolean DBG = false;
|
||||||
private static final boolean DEBUG_PREVIOUS_TEXT = false;
|
private static final boolean DEBUG_PREVIOUS_TEXT = false;
|
||||||
private static final boolean DEBUG_BATCH_NESTING = false;
|
private static final boolean DEBUG_BATCH_NESTING = false;
|
||||||
// Provision for a long word pair and a separator
|
// Provision for long words and separators between the words.
|
||||||
private static final int LOOKBACK_CHARACTER_NUM = Constants.DICTIONARY_MAX_WORD_LENGTH * 2 + 1;
|
private static final int LOOKBACK_CHARACTER_NUM = Constants.DICTIONARY_MAX_WORD_LENGTH
|
||||||
|
* (Constants.MAX_PREV_WORD_COUNT_FOR_N_GRAM + 1) /* words */
|
||||||
|
+ Constants.MAX_PREV_WORD_COUNT_FOR_N_GRAM /* separators */;
|
||||||
private static final Pattern spaceRegex = Pattern.compile("\\s+");
|
private static final Pattern spaceRegex = Pattern.compile("\\s+");
|
||||||
private static final int INVALID_CURSOR_POSITION = -1;
|
private static final int INVALID_CURSOR_POSITION = -1;
|
||||||
|
|
||||||
|
@ -544,22 +547,25 @@ public final class RichInputConnection {
|
||||||
return Arrays.binarySearch(sortedSeparators, code) >= 0;
|
return Arrays.binarySearch(sortedSeparators, code) >= 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Get information of the nth word before cursor. n = 1 retrieves the word immediately before
|
// Get context information from nth word before the cursor. n = 1 retrieves the words
|
||||||
// the cursor, n = 2 retrieves the word before that, and so on. This splits on whitespace only.
|
// immediately before the cursor, n = 2 retrieves the words before that, and so on. This splits
|
||||||
|
// on whitespace only.
|
||||||
// Also, it won't return words that end in a separator (if the nth word before the cursor
|
// Also, it won't return words that end in a separator (if the nth word before the cursor
|
||||||
// ends in a separator, it returns information representing beginning-of-sentence).
|
// ends in a separator, it returns information representing beginning-of-sentence).
|
||||||
// Example :
|
// Example (when Constants.MAX_PREV_WORD_COUNT_FOR_N_GRAM is 2):
|
||||||
// (n = 1) "abc def|" -> def
|
// (n = 1) "abc def|" -> abc, def
|
||||||
// (n = 1) "abc def |" -> def
|
// (n = 1) "abc def |" -> abc, def
|
||||||
// (n = 1) "abc 'def|" -> 'def
|
// (n = 1) "abc 'def|" -> empty, 'def
|
||||||
// (n = 1) "abc def. |" -> beginning-of-sentence
|
// (n = 1) "abc def. |" -> beginning-of-sentence
|
||||||
// (n = 1) "abc def . |" -> beginning-of-sentence
|
// (n = 1) "abc def . |" -> beginning-of-sentence
|
||||||
// (n = 2) "abc def|" -> abc
|
// (n = 2) "abc def|" -> beginning-of-sentence, abc
|
||||||
// (n = 2) "abc def |" -> abc
|
// (n = 2) "abc def |" -> beginning-of-sentence, abc
|
||||||
// (n = 2) "abc 'def|" -> empty. The context is different from "abc def", but we cannot
|
// (n = 2) "abc 'def|" -> empty. The context is different from "abc def", but we cannot
|
||||||
// represent this situation using PrevWordsInfo. See TODO in the method.
|
// represent this situation using PrevWordsInfo. See TODO in the method.
|
||||||
// (n = 2) "abc def. |" -> abc
|
// TODO: The next example's result should be "abc, def". This have to be fixed before we
|
||||||
// (n = 2) "abc def . |" -> def
|
// retrieve the prior context of Beginning-of-Sentence.
|
||||||
|
// (n = 2) "abc def. |" -> beginning-of-sentence, abc
|
||||||
|
// (n = 2) "abc def . |" -> abc, def
|
||||||
// (n = 2) "abc|" -> beginning-of-sentence
|
// (n = 2) "abc|" -> beginning-of-sentence
|
||||||
// (n = 2) "abc |" -> beginning-of-sentence
|
// (n = 2) "abc |" -> beginning-of-sentence
|
||||||
// (n = 2) "abc. def|" -> beginning-of-sentence
|
// (n = 2) "abc. def|" -> beginning-of-sentence
|
||||||
|
@ -567,43 +573,50 @@ public final class RichInputConnection {
|
||||||
final SpacingAndPunctuations spacingAndPunctuations, final int n) {
|
final SpacingAndPunctuations spacingAndPunctuations, final int n) {
|
||||||
if (prev == null) return PrevWordsInfo.EMPTY_PREV_WORDS_INFO;
|
if (prev == null) return PrevWordsInfo.EMPTY_PREV_WORDS_INFO;
|
||||||
final String[] w = spaceRegex.split(prev);
|
final String[] w = spaceRegex.split(prev);
|
||||||
|
final WordInfo[] prevWordsInfo = new WordInfo[Constants.MAX_PREV_WORD_COUNT_FOR_N_GRAM];
|
||||||
// Referring to the word after the nth word.
|
for (int i = 0; i < prevWordsInfo.length; i++) {
|
||||||
if ((n - 1) > 0 && (n - 1) <= w.length) {
|
final int focusedWordIndex = w.length - n - i;
|
||||||
final String wordFollowingTheNthPrevWord = w[w.length - n + 1];
|
// Referring to the word after the focused word.
|
||||||
|
if ((focusedWordIndex + 1) >= 0 && (focusedWordIndex + 1) < w.length) {
|
||||||
|
final String wordFollowingTheNthPrevWord = w[focusedWordIndex + 1];
|
||||||
if (!wordFollowingTheNthPrevWord.isEmpty()) {
|
if (!wordFollowingTheNthPrevWord.isEmpty()) {
|
||||||
final char firstChar = wordFollowingTheNthPrevWord.charAt(0);
|
final char firstChar = wordFollowingTheNthPrevWord.charAt(0);
|
||||||
if (spacingAndPunctuations.isWordConnector(firstChar)) {
|
if (spacingAndPunctuations.isWordConnector(firstChar)) {
|
||||||
// The word following the n-th prev word is starting with a word connector.
|
// The word following the focused word is starting with a word connector.
|
||||||
// TODO: Return meaningful context for this case.
|
// TODO: Return meaningful context for this case.
|
||||||
return PrevWordsInfo.EMPTY_PREV_WORDS_INFO;
|
prevWordsInfo[i] = WordInfo.EMPTY_WORD_INFO;
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// If we can't find (n + i) words, the context is beginning-of-sentence.
|
||||||
// If we can't find n words, or we found an empty word, the context is
|
if (focusedWordIndex < 0) {
|
||||||
// beginning-of-sentence.
|
prevWordsInfo[i] = WordInfo.BEGINNING_OF_SENTENCE;
|
||||||
if (w.length < n) {
|
break;
|
||||||
return PrevWordsInfo.BEGINNING_OF_SENTENCE;
|
|
||||||
}
|
}
|
||||||
final String nthPrevWord = w[w.length - n];
|
final String focusedWord = w[focusedWordIndex];
|
||||||
final int length = nthPrevWord.length();
|
// If the word is empty, the context is beginning-of-sentence.
|
||||||
|
final int length = focusedWord.length();
|
||||||
if (length <= 0) {
|
if (length <= 0) {
|
||||||
return PrevWordsInfo.BEGINNING_OF_SENTENCE;
|
prevWordsInfo[i] = WordInfo.BEGINNING_OF_SENTENCE;
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
// If ends in a sentence separator, the context is beginning-of-sentence.
|
// If ends in a sentence separator, the context is beginning-of-sentence.
|
||||||
final char lastChar = nthPrevWord.charAt(length - 1);
|
final char lastChar = focusedWord.charAt(length - 1);
|
||||||
if (spacingAndPunctuations.isSentenceSeparator(lastChar)) {
|
if (spacingAndPunctuations.isSentenceSeparator(lastChar)) {
|
||||||
return PrevWordsInfo.BEGINNING_OF_SENTENCE;
|
prevWordsInfo[i] = WordInfo.BEGINNING_OF_SENTENCE;
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
// If ends in a word separator or connector, the context is unclear.
|
// If ends in a word separator or connector, the context is unclear.
|
||||||
// TODO: Return meaningful context for this case.
|
// TODO: Return meaningful context for this case.
|
||||||
if (spacingAndPunctuations.isWordSeparator(lastChar)
|
if (spacingAndPunctuations.isWordSeparator(lastChar)
|
||||||
|| spacingAndPunctuations.isWordConnector(lastChar)) {
|
|| spacingAndPunctuations.isWordConnector(lastChar)) {
|
||||||
return PrevWordsInfo.EMPTY_PREV_WORDS_INFO;
|
prevWordsInfo[i] = WordInfo.EMPTY_WORD_INFO;
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
return new PrevWordsInfo(new PrevWordsInfo.WordInfo(nthPrevWord));
|
prevWordsInfo[i] = new WordInfo(focusedWord);
|
||||||
|
}
|
||||||
|
return new PrevWordsInfo(prevWordsInfo);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -1882,10 +1882,11 @@ public final class InputLogic {
|
||||||
final CharSequence chosenWordWithSuggestions =
|
final CharSequence chosenWordWithSuggestions =
|
||||||
SuggestionSpanUtils.getTextWithSuggestionSpan(mLatinIME, chosenWord,
|
SuggestionSpanUtils.getTextWithSuggestionSpan(mLatinIME, chosenWord,
|
||||||
suggestedWords);
|
suggestedWords);
|
||||||
// Use the 2nd previous word as the previous word because the 1st previous word is the word
|
// When we are composing word, get previous words information from the 2nd previous word
|
||||||
// to be committed.
|
// because the 1st previous word is the word to be committed. Otherwise get previous words
|
||||||
|
// information from the 1st previous word.
|
||||||
final PrevWordsInfo prevWordsInfo = mConnection.getPrevWordsInfoFromNthPreviousWord(
|
final PrevWordsInfo prevWordsInfo = mConnection.getPrevWordsInfoFromNthPreviousWord(
|
||||||
settingsValues.mSpacingAndPunctuations, 2);
|
settingsValues.mSpacingAndPunctuations, mWordComposer.isComposingWord() ? 2 : 1);
|
||||||
mConnection.commitText(chosenWordWithSuggestions, 1);
|
mConnection.commitText(chosenWordWithSuggestions, 1);
|
||||||
// Add the word to the user history dictionary
|
// Add the word to the user history dictionary
|
||||||
performAdditionToUserHistoryDictionary(settingsValues, chosenWord, prevWordsInfo);
|
performAdditionToUserHistoryDictionary(settingsValues, chosenWord, prevWordsInfo);
|
||||||
|
|
|
@ -30,6 +30,7 @@ import android.view.inputmethod.ExtractedTextRequest;
|
||||||
import android.view.inputmethod.InputConnection;
|
import android.view.inputmethod.InputConnection;
|
||||||
import android.view.inputmethod.InputConnectionWrapper;
|
import android.view.inputmethod.InputConnectionWrapper;
|
||||||
|
|
||||||
|
import com.android.inputmethod.latin.PrevWordsInfo.WordInfo;
|
||||||
import com.android.inputmethod.latin.settings.SpacingAndPunctuations;
|
import com.android.inputmethod.latin.settings.SpacingAndPunctuations;
|
||||||
import com.android.inputmethod.latin.utils.RunInLocale;
|
import com.android.inputmethod.latin.utils.RunInLocale;
|
||||||
import com.android.inputmethod.latin.utils.StringUtils;
|
import com.android.inputmethod.latin.utils.StringUtils;
|
||||||
|
@ -166,6 +167,16 @@ public class RichInputConnectionAndTextRangeTests extends AndroidTestCase {
|
||||||
"abc def", mSpacingAndPunctuations, 2).mPrevWordsInfo[0].mIsBeginningOfSentence);
|
"abc def", mSpacingAndPunctuations, 2).mPrevWordsInfo[0].mIsBeginningOfSentence);
|
||||||
assertTrue(RichInputConnection.getPrevWordsInfoFromNthPreviousWord(
|
assertTrue(RichInputConnection.getPrevWordsInfoFromNthPreviousWord(
|
||||||
"abc", mSpacingAndPunctuations, 2).mPrevWordsInfo[0].mIsBeginningOfSentence);
|
"abc", mSpacingAndPunctuations, 2).mPrevWordsInfo[0].mIsBeginningOfSentence);
|
||||||
|
|
||||||
|
// For n-gram
|
||||||
|
assertEquals(RichInputConnection.getPrevWordsInfoFromNthPreviousWord(
|
||||||
|
"abc def", mSpacingAndPunctuations, 1).mPrevWordsInfo[0].mWord, "def");
|
||||||
|
assertEquals(RichInputConnection.getPrevWordsInfoFromNthPreviousWord(
|
||||||
|
"abc def", mSpacingAndPunctuations, 1).mPrevWordsInfo[1].mWord, "abc");
|
||||||
|
assertEquals(RichInputConnection.getPrevWordsInfoFromNthPreviousWord(
|
||||||
|
"abc def", mSpacingAndPunctuations, 2).mPrevWordsInfo[1],
|
||||||
|
WordInfo.BEGINNING_OF_SENTENCE);
|
||||||
|
|
||||||
// The following tests reflect the current behavior of the function
|
// The following tests reflect the current behavior of the function
|
||||||
// RichInputConnection#getNthPreviousWord.
|
// RichInputConnection#getNthPreviousWord.
|
||||||
// TODO: However at this time, the code does never go
|
// TODO: However at this time, the code does never go
|
||||||
|
|
Loading…
Reference in New Issue