Merge "Start-of-sentence should include newlines and non-period terminators."
commit
edcf5853d1
|
@ -31,6 +31,7 @@ public final class NgramContextUtils {
|
||||||
// Intentional empty constructor for utility class.
|
// Intentional empty constructor for utility class.
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static final Pattern NEWLINE_REGEX = Pattern.compile("[\\r\\n]+");
|
||||||
private static final Pattern SPACE_REGEX = Pattern.compile("\\s+");
|
private static final Pattern SPACE_REGEX = Pattern.compile("\\s+");
|
||||||
// Get context information from nth word before the cursor. n = 1 retrieves the words
|
// Get context information from nth word before the cursor. n = 1 retrieves the words
|
||||||
// immediately before the cursor, n = 2 retrieves the words before that, and so on. This splits
|
// immediately before the cursor, n = 2 retrieves the words before that, and so on. This splits
|
||||||
|
@ -58,7 +59,11 @@ public final class NgramContextUtils {
|
||||||
public static NgramContext getNgramContextFromNthPreviousWord(final CharSequence prev,
|
public static NgramContext getNgramContextFromNthPreviousWord(final CharSequence prev,
|
||||||
final SpacingAndPunctuations spacingAndPunctuations, final int n) {
|
final SpacingAndPunctuations spacingAndPunctuations, final int n) {
|
||||||
if (prev == null) return NgramContext.EMPTY_PREV_WORDS_INFO;
|
if (prev == null) return NgramContext.EMPTY_PREV_WORDS_INFO;
|
||||||
final String[] w = SPACE_REGEX.split(prev);
|
final String[] lines = NEWLINE_REGEX.split(prev);
|
||||||
|
if (lines.length == 0) {
|
||||||
|
return new NgramContext(WordInfo.BEGINNING_OF_SENTENCE_WORD_INFO);
|
||||||
|
}
|
||||||
|
final String[] w = SPACE_REGEX.split(lines[lines.length - 1]);
|
||||||
final WordInfo[] prevWordsInfo =
|
final WordInfo[] prevWordsInfo =
|
||||||
new WordInfo[DecoderSpecificConstants.MAX_PREV_WORD_COUNT_FOR_N_GRAM];
|
new WordInfo[DecoderSpecificConstants.MAX_PREV_WORD_COUNT_FOR_N_GRAM];
|
||||||
Arrays.fill(prevWordsInfo, WordInfo.EMPTY_WORD_INFO);
|
Arrays.fill(prevWordsInfo, WordInfo.EMPTY_WORD_INFO);
|
||||||
|
@ -81,16 +86,17 @@ public final class NgramContextUtils {
|
||||||
prevWordsInfo[i] = WordInfo.BEGINNING_OF_SENTENCE_WORD_INFO;
|
prevWordsInfo[i] = WordInfo.BEGINNING_OF_SENTENCE_WORD_INFO;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
final String focusedWord = w[focusedWordIndex];
|
final String focusedWord = w[focusedWordIndex];
|
||||||
// If the word is, the context is beginning-of-sentence.
|
// If the word is empty, the context is beginning-of-sentence.
|
||||||
final int length = focusedWord.length();
|
final int length = focusedWord.length();
|
||||||
if (length <= 0) {
|
if (length <= 0) {
|
||||||
prevWordsInfo[i] = WordInfo.BEGINNING_OF_SENTENCE_WORD_INFO;
|
prevWordsInfo[i] = WordInfo.BEGINNING_OF_SENTENCE_WORD_INFO;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
// If ends in a sentence separator, the context is beginning-of-sentence.
|
// If the word ends in a sentence terminator, the context is beginning-of-sentence.
|
||||||
final char lastChar = focusedWord.charAt(length - 1);
|
final char lastChar = focusedWord.charAt(length - 1);
|
||||||
if (spacingAndPunctuations.isSentenceSeparator(lastChar)) {
|
if (spacingAndPunctuations.isSentenceTerminator(lastChar)) {
|
||||||
prevWordsInfo[i] = WordInfo.BEGINNING_OF_SENTENCE_WORD_INFO;
|
prevWordsInfo[i] = WordInfo.BEGINNING_OF_SENTENCE_WORD_INFO;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
|
@ -17,6 +17,8 @@
|
||||||
package com.android.inputmethod.latin;
|
package com.android.inputmethod.latin;
|
||||||
|
|
||||||
import com.android.inputmethod.latin.NgramContext.WordInfo;
|
import com.android.inputmethod.latin.NgramContext.WordInfo;
|
||||||
|
import com.android.inputmethod.latin.settings.SpacingAndPunctuations;
|
||||||
|
import com.android.inputmethod.latin.utils.NgramContextUtils;
|
||||||
|
|
||||||
import android.test.AndroidTestCase;
|
import android.test.AndroidTestCase;
|
||||||
import android.test.suitebuilder.annotation.SmallTest;
|
import android.test.suitebuilder.annotation.SmallTest;
|
||||||
|
@ -120,4 +122,24 @@ public class NgramContextTests extends AndroidTestCase {
|
||||||
assertEquals(1, ngramContext_a_empty.extractPrevWordsContextArray().length);
|
assertEquals(1, ngramContext_a_empty.extractPrevWordsContextArray().length);
|
||||||
assertEquals("a", ngramContext_a_empty.extractPrevWordsContextArray()[0]);
|
assertEquals("a", ngramContext_a_empty.extractPrevWordsContextArray()[0]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testGetNgramContextFromNthPreviousWord() {
|
||||||
|
SpacingAndPunctuations spacingAndPunctuations = new SpacingAndPunctuations(
|
||||||
|
mContext.getResources());
|
||||||
|
assertEquals("<S>", NgramContextUtils.getNgramContextFromNthPreviousWord("",
|
||||||
|
spacingAndPunctuations, 1).extractPrevWordsContext());
|
||||||
|
assertEquals("<S> b", NgramContextUtils.getNgramContextFromNthPreviousWord("a. b ",
|
||||||
|
spacingAndPunctuations, 1).extractPrevWordsContext());
|
||||||
|
assertEquals("<S> b", NgramContextUtils.getNgramContextFromNthPreviousWord("a? b ",
|
||||||
|
spacingAndPunctuations, 1).extractPrevWordsContext());
|
||||||
|
assertEquals("<S> b", NgramContextUtils.getNgramContextFromNthPreviousWord("a! b ",
|
||||||
|
spacingAndPunctuations, 1).extractPrevWordsContext());
|
||||||
|
assertEquals("<S> b", NgramContextUtils.getNgramContextFromNthPreviousWord("a\nb ",
|
||||||
|
spacingAndPunctuations, 1).extractPrevWordsContext());
|
||||||
|
assertEquals("<S> a b", NgramContextUtils.getNgramContextFromNthPreviousWord("a b ",
|
||||||
|
spacingAndPunctuations, 1).extractPrevWordsContext());
|
||||||
|
assertFalse(NgramContextUtils
|
||||||
|
.getNgramContextFromNthPreviousWord("a b c d e", spacingAndPunctuations, 1)
|
||||||
|
.extractPrevWordsContext().startsWith("<S>"));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue