[SD7] Actually check for script.

...also implement the check for Hebrew and Arabic.

Bug: 15840116
Change-Id: Ia6433d7d98038ade64c171be4fe4b3f094111fac
This commit is contained in:
Jean Chalard 2014-06-27 22:44:24 +09:00
parent 943e91ffbd
commit 292deb632c
5 changed files with 68 additions and 19 deletions

View file

@ -30,6 +30,7 @@ import com.android.inputmethod.latin.PrevWordsInfo.WordInfo;
import com.android.inputmethod.latin.settings.SpacingAndPunctuations;
import com.android.inputmethod.latin.utils.CapsModeUtils;
import com.android.inputmethod.latin.utils.DebugLogUtils;
import com.android.inputmethod.latin.utils.ScriptUtils;
import com.android.inputmethod.latin.utils.SpannableStringUtils;
import com.android.inputmethod.latin.utils.StringUtils;
import com.android.inputmethod.latin.utils.TextRange;
@ -623,9 +624,10 @@ public final class RichInputConnection {
* Returns the text surrounding the cursor.
* @param sortedSeparators a sorted array of code points that split words.
* @param scriptId the script we consider to be writing words, as one of ScriptUtils.SCRIPT_*
* @return a range containing the text surrounding the cursor
public TextRange getWordRangeAtCursor(final int[] sortedSeparators) {
public TextRange getWordRangeAtCursor(final int[] sortedSeparators, final int scriptId) {
mIC = mParent.getCurrentInputConnection();
if (mIC == null) {
return null;
@ -642,7 +644,8 @@ public final class RichInputConnection {
int startIndexInBefore = before.length();
while (startIndexInBefore > 0) {
final int codePoint = Character.codePointBefore(before, startIndexInBefore);
if (isSeparator(codePoint, sortedSeparators)) {
if (isSeparator(codePoint, sortedSeparators)
|| !ScriptUtils.isLetterPartOfScript(codePoint, scriptId)) {
@ -655,7 +658,8 @@ public final class RichInputConnection {
int endIndexInAfter = -1;
while (++endIndexInAfter < after.length()) {
final int codePoint = Character.codePointAt(after, endIndexInAfter);
if (isSeparator(codePoint, sortedSeparators)) {
if (isSeparator(codePoint, sortedSeparators)
|| !ScriptUtils.isLetterPartOfScript(codePoint, scriptId)) {
if (Character.isSupplementaryCodePoint(codePoint)) {

View file

@ -1288,9 +1288,14 @@ public final class InputLogic {
final TextRange range = mConnection.getWordRangeAtCursor(
if (null == range) return; // Happens if we don't have an input connection at all
if (range.length() <= 0) return; // Race condition. No text to resume on, so bail out.
if (range.length() <= 0) {
// Race condition, or touching a word in a non-supported script.
// If for some strange reason (editor bug or so) we measure the text before the cursor as
// longer than what the entire text is supposed to be, the safe thing to do is bail out.
if (range.mHasUrlSpans) return; // If there are links, we don't resume suggestions. Making

View file

@ -152,7 +152,7 @@ public abstract class AndroidWordLevelSpellCheckerSession extends Session {
// Filter by first letter
final int firstCodePoint = text.codePointAt(0);
// Filter out words that don't start with a letter or an apostrophe
if (!ScriptUtils.isLetterCheckableByScript(firstCodePoint, script)
if (!ScriptUtils.isLetterPartOfScript(firstCodePoint, script)
&& '\'' != firstCodePoint) return CHECKABILITY_FIRST_LETTER_UNCHECKABLE;
// Filter contents
@ -173,7 +173,7 @@ public abstract class AndroidWordLevelSpellCheckerSession extends Session {
if (Constants.CODE_PERIOD == codePoint) {
if (ScriptUtils.isLetterCheckableByScript(codePoint, script)) ++letterCount;
if (ScriptUtils.isLetterPartOfScript(codePoint, script)) ++letterCount;
// Guestimate heuristic: perform spell checking if at least 3/4 of the characters
// in this word are letters

View file

@ -68,8 +68,8 @@ public class ScriptUtils {
* Hence at the moment this explicitly tests for Cyrillic characters or Latin characters
* as appropriate, and explicitly excludes CJK, Arabic and Hebrew characters.
public static boolean isLetterCheckableByScript(final int codePoint, final int script) {
switch (script) {
public static boolean isLetterPartOfScript(final int codePoint, final int scriptId) {
switch (scriptId) {
// Our supported latin script dictionaries (EFIGS) at the moment only include
// characters in the C0, C1, Latin Extended A and B, IPA extensions unicode
@ -91,11 +91,29 @@ public class ScriptUtils {
return (codePoint >= 0x370 && codePoint <= 0x3FF)
|| (codePoint >= 0x1F00 && codePoint <= 0x1FFF)
|| codePoint == 0xF2;
// Arabic letters can be in any of the following blocks:
// Arabic U+0600..U+06FF
// Arabic Supplement U+0750..U+077F
// Arabic Extended-A U+08A0..U+08FF
// Arabic Presentation Forms-A U+FB50..U+FDFF
// Arabic Presentation Forms-B U+FE70..U+FEFF
return (codePoint >= 0x600 && codePoint <= 0x6FF)
|| (codePoint >= 0x750 && codePoint <= 0x77F)
|| (codePoint >= 0x8A0 && codePoint <= 0x8FF)
|| (codePoint >= 0xFB50 && codePoint <= 0xFDFF)
|| (codePoint >= 0xFE70 && codePoint <= 0xFEFF);
// Hebrew letters are in the Hebrew unicode block, which spans from U+0590 to U+05FF,
// or in the Alphabetic Presentation Forms block, U+FB00..U+FB4F, but only in the
// Hebrew part of that block, which is U+FB1D..U+FB4F.
return (codePoint >= 0x590 && codePoint <= 0x5FF
|| codePoint >= 0xFB1D && codePoint <= 0xFB4F);
return true;
// Should never come here
throw new RuntimeException("Impossible value of script: " + script);
throw new RuntimeException("Impossible value of script: " + scriptId);

View file

@ -33,6 +33,7 @@ import android.view.inputmethod.InputConnectionWrapper;
import com.android.inputmethod.latin.PrevWordsInfo.WordInfo;
import com.android.inputmethod.latin.settings.SpacingAndPunctuations;
import com.android.inputmethod.latin.utils.RunInLocale;
import com.android.inputmethod.latin.utils.ScriptUtils;
import com.android.inputmethod.latin.utils.StringUtils;
import com.android.inputmethod.latin.utils.TextRange;
@ -221,6 +222,8 @@ public class RichInputConnectionAndTextRangeTests extends AndroidTestCase {
private static final int[] SPACE_TAB = StringUtils.toSortedCodePointArray(" \t");
// A character that needs surrogate pair to represent its code point (U+2008A).
private static final String SUPPLEMENTARY_CHAR = "\uD840\uDC8A";
private static final String HIRAGANA_WORD = "\u3042\u3044\u3046\u3048\u304A"; // あいうえお
private static final String GREEK_WORD = "\u03BA\u03B1\u03B9"; // και
public void testGetWordRangeAtCursor() {
ExtractedText et = new ExtractedText();
@ -233,13 +236,13 @@ public class RichInputConnectionAndTextRangeTests extends AndroidTestCase {
// basic case
r = ic.getWordRangeAtCursor(SPACE);
r = ic.getWordRangeAtCursor(SPACE, ScriptUtils.SCRIPT_LATIN);
assertTrue(TextUtils.equals("word", r.mWord));
// tab character instead of space
mockInputMethodService.setInputConnection(new MockConnection("one\tword\two", "rd", et));
r = ic.getWordRangeAtCursor(TAB);
r = ic.getWordRangeAtCursor(TAB, ScriptUtils.SCRIPT_LATIN);
assertTrue(TextUtils.equals("word", r.mWord));
@ -247,9 +250,28 @@ public class RichInputConnectionAndTextRangeTests extends AndroidTestCase {
new MockConnection("one word" + SUPPLEMENTARY_CHAR + "wo", "rd", et));
r = ic.getWordRangeAtCursor(StringUtils.toSortedCodePointArray(SUPPLEMENTARY_CHAR));
r = ic.getWordRangeAtCursor(StringUtils.toSortedCodePointArray(SUPPLEMENTARY_CHAR),
assertTrue(TextUtils.equals("word", r.mWord));
// split on chars outside the specified script
new MockConnection(HIRAGANA_WORD + "wo", "rd" + GREEK_WORD, et));
r = ic.getWordRangeAtCursor(StringUtils.toSortedCodePointArray(SUPPLEMENTARY_CHAR),
assertTrue(TextUtils.equals("word", r.mWord));
// likewise for greek
new MockConnection("text" + GREEK_WORD, "text", et));
r = ic.getWordRangeAtCursor(StringUtils.toSortedCodePointArray(SUPPLEMENTARY_CHAR),
assertTrue(TextUtils.equals(GREEK_WORD, r.mWord));
@ -277,7 +299,7 @@ public class RichInputConnectionAndTextRangeTests extends AndroidTestCase {
TextRange r;
SuggestionSpan[] suggestions;
r = ic.getWordRangeAtCursor(SPACE);
r = ic.getWordRangeAtCursor(SPACE, ScriptUtils.SCRIPT_LATIN);
suggestions = r.getSuggestionSpansAtWord();
assertEquals(suggestions.length, 1);
MoreAsserts.assertEquals(suggestions[0].getSuggestions(), SUGGESTIONS1);
@ -289,7 +311,7 @@ public class RichInputConnectionAndTextRangeTests extends AndroidTestCase {
text.setSpan(new SuggestionSpan(Locale.ENGLISH, SUGGESTIONS2, 0 /* flags */),
10 /* start */, 16 /* end */, 0 /* flags */);
mockInputMethodService.setInputConnection(new MockConnection(text, cursorPos));
r = ic.getWordRangeAtCursor(SPACE);
r = ic.getWordRangeAtCursor(SPACE, ScriptUtils.SCRIPT_LATIN);
suggestions = r.getSuggestionSpansAtWord();
assertEquals(suggestions.length, 2);
MoreAsserts.assertEquals(suggestions[0].getSuggestions(), SUGGESTIONS1);
@ -302,7 +324,7 @@ public class RichInputConnectionAndTextRangeTests extends AndroidTestCase {
text.setSpan(new SuggestionSpan(Locale.ENGLISH, SUGGESTIONS2, 0 /* flags */),
5 /* start */, 16 /* end */, 0 /* flags */);
mockInputMethodService.setInputConnection(new MockConnection(text, cursorPos));
r = ic.getWordRangeAtCursor(SPACE);
r = ic.getWordRangeAtCursor(SPACE, ScriptUtils.SCRIPT_LATIN);
suggestions = r.getSuggestionSpansAtWord();
assertEquals(suggestions.length, 1);
MoreAsserts.assertEquals(suggestions[0].getSuggestions(), SUGGESTIONS1);
@ -314,7 +336,7 @@ public class RichInputConnectionAndTextRangeTests extends AndroidTestCase {
text.setSpan(new SuggestionSpan(Locale.ENGLISH, SUGGESTIONS2, 0 /* flags */),
10 /* start */, 20 /* end */, 0 /* flags */);
mockInputMethodService.setInputConnection(new MockConnection(text, cursorPos));
r = ic.getWordRangeAtCursor(SPACE);
r = ic.getWordRangeAtCursor(SPACE, ScriptUtils.SCRIPT_LATIN);
suggestions = r.getSuggestionSpansAtWord();
assertEquals(suggestions.length, 1);
MoreAsserts.assertEquals(suggestions[0].getSuggestions(), SUGGESTIONS1);
@ -326,7 +348,7 @@ public class RichInputConnectionAndTextRangeTests extends AndroidTestCase {
text.setSpan(new SuggestionSpan(Locale.ENGLISH, SUGGESTIONS2, 0 /* flags */),
5 /* start */, 20 /* end */, 0 /* flags */);
mockInputMethodService.setInputConnection(new MockConnection(text, cursorPos));
r = ic.getWordRangeAtCursor(SPACE);
r = ic.getWordRangeAtCursor(SPACE, ScriptUtils.SCRIPT_LATIN);
suggestions = r.getSuggestionSpansAtWord();
assertEquals(suggestions.length, 1);
MoreAsserts.assertEquals(suggestions[0].getSuggestions(), SUGGESTIONS1);
@ -338,7 +360,7 @@ public class RichInputConnectionAndTextRangeTests extends AndroidTestCase {
text.setSpan(new SuggestionSpan(Locale.ENGLISH, SUGGESTIONS2, 0 /* flags */),
5 /* start */, 20 /* end */, 0 /* flags */);
mockInputMethodService.setInputConnection(new MockConnection(text, cursorPos));
r = ic.getWordRangeAtCursor(SPACE);
r = ic.getWordRangeAtCursor(SPACE, ScriptUtils.SCRIPT_LATIN);
suggestions = r.getSuggestionSpansAtWord();
assertEquals(suggestions.length, 0);