[SD7] Actually check for script.

...also implement the check for Hebrew and Arabic.

Bug: 15840116
Change-Id: Ia6433d7d98038ade64c171be4fe4b3f094111fac
This commit is contained in:
Jean Chalard 2014-06-27 22:44:24 +09:00
parent 943e91ffbd
commit 292deb632c
5 changed files with 68 additions and 19 deletions

View file

@ -30,6 +30,7 @@ import com.android.inputmethod.latin.PrevWordsInfo.WordInfo;
import com.android.inputmethod.latin.settings.SpacingAndPunctuations; import com.android.inputmethod.latin.settings.SpacingAndPunctuations;
import com.android.inputmethod.latin.utils.CapsModeUtils; import com.android.inputmethod.latin.utils.CapsModeUtils;
import com.android.inputmethod.latin.utils.DebugLogUtils; import com.android.inputmethod.latin.utils.DebugLogUtils;
import com.android.inputmethod.latin.utils.ScriptUtils;
import com.android.inputmethod.latin.utils.SpannableStringUtils; import com.android.inputmethod.latin.utils.SpannableStringUtils;
import com.android.inputmethod.latin.utils.StringUtils; import com.android.inputmethod.latin.utils.StringUtils;
import com.android.inputmethod.latin.utils.TextRange; import com.android.inputmethod.latin.utils.TextRange;
@ -623,9 +624,10 @@ public final class RichInputConnection {
* Returns the text surrounding the cursor. * Returns the text surrounding the cursor.
* *
* @param sortedSeparators a sorted array of code points that split words. * @param sortedSeparators a sorted array of code points that split words.
* @param scriptId the script we consider to be writing words, as one of ScriptUtils.SCRIPT_*
* @return a range containing the text surrounding the cursor * @return a range containing the text surrounding the cursor
*/ */
public TextRange getWordRangeAtCursor(final int[] sortedSeparators) { public TextRange getWordRangeAtCursor(final int[] sortedSeparators, final int scriptId) {
mIC = mParent.getCurrentInputConnection(); mIC = mParent.getCurrentInputConnection();
if (mIC == null) { if (mIC == null) {
return null; return null;
@ -642,7 +644,8 @@ public final class RichInputConnection {
int startIndexInBefore = before.length(); int startIndexInBefore = before.length();
while (startIndexInBefore > 0) { while (startIndexInBefore > 0) {
final int codePoint = Character.codePointBefore(before, startIndexInBefore); final int codePoint = Character.codePointBefore(before, startIndexInBefore);
if (isSeparator(codePoint, sortedSeparators)) { if (isSeparator(codePoint, sortedSeparators)
|| !ScriptUtils.isLetterPartOfScript(codePoint, scriptId)) {
break; break;
} }
--startIndexInBefore; --startIndexInBefore;
@ -655,7 +658,8 @@ public final class RichInputConnection {
int endIndexInAfter = -1; int endIndexInAfter = -1;
while (++endIndexInAfter < after.length()) { while (++endIndexInAfter < after.length()) {
final int codePoint = Character.codePointAt(after, endIndexInAfter); final int codePoint = Character.codePointAt(after, endIndexInAfter);
if (isSeparator(codePoint, sortedSeparators)) { if (isSeparator(codePoint, sortedSeparators)
|| !ScriptUtils.isLetterPartOfScript(codePoint, scriptId)) {
break; break;
} }
if (Character.isSupplementaryCodePoint(codePoint)) { if (Character.isSupplementaryCodePoint(codePoint)) {

View file

@ -1288,9 +1288,14 @@ public final class InputLogic {
return; return;
} }
final TextRange range = mConnection.getWordRangeAtCursor( final TextRange range = mConnection.getWordRangeAtCursor(
settingsValues.mSpacingAndPunctuations.mSortedWordSeparators); settingsValues.mSpacingAndPunctuations.mSortedWordSeparators,
currentKeyboardScriptId);
if (null == range) return; // Happens if we don't have an input connection at all if (null == range) return; // Happens if we don't have an input connection at all
if (range.length() <= 0) return; // Race condition. No text to resume on, so bail out. if (range.length() <= 0) {
// Race condition, or touching a word in a non-supported script.
mLatinIME.setNeutralSuggestionStrip();
return;
}
// If for some strange reason (editor bug or so) we measure the text before the cursor as // If for some strange reason (editor bug or so) we measure the text before the cursor as
// longer than what the entire text is supposed to be, the safe thing to do is bail out. // longer than what the entire text is supposed to be, the safe thing to do is bail out.
if (range.mHasUrlSpans) return; // If there are links, we don't resume suggestions. Making if (range.mHasUrlSpans) return; // If there are links, we don't resume suggestions. Making

View file

@ -152,7 +152,7 @@ public abstract class AndroidWordLevelSpellCheckerSession extends Session {
// Filter by first letter // Filter by first letter
final int firstCodePoint = text.codePointAt(0); final int firstCodePoint = text.codePointAt(0);
// Filter out words that don't start with a letter or an apostrophe // Filter out words that don't start with a letter or an apostrophe
if (!ScriptUtils.isLetterCheckableByScript(firstCodePoint, script) if (!ScriptUtils.isLetterPartOfScript(firstCodePoint, script)
&& '\'' != firstCodePoint) return CHECKABILITY_FIRST_LETTER_UNCHECKABLE; && '\'' != firstCodePoint) return CHECKABILITY_FIRST_LETTER_UNCHECKABLE;
// Filter contents // Filter contents
@ -173,7 +173,7 @@ public abstract class AndroidWordLevelSpellCheckerSession extends Session {
if (Constants.CODE_PERIOD == codePoint) { if (Constants.CODE_PERIOD == codePoint) {
return CHECKABILITY_CONTAINS_PERIOD; return CHECKABILITY_CONTAINS_PERIOD;
} }
if (ScriptUtils.isLetterCheckableByScript(codePoint, script)) ++letterCount; if (ScriptUtils.isLetterPartOfScript(codePoint, script)) ++letterCount;
} }
// Guestimate heuristic: perform spell checking if at least 3/4 of the characters // Guestimate heuristic: perform spell checking if at least 3/4 of the characters
// in this word are letters // in this word are letters

View file

@ -68,8 +68,8 @@ public class ScriptUtils {
* Hence at the moment this explicitly tests for Cyrillic characters or Latin characters * Hence at the moment this explicitly tests for Cyrillic characters or Latin characters
* as appropriate, and explicitly excludes CJK, Arabic and Hebrew characters. * as appropriate, and explicitly excludes CJK, Arabic and Hebrew characters.
*/ */
public static boolean isLetterCheckableByScript(final int codePoint, final int script) { public static boolean isLetterPartOfScript(final int codePoint, final int scriptId) {
switch (script) { switch (scriptId) {
case SCRIPT_LATIN: case SCRIPT_LATIN:
// Our supported latin script dictionaries (EFIGS) at the moment only include // Our supported latin script dictionaries (EFIGS) at the moment only include
// characters in the C0, C1, Latin Extended A and B, IPA extensions unicode // characters in the C0, C1, Latin Extended A and B, IPA extensions unicode
@ -91,11 +91,29 @@ public class ScriptUtils {
return (codePoint >= 0x370 && codePoint <= 0x3FF) return (codePoint >= 0x370 && codePoint <= 0x3FF)
|| (codePoint >= 0x1F00 && codePoint <= 0x1FFF) || (codePoint >= 0x1F00 && codePoint <= 0x1FFF)
|| codePoint == 0xF2; || codePoint == 0xF2;
case SCRIPT_ARABIC:
// Arabic letters can be in any of the following blocks:
// Arabic U+0600..U+06FF
// Arabic Supplement U+0750..U+077F
// Arabic Extended-A U+08A0..U+08FF
// Arabic Presentation Forms-A U+FB50..U+FDFF
// Arabic Presentation Forms-B U+FE70..U+FEFF
return (codePoint >= 0x600 && codePoint <= 0x6FF)
|| (codePoint >= 0x750 && codePoint <= 0x77F)
|| (codePoint >= 0x8A0 && codePoint <= 0x8FF)
|| (codePoint >= 0xFB50 && codePoint <= 0xFDFF)
|| (codePoint >= 0xFE70 && codePoint <= 0xFEFF);
case SCRIPT_HEBREW:
// Hebrew letters are in the Hebrew unicode block, which spans from U+0590 to U+05FF,
// or in the Alphabetic Presentation Forms block, U+FB00..U+FB4F, but only in the
// Hebrew part of that block, which is U+FB1D..U+FB4F.
return (codePoint >= 0x590 && codePoint <= 0x5FF
|| codePoint >= 0xFB1D && codePoint <= 0xFB4F);
case SCRIPT_UNKNOWN: case SCRIPT_UNKNOWN:
return true; return true;
default: default:
// Should never come here // Should never come here
throw new RuntimeException("Impossible value of script: " + script); throw new RuntimeException("Impossible value of script: " + scriptId);
} }
} }

View file

@ -33,6 +33,7 @@ import android.view.inputmethod.InputConnectionWrapper;
import com.android.inputmethod.latin.PrevWordsInfo.WordInfo; import com.android.inputmethod.latin.PrevWordsInfo.WordInfo;
import com.android.inputmethod.latin.settings.SpacingAndPunctuations; import com.android.inputmethod.latin.settings.SpacingAndPunctuations;
import com.android.inputmethod.latin.utils.RunInLocale; import com.android.inputmethod.latin.utils.RunInLocale;
import com.android.inputmethod.latin.utils.ScriptUtils;
import com.android.inputmethod.latin.utils.StringUtils; import com.android.inputmethod.latin.utils.StringUtils;
import com.android.inputmethod.latin.utils.TextRange; import com.android.inputmethod.latin.utils.TextRange;
@ -221,6 +222,8 @@ public class RichInputConnectionAndTextRangeTests extends AndroidTestCase {
private static final int[] SPACE_TAB = StringUtils.toSortedCodePointArray(" \t"); private static final int[] SPACE_TAB = StringUtils.toSortedCodePointArray(" \t");
// A character that needs surrogate pair to represent its code point (U+2008A). // A character that needs surrogate pair to represent its code point (U+2008A).
private static final String SUPPLEMENTARY_CHAR = "\uD840\uDC8A"; private static final String SUPPLEMENTARY_CHAR = "\uD840\uDC8A";
private static final String HIRAGANA_WORD = "\u3042\u3044\u3046\u3048\u304A"; // あいうえお
private static final String GREEK_WORD = "\u03BA\u03B1\u03B9"; // και
public void testGetWordRangeAtCursor() { public void testGetWordRangeAtCursor() {
ExtractedText et = new ExtractedText(); ExtractedText et = new ExtractedText();
@ -233,13 +236,13 @@ public class RichInputConnectionAndTextRangeTests extends AndroidTestCase {
ic.beginBatchEdit(); ic.beginBatchEdit();
// basic case // basic case
r = ic.getWordRangeAtCursor(SPACE); r = ic.getWordRangeAtCursor(SPACE, ScriptUtils.SCRIPT_LATIN);
assertTrue(TextUtils.equals("word", r.mWord)); assertTrue(TextUtils.equals("word", r.mWord));
// tab character instead of space // tab character instead of space
mockInputMethodService.setInputConnection(new MockConnection("one\tword\two", "rd", et)); mockInputMethodService.setInputConnection(new MockConnection("one\tword\two", "rd", et));
ic.beginBatchEdit(); ic.beginBatchEdit();
r = ic.getWordRangeAtCursor(TAB); r = ic.getWordRangeAtCursor(TAB, ScriptUtils.SCRIPT_LATIN);
ic.endBatchEdit(); ic.endBatchEdit();
assertTrue(TextUtils.equals("word", r.mWord)); assertTrue(TextUtils.equals("word", r.mWord));
@ -247,9 +250,28 @@ public class RichInputConnectionAndTextRangeTests extends AndroidTestCase {
mockInputMethodService.setInputConnection( mockInputMethodService.setInputConnection(
new MockConnection("one word" + SUPPLEMENTARY_CHAR + "wo", "rd", et)); new MockConnection("one word" + SUPPLEMENTARY_CHAR + "wo", "rd", et));
ic.beginBatchEdit(); ic.beginBatchEdit();
r = ic.getWordRangeAtCursor(StringUtils.toSortedCodePointArray(SUPPLEMENTARY_CHAR)); r = ic.getWordRangeAtCursor(StringUtils.toSortedCodePointArray(SUPPLEMENTARY_CHAR),
ScriptUtils.SCRIPT_LATIN);
ic.endBatchEdit(); ic.endBatchEdit();
assertTrue(TextUtils.equals("word", r.mWord)); assertTrue(TextUtils.equals("word", r.mWord));
// split on chars outside the specified script
mockInputMethodService.setInputConnection(
new MockConnection(HIRAGANA_WORD + "wo", "rd" + GREEK_WORD, et));
ic.beginBatchEdit();
r = ic.getWordRangeAtCursor(StringUtils.toSortedCodePointArray(SUPPLEMENTARY_CHAR),
ScriptUtils.SCRIPT_LATIN);
ic.endBatchEdit();
assertTrue(TextUtils.equals("word", r.mWord));
// likewise for greek
mockInputMethodService.setInputConnection(
new MockConnection("text" + GREEK_WORD, "text", et));
ic.beginBatchEdit();
r = ic.getWordRangeAtCursor(StringUtils.toSortedCodePointArray(SUPPLEMENTARY_CHAR),
ScriptUtils.SCRIPT_GREEK);
ic.endBatchEdit();
assertTrue(TextUtils.equals(GREEK_WORD, r.mWord));
} }
/** /**
@ -277,7 +299,7 @@ public class RichInputConnectionAndTextRangeTests extends AndroidTestCase {
TextRange r; TextRange r;
SuggestionSpan[] suggestions; SuggestionSpan[] suggestions;
r = ic.getWordRangeAtCursor(SPACE); r = ic.getWordRangeAtCursor(SPACE, ScriptUtils.SCRIPT_LATIN);
suggestions = r.getSuggestionSpansAtWord(); suggestions = r.getSuggestionSpansAtWord();
assertEquals(suggestions.length, 1); assertEquals(suggestions.length, 1);
MoreAsserts.assertEquals(suggestions[0].getSuggestions(), SUGGESTIONS1); MoreAsserts.assertEquals(suggestions[0].getSuggestions(), SUGGESTIONS1);
@ -289,7 +311,7 @@ public class RichInputConnectionAndTextRangeTests extends AndroidTestCase {
text.setSpan(new SuggestionSpan(Locale.ENGLISH, SUGGESTIONS2, 0 /* flags */), text.setSpan(new SuggestionSpan(Locale.ENGLISH, SUGGESTIONS2, 0 /* flags */),
10 /* start */, 16 /* end */, 0 /* flags */); 10 /* start */, 16 /* end */, 0 /* flags */);
mockInputMethodService.setInputConnection(new MockConnection(text, cursorPos)); mockInputMethodService.setInputConnection(new MockConnection(text, cursorPos));
r = ic.getWordRangeAtCursor(SPACE); r = ic.getWordRangeAtCursor(SPACE, ScriptUtils.SCRIPT_LATIN);
suggestions = r.getSuggestionSpansAtWord(); suggestions = r.getSuggestionSpansAtWord();
assertEquals(suggestions.length, 2); assertEquals(suggestions.length, 2);
MoreAsserts.assertEquals(suggestions[0].getSuggestions(), SUGGESTIONS1); MoreAsserts.assertEquals(suggestions[0].getSuggestions(), SUGGESTIONS1);
@ -302,7 +324,7 @@ public class RichInputConnectionAndTextRangeTests extends AndroidTestCase {
text.setSpan(new SuggestionSpan(Locale.ENGLISH, SUGGESTIONS2, 0 /* flags */), text.setSpan(new SuggestionSpan(Locale.ENGLISH, SUGGESTIONS2, 0 /* flags */),
5 /* start */, 16 /* end */, 0 /* flags */); 5 /* start */, 16 /* end */, 0 /* flags */);
mockInputMethodService.setInputConnection(new MockConnection(text, cursorPos)); mockInputMethodService.setInputConnection(new MockConnection(text, cursorPos));
r = ic.getWordRangeAtCursor(SPACE); r = ic.getWordRangeAtCursor(SPACE, ScriptUtils.SCRIPT_LATIN);
suggestions = r.getSuggestionSpansAtWord(); suggestions = r.getSuggestionSpansAtWord();
assertEquals(suggestions.length, 1); assertEquals(suggestions.length, 1);
MoreAsserts.assertEquals(suggestions[0].getSuggestions(), SUGGESTIONS1); MoreAsserts.assertEquals(suggestions[0].getSuggestions(), SUGGESTIONS1);
@ -314,7 +336,7 @@ public class RichInputConnectionAndTextRangeTests extends AndroidTestCase {
text.setSpan(new SuggestionSpan(Locale.ENGLISH, SUGGESTIONS2, 0 /* flags */), text.setSpan(new SuggestionSpan(Locale.ENGLISH, SUGGESTIONS2, 0 /* flags */),
10 /* start */, 20 /* end */, 0 /* flags */); 10 /* start */, 20 /* end */, 0 /* flags */);
mockInputMethodService.setInputConnection(new MockConnection(text, cursorPos)); mockInputMethodService.setInputConnection(new MockConnection(text, cursorPos));
r = ic.getWordRangeAtCursor(SPACE); r = ic.getWordRangeAtCursor(SPACE, ScriptUtils.SCRIPT_LATIN);
suggestions = r.getSuggestionSpansAtWord(); suggestions = r.getSuggestionSpansAtWord();
assertEquals(suggestions.length, 1); assertEquals(suggestions.length, 1);
MoreAsserts.assertEquals(suggestions[0].getSuggestions(), SUGGESTIONS1); MoreAsserts.assertEquals(suggestions[0].getSuggestions(), SUGGESTIONS1);
@ -326,7 +348,7 @@ public class RichInputConnectionAndTextRangeTests extends AndroidTestCase {
text.setSpan(new SuggestionSpan(Locale.ENGLISH, SUGGESTIONS2, 0 /* flags */), text.setSpan(new SuggestionSpan(Locale.ENGLISH, SUGGESTIONS2, 0 /* flags */),
5 /* start */, 20 /* end */, 0 /* flags */); 5 /* start */, 20 /* end */, 0 /* flags */);
mockInputMethodService.setInputConnection(new MockConnection(text, cursorPos)); mockInputMethodService.setInputConnection(new MockConnection(text, cursorPos));
r = ic.getWordRangeAtCursor(SPACE); r = ic.getWordRangeAtCursor(SPACE, ScriptUtils.SCRIPT_LATIN);
suggestions = r.getSuggestionSpansAtWord(); suggestions = r.getSuggestionSpansAtWord();
assertEquals(suggestions.length, 1); assertEquals(suggestions.length, 1);
MoreAsserts.assertEquals(suggestions[0].getSuggestions(), SUGGESTIONS1); MoreAsserts.assertEquals(suggestions[0].getSuggestions(), SUGGESTIONS1);
@ -338,7 +360,7 @@ public class RichInputConnectionAndTextRangeTests extends AndroidTestCase {
text.setSpan(new SuggestionSpan(Locale.ENGLISH, SUGGESTIONS2, 0 /* flags */), text.setSpan(new SuggestionSpan(Locale.ENGLISH, SUGGESTIONS2, 0 /* flags */),
5 /* start */, 20 /* end */, 0 /* flags */); 5 /* start */, 20 /* end */, 0 /* flags */);
mockInputMethodService.setInputConnection(new MockConnection(text, cursorPos)); mockInputMethodService.setInputConnection(new MockConnection(text, cursorPos));
r = ic.getWordRangeAtCursor(SPACE); r = ic.getWordRangeAtCursor(SPACE, ScriptUtils.SCRIPT_LATIN);
suggestions = r.getSuggestionSpansAtWord(); suggestions = r.getSuggestionSpansAtWord();
assertEquals(suggestions.length, 0); assertEquals(suggestions.length, 0);
} }