From 84ed0966417d93b07c4da2b295244b160d223ce9 Mon Sep 17 00:00:00 2001 From: Satoshi Kataoka Date: Tue, 10 Jul 2012 15:33:18 +0900 Subject: [PATCH] Separate SpellCheckerSession from SpellCheckerService Bug: 6789576 Change-Id: I7c55d36afad7ef6046353b3c9e849a54a6dc83ae --- .../AndroidSpellCheckerService.java | 409 +---------------- .../AndroidSpellCheckerSession.java | 420 ++++++++++++++++++ 2 files changed, 436 insertions(+), 393 deletions(-) create mode 100644 java/src/com/android/inputmethod/latin/spellcheck/AndroidSpellCheckerSession.java diff --git a/java/src/com/android/inputmethod/latin/spellcheck/AndroidSpellCheckerService.java b/java/src/com/android/inputmethod/latin/spellcheck/AndroidSpellCheckerService.java index 5332c066a..42b05592c 100644 --- a/java/src/com/android/inputmethod/latin/spellcheck/AndroidSpellCheckerService.java +++ b/java/src/com/android/inputmethod/latin/spellcheck/AndroidSpellCheckerService.java @@ -20,14 +20,9 @@ import android.content.Intent; import android.content.SharedPreferences; import android.preference.PreferenceManager; import android.service.textservice.SpellCheckerService; -import android.text.TextUtils; import android.util.Log; -import android.util.LruCache; -import android.view.textservice.SentenceSuggestionsInfo; import android.view.textservice.SuggestionsInfo; -import android.view.textservice.TextInfo; -import com.android.inputmethod.compat.SuggestionsInfoCompatUtils; import com.android.inputmethod.keyboard.ProximityInfo; import com.android.inputmethod.latin.BinaryDictionary; import com.android.inputmethod.latin.ContactsBinaryDictionary; @@ -37,12 +32,10 @@ import com.android.inputmethod.latin.DictionaryFactory; import com.android.inputmethod.latin.LocaleUtils; import com.android.inputmethod.latin.R; import com.android.inputmethod.latin.StringUtils; -import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo; import com.android.inputmethod.latin.SynchronouslyLoadedContactsBinaryDictionary; import com.android.inputmethod.latin.SynchronouslyLoadedUserBinaryDictionary; import com.android.inputmethod.latin.UserBinaryDictionary; import com.android.inputmethod.latin.WhitelistDictionary; -import com.android.inputmethod.latin.WordComposer; import java.lang.ref.WeakReference; import java.util.ArrayList; @@ -65,9 +58,9 @@ public class AndroidSpellCheckerService extends SpellCheckerService public static final String PREF_USE_CONTACTS_KEY = "pref_spellcheck_use_contacts"; - private static final int CAPITALIZE_NONE = 0; // No caps, or mixed case - private static final int CAPITALIZE_FIRST = 1; // First only - private static final int CAPITALIZE_ALL = 2; // All caps + public static final int CAPITALIZE_NONE = 0; // No caps, or mixed case + public static final int CAPITALIZE_FIRST = 1; // First only + public static final int CAPITALIZE_ALL = 2; // All caps private final static String[] EMPTY_STRING_ARRAY = new String[0]; private Map mDictionaryPools = @@ -91,8 +84,8 @@ public class AndroidSpellCheckerService extends SpellCheckerService public static final int SCRIPT_LATIN = 0; public static final int SCRIPT_CYRILLIC = 1; - private static final String SINGLE_QUOTE = "\u0027"; - private static final String APOSTROPHE = "\u2019"; + public static final String SINGLE_QUOTE = "\u0027"; + public static final String APOSTROPHE = "\u2019"; private static final TreeMap mLanguageToScript; static { // List of the supported languages and their associated script. We won't check @@ -129,7 +122,7 @@ public class AndroidSpellCheckerService extends SpellCheckerService onSharedPreferenceChanged(prefs, PREF_USE_CONTACTS_KEY); } - private static int getScriptFromLocale(final Locale locale) { + public static int getScriptFromLocale(final Locale locale) { final Integer script = mLanguageToScript.get(locale.getLanguage()); if (null == script) { throw new RuntimeException("We have been called with an unsupported language: \"" @@ -194,17 +187,22 @@ public class AndroidSpellCheckerService extends SpellCheckerService return new AndroidSpellCheckerSession(this); } - private static SuggestionsInfo getNotInDictEmptySuggestions() { + public static SuggestionsInfo getNotInDictEmptySuggestions() { return new SuggestionsInfo(0, EMPTY_STRING_ARRAY); } - private static SuggestionsInfo getInDictEmptySuggestions() { + public static SuggestionsInfo getInDictEmptySuggestions() { return new SuggestionsInfo(SuggestionsInfo.RESULT_ATTR_IN_THE_DICTIONARY, EMPTY_STRING_ARRAY); } + public SuggestionsGatherer newSuggestionsGatherer(final String text, int maxLength) { + return new SuggestionsGatherer( + text, mSuggestionThreshold, mRecommendedThreshold, maxLength); + } + // TODO: remove this class and replace it by storage local to the session. - private static class SuggestionsGatherer { + public static class SuggestionsGatherer { public static class Result { public final String[] mSuggestions; public final boolean mHasRecommendedSuggestions; @@ -396,7 +394,7 @@ public class AndroidSpellCheckerService extends SpellCheckerService }.start(); } - private DictionaryPool getDictionaryPool(final String locale) { + public DictionaryPool getDictionaryPool(final String locale) { DictionaryPool pool = mDictionaryPools.get(locale); if (null == pool) { final Locale localeObject = LocaleUtils.constructLocaleFromString(locale); @@ -447,7 +445,7 @@ public class AndroidSpellCheckerService extends SpellCheckerService } // This method assumes the text is not empty or null. - private static int getCapitalizationType(String text) { + public static int getCapitalizationType(String text) { // If the first char is not uppercase, then the word is either all lower case, // and in either case we return CAPITALIZE_NONE. if (!Character.isUpperCase(text.codePointAt(0))) return CAPITALIZE_NONE; @@ -464,379 +462,4 @@ public class AndroidSpellCheckerService extends SpellCheckerService if (1 == capsCount) return CAPITALIZE_FIRST; return (len == capsCount ? CAPITALIZE_ALL : CAPITALIZE_NONE); } - - private static class AndroidSpellCheckerSession extends Session { - // Immutable, but need the locale which is not available in the constructor yet - private DictionaryPool mDictionaryPool; - // Likewise - private Locale mLocale; - // Cache this for performance - private int mScript; // One of SCRIPT_LATIN or SCRIPT_CYRILLIC for now. - - private final AndroidSpellCheckerService mService; - - private final SuggestionsCache mSuggestionsCache = new SuggestionsCache(); - - private static class SuggestionsParams { - public final String[] mSuggestions; - public final int mFlags; - public SuggestionsParams(String[] suggestions, int flags) { - mSuggestions = suggestions; - mFlags = flags; - } - } - - private static class SuggestionsCache { - private static final char CHAR_DELIMITER = '\uFFFC'; - private static final int MAX_CACHE_SIZE = 50; - private final LruCache mUnigramSuggestionsInfoCache = - new LruCache(MAX_CACHE_SIZE); - - // TODO: Support n-gram input - private static String generateKey(String query, String prevWord) { - if (TextUtils.isEmpty(query) || TextUtils.isEmpty(prevWord)) { - return query; - } - return query + CHAR_DELIMITER + prevWord; - } - - // TODO: Support n-gram input - public SuggestionsParams getSuggestionsFromCache(String query, String prevWord) { - return mUnigramSuggestionsInfoCache.get(generateKey(query, prevWord)); - } - - // TODO: Support n-gram input - public void putSuggestionsToCache( - String query, String prevWord, String[] suggestions, int flags) { - if (suggestions == null || TextUtils.isEmpty(query)) { - return; - } - mUnigramSuggestionsInfoCache.put( - generateKey(query, prevWord), new SuggestionsParams(suggestions, flags)); - } - } - - AndroidSpellCheckerSession(final AndroidSpellCheckerService service) { - mService = service; - } - - @Override - public void onCreate() { - final String localeString = getLocale(); - mDictionaryPool = mService.getDictionaryPool(localeString); - mLocale = LocaleUtils.constructLocaleFromString(localeString); - mScript = getScriptFromLocale(mLocale); - } - - /* - * Returns whether the code point is a letter that makes sense for the specified - * locale for this spell checker. - * The dictionaries supported by Latin IME are described in res/xml/spellchecker.xml - * and is limited to EFIGS languages and Russian. - * Hence at the moment this explicitly tests for Cyrillic characters or Latin characters - * as appropriate, and explicitly excludes CJK, Arabic and Hebrew characters. - */ - private static boolean isLetterCheckableByLanguage(final int codePoint, - final int script) { - switch (script) { - case SCRIPT_LATIN: - // Our supported latin script dictionaries (EFIGS) at the moment only include - // characters in the C0, C1, Latin Extended A and B, IPA extensions unicode - // blocks. As it happens, those are back-to-back in the code range 0x40 to 0x2AF, - // so the below is a very efficient way to test for it. As for the 0-0x3F, it's - // excluded from isLetter anyway. - return codePoint <= 0x2AF && Character.isLetter(codePoint); - case SCRIPT_CYRILLIC: - // All Cyrillic characters are in the 400~52F block. There are some in the upper - // Unicode range, but they are archaic characters that are not used in modern - // russian and are not used by our dictionary. - return codePoint >= 0x400 && codePoint <= 0x52F && Character.isLetter(codePoint); - default: - // Should never come here - throw new RuntimeException("Impossible value of script: " + script); - } - } - - /** - * Finds out whether a particular string should be filtered out of spell checking. - * - * This will loosely match URLs, numbers, symbols. To avoid always underlining words that - * we know we will never recognize, this accepts a script identifier that should be one - * of the SCRIPT_* constants defined above, to rule out quickly characters from very - * different languages. - * - * @param text the string to evaluate. - * @param script the identifier for the script this spell checker recognizes - * @return true if we should filter this text out, false otherwise - */ - private static boolean shouldFilterOut(final String text, final int script) { - if (TextUtils.isEmpty(text) || text.length() <= 1) return true; - - // TODO: check if an equivalent processing can't be done more quickly with a - // compiled regexp. - // Filter by first letter - final int firstCodePoint = text.codePointAt(0); - // Filter out words that don't start with a letter or an apostrophe - if (!isLetterCheckableByLanguage(firstCodePoint, script) - && '\'' != firstCodePoint) return true; - - // Filter contents - final int length = text.length(); - int letterCount = 0; - for (int i = 0; i < length; i = text.offsetByCodePoints(i, 1)) { - final int codePoint = text.codePointAt(i); - // Any word containing a '@' is probably an e-mail address - // Any word containing a '/' is probably either an ad-hoc combination of two - // words or a URI - in either case we don't want to spell check that - if ('@' == codePoint || '/' == codePoint) return true; - if (isLetterCheckableByLanguage(codePoint, script)) ++letterCount; - } - // Guestimate heuristic: perform spell checking if at least 3/4 of the characters - // in this word are letters - return (letterCount * 4 < length * 3); - } - - private SentenceSuggestionsInfo fixWronglyInvalidatedWordWithSingleQuote( - TextInfo ti, SentenceSuggestionsInfo ssi) { - final String typedText = ti.getText(); - if (!typedText.contains(SINGLE_QUOTE)) { - return null; - } - final int N = ssi.getSuggestionsCount(); - final ArrayList additionalOffsets = new ArrayList(); - final ArrayList additionalLengths = new ArrayList(); - final ArrayList additionalSuggestionsInfos = - new ArrayList(); - String currentWord = null; - for (int i = 0; i < N; ++i) { - final SuggestionsInfo si = ssi.getSuggestionsInfoAt(i); - final int flags = si.getSuggestionsAttributes(); - if ((flags & SuggestionsInfo.RESULT_ATTR_IN_THE_DICTIONARY) == 0) { - continue; - } - final int offset = ssi.getOffsetAt(i); - final int length = ssi.getLengthAt(i); - final String subText = typedText.substring(offset, offset + length); - final String prevWord = currentWord; - currentWord = subText; - if (!subText.contains(SINGLE_QUOTE)) { - continue; - } - final String[] splitTexts = subText.split(SINGLE_QUOTE, -1); - if (splitTexts == null || splitTexts.length <= 1) { - continue; - } - final int splitNum = splitTexts.length; - for (int j = 0; j < splitNum; ++j) { - final String splitText = splitTexts[j]; - if (TextUtils.isEmpty(splitText)) { - continue; - } - if (mSuggestionsCache.getSuggestionsFromCache( - splitText, prevWord) == null) { - continue; - } - final int newLength = splitText.length(); - // Neither RESULT_ATTR_IN_THE_DICTIONARY nor RESULT_ATTR_LOOKS_LIKE_TYPO - final int newFlags = 0; - final SuggestionsInfo newSi = new SuggestionsInfo(newFlags, EMPTY_STRING_ARRAY); - newSi.setCookieAndSequence(si.getCookie(), si.getSequence()); - if (DBG) { - Log.d(TAG, "Override and remove old span over: " - + splitText + ", " + offset + "," + newLength); - } - additionalOffsets.add(offset); - additionalLengths.add(newLength); - additionalSuggestionsInfos.add(newSi); - } - } - final int additionalSize = additionalOffsets.size(); - if (additionalSize <= 0) { - return null; - } - final int suggestionsSize = N + additionalSize; - final int[] newOffsets = new int[suggestionsSize]; - final int[] newLengths = new int[suggestionsSize]; - final SuggestionsInfo[] newSuggestionsInfos = new SuggestionsInfo[suggestionsSize]; - int i; - for (i = 0; i < N; ++i) { - newOffsets[i] = ssi.getOffsetAt(i); - newLengths[i] = ssi.getLengthAt(i); - newSuggestionsInfos[i] = ssi.getSuggestionsInfoAt(i); - } - for (; i < suggestionsSize; ++i) { - newOffsets[i] = additionalOffsets.get(i - N); - newLengths[i] = additionalLengths.get(i - N); - newSuggestionsInfos[i] = additionalSuggestionsInfos.get(i - N); - } - return new SentenceSuggestionsInfo(newSuggestionsInfos, newOffsets, newLengths); - } - - @Override - public SentenceSuggestionsInfo[] onGetSentenceSuggestionsMultiple( - TextInfo[] textInfos, int suggestionsLimit) { - final SentenceSuggestionsInfo[] retval = super.onGetSentenceSuggestionsMultiple( - textInfos, suggestionsLimit); - if (retval == null || retval.length != textInfos.length) { - return retval; - } - for (int i = 0; i < retval.length; ++i) { - final SentenceSuggestionsInfo tempSsi = - fixWronglyInvalidatedWordWithSingleQuote(textInfos[i], retval[i]); - if (tempSsi != null) { - retval[i] = tempSsi; - } - } - return retval; - } - - @Override - public SuggestionsInfo[] onGetSuggestionsMultiple(TextInfo[] textInfos, - int suggestionsLimit, boolean sequentialWords) { - final int length = textInfos.length; - final SuggestionsInfo[] retval = new SuggestionsInfo[length]; - for (int i = 0; i < length; ++i) { - final String prevWord; - if (sequentialWords && i > 0) { - final String prevWordCandidate = textInfos[i - 1].getText(); - // Note that an empty string would be used to indicate the initial word - // in the future. - prevWord = TextUtils.isEmpty(prevWordCandidate) ? null : prevWordCandidate; - } else { - prevWord = null; - } - retval[i] = onGetSuggestions(textInfos[i], prevWord, suggestionsLimit); - retval[i].setCookieAndSequence( - textInfos[i].getCookie(), textInfos[i].getSequence()); - } - return retval; - } - - // Note : this must be reentrant - /** - * Gets a list of suggestions for a specific string. This returns a list of possible - * corrections for the text passed as an argument. It may split or group words, and - * even perform grammatical analysis. - */ - @Override - public SuggestionsInfo onGetSuggestions(final TextInfo textInfo, - final int suggestionsLimit) { - return onGetSuggestions(textInfo, null, suggestionsLimit); - } - - private SuggestionsInfo onGetSuggestions( - final TextInfo textInfo, final String prevWord, final int suggestionsLimit) { - try { - final String inText = textInfo.getText(); - final SuggestionsParams cachedSuggestionsParams = - mSuggestionsCache.getSuggestionsFromCache(inText, prevWord); - if (cachedSuggestionsParams != null) { - if (DBG) { - Log.d(TAG, "Cache hit: " + inText + ", " + cachedSuggestionsParams.mFlags); - } - return new SuggestionsInfo( - cachedSuggestionsParams.mFlags, cachedSuggestionsParams.mSuggestions); - } - - if (shouldFilterOut(inText, mScript)) { - DictAndProximity dictInfo = null; - try { - dictInfo = mDictionaryPool.takeOrGetNull(); - if (null == dictInfo) return getNotInDictEmptySuggestions(); - return dictInfo.mDictionary.isValidWord(inText) ? - getInDictEmptySuggestions() : getNotInDictEmptySuggestions(); - } finally { - if (null != dictInfo) { - if (!mDictionaryPool.offer(dictInfo)) { - Log.e(TAG, "Can't re-insert a dictionary into its pool"); - } - } - } - } - final String text = inText.replaceAll(APOSTROPHE, SINGLE_QUOTE); - - // TODO: Don't gather suggestions if the limit is <= 0 unless necessary - final SuggestionsGatherer suggestionsGatherer = new SuggestionsGatherer(text, - mService.mSuggestionThreshold, mService.mRecommendedThreshold, - suggestionsLimit); - final WordComposer composer = new WordComposer(); - final int length = text.length(); - for (int i = 0; i < length; i = text.offsetByCodePoints(i, 1)) { - final int codePoint = text.codePointAt(i); - // The getXYForCodePointAndScript method returns (Y << 16) + X - final int xy = SpellCheckerProximityInfo.getXYForCodePointAndScript( - codePoint, mScript); - if (SpellCheckerProximityInfo.NOT_A_COORDINATE_PAIR == xy) { - composer.add(codePoint, WordComposer.NOT_A_COORDINATE, - WordComposer.NOT_A_COORDINATE); - } else { - composer.add(codePoint, xy & 0xFFFF, xy >> 16); - } - } - - final int capitalizeType = getCapitalizationType(text); - boolean isInDict = true; - DictAndProximity dictInfo = null; - try { - dictInfo = mDictionaryPool.takeOrGetNull(); - if (null == dictInfo) return getNotInDictEmptySuggestions(); - final ArrayList suggestions = dictInfo.mDictionary.getWords( - composer, prevWord, dictInfo.mProximityInfo); - for (final SuggestedWordInfo suggestion : suggestions) { - final String suggestionStr = suggestion.mWord.toString(); - suggestionsGatherer.addWord(suggestionStr.toCharArray(), null, 0, - suggestionStr.length(), suggestion.mScore); - } - isInDict = dictInfo.mDictionary.isValidWord(text); - if (!isInDict && CAPITALIZE_NONE != capitalizeType) { - // We want to test the word again if it's all caps or first caps only. - // If it's fully down, we already tested it, if it's mixed case, we don't - // want to test a lowercase version of it. - isInDict = dictInfo.mDictionary.isValidWord(text.toLowerCase(mLocale)); - } - } finally { - if (null != dictInfo) { - if (!mDictionaryPool.offer(dictInfo)) { - Log.e(TAG, "Can't re-insert a dictionary into its pool"); - } - } - } - - final SuggestionsGatherer.Result result = suggestionsGatherer.getResults( - capitalizeType, mLocale); - - if (DBG) { - Log.i(TAG, "Spell checking results for " + text + " with suggestion limit " - + suggestionsLimit); - Log.i(TAG, "IsInDict = " + isInDict); - Log.i(TAG, "LooksLikeTypo = " + (!isInDict)); - Log.i(TAG, "HasRecommendedSuggestions = " + result.mHasRecommendedSuggestions); - if (null != result.mSuggestions) { - for (String suggestion : result.mSuggestions) { - Log.i(TAG, suggestion); - } - } - } - - final int flags = - (isInDict ? SuggestionsInfo.RESULT_ATTR_IN_THE_DICTIONARY - : SuggestionsInfo.RESULT_ATTR_LOOKS_LIKE_TYPO) - | (result.mHasRecommendedSuggestions - ? SuggestionsInfoCompatUtils - .getValueOf_RESULT_ATTR_HAS_RECOMMENDED_SUGGESTIONS() - : 0); - final SuggestionsInfo retval = new SuggestionsInfo(flags, result.mSuggestions); - mSuggestionsCache.putSuggestionsToCache(text, prevWord, result.mSuggestions, flags); - return retval; - } catch (RuntimeException e) { - // Don't kill the keyboard if there is a bug in the spell checker - if (DBG) { - throw e; - } else { - Log.e(TAG, "Exception while spellcheking: " + e); - return getNotInDictEmptySuggestions(); - } - } - } - } } diff --git a/java/src/com/android/inputmethod/latin/spellcheck/AndroidSpellCheckerSession.java b/java/src/com/android/inputmethod/latin/spellcheck/AndroidSpellCheckerSession.java new file mode 100644 index 000000000..e0f340879 --- /dev/null +++ b/java/src/com/android/inputmethod/latin/spellcheck/AndroidSpellCheckerSession.java @@ -0,0 +1,420 @@ +/* + * Copyright (C) 2012 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +package com.android.inputmethod.latin.spellcheck; + + +import android.service.textservice.SpellCheckerService.Session; +import android.text.TextUtils; +import android.util.Log; +import android.util.LruCache; +import android.view.textservice.SentenceSuggestionsInfo; +import android.view.textservice.SuggestionsInfo; +import android.view.textservice.TextInfo; + +import com.android.inputmethod.compat.SuggestionsInfoCompatUtils; +import com.android.inputmethod.latin.LocaleUtils; +import com.android.inputmethod.latin.WordComposer; +import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo; +import com.android.inputmethod.latin.spellcheck.AndroidSpellCheckerService.SuggestionsGatherer; + +import java.util.ArrayList; +import java.util.Locale; + +public class AndroidSpellCheckerSession extends Session { + private static final String TAG = AndroidSpellCheckerSession.class.getSimpleName(); + private static final boolean DBG = false; + private final static String[] EMPTY_STRING_ARRAY = new String[0]; + + // Immutable, but need the locale which is not available in the constructor yet + private DictionaryPool mDictionaryPool; + // Likewise + private Locale mLocale; + // Cache this for performance + private int mScript; // One of SCRIPT_LATIN or SCRIPT_CYRILLIC for now. + private final AndroidSpellCheckerService mService; + private final SuggestionsCache mSuggestionsCache = new SuggestionsCache(); + + private static class SuggestionsParams { + public final String[] mSuggestions; + public final int mFlags; + public SuggestionsParams(String[] suggestions, int flags) { + mSuggestions = suggestions; + mFlags = flags; + } + } + + private static class SuggestionsCache { + private static final char CHAR_DELIMITER = '\uFFFC'; + private static final int MAX_CACHE_SIZE = 50; + private final LruCache mUnigramSuggestionsInfoCache = + new LruCache(MAX_CACHE_SIZE); + + // TODO: Support n-gram input + private static String generateKey(String query, String prevWord) { + if (TextUtils.isEmpty(query) || TextUtils.isEmpty(prevWord)) { + return query; + } + return query + CHAR_DELIMITER + prevWord; + } + + // TODO: Support n-gram input + public SuggestionsParams getSuggestionsFromCache(String query, String prevWord) { + return mUnigramSuggestionsInfoCache.get(generateKey(query, prevWord)); + } + + // TODO: Support n-gram input + public void putSuggestionsToCache( + String query, String prevWord, String[] suggestions, int flags) { + if (suggestions == null || TextUtils.isEmpty(query)) { + return; + } + mUnigramSuggestionsInfoCache.put( + generateKey(query, prevWord), new SuggestionsParams(suggestions, flags)); + } + } + + AndroidSpellCheckerSession(final AndroidSpellCheckerService service) { + mService = service; + } + + @Override + public void onCreate() { + final String localeString = getLocale(); + mDictionaryPool = mService.getDictionaryPool(localeString); + mLocale = LocaleUtils.constructLocaleFromString(localeString); + mScript = AndroidSpellCheckerService.getScriptFromLocale(mLocale); + } + + /* + * Returns whether the code point is a letter that makes sense for the specified + * locale for this spell checker. + * The dictionaries supported by Latin IME are described in res/xml/spellchecker.xml + * and is limited to EFIGS languages and Russian. + * Hence at the moment this explicitly tests for Cyrillic characters or Latin characters + * as appropriate, and explicitly excludes CJK, Arabic and Hebrew characters. + */ + private static boolean isLetterCheckableByLanguage(final int codePoint, + final int script) { + switch (script) { + case AndroidSpellCheckerService.SCRIPT_LATIN: + // Our supported latin script dictionaries (EFIGS) at the moment only include + // characters in the C0, C1, Latin Extended A and B, IPA extensions unicode + // blocks. As it happens, those are back-to-back in the code range 0x40 to 0x2AF, + // so the below is a very efficient way to test for it. As for the 0-0x3F, it's + // excluded from isLetter anyway. + return codePoint <= 0x2AF && Character.isLetter(codePoint); + case AndroidSpellCheckerService.SCRIPT_CYRILLIC: + // All Cyrillic characters are in the 400~52F block. There are some in the upper + // Unicode range, but they are archaic characters that are not used in modern + // russian and are not used by our dictionary. + return codePoint >= 0x400 && codePoint <= 0x52F && Character.isLetter(codePoint); + default: + // Should never come here + throw new RuntimeException("Impossible value of script: " + script); + } + } + + /** + * Finds out whether a particular string should be filtered out of spell checking. + * + * This will loosely match URLs, numbers, symbols. To avoid always underlining words that + * we know we will never recognize, this accepts a script identifier that should be one + * of the SCRIPT_* constants defined above, to rule out quickly characters from very + * different languages. + * + * @param text the string to evaluate. + * @param script the identifier for the script this spell checker recognizes + * @return true if we should filter this text out, false otherwise + */ + private static boolean shouldFilterOut(final String text, final int script) { + if (TextUtils.isEmpty(text) || text.length() <= 1) return true; + + // TODO: check if an equivalent processing can't be done more quickly with a + // compiled regexp. + // Filter by first letter + final int firstCodePoint = text.codePointAt(0); + // Filter out words that don't start with a letter or an apostrophe + if (!isLetterCheckableByLanguage(firstCodePoint, script) + && '\'' != firstCodePoint) return true; + + // Filter contents + final int length = text.length(); + int letterCount = 0; + for (int i = 0; i < length; i = text.offsetByCodePoints(i, 1)) { + final int codePoint = text.codePointAt(i); + // Any word containing a '@' is probably an e-mail address + // Any word containing a '/' is probably either an ad-hoc combination of two + // words or a URI - in either case we don't want to spell check that + if ('@' == codePoint || '/' == codePoint) return true; + if (isLetterCheckableByLanguage(codePoint, script)) ++letterCount; + } + // Guestimate heuristic: perform spell checking if at least 3/4 of the characters + // in this word are letters + return (letterCount * 4 < length * 3); + } + + private SentenceSuggestionsInfo fixWronglyInvalidatedWordWithSingleQuote( + TextInfo ti, SentenceSuggestionsInfo ssi) { + final String typedText = ti.getText(); + if (!typedText.contains(AndroidSpellCheckerService.SINGLE_QUOTE)) { + return null; + } + final int N = ssi.getSuggestionsCount(); + final ArrayList additionalOffsets = new ArrayList(); + final ArrayList additionalLengths = new ArrayList(); + final ArrayList additionalSuggestionsInfos = + new ArrayList(); + String currentWord = null; + for (int i = 0; i < N; ++i) { + final SuggestionsInfo si = ssi.getSuggestionsInfoAt(i); + final int flags = si.getSuggestionsAttributes(); + if ((flags & SuggestionsInfo.RESULT_ATTR_IN_THE_DICTIONARY) == 0) { + continue; + } + final int offset = ssi.getOffsetAt(i); + final int length = ssi.getLengthAt(i); + final String subText = typedText.substring(offset, offset + length); + final String prevWord = currentWord; + currentWord = subText; + if (!subText.contains(AndroidSpellCheckerService.SINGLE_QUOTE)) { + continue; + } + final String[] splitTexts = subText.split(AndroidSpellCheckerService.SINGLE_QUOTE, -1); + if (splitTexts == null || splitTexts.length <= 1) { + continue; + } + final int splitNum = splitTexts.length; + for (int j = 0; j < splitNum; ++j) { + final String splitText = splitTexts[j]; + if (TextUtils.isEmpty(splitText)) { + continue; + } + if (mSuggestionsCache.getSuggestionsFromCache( + splitText, prevWord) == null) { + continue; + } + final int newLength = splitText.length(); + // Neither RESULT_ATTR_IN_THE_DICTIONARY nor RESULT_ATTR_LOOKS_LIKE_TYPO + final int newFlags = 0; + final SuggestionsInfo newSi = new SuggestionsInfo(newFlags, EMPTY_STRING_ARRAY); + newSi.setCookieAndSequence(si.getCookie(), si.getSequence()); + if (DBG) { + Log.d(TAG, "Override and remove old span over: " + + splitText + ", " + offset + "," + newLength); + } + additionalOffsets.add(offset); + additionalLengths.add(newLength); + additionalSuggestionsInfos.add(newSi); + } + } + final int additionalSize = additionalOffsets.size(); + if (additionalSize <= 0) { + return null; + } + final int suggestionsSize = N + additionalSize; + final int[] newOffsets = new int[suggestionsSize]; + final int[] newLengths = new int[suggestionsSize]; + final SuggestionsInfo[] newSuggestionsInfos = new SuggestionsInfo[suggestionsSize]; + int i; + for (i = 0; i < N; ++i) { + newOffsets[i] = ssi.getOffsetAt(i); + newLengths[i] = ssi.getLengthAt(i); + newSuggestionsInfos[i] = ssi.getSuggestionsInfoAt(i); + } + for (; i < suggestionsSize; ++i) { + newOffsets[i] = additionalOffsets.get(i - N); + newLengths[i] = additionalLengths.get(i - N); + newSuggestionsInfos[i] = additionalSuggestionsInfos.get(i - N); + } + return new SentenceSuggestionsInfo(newSuggestionsInfos, newOffsets, newLengths); + } + + @Override + public SentenceSuggestionsInfo[] onGetSentenceSuggestionsMultiple( + TextInfo[] textInfos, int suggestionsLimit) { + final SentenceSuggestionsInfo[] retval = super.onGetSentenceSuggestionsMultiple( + textInfos, suggestionsLimit); + if (retval == null || retval.length != textInfos.length) { + return retval; + } + for (int i = 0; i < retval.length; ++i) { + final SentenceSuggestionsInfo tempSsi = + fixWronglyInvalidatedWordWithSingleQuote(textInfos[i], retval[i]); + if (tempSsi != null) { + retval[i] = tempSsi; + } + } + return retval; + } + + @Override + public SuggestionsInfo[] onGetSuggestionsMultiple(TextInfo[] textInfos, + int suggestionsLimit, boolean sequentialWords) { + final int length = textInfos.length; + final SuggestionsInfo[] retval = new SuggestionsInfo[length]; + for (int i = 0; i < length; ++i) { + final String prevWord; + if (sequentialWords && i > 0) { + final String prevWordCandidate = textInfos[i - 1].getText(); + // Note that an empty string would be used to indicate the initial word + // in the future. + prevWord = TextUtils.isEmpty(prevWordCandidate) ? null : prevWordCandidate; + } else { + prevWord = null; + } + retval[i] = onGetSuggestions(textInfos[i], prevWord, suggestionsLimit); + retval[i].setCookieAndSequence( + textInfos[i].getCookie(), textInfos[i].getSequence()); + } + return retval; + } + + // Note : this must be reentrant + /** + * Gets a list of suggestions for a specific string. This returns a list of possible + * corrections for the text passed as an argument. It may split or group words, and + * even perform grammatical analysis. + */ + @Override + public SuggestionsInfo onGetSuggestions(final TextInfo textInfo, + final int suggestionsLimit) { + return onGetSuggestions(textInfo, null, suggestionsLimit); + } + + private SuggestionsInfo onGetSuggestions( + final TextInfo textInfo, final String prevWord, final int suggestionsLimit) { + try { + final String inText = textInfo.getText(); + final SuggestionsParams cachedSuggestionsParams = + mSuggestionsCache.getSuggestionsFromCache(inText, prevWord); + if (cachedSuggestionsParams != null) { + if (DBG) { + Log.d(TAG, "Cache hit: " + inText + ", " + cachedSuggestionsParams.mFlags); + } + return new SuggestionsInfo( + cachedSuggestionsParams.mFlags, cachedSuggestionsParams.mSuggestions); + } + + if (shouldFilterOut(inText, mScript)) { + DictAndProximity dictInfo = null; + try { + dictInfo = mDictionaryPool.takeOrGetNull(); + if (null == dictInfo) { + return AndroidSpellCheckerService.getNotInDictEmptySuggestions(); + } + return dictInfo.mDictionary.isValidWord(inText) + ? AndroidSpellCheckerService.getInDictEmptySuggestions() + : AndroidSpellCheckerService.getNotInDictEmptySuggestions(); + } finally { + if (null != dictInfo) { + if (!mDictionaryPool.offer(dictInfo)) { + Log.e(TAG, "Can't re-insert a dictionary into its pool"); + } + } + } + } + final String text = inText.replaceAll( + AndroidSpellCheckerService.APOSTROPHE, AndroidSpellCheckerService.SINGLE_QUOTE); + + // TODO: Don't gather suggestions if the limit is <= 0 unless necessary + //final SuggestionsGatherer suggestionsGatherer = new SuggestionsGatherer(text, + //mService.mSuggestionThreshold, mService.mRecommendedThreshold, + //suggestionsLimit); + final SuggestionsGatherer suggestionsGatherer = mService.newSuggestionsGatherer( + text, suggestionsLimit); + final WordComposer composer = new WordComposer(); + final int length = text.length(); + for (int i = 0; i < length; i = text.offsetByCodePoints(i, 1)) { + final int codePoint = text.codePointAt(i); + // The getXYForCodePointAndScript method returns (Y << 16) + X + final int xy = SpellCheckerProximityInfo.getXYForCodePointAndScript( + codePoint, mScript); + if (SpellCheckerProximityInfo.NOT_A_COORDINATE_PAIR == xy) { + composer.add(codePoint, WordComposer.NOT_A_COORDINATE, + WordComposer.NOT_A_COORDINATE); + } else { + composer.add(codePoint, xy & 0xFFFF, xy >> 16); + } + } + + final int capitalizeType = AndroidSpellCheckerService.getCapitalizationType(text); + boolean isInDict = true; + DictAndProximity dictInfo = null; + try { + dictInfo = mDictionaryPool.takeOrGetNull(); + if (null == dictInfo) { + return AndroidSpellCheckerService.getNotInDictEmptySuggestions(); + } + final ArrayList suggestions = dictInfo.mDictionary.getWords( + composer, prevWord, dictInfo.mProximityInfo); + for (final SuggestedWordInfo suggestion : suggestions) { + final String suggestionStr = suggestion.mWord.toString(); + suggestionsGatherer.addWord(suggestionStr.toCharArray(), null, 0, + suggestionStr.length(), suggestion.mScore); + } + isInDict = dictInfo.mDictionary.isValidWord(text); + if (!isInDict && AndroidSpellCheckerService.CAPITALIZE_NONE != capitalizeType) { + // We want to test the word again if it's all caps or first caps only. + // If it's fully down, we already tested it, if it's mixed case, we don't + // want to test a lowercase version of it. + isInDict = dictInfo.mDictionary.isValidWord(text.toLowerCase(mLocale)); + } + } finally { + if (null != dictInfo) { + if (!mDictionaryPool.offer(dictInfo)) { + Log.e(TAG, "Can't re-insert a dictionary into its pool"); + } + } + } + + final SuggestionsGatherer.Result result = suggestionsGatherer.getResults( + capitalizeType, mLocale); + + if (DBG) { + Log.i(TAG, "Spell checking results for " + text + " with suggestion limit " + + suggestionsLimit); + Log.i(TAG, "IsInDict = " + isInDict); + Log.i(TAG, "LooksLikeTypo = " + (!isInDict)); + Log.i(TAG, "HasRecommendedSuggestions = " + result.mHasRecommendedSuggestions); + if (null != result.mSuggestions) { + for (String suggestion : result.mSuggestions) { + Log.i(TAG, suggestion); + } + } + } + + final int flags = + (isInDict ? SuggestionsInfo.RESULT_ATTR_IN_THE_DICTIONARY + : SuggestionsInfo.RESULT_ATTR_LOOKS_LIKE_TYPO) + | (result.mHasRecommendedSuggestions + ? SuggestionsInfoCompatUtils + .getValueOf_RESULT_ATTR_HAS_RECOMMENDED_SUGGESTIONS() + : 0); + final SuggestionsInfo retval = new SuggestionsInfo(flags, result.mSuggestions); + mSuggestionsCache.putSuggestionsToCache(text, prevWord, result.mSuggestions, flags); + return retval; + } catch (RuntimeException e) { + // Don't kill the keyboard if there is a bug in the spell checker + if (DBG) { + throw e; + } else { + Log.e(TAG, "Exception while spellcheking: " + e); + return AndroidSpellCheckerService.getNotInDictEmptySuggestions(); + } + } + } +}