/* * Copyright (C) 2012 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of * the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations under * the License. */ package com.android.inputmethod.latin.spellcheck; import android.service.textservice.SpellCheckerService.Session; import android.text.TextUtils; import android.util.Log; import android.util.LruCache; import android.view.textservice.SuggestionsInfo; import android.view.textservice.TextInfo; import com.android.inputmethod.compat.SuggestionsInfoCompatUtils; import com.android.inputmethod.latin.LocaleUtils; import com.android.inputmethod.latin.WordComposer; import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo; import com.android.inputmethod.latin.spellcheck.AndroidSpellCheckerService.SuggestionsGatherer; import java.util.ArrayList; import java.util.Locale; public abstract class AndroidWordLevelSpellCheckerSession extends Session { private static final String TAG = AndroidWordLevelSpellCheckerSession.class.getSimpleName(); private static final boolean DBG = false; // Immutable, but need the locale which is not available in the constructor yet private DictionaryPool mDictionaryPool; // Likewise private Locale mLocale; // Cache this for performance private int mScript; // One of SCRIPT_LATIN or SCRIPT_CYRILLIC for now. private final AndroidSpellCheckerService mService; protected final SuggestionsCache mSuggestionsCache = new SuggestionsCache(); private static class SuggestionsParams { public final String[] mSuggestions; public final int mFlags; public SuggestionsParams(String[] suggestions, int flags) { mSuggestions = suggestions; mFlags = flags; } } protected static class SuggestionsCache { private static final char CHAR_DELIMITER = '\uFFFC'; private static final int MAX_CACHE_SIZE = 50; private final LruCache mUnigramSuggestionsInfoCache = new LruCache(MAX_CACHE_SIZE); // TODO: Support n-gram input private static String generateKey(String query, String prevWord) { if (TextUtils.isEmpty(query) || TextUtils.isEmpty(prevWord)) { return query; } return query + CHAR_DELIMITER + prevWord; } // TODO: Support n-gram input public SuggestionsParams getSuggestionsFromCache(String query, String prevWord) { return mUnigramSuggestionsInfoCache.get(generateKey(query, prevWord)); } // TODO: Support n-gram input public void putSuggestionsToCache( String query, String prevWord, String[] suggestions, int flags) { if (suggestions == null || TextUtils.isEmpty(query)) { return; } mUnigramSuggestionsInfoCache.put( generateKey(query, prevWord), new SuggestionsParams(suggestions, flags)); } } AndroidWordLevelSpellCheckerSession(final AndroidSpellCheckerService service) { mService = service; } @Override public void onCreate() { final String localeString = getLocale(); mDictionaryPool = mService.getDictionaryPool(localeString); mLocale = LocaleUtils.constructLocaleFromString(localeString); mScript = AndroidSpellCheckerService.getScriptFromLocale(mLocale); } /* * Returns whether the code point is a letter that makes sense for the specified * locale for this spell checker. * The dictionaries supported by Latin IME are described in res/xml/spellchecker.xml * and is limited to EFIGS languages and Russian. * Hence at the moment this explicitly tests for Cyrillic characters or Latin characters * as appropriate, and explicitly excludes CJK, Arabic and Hebrew characters. */ private static boolean isLetterCheckableByLanguage(final int codePoint, final int script) { switch (script) { case AndroidSpellCheckerService.SCRIPT_LATIN: // Our supported latin script dictionaries (EFIGS) at the moment only include // characters in the C0, C1, Latin Extended A and B, IPA extensions unicode // blocks. As it happens, those are back-to-back in the code range 0x40 to 0x2AF, // so the below is a very efficient way to test for it. As for the 0-0x3F, it's // excluded from isLetter anyway. return codePoint <= 0x2AF && Character.isLetter(codePoint); case AndroidSpellCheckerService.SCRIPT_CYRILLIC: // All Cyrillic characters are in the 400~52F block. There are some in the upper // Unicode range, but they are archaic characters that are not used in modern // russian and are not used by our dictionary. return codePoint >= 0x400 && codePoint <= 0x52F && Character.isLetter(codePoint); default: // Should never come here throw new RuntimeException("Impossible value of script: " + script); } } /** * Finds out whether a particular string should be filtered out of spell checking. * * This will loosely match URLs, numbers, symbols. To avoid always underlining words that * we know we will never recognize, this accepts a script identifier that should be one * of the SCRIPT_* constants defined above, to rule out quickly characters from very * different languages. * * @param text the string to evaluate. * @param script the identifier for the script this spell checker recognizes * @return true if we should filter this text out, false otherwise */ private static boolean shouldFilterOut(final String text, final int script) { if (TextUtils.isEmpty(text) || text.length() <= 1) return true; // TODO: check if an equivalent processing can't be done more quickly with a // compiled regexp. // Filter by first letter final int firstCodePoint = text.codePointAt(0); // Filter out words that don't start with a letter or an apostrophe if (!isLetterCheckableByLanguage(firstCodePoint, script) && '\'' != firstCodePoint) return true; // Filter contents final int length = text.length(); int letterCount = 0; for (int i = 0; i < length; i = text.offsetByCodePoints(i, 1)) { final int codePoint = text.codePointAt(i); // Any word containing a '@' is probably an e-mail address // Any word containing a '/' is probably either an ad-hoc combination of two // words or a URI - in either case we don't want to spell check that if ('@' == codePoint || '/' == codePoint) return true; if (isLetterCheckableByLanguage(codePoint, script)) ++letterCount; } // Guestimate heuristic: perform spell checking if at least 3/4 of the characters // in this word are letters return (letterCount * 4 < length * 3); } // Note : this must be reentrant /** * Gets a list of suggestions for a specific string. This returns a list of possible * corrections for the text passed as an argument. It may split or group words, and * even perform grammatical analysis. */ @Override public SuggestionsInfo onGetSuggestions(final TextInfo textInfo, final int suggestionsLimit) { return onGetSuggestions(textInfo, null, suggestionsLimit); } protected SuggestionsInfo onGetSuggestions( final TextInfo textInfo, final String prevWord, final int suggestionsLimit) { try { final String inText = textInfo.getText(); final SuggestionsParams cachedSuggestionsParams = mSuggestionsCache.getSuggestionsFromCache(inText, prevWord); if (cachedSuggestionsParams != null) { if (DBG) { Log.d(TAG, "Cache hit: " + inText + ", " + cachedSuggestionsParams.mFlags); } return new SuggestionsInfo( cachedSuggestionsParams.mFlags, cachedSuggestionsParams.mSuggestions); } if (shouldFilterOut(inText, mScript)) { DictAndProximity dictInfo = null; try { dictInfo = mDictionaryPool.pollWithDefaultTimeout(); if (null == dictInfo) { return AndroidSpellCheckerService.getNotInDictEmptySuggestions(); } return dictInfo.mDictionary.isValidWord(inText) ? AndroidSpellCheckerService.getInDictEmptySuggestions() : AndroidSpellCheckerService.getNotInDictEmptySuggestions(); } finally { if (null != dictInfo) { if (!mDictionaryPool.offer(dictInfo)) { Log.e(TAG, "Can't re-insert a dictionary into its pool"); } } } } final String text = inText.replaceAll( AndroidSpellCheckerService.APOSTROPHE, AndroidSpellCheckerService.SINGLE_QUOTE); // TODO: Don't gather suggestions if the limit is <= 0 unless necessary //final SuggestionsGatherer suggestionsGatherer = new SuggestionsGatherer(text, //mService.mSuggestionThreshold, mService.mRecommendedThreshold, //suggestionsLimit); final SuggestionsGatherer suggestionsGatherer = mService.newSuggestionsGatherer( text, suggestionsLimit); final WordComposer composer = new WordComposer(); final int length = text.length(); for (int i = 0; i < length; i = text.offsetByCodePoints(i, 1)) { final int codePoint = text.codePointAt(i); // The getXYForCodePointAndScript method returns (Y << 16) + X final int xy = SpellCheckerProximityInfo.getXYForCodePointAndScript( codePoint, mScript); if (SpellCheckerProximityInfo.NOT_A_COORDINATE_PAIR == xy) { composer.add(codePoint, WordComposer.NOT_A_COORDINATE, WordComposer.NOT_A_COORDINATE); } else { composer.add(codePoint, xy & 0xFFFF, xy >> 16); } } final int capitalizeType = AndroidSpellCheckerService.getCapitalizationType(text); boolean isInDict = true; DictAndProximity dictInfo = null; try { dictInfo = mDictionaryPool.pollWithDefaultTimeout(); if (null == dictInfo) { return AndroidSpellCheckerService.getNotInDictEmptySuggestions(); } final ArrayList suggestions = dictInfo.mDictionary.getSuggestions(composer, prevWord, dictInfo.mProximityInfo); for (final SuggestedWordInfo suggestion : suggestions) { final String suggestionStr = suggestion.mWord.toString(); suggestionsGatherer.addWord(suggestionStr.toCharArray(), null, 0, suggestionStr.length(), suggestion.mScore); } isInDict = dictInfo.mDictionary.isValidWord(text); if (!isInDict && AndroidSpellCheckerService.CAPITALIZE_NONE != capitalizeType) { // We want to test the word again if it's all caps or first caps only. // If it's fully down, we already tested it, if it's mixed case, we don't // want to test a lowercase version of it. isInDict = dictInfo.mDictionary.isValidWord(text.toLowerCase(mLocale)); } } finally { if (null != dictInfo) { if (!mDictionaryPool.offer(dictInfo)) { Log.e(TAG, "Can't re-insert a dictionary into its pool"); } } } final SuggestionsGatherer.Result result = suggestionsGatherer.getResults( capitalizeType, mLocale); if (DBG) { Log.i(TAG, "Spell checking results for " + text + " with suggestion limit " + suggestionsLimit); Log.i(TAG, "IsInDict = " + isInDict); Log.i(TAG, "LooksLikeTypo = " + (!isInDict)); Log.i(TAG, "HasRecommendedSuggestions = " + result.mHasRecommendedSuggestions); if (null != result.mSuggestions) { for (String suggestion : result.mSuggestions) { Log.i(TAG, suggestion); } } } final int flags = (isInDict ? SuggestionsInfo.RESULT_ATTR_IN_THE_DICTIONARY : SuggestionsInfo.RESULT_ATTR_LOOKS_LIKE_TYPO) | (result.mHasRecommendedSuggestions ? SuggestionsInfoCompatUtils .getValueOf_RESULT_ATTR_HAS_RECOMMENDED_SUGGESTIONS() : 0); final SuggestionsInfo retval = new SuggestionsInfo(flags, result.mSuggestions); mSuggestionsCache.putSuggestionsToCache(text, prevWord, result.mSuggestions, flags); return retval; } catch (RuntimeException e) { // Don't kill the keyboard if there is a bug in the spell checker if (DBG) { throw e; } else { Log.e(TAG, "Exception while spellcheking: " + e); return AndroidSpellCheckerService.getNotInDictEmptySuggestions(); } } } }