/* * Copyright (C) 2011 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of * the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations under * the License. */ package com.android.inputmethod.latin.spellcheck; import android.content.Intent; import android.content.SharedPreferences; import android.content.res.Resources; import android.preference.PreferenceManager; import android.service.textservice.SpellCheckerService; import android.text.TextUtils; import android.util.Log; import android.view.textservice.SuggestionsInfo; import android.view.textservice.TextInfo; import com.android.inputmethod.compat.SuggestionsInfoCompatUtils; import com.android.inputmethod.keyboard.ProximityInfo; import com.android.inputmethod.latin.BinaryDictionary; import com.android.inputmethod.latin.Dictionary; import com.android.inputmethod.latin.Dictionary.WordCallback; import com.android.inputmethod.latin.DictionaryCollection; import com.android.inputmethod.latin.DictionaryFactory; import com.android.inputmethod.latin.Flag; import com.android.inputmethod.latin.LocaleUtils; import com.android.inputmethod.latin.R; import com.android.inputmethod.latin.StringUtils; import com.android.inputmethod.latin.SynchronouslyLoadedContactsDictionary; import com.android.inputmethod.latin.SynchronouslyLoadedUserDictionary; import com.android.inputmethod.latin.WhitelistDictionary; import com.android.inputmethod.latin.WordComposer; import java.lang.ref.WeakReference; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.HashSet; import java.util.Iterator; import java.util.Locale; import java.util.Map; import java.util.TreeMap; /** * Service for spell checking, using LatinIME's dictionaries and mechanisms. */ public class AndroidSpellCheckerService extends SpellCheckerService implements SharedPreferences.OnSharedPreferenceChangeListener { private static final String TAG = AndroidSpellCheckerService.class.getSimpleName(); private static final boolean DBG = false; private static final int POOL_SIZE = 2; public static final String PREF_USE_CONTACTS_KEY = "pref_spellcheck_use_contacts"; private static final int CAPITALIZE_NONE = 0; // No caps, or mixed case private static final int CAPITALIZE_FIRST = 1; // First only private static final int CAPITALIZE_ALL = 2; // All caps private final static String[] EMPTY_STRING_ARRAY = new String[0]; private final static Flag[] USE_FULL_EDIT_DISTANCE_FLAG_ARRAY; static { // See BinaryDictionary.java for an explanation of these flags // Specifially, ALL_CONFIG_FLAGS means that we want to consider all flags with the // current dictionary configuration - for example, consider the UMLAUT flag // so that it will be turned on for German dictionaries and off for others. USE_FULL_EDIT_DISTANCE_FLAG_ARRAY = Arrays.copyOf(BinaryDictionary.ALL_CONFIG_FLAGS, BinaryDictionary.ALL_CONFIG_FLAGS.length + 1); USE_FULL_EDIT_DISTANCE_FLAG_ARRAY[BinaryDictionary.ALL_CONFIG_FLAGS.length] = BinaryDictionary.FLAG_USE_FULL_EDIT_DISTANCE; } private Map mDictionaryPools = Collections.synchronizedMap(new TreeMap()); private Map mUserDictionaries = Collections.synchronizedMap(new TreeMap()); private Map mWhitelistDictionaries = Collections.synchronizedMap(new TreeMap()); private SynchronouslyLoadedContactsDictionary mContactsDictionary; // The threshold for a candidate to be offered as a suggestion. private double mSuggestionThreshold; // The threshold for a suggestion to be considered "recommended". private double mRecommendedThreshold; // Whether to use the contacts dictionary private boolean mUseContactsDictionary; private final Object mUseContactsLock = new Object(); private final HashSet> mDictionaryCollectionsList = new HashSet>(); public static final int SCRIPT_LATIN = 0; public static final int SCRIPT_CYRILLIC = 1; private static final TreeMap mLanguageToScript; static { // List of the supported languages and their associated script. We won't check // words written in another script than the selected script, because we know we // don't have those in our dictionary so we will underline everything and we // will never have any suggestions, so it makes no sense checking them. mLanguageToScript = new TreeMap(); mLanguageToScript.put("en", SCRIPT_LATIN); mLanguageToScript.put("fr", SCRIPT_LATIN); mLanguageToScript.put("de", SCRIPT_LATIN); mLanguageToScript.put("nl", SCRIPT_LATIN); mLanguageToScript.put("cs", SCRIPT_LATIN); mLanguageToScript.put("es", SCRIPT_LATIN); mLanguageToScript.put("it", SCRIPT_LATIN); mLanguageToScript.put("ru", SCRIPT_CYRILLIC); } @Override public void onCreate() { super.onCreate(); mSuggestionThreshold = Double.parseDouble(getString(R.string.spellchecker_suggestion_threshold_value)); mRecommendedThreshold = Double.parseDouble(getString(R.string.spellchecker_recommended_threshold_value)); final SharedPreferences prefs = PreferenceManager.getDefaultSharedPreferences(this); prefs.registerOnSharedPreferenceChangeListener(this); onSharedPreferenceChanged(prefs, PREF_USE_CONTACTS_KEY); } private static int getScriptFromLocale(final Locale locale) { final Integer script = mLanguageToScript.get(locale.getLanguage()); if (null == script) { throw new RuntimeException("We have been called with an unsupported language: \"" + locale.getLanguage() + "\". Framework bug?"); } return script; } @Override public void onSharedPreferenceChanged(final SharedPreferences prefs, final String key) { if (!PREF_USE_CONTACTS_KEY.equals(key)) return; synchronized(mUseContactsLock) { mUseContactsDictionary = prefs.getBoolean(PREF_USE_CONTACTS_KEY, true); if (mUseContactsDictionary) { startUsingContactsDictionaryLocked(); } else { stopUsingContactsDictionaryLocked(); } } } private void startUsingContactsDictionaryLocked() { if (null == mContactsDictionary) { mContactsDictionary = new SynchronouslyLoadedContactsDictionary(this); } final Iterator> iterator = mDictionaryCollectionsList.iterator(); while (iterator.hasNext()) { final WeakReference dictRef = iterator.next(); final DictionaryCollection dict = dictRef.get(); if (null == dict) { iterator.remove(); } else { dict.addDictionary(mContactsDictionary); } } } private void stopUsingContactsDictionaryLocked() { if (null == mContactsDictionary) return; final SynchronouslyLoadedContactsDictionary contactsDict = mContactsDictionary; mContactsDictionary = null; final Iterator> iterator = mDictionaryCollectionsList.iterator(); while (iterator.hasNext()) { final WeakReference dictRef = iterator.next(); final DictionaryCollection dict = dictRef.get(); if (null == dict) { iterator.remove(); } else { dict.removeDictionary(contactsDict); } } contactsDict.close(); } @Override public Session createSession() { return new AndroidSpellCheckerSession(this); } private static SuggestionsInfo getNotInDictEmptySuggestions() { return new SuggestionsInfo(0, EMPTY_STRING_ARRAY); } private static SuggestionsInfo getInDictEmptySuggestions() { return new SuggestionsInfo(SuggestionsInfo.RESULT_ATTR_IN_THE_DICTIONARY, EMPTY_STRING_ARRAY); } private static class SuggestionsGatherer implements WordCallback { public static class Result { public final String[] mSuggestions; public final boolean mHasRecommendedSuggestions; public Result(final String[] gatheredSuggestions, final boolean hasRecommendedSuggestions) { mSuggestions = gatheredSuggestions; mHasRecommendedSuggestions = hasRecommendedSuggestions; } } private final ArrayList mSuggestions; private final int[] mScores; private final String mOriginalText; private final double mSuggestionThreshold; private final double mRecommendedThreshold; private final int mMaxLength; private int mLength = 0; // The two following attributes are only ever filled if the requested max length // is 0 (or less, which is treated the same). private String mBestSuggestion = null; private int mBestScore = Integer.MIN_VALUE; // As small as possible SuggestionsGatherer(final String originalText, final double suggestionThreshold, final double recommendedThreshold, final int maxLength) { mOriginalText = originalText; mSuggestionThreshold = suggestionThreshold; mRecommendedThreshold = recommendedThreshold; mMaxLength = maxLength; mSuggestions = new ArrayList(maxLength + 1); mScores = new int[mMaxLength]; } @Override synchronized public boolean addWord(char[] word, int wordOffset, int wordLength, int score, int dicTypeId, int dataType) { final int positionIndex = Arrays.binarySearch(mScores, 0, mLength, score); // binarySearch returns the index if the element exists, and - - 1 // if it doesn't. See documentation for binarySearch. final int insertIndex = positionIndex >= 0 ? positionIndex : -positionIndex - 1; if (insertIndex == 0 && mLength >= mMaxLength) { // In the future, we may want to keep track of the best suggestion score even if // we are asked for 0 suggestions. In this case, we can use the following // (tested) code to keep it: // If the maxLength is 0 (should never be less, but if it is, it's treated as 0) // then we need to keep track of the best suggestion in mBestScore and // mBestSuggestion. This is so that we know whether the best suggestion makes // the score cutoff, since we need to know that to return a meaningful // looksLikeTypo. // if (0 >= mMaxLength) { // if (score > mBestScore) { // mBestScore = score; // mBestSuggestion = new String(word, wordOffset, wordLength); // } // } return true; } if (insertIndex >= mMaxLength) { // We found a suggestion, but its score is too weak to be kept considering // the suggestion limit. return true; } // Compute the normalized score and skip this word if it's normalized score does not // make the threshold. final String wordString = new String(word, wordOffset, wordLength); final double normalizedScore = BinaryDictionary.calcNormalizedScore(mOriginalText, wordString, score); if (normalizedScore < mSuggestionThreshold) { if (DBG) Log.i(TAG, wordString + " does not make the score threshold"); return true; } if (mLength < mMaxLength) { final int copyLen = mLength - insertIndex; ++mLength; System.arraycopy(mScores, insertIndex, mScores, insertIndex + 1, copyLen); mSuggestions.add(insertIndex, wordString); } else { System.arraycopy(mScores, 1, mScores, 0, insertIndex); mSuggestions.add(insertIndex, wordString); mSuggestions.remove(0); } mScores[insertIndex] = score; return true; } public Result getResults(final int capitalizeType, final Locale locale) { final String[] gatheredSuggestions; final boolean hasRecommendedSuggestions; if (0 == mLength) { // Either we found no suggestions, or we found some BUT the max length was 0. // If we found some mBestSuggestion will not be null. If it is null, then // we found none, regardless of the max length. if (null == mBestSuggestion) { gatheredSuggestions = null; hasRecommendedSuggestions = false; } else { gatheredSuggestions = EMPTY_STRING_ARRAY; final double normalizedScore = BinaryDictionary.calcNormalizedScore( mOriginalText, mBestSuggestion, mBestScore); hasRecommendedSuggestions = (normalizedScore > mRecommendedThreshold); } } else { if (DBG) { if (mLength != mSuggestions.size()) { Log.e(TAG, "Suggestion size is not the same as stored mLength"); } for (int i = mLength - 1; i >= 0; --i) { Log.i(TAG, "" + mScores[i] + " " + mSuggestions.get(i)); } } Collections.reverse(mSuggestions); StringUtils.removeDupes(mSuggestions); if (CAPITALIZE_ALL == capitalizeType) { for (int i = 0; i < mSuggestions.size(); ++i) { // get(i) returns a CharSequence which is actually a String so .toString() // should return the same object. mSuggestions.set(i, mSuggestions.get(i).toString().toUpperCase(locale)); } } else if (CAPITALIZE_FIRST == capitalizeType) { for (int i = 0; i < mSuggestions.size(); ++i) { // Likewise mSuggestions.set(i, StringUtils.toTitleCase( mSuggestions.get(i).toString(), locale)); } } // This returns a String[], while toArray() returns an Object[] which cannot be cast // into a String[]. gatheredSuggestions = mSuggestions.toArray(EMPTY_STRING_ARRAY); final int bestScore = mScores[mLength - 1]; final CharSequence bestSuggestion = mSuggestions.get(0); final double normalizedScore = BinaryDictionary.calcNormalizedScore( mOriginalText, bestSuggestion.toString(), bestScore); hasRecommendedSuggestions = (normalizedScore > mRecommendedThreshold); if (DBG) { Log.i(TAG, "Best suggestion : " + bestSuggestion + ", score " + bestScore); Log.i(TAG, "Normalized score = " + normalizedScore + " (threshold " + mRecommendedThreshold + ") => hasRecommendedSuggestions = " + hasRecommendedSuggestions); } } return new Result(gatheredSuggestions, hasRecommendedSuggestions); } } @Override public boolean onUnbind(final Intent intent) { closeAllDictionaries(); return false; } private void closeAllDictionaries() { final Map oldPools = mDictionaryPools; mDictionaryPools = Collections.synchronizedMap(new TreeMap()); final Map oldUserDictionaries = mUserDictionaries; mUserDictionaries = Collections.synchronizedMap(new TreeMap()); final Map oldWhitelistDictionaries = mWhitelistDictionaries; mWhitelistDictionaries = Collections.synchronizedMap(new TreeMap()); for (DictionaryPool pool : oldPools.values()) { pool.close(); } for (Dictionary dict : oldUserDictionaries.values()) { dict.close(); } for (Dictionary dict : oldWhitelistDictionaries.values()) { dict.close(); } synchronized(mUseContactsLock) { if (null != mContactsDictionary) { // The synchronously loaded contacts dictionary should have been in one // or several pools, but it is shielded against multiple closing and it's // safe to call it several times. final SynchronouslyLoadedContactsDictionary dictToClose = mContactsDictionary; mContactsDictionary = null; dictToClose.close(); } } } private DictionaryPool getDictionaryPool(final String locale) { DictionaryPool pool = mDictionaryPools.get(locale); if (null == pool) { final Locale localeObject = LocaleUtils.constructLocaleFromString(locale); pool = new DictionaryPool(POOL_SIZE, this, localeObject); mDictionaryPools.put(locale, pool); } return pool; } public DictAndProximity createDictAndProximity(final Locale locale) { final int script = getScriptFromLocale(locale); final ProximityInfo proximityInfo = ProximityInfo.createSpellCheckerProximityInfo( SpellCheckerProximityInfo.getProximityForScript(script)); final Resources resources = getResources(); final int fallbackResourceId = DictionaryFactory.getMainDictionaryResourceId(resources); final DictionaryCollection dictionaryCollection = DictionaryFactory.createDictionaryFromManager(this, locale, fallbackResourceId, USE_FULL_EDIT_DISTANCE_FLAG_ARRAY); final String localeStr = locale.toString(); Dictionary userDictionary = mUserDictionaries.get(localeStr); if (null == userDictionary) { userDictionary = new SynchronouslyLoadedUserDictionary(this, localeStr, true); mUserDictionaries.put(localeStr, userDictionary); } dictionaryCollection.addDictionary(userDictionary); Dictionary whitelistDictionary = mWhitelistDictionaries.get(localeStr); if (null == whitelistDictionary) { whitelistDictionary = new WhitelistDictionary(this, locale); mWhitelistDictionaries.put(localeStr, whitelistDictionary); } dictionaryCollection.addDictionary(whitelistDictionary); synchronized(mUseContactsLock) { if (mUseContactsDictionary) { if (null == mContactsDictionary) { mContactsDictionary = new SynchronouslyLoadedContactsDictionary(this); } } dictionaryCollection.addDictionary(mContactsDictionary); mDictionaryCollectionsList.add( new WeakReference(dictionaryCollection)); } return new DictAndProximity(dictionaryCollection, proximityInfo); } // This method assumes the text is not empty or null. private static int getCapitalizationType(String text) { // If the first char is not uppercase, then the word is either all lower case, // and in either case we return CAPITALIZE_NONE. if (!Character.isUpperCase(text.codePointAt(0))) return CAPITALIZE_NONE; final int len = text.length(); int capsCount = 1; for (int i = 1; i < len; i = text.offsetByCodePoints(i, 1)) { if (1 != capsCount && i != capsCount) break; if (Character.isUpperCase(text.codePointAt(i))) ++capsCount; } // We know the first char is upper case. So we want to test if either everything // else is lower case, or if everything else is upper case. If the string is // exactly one char long, then we will arrive here with capsCount 1, and this is // correct, too. if (1 == capsCount) return CAPITALIZE_FIRST; return (len == capsCount ? CAPITALIZE_ALL : CAPITALIZE_NONE); } private static class AndroidSpellCheckerSession extends Session { // Immutable, but need the locale which is not available in the constructor yet private DictionaryPool mDictionaryPool; // Likewise private Locale mLocale; // Cache this for performance private int mScript; // One of SCRIPT_LATIN or SCRIPT_CYRILLIC for now. private final AndroidSpellCheckerService mService; AndroidSpellCheckerSession(final AndroidSpellCheckerService service) { mService = service; } @Override public void onCreate() { final String localeString = getLocale(); mDictionaryPool = mService.getDictionaryPool(localeString); mLocale = LocaleUtils.constructLocaleFromString(localeString); mScript = getScriptFromLocale(mLocale); } /* * Returns whether the code point is a letter that makes sense for the specified * locale for this spell checker. * The dictionaries supported by Latin IME are described in res/xml/spellchecker.xml * and is limited to EFIGS languages and Russian. * Hence at the moment this explicitly tests for Cyrillic characters or Latin characters * as appropriate, and explicitly excludes CJK, Arabic and Hebrew characters. */ private static boolean isLetterCheckableByLanguage(final int codePoint, final int script) { switch (script) { case SCRIPT_LATIN: // Our supported latin script dictionaries (EFIGS) at the moment only include // characters in the C0, C1, Latin Extended A and B, IPA extensions unicode // blocks. As it happens, those are back-to-back in the code range 0x40 to 0x2AF, // so the below is a very efficient way to test for it. As for the 0-0x3F, it's // excluded from isLetter anyway. return codePoint <= 0x2AF && Character.isLetter(codePoint); case SCRIPT_CYRILLIC: // All Cyrillic characters are in the 400~52F block. There are some in the upper // Unicode range, but they are archaic characters that are not used in modern // russian and are not used by our dictionary. return codePoint >= 0x400 && codePoint <= 0x52F && Character.isLetter(codePoint); default: // Should never come here throw new RuntimeException("Impossible value of script: " + script); } } /** * Finds out whether a particular string should be filtered out of spell checking. * * This will loosely match URLs, numbers, symbols. To avoid always underlining words that * we know we will never recognize, this accepts a script identifier that should be one * of the SCRIPT_* constants defined above, to rule out quickly characters from very * different languages. * * @param text the string to evaluate. * @param script the identifier for the script this spell checker recognizes * @return true if we should filter this text out, false otherwise */ private static boolean shouldFilterOut(final String text, final int script) { if (TextUtils.isEmpty(text) || text.length() <= 1) return true; // TODO: check if an equivalent processing can't be done more quickly with a // compiled regexp. // Filter by first letter final int firstCodePoint = text.codePointAt(0); // Filter out words that don't start with a letter or an apostrophe if (!isLetterCheckableByLanguage(firstCodePoint, script) && '\'' != firstCodePoint) return true; // Filter contents final int length = text.length(); int letterCount = 0; for (int i = 0; i < length; i = text.offsetByCodePoints(i, 1)) { final int codePoint = text.codePointAt(i); // Any word containing a '@' is probably an e-mail address // Any word containing a '/' is probably either an ad-hoc combination of two // words or a URI - in either case we don't want to spell check that if ('@' == codePoint || '/' == codePoint) return true; if (isLetterCheckableByLanguage(codePoint, script)) ++letterCount; } // Guestimate heuristic: perform spell checking if at least 3/4 of the characters // in this word are letters return (letterCount * 4 < length * 3); } // Note : this must be reentrant /** * Gets a list of suggestions for a specific string. This returns a list of possible * corrections for the text passed as an argument. It may split or group words, and * even perform grammatical analysis. */ @Override public SuggestionsInfo onGetSuggestions(final TextInfo textInfo, final int suggestionsLimit) { try { final String text = textInfo.getText(); if (shouldFilterOut(text, mScript)) { DictAndProximity dictInfo = null; try { dictInfo = mDictionaryPool.takeOrGetNull(); if (null == dictInfo) return getNotInDictEmptySuggestions(); return dictInfo.mDictionary.isValidWord(text) ? getInDictEmptySuggestions() : getNotInDictEmptySuggestions(); } finally { if (null != dictInfo) { if (!mDictionaryPool.offer(dictInfo)) { Log.e(TAG, "Can't re-insert a dictionary into its pool"); } } } } // TODO: Don't gather suggestions if the limit is <= 0 unless necessary final SuggestionsGatherer suggestionsGatherer = new SuggestionsGatherer(text, mService.mSuggestionThreshold, mService.mRecommendedThreshold, suggestionsLimit); final WordComposer composer = new WordComposer(); final int length = text.length(); for (int i = 0; i < length; i = text.offsetByCodePoints(i, 1)) { final int codePoint = text.codePointAt(i); // The getXYForCodePointAndScript method returns (Y << 16) + X final int xy = SpellCheckerProximityInfo.getXYForCodePointAndScript( codePoint, mScript); if (SpellCheckerProximityInfo.NOT_A_COORDINATE_PAIR == xy) { composer.add(codePoint, WordComposer.NOT_A_COORDINATE, WordComposer.NOT_A_COORDINATE, null); } else { composer.add(codePoint, xy & 0xFFFF, xy >> 16, null); } } final int capitalizeType = getCapitalizationType(text); boolean isInDict = true; DictAndProximity dictInfo = null; try { dictInfo = mDictionaryPool.takeOrGetNull(); if (null == dictInfo) return getNotInDictEmptySuggestions(); dictInfo.mDictionary.getWords(composer, suggestionsGatherer, dictInfo.mProximityInfo); isInDict = dictInfo.mDictionary.isValidWord(text); if (!isInDict && CAPITALIZE_NONE != capitalizeType) { // We want to test the word again if it's all caps or first caps only. // If it's fully down, we already tested it, if it's mixed case, we don't // want to test a lowercase version of it. isInDict = dictInfo.mDictionary.isValidWord(text.toLowerCase(mLocale)); } } finally { if (null != dictInfo) { if (!mDictionaryPool.offer(dictInfo)) { Log.e(TAG, "Can't re-insert a dictionary into its pool"); } } } final SuggestionsGatherer.Result result = suggestionsGatherer.getResults( capitalizeType, mLocale); if (DBG) { Log.i(TAG, "Spell checking results for " + text + " with suggestion limit " + suggestionsLimit); Log.i(TAG, "IsInDict = " + isInDict); Log.i(TAG, "LooksLikeTypo = " + (!isInDict)); Log.i(TAG, "HasRecommendedSuggestions = " + result.mHasRecommendedSuggestions); if (null != result.mSuggestions) { for (String suggestion : result.mSuggestions) { Log.i(TAG, suggestion); } } } final int flags = (isInDict ? SuggestionsInfo.RESULT_ATTR_IN_THE_DICTIONARY : SuggestionsInfo.RESULT_ATTR_LOOKS_LIKE_TYPO) | (result.mHasRecommendedSuggestions ? SuggestionsInfoCompatUtils .getValueOf_RESULT_ATTR_HAS_RECOMMENDED_SUGGESTIONS() : 0); return new SuggestionsInfo(flags, result.mSuggestions); } catch (RuntimeException e) { // Don't kill the keyboard if there is a bug in the spell checker if (DBG) { throw e; } else { Log.e(TAG, "Exception while spellcheking: " + e); return getNotInDictEmptySuggestions(); } } } } }