LatinIME/java/src/com/android/inputmethod/latin/spellcheck/AndroidSpellCheckerSession.java

420 lines
19 KiB
Java
Raw Normal View History

/*
* Copyright (C) 2012 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package com.android.inputmethod.latin.spellcheck;
import android.service.textservice.SpellCheckerService.Session;
import android.text.TextUtils;
import android.util.Log;
import android.util.LruCache;
import android.view.textservice.SentenceSuggestionsInfo;
import android.view.textservice.SuggestionsInfo;
import android.view.textservice.TextInfo;
import com.android.inputmethod.compat.SuggestionsInfoCompatUtils;
import com.android.inputmethod.latin.LocaleUtils;
import com.android.inputmethod.latin.WordComposer;
import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo;
import com.android.inputmethod.latin.spellcheck.AndroidSpellCheckerService.SuggestionsGatherer;
import java.util.ArrayList;
import java.util.Locale;
public class AndroidSpellCheckerSession extends Session {
private static final String TAG = AndroidSpellCheckerSession.class.getSimpleName();
private static final boolean DBG = false;
private final static String[] EMPTY_STRING_ARRAY = new String[0];
// Immutable, but need the locale which is not available in the constructor yet
private DictionaryPool mDictionaryPool;
// Likewise
private Locale mLocale;
// Cache this for performance
private int mScript; // One of SCRIPT_LATIN or SCRIPT_CYRILLIC for now.
private final AndroidSpellCheckerService mService;
private final SuggestionsCache mSuggestionsCache = new SuggestionsCache();
private static class SuggestionsParams {
public final String[] mSuggestions;
public final int mFlags;
public SuggestionsParams(String[] suggestions, int flags) {
mSuggestions = suggestions;
mFlags = flags;
}
}
private static class SuggestionsCache {
private static final char CHAR_DELIMITER = '\uFFFC';
private static final int MAX_CACHE_SIZE = 50;
private final LruCache<String, SuggestionsParams> mUnigramSuggestionsInfoCache =
new LruCache<String, SuggestionsParams>(MAX_CACHE_SIZE);
// TODO: Support n-gram input
private static String generateKey(String query, String prevWord) {
if (TextUtils.isEmpty(query) || TextUtils.isEmpty(prevWord)) {
return query;
}
return query + CHAR_DELIMITER + prevWord;
}
// TODO: Support n-gram input
public SuggestionsParams getSuggestionsFromCache(String query, String prevWord) {
return mUnigramSuggestionsInfoCache.get(generateKey(query, prevWord));
}
// TODO: Support n-gram input
public void putSuggestionsToCache(
String query, String prevWord, String[] suggestions, int flags) {
if (suggestions == null || TextUtils.isEmpty(query)) {
return;
}
mUnigramSuggestionsInfoCache.put(
generateKey(query, prevWord), new SuggestionsParams(suggestions, flags));
}
}
AndroidSpellCheckerSession(final AndroidSpellCheckerService service) {
mService = service;
}
@Override
public void onCreate() {
final String localeString = getLocale();
mDictionaryPool = mService.getDictionaryPool(localeString);
mLocale = LocaleUtils.constructLocaleFromString(localeString);
mScript = AndroidSpellCheckerService.getScriptFromLocale(mLocale);
}
/*
* Returns whether the code point is a letter that makes sense for the specified
* locale for this spell checker.
* The dictionaries supported by Latin IME are described in res/xml/spellchecker.xml
* and is limited to EFIGS languages and Russian.
* Hence at the moment this explicitly tests for Cyrillic characters or Latin characters
* as appropriate, and explicitly excludes CJK, Arabic and Hebrew characters.
*/
private static boolean isLetterCheckableByLanguage(final int codePoint,
final int script) {
switch (script) {
case AndroidSpellCheckerService.SCRIPT_LATIN:
// Our supported latin script dictionaries (EFIGS) at the moment only include
// characters in the C0, C1, Latin Extended A and B, IPA extensions unicode
// blocks. As it happens, those are back-to-back in the code range 0x40 to 0x2AF,
// so the below is a very efficient way to test for it. As for the 0-0x3F, it's
// excluded from isLetter anyway.
return codePoint <= 0x2AF && Character.isLetter(codePoint);
case AndroidSpellCheckerService.SCRIPT_CYRILLIC:
// All Cyrillic characters are in the 400~52F block. There are some in the upper
// Unicode range, but they are archaic characters that are not used in modern
// russian and are not used by our dictionary.
return codePoint >= 0x400 && codePoint <= 0x52F && Character.isLetter(codePoint);
default:
// Should never come here
throw new RuntimeException("Impossible value of script: " + script);
}
}
/**
* Finds out whether a particular string should be filtered out of spell checking.
*
* This will loosely match URLs, numbers, symbols. To avoid always underlining words that
* we know we will never recognize, this accepts a script identifier that should be one
* of the SCRIPT_* constants defined above, to rule out quickly characters from very
* different languages.
*
* @param text the string to evaluate.
* @param script the identifier for the script this spell checker recognizes
* @return true if we should filter this text out, false otherwise
*/
private static boolean shouldFilterOut(final String text, final int script) {
if (TextUtils.isEmpty(text) || text.length() <= 1) return true;
// TODO: check if an equivalent processing can't be done more quickly with a
// compiled regexp.
// Filter by first letter
final int firstCodePoint = text.codePointAt(0);
// Filter out words that don't start with a letter or an apostrophe
if (!isLetterCheckableByLanguage(firstCodePoint, script)
&& '\'' != firstCodePoint) return true;
// Filter contents
final int length = text.length();
int letterCount = 0;
for (int i = 0; i < length; i = text.offsetByCodePoints(i, 1)) {
final int codePoint = text.codePointAt(i);
// Any word containing a '@' is probably an e-mail address
// Any word containing a '/' is probably either an ad-hoc combination of two
// words or a URI - in either case we don't want to spell check that
if ('@' == codePoint || '/' == codePoint) return true;
if (isLetterCheckableByLanguage(codePoint, script)) ++letterCount;
}
// Guestimate heuristic: perform spell checking if at least 3/4 of the characters
// in this word are letters
return (letterCount * 4 < length * 3);
}
private SentenceSuggestionsInfo fixWronglyInvalidatedWordWithSingleQuote(
TextInfo ti, SentenceSuggestionsInfo ssi) {
final String typedText = ti.getText();
if (!typedText.contains(AndroidSpellCheckerService.SINGLE_QUOTE)) {
return null;
}
final int N = ssi.getSuggestionsCount();
final ArrayList<Integer> additionalOffsets = new ArrayList<Integer>();
final ArrayList<Integer> additionalLengths = new ArrayList<Integer>();
final ArrayList<SuggestionsInfo> additionalSuggestionsInfos =
new ArrayList<SuggestionsInfo>();
String currentWord = null;
for (int i = 0; i < N; ++i) {
final SuggestionsInfo si = ssi.getSuggestionsInfoAt(i);
final int flags = si.getSuggestionsAttributes();
if ((flags & SuggestionsInfo.RESULT_ATTR_IN_THE_DICTIONARY) == 0) {
continue;
}
final int offset = ssi.getOffsetAt(i);
final int length = ssi.getLengthAt(i);
final String subText = typedText.substring(offset, offset + length);
final String prevWord = currentWord;
currentWord = subText;
if (!subText.contains(AndroidSpellCheckerService.SINGLE_QUOTE)) {
continue;
}
final String[] splitTexts = subText.split(AndroidSpellCheckerService.SINGLE_QUOTE, -1);
if (splitTexts == null || splitTexts.length <= 1) {
continue;
}
final int splitNum = splitTexts.length;
for (int j = 0; j < splitNum; ++j) {
final String splitText = splitTexts[j];
if (TextUtils.isEmpty(splitText)) {
continue;
}
if (mSuggestionsCache.getSuggestionsFromCache(
splitText, prevWord) == null) {
continue;
}
final int newLength = splitText.length();
// Neither RESULT_ATTR_IN_THE_DICTIONARY nor RESULT_ATTR_LOOKS_LIKE_TYPO
final int newFlags = 0;
final SuggestionsInfo newSi = new SuggestionsInfo(newFlags, EMPTY_STRING_ARRAY);
newSi.setCookieAndSequence(si.getCookie(), si.getSequence());
if (DBG) {
Log.d(TAG, "Override and remove old span over: "
+ splitText + ", " + offset + "," + newLength);
}
additionalOffsets.add(offset);
additionalLengths.add(newLength);
additionalSuggestionsInfos.add(newSi);
}
}
final int additionalSize = additionalOffsets.size();
if (additionalSize <= 0) {
return null;
}
final int suggestionsSize = N + additionalSize;
final int[] newOffsets = new int[suggestionsSize];
final int[] newLengths = new int[suggestionsSize];
final SuggestionsInfo[] newSuggestionsInfos = new SuggestionsInfo[suggestionsSize];
int i;
for (i = 0; i < N; ++i) {
newOffsets[i] = ssi.getOffsetAt(i);
newLengths[i] = ssi.getLengthAt(i);
newSuggestionsInfos[i] = ssi.getSuggestionsInfoAt(i);
}
for (; i < suggestionsSize; ++i) {
newOffsets[i] = additionalOffsets.get(i - N);
newLengths[i] = additionalLengths.get(i - N);
newSuggestionsInfos[i] = additionalSuggestionsInfos.get(i - N);
}
return new SentenceSuggestionsInfo(newSuggestionsInfos, newOffsets, newLengths);
}
@Override
public SentenceSuggestionsInfo[] onGetSentenceSuggestionsMultiple(
TextInfo[] textInfos, int suggestionsLimit) {
final SentenceSuggestionsInfo[] retval = super.onGetSentenceSuggestionsMultiple(
textInfos, suggestionsLimit);
if (retval == null || retval.length != textInfos.length) {
return retval;
}
for (int i = 0; i < retval.length; ++i) {
final SentenceSuggestionsInfo tempSsi =
fixWronglyInvalidatedWordWithSingleQuote(textInfos[i], retval[i]);
if (tempSsi != null) {
retval[i] = tempSsi;
}
}
return retval;
}
@Override
public SuggestionsInfo[] onGetSuggestionsMultiple(TextInfo[] textInfos,
int suggestionsLimit, boolean sequentialWords) {
final int length = textInfos.length;
final SuggestionsInfo[] retval = new SuggestionsInfo[length];
for (int i = 0; i < length; ++i) {
final String prevWord;
if (sequentialWords && i > 0) {
final String prevWordCandidate = textInfos[i - 1].getText();
// Note that an empty string would be used to indicate the initial word
// in the future.
prevWord = TextUtils.isEmpty(prevWordCandidate) ? null : prevWordCandidate;
} else {
prevWord = null;
}
retval[i] = onGetSuggestions(textInfos[i], prevWord, suggestionsLimit);
retval[i].setCookieAndSequence(
textInfos[i].getCookie(), textInfos[i].getSequence());
}
return retval;
}
// Note : this must be reentrant
/**
* Gets a list of suggestions for a specific string. This returns a list of possible
* corrections for the text passed as an argument. It may split or group words, and
* even perform grammatical analysis.
*/
@Override
public SuggestionsInfo onGetSuggestions(final TextInfo textInfo,
final int suggestionsLimit) {
return onGetSuggestions(textInfo, null, suggestionsLimit);
}
private SuggestionsInfo onGetSuggestions(
final TextInfo textInfo, final String prevWord, final int suggestionsLimit) {
try {
final String inText = textInfo.getText();
final SuggestionsParams cachedSuggestionsParams =
mSuggestionsCache.getSuggestionsFromCache(inText, prevWord);
if (cachedSuggestionsParams != null) {
if (DBG) {
Log.d(TAG, "Cache hit: " + inText + ", " + cachedSuggestionsParams.mFlags);
}
return new SuggestionsInfo(
cachedSuggestionsParams.mFlags, cachedSuggestionsParams.mSuggestions);
}
if (shouldFilterOut(inText, mScript)) {
DictAndProximity dictInfo = null;
try {
dictInfo = mDictionaryPool.takeOrGetNull();
if (null == dictInfo) {
return AndroidSpellCheckerService.getNotInDictEmptySuggestions();
}
return dictInfo.mDictionary.isValidWord(inText)
? AndroidSpellCheckerService.getInDictEmptySuggestions()
: AndroidSpellCheckerService.getNotInDictEmptySuggestions();
} finally {
if (null != dictInfo) {
if (!mDictionaryPool.offer(dictInfo)) {
Log.e(TAG, "Can't re-insert a dictionary into its pool");
}
}
}
}
final String text = inText.replaceAll(
AndroidSpellCheckerService.APOSTROPHE, AndroidSpellCheckerService.SINGLE_QUOTE);
// TODO: Don't gather suggestions if the limit is <= 0 unless necessary
//final SuggestionsGatherer suggestionsGatherer = new SuggestionsGatherer(text,
//mService.mSuggestionThreshold, mService.mRecommendedThreshold,
//suggestionsLimit);
final SuggestionsGatherer suggestionsGatherer = mService.newSuggestionsGatherer(
text, suggestionsLimit);
final WordComposer composer = new WordComposer();
final int length = text.length();
for (int i = 0; i < length; i = text.offsetByCodePoints(i, 1)) {
final int codePoint = text.codePointAt(i);
// The getXYForCodePointAndScript method returns (Y << 16) + X
final int xy = SpellCheckerProximityInfo.getXYForCodePointAndScript(
codePoint, mScript);
if (SpellCheckerProximityInfo.NOT_A_COORDINATE_PAIR == xy) {
composer.add(codePoint, WordComposer.NOT_A_COORDINATE,
WordComposer.NOT_A_COORDINATE);
} else {
composer.add(codePoint, xy & 0xFFFF, xy >> 16);
}
}
final int capitalizeType = AndroidSpellCheckerService.getCapitalizationType(text);
boolean isInDict = true;
DictAndProximity dictInfo = null;
try {
dictInfo = mDictionaryPool.takeOrGetNull();
if (null == dictInfo) {
return AndroidSpellCheckerService.getNotInDictEmptySuggestions();
}
final ArrayList<SuggestedWordInfo> suggestions = dictInfo.mDictionary.getWords(
composer, prevWord, dictInfo.mProximityInfo);
for (final SuggestedWordInfo suggestion : suggestions) {
final String suggestionStr = suggestion.mWord.toString();
suggestionsGatherer.addWord(suggestionStr.toCharArray(), null, 0,
suggestionStr.length(), suggestion.mScore);
}
isInDict = dictInfo.mDictionary.isValidWord(text);
if (!isInDict && AndroidSpellCheckerService.CAPITALIZE_NONE != capitalizeType) {
// We want to test the word again if it's all caps or first caps only.
// If it's fully down, we already tested it, if it's mixed case, we don't
// want to test a lowercase version of it.
isInDict = dictInfo.mDictionary.isValidWord(text.toLowerCase(mLocale));
}
} finally {
if (null != dictInfo) {
if (!mDictionaryPool.offer(dictInfo)) {
Log.e(TAG, "Can't re-insert a dictionary into its pool");
}
}
}
final SuggestionsGatherer.Result result = suggestionsGatherer.getResults(
capitalizeType, mLocale);
if (DBG) {
Log.i(TAG, "Spell checking results for " + text + " with suggestion limit "
+ suggestionsLimit);
Log.i(TAG, "IsInDict = " + isInDict);
Log.i(TAG, "LooksLikeTypo = " + (!isInDict));
Log.i(TAG, "HasRecommendedSuggestions = " + result.mHasRecommendedSuggestions);
if (null != result.mSuggestions) {
for (String suggestion : result.mSuggestions) {
Log.i(TAG, suggestion);
}
}
}
final int flags =
(isInDict ? SuggestionsInfo.RESULT_ATTR_IN_THE_DICTIONARY
: SuggestionsInfo.RESULT_ATTR_LOOKS_LIKE_TYPO)
| (result.mHasRecommendedSuggestions
? SuggestionsInfoCompatUtils
.getValueOf_RESULT_ATTR_HAS_RECOMMENDED_SUGGESTIONS()
: 0);
final SuggestionsInfo retval = new SuggestionsInfo(flags, result.mSuggestions);
mSuggestionsCache.putSuggestionsToCache(text, prevWord, result.mSuggestions, flags);
return retval;
} catch (RuntimeException e) {
// Don't kill the keyboard if there is a bug in the spell checker
if (DBG) {
throw e;
} else {
Log.e(TAG, "Exception while spellcheking: " + e);
return AndroidSpellCheckerService.getNotInDictEmptySuggestions();
}
}
}
}