/* * Copyright (C) 2015 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.android.inputmethod.latin; import android.content.ContentResolver; import android.content.Context; import android.database.ContentObserver; import android.database.Cursor; import android.net.Uri; import android.provider.UserDictionary; import android.text.TextUtils; import android.util.Log; import com.android.inputmethod.annotations.UsedForTesting; import com.android.inputmethod.latin.common.CollectionUtils; import com.android.inputmethod.latin.common.LocaleUtils; import com.android.inputmethod.latin.define.DebugFlags; import com.android.inputmethod.latin.utils.ExecutorUtils; import java.io.Closeable; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Locale; import java.util.Map; import java.util.Set; import java.util.concurrent.ScheduledFuture; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; import javax.annotation.Nonnull; import javax.annotation.Nullable; /** * This class provides the ability to look into the system-wide "Personal dictionary". It loads the * data once when created and reloads it when notified of changes to {@link UserDictionary} * * It can be used directly to validate words or expand shortcuts, and it can be used by instances * of {@link PersonalLanguageModelHelper} that create language model files for a specific input * locale. * * Note, that the initial dictionary loading happens asynchronously so it is possible (hopefully * rarely) that {@link #isValidWord} or {@link #expandShortcut} is called before the initial load * has started. * * The caller should explicitly call {@link #close} when the object is no longer needed, in order * to release any resources and references to this object. A service should create this object in * {@link android.app.Service#onCreate} and close it in {@link android.app.Service#onDestroy}. */ public class PersonalDictionaryLookup implements Closeable { /** * To avoid loading too many dictionary entries in memory, we cap them at this number. If * that number is exceeded, the lowest-frequency items will be dropped. Note, there is no * explicit cap on the number of locales in every entry. */ private static final int MAX_NUM_ENTRIES = 1000; /** * The delay (in milliseconds) to impose on reloads. Previously scheduled reloads will be * cancelled if a new reload is scheduled before the delay expires. Thus, only the last * reload in the series of frequent reloads will execute. * * Note, this value should be low enough to allow the "Add to dictionary" feature in the * TextView correction (red underline) drop-down menu to work properly in the following case: * * 1. User types OOV (out-of-vocabulary) word. * 2. The OOV is red-underlined. * 3. User selects "Add to dictionary". The red underline disappears while the OOV is * in a composing span. * 4. The user taps space. The red underline should NOT reappear. If this value is very * high and the user performs the space tap fast enough, the red underline may reappear. */ @UsedForTesting static final int RELOAD_DELAY_MS = 200; @UsedForTesting static final Locale ANY_LOCALE = new Locale(""); private final String mTag; private final ContentResolver mResolver; private final String mServiceName; /** * Interface to implement for classes interested in getting notified of updates. */ public static interface PersonalDictionaryListener { public void onUpdate(); } private final Set mListeners = new HashSet<>(); public void addListener(@Nonnull final PersonalDictionaryListener listener) { mListeners.add(listener); } public void removeListener(@Nonnull final PersonalDictionaryListener listener) { mListeners.remove(listener); } /** * Broadcast the update to all the Locale-specific language models. */ @UsedForTesting void notifyListeners() { for (PersonalDictionaryListener listener : mListeners) { listener.onUpdate(); } } /** * Content observer for changes to the personal dictionary. It has the following properties: * 1. It spawns off a reload in another thread, after some delay. * 2. It cancels previously scheduled reloads, and only executes the latest. * 3. It may be called multiple times quickly in succession (and is in fact called so * when the dictionary is edited through its settings UI, when sometimes multiple * notifications are sent for the edited entry, but also for the entire dictionary). */ private class PersonalDictionaryContentObserver extends ContentObserver implements Runnable { public PersonalDictionaryContentObserver() { super(null); } @Override public boolean deliverSelfNotifications() { return true; } // Support pre-API16 platforms. @Override public void onChange(boolean selfChange) { onChange(selfChange, null); } @Override public void onChange(boolean selfChange, Uri uri) { if (DebugFlags.DEBUG_ENABLED) { Log.d(mTag, "onChange() : URI = " + uri); } // Cancel (but don't interrupt) any pending reloads (except the initial load). if (mReloadFuture != null && !mReloadFuture.isCancelled() && !mReloadFuture.isDone()) { // Note, that if already cancelled or done, this will do nothing. boolean isCancelled = mReloadFuture.cancel(false); if (DebugFlags.DEBUG_ENABLED) { if (isCancelled) { Log.d(mTag, "onChange() : Canceled previous reload request"); } else { Log.d(mTag, "onChange() : Failed to cancel previous reload request"); } } } if (DebugFlags.DEBUG_ENABLED) { Log.d(mTag, "onChange() : Scheduling reload in " + RELOAD_DELAY_MS + " ms"); } // Schedule a new reload after RELOAD_DELAY_MS. mReloadFuture = ExecutorUtils.getBackgroundExecutor(mServiceName) .schedule(this, RELOAD_DELAY_MS, TimeUnit.MILLISECONDS); } @Override public void run() { loadPersonalDictionary(); } } private final PersonalDictionaryContentObserver mPersonalDictionaryContentObserver = new PersonalDictionaryContentObserver(); /** * Indicates that a load is in progress, so no need for another. */ private AtomicBoolean mIsLoading = new AtomicBoolean(false); /** * Indicates that this lookup object has been close()d. */ private AtomicBoolean mIsClosed = new AtomicBoolean(false); /** * We store a map from a dictionary word to the set of locales & raw string(as it appears) * We then iterate over the set of locales to find a match using LocaleUtils. */ private volatile HashMap> mDictWords; /** * We store a map from a shortcut to a word for each locale. * Shortcuts that apply to any locale are keyed by {@link #ANY_LOCALE}. */ private volatile HashMap> mShortcutsPerLocale; /** * The last-scheduled reload future. Saved in order to cancel a pending reload if a new one * is coming. */ private volatile ScheduledFuture mReloadFuture; private volatile List mDictionaryStats; /** * @param context the context from which to obtain content resolver */ public PersonalDictionaryLookup( @Nonnull final Context context, @Nonnull final String serviceName) { mTag = serviceName + ".Personal"; Log.i(mTag, "create()"); mServiceName = serviceName; mDictionaryStats = new ArrayList(); mDictionaryStats.add(new DictionaryStats(ANY_LOCALE, Dictionary.TYPE_USER, 0)); mDictionaryStats.add(new DictionaryStats(ANY_LOCALE, Dictionary.TYPE_USER_SHORTCUT, 0)); // Obtain a content resolver. mResolver = context.getContentResolver(); } public List getDictionaryStats() { return mDictionaryStats; } public void open() { Log.i(mTag, "open()"); // Schedule the initial load to run immediately. It's possible that the first call to // isValidWord occurs before the dictionary has actually loaded, so it should not // assume that the dictionary has been loaded. loadPersonalDictionary(); // Register the observer to be notified on changes to the personal dictionary and all // individual items. // // If the user is interacting with the Personal Dictionary settings UI, or with the // "Add to dictionary" drop-down option, duplicate notifications will be sent for the same // edit: if a new entry is added, there is a notification for the entry itself, and // separately for the entire dictionary. However, when used programmatically, // only notifications for the specific edits are sent. Thus, the observer is registered to // receive every possible notification, and instead has throttling logic to avoid doing too // many reloads. mResolver.registerContentObserver( UserDictionary.Words.CONTENT_URI, true /* notifyForDescendents */, mPersonalDictionaryContentObserver); } /** * To be called by the garbage collector in the off chance that the service did not clean up * properly. Do not rely on this getting called, and make sure close() is called explicitly. */ @Override public void finalize() throws Throwable { try { if (DebugFlags.DEBUG_ENABLED) { Log.d(mTag, "finalize()"); } close(); } finally { super.finalize(); } } /** * Cleans up PersonalDictionaryLookup: shuts down any extra threads and unregisters the observer. * * It is safe, but not advised to call this multiple times, and isValidWord would continue to * work, but no data will be reloaded any longer. */ @Override public void close() { if (DebugFlags.DEBUG_ENABLED) { Log.d(mTag, "close() : Unregistering content observer"); } if (mIsClosed.compareAndSet(false, true)) { // Unregister the content observer. mResolver.unregisterContentObserver(mPersonalDictionaryContentObserver); } } /** * Returns true if the initial load has been performed. * * @return true if the initial load is successful */ public boolean isLoaded() { return mDictWords != null && mShortcutsPerLocale != null; } /** * Returns the set of words defined for the given locale and more general locales. * * For example, input locale en_US uses data for en_US, en, and the global dictionary. * * Note that this method returns expanded words, not shortcuts. Shortcuts are handled * by {@link #getShortcutsForLocale}. * * @param inputLocale the locale to restrict for * @return set of words that apply to the given locale. */ public Set getWordsForLocale(@Nonnull final Locale inputLocale) { final HashMap> dictWords = mDictWords; if (CollectionUtils.isNullOrEmpty(dictWords)) { return Collections.emptySet(); } final Set words = new HashSet<>(); final String inputLocaleString = inputLocale.toString(); for (String word : dictWords.keySet()) { HashMap localeStringMap = dictWords.get(word); if (!CollectionUtils.isNullOrEmpty(localeStringMap)) { for (Locale wordLocale : localeStringMap.keySet()) { final String wordLocaleString = wordLocale.toString(); final int match = LocaleUtils.getMatchLevel(wordLocaleString, inputLocaleString); if (LocaleUtils.isMatch(match)) { words.add(localeStringMap.get(wordLocale)); } } } } return words; } /** * Returns the set of shortcuts defined for the given locale and more general locales. * * For example, input locale en_US uses data for en_US, en, and the global dictionary. * * Note that this method returns shortcut keys, not expanded words. Words are handled * by {@link #getWordsForLocale}. * * @param inputLocale the locale to restrict for * @return set of shortcuts that apply to the given locale. */ public Set getShortcutsForLocale(@Nonnull final Locale inputLocale) { final Map> shortcutsPerLocale = mShortcutsPerLocale; if (CollectionUtils.isNullOrEmpty(shortcutsPerLocale)) { return Collections.emptySet(); } final Set shortcuts = new HashSet<>(); if (!TextUtils.isEmpty(inputLocale.getCountry())) { // First look for the country-specific shortcut: en_US, en_UK, fr_FR, etc. final Map countryShortcuts = shortcutsPerLocale.get(inputLocale); if (!CollectionUtils.isNullOrEmpty(countryShortcuts)) { shortcuts.addAll(countryShortcuts.keySet()); } } // Next look for the language-specific shortcut: en, fr, etc. final Locale languageOnlyLocale = LocaleUtils.constructLocaleFromString(inputLocale.getLanguage()); final Map languageShortcuts = shortcutsPerLocale.get(languageOnlyLocale); if (!CollectionUtils.isNullOrEmpty(languageShortcuts)) { shortcuts.addAll(languageShortcuts.keySet()); } // If all else fails, look for a global shortcut. final Map globalShortcuts = shortcutsPerLocale.get(ANY_LOCALE); if (!CollectionUtils.isNullOrEmpty(globalShortcuts)) { shortcuts.addAll(globalShortcuts.keySet()); } return shortcuts; } /** * Determines if the given word is a valid word in the given locale based on the dictionary. * It tries hard to find a match: for example, casing is ignored and if the word is present in a * more general locale (e.g. en or all locales), and isValidWord is asking for a more specific * locale (e.g. en_US), it will be considered a match. * * @param word the word to match * @param inputLocale the locale in which to match the word * @return true iff the word has been matched for this locale in the dictionary. */ public boolean isValidWord(@Nonnull final String word, @Nonnull final Locale inputLocale) { if (!isLoaded()) { // This is a corner case in the event the initial load of the dictionary has not // completed. In that case, we assume the word is not a valid word in the dictionary. if (DebugFlags.DEBUG_ENABLED) { Log.d(mTag, "isValidWord() : Initial load not complete"); } return false; } if (DebugFlags.DEBUG_ENABLED) { Log.d(mTag, "isValidWord() : Word [" + word + "] in Locale [" + inputLocale + "]"); } // Atomically obtain the current copy of mDictWords; final HashMap> dictWords = mDictWords; // Lowercase the word using the given locale. Note, that dictionary // words are lowercased using their locale, and theoretically the // lowercasing between two matching locales may differ. For simplicity // we ignore that possibility. final String lowercased = word.toLowerCase(inputLocale); final HashMap dictLocales = dictWords.get(lowercased); if (CollectionUtils.isNullOrEmpty(dictLocales)) { if (DebugFlags.DEBUG_ENABLED) { Log.d(mTag, "isValidWord() : No entry for word [" + word + "]"); } return false; } else { if (DebugFlags.DEBUG_ENABLED) { Log.d(mTag, "isValidWord() : Found entry for word [" + word + "]"); } // Iterate over the locales this word is in. for (final Locale dictLocale : dictLocales.keySet()) { final int matchLevel = LocaleUtils.getMatchLevel(dictLocale.toString(), inputLocale.toString()); if (DebugFlags.DEBUG_ENABLED) { Log.d(mTag, "isValidWord() : MatchLevel for DictLocale [" + dictLocale + "] and InputLocale [" + inputLocale + "] is " + matchLevel); } if (LocaleUtils.isMatch(matchLevel)) { if (DebugFlags.DEBUG_ENABLED) { Log.d(mTag, "isValidWord() : MatchLevel " + matchLevel + " IS a match"); } return true; } if (DebugFlags.DEBUG_ENABLED) { Log.d(mTag, "isValidWord() : MatchLevel " + matchLevel + " is NOT a match"); } } if (DebugFlags.DEBUG_ENABLED) { Log.d(mTag, "isValidWord() : False, since none of the locales matched"); } return false; } } /** * Expands the given shortcut for the given locale. * * @param shortcut the shortcut to expand * @param inputLocale the locale in which to expand the shortcut * @return expanded shortcut iff the word is a shortcut in the dictionary. */ @Nullable public String expandShortcut( @Nonnull final String shortcut, @Nonnull final Locale inputLocale) { if (DebugFlags.DEBUG_ENABLED) { Log.d(mTag, "expandShortcut() : Shortcut [" + shortcut + "] for [" + inputLocale + "]"); } // Atomically obtain the current copy of mShortcuts; final HashMap> shortcutsPerLocale = mShortcutsPerLocale; // Exit as early as possible. Most users don't use shortcuts. if (CollectionUtils.isNullOrEmpty(shortcutsPerLocale)) { if (DebugFlags.DEBUG_ENABLED) { Log.d(mTag, "expandShortcut() : User has no shortcuts"); } return null; } if (!TextUtils.isEmpty(inputLocale.getCountry())) { // First look for the country-specific shortcut: en_US, en_UK, fr_FR, etc. final String expansionForCountry = expandShortcut( shortcutsPerLocale, shortcut, inputLocale); if (!TextUtils.isEmpty(expansionForCountry)) { if (DebugFlags.DEBUG_ENABLED) { Log.d(mTag, "expandShortcut() : Country expansion is [" + expansionForCountry + "]"); } return expansionForCountry; } } // Next look for the language-specific shortcut: en, fr, etc. final Locale languageOnlyLocale = LocaleUtils.constructLocaleFromString(inputLocale.getLanguage()); final String expansionForLanguage = expandShortcut( shortcutsPerLocale, shortcut, languageOnlyLocale); if (!TextUtils.isEmpty(expansionForLanguage)) { if (DebugFlags.DEBUG_ENABLED) { Log.d(mTag, "expandShortcut() : Language expansion is [" + expansionForLanguage + "]"); } return expansionForLanguage; } // If all else fails, look for a global shortcut. final String expansionForGlobal = expandShortcut(shortcutsPerLocale, shortcut, ANY_LOCALE); if (!TextUtils.isEmpty(expansionForGlobal) && DebugFlags.DEBUG_ENABLED) { Log.d(mTag, "expandShortcut() : Global expansion is [" + expansionForGlobal + "]"); } return expansionForGlobal; } @Nullable private String expandShortcut( @Nullable final HashMap> shortcutsPerLocale, @Nonnull final String shortcut, @Nonnull final Locale locale) { if (CollectionUtils.isNullOrEmpty(shortcutsPerLocale)) { return null; } final HashMap localeShortcuts = shortcutsPerLocale.get(locale); if (CollectionUtils.isNullOrEmpty(localeShortcuts)) { return null; } return localeShortcuts.get(shortcut); } /** * Loads the personal dictionary in the current thread. * * Only one reload can happen at a time. If already running, will exit quickly. */ private void loadPersonalDictionary() { // Bail out if already in the process of loading. if (!mIsLoading.compareAndSet(false, true)) { Log.i(mTag, "loadPersonalDictionary() : Already Loading (exit)"); return; } Log.i(mTag, "loadPersonalDictionary() : Start Loading"); HashMap> dictWords = new HashMap<>(); HashMap> shortcutsPerLocale = new HashMap<>(); // Load the dictionary. Items are returned in the default sort order (by frequency). Cursor cursor = mResolver.query(UserDictionary.Words.CONTENT_URI, null, null, null, UserDictionary.Words.DEFAULT_SORT_ORDER); if (null == cursor || cursor.getCount() < 1) { Log.i(mTag, "loadPersonalDictionary() : Empty"); } else { // Iterate over the entries in the personal dictionary. Note, that iteration is in // descending frequency by default. while (dictWords.size() < MAX_NUM_ENTRIES && cursor.moveToNext()) { // If there is no column for locale, skip this entry. An empty // locale on the other hand will not be skipped. final int dictLocaleIndex = cursor.getColumnIndex(UserDictionary.Words.LOCALE); if (dictLocaleIndex < 0) { if (DebugFlags.DEBUG_ENABLED) { Log.d(mTag, "loadPersonalDictionary() : Entry without LOCALE, skipping"); } continue; } // If there is no column for word, skip this entry. final int dictWordIndex = cursor.getColumnIndex(UserDictionary.Words.WORD); if (dictWordIndex < 0) { if (DebugFlags.DEBUG_ENABLED) { Log.d(mTag, "loadPersonalDictionary() : Entry without WORD, skipping"); } continue; } // If the word is null, skip this entry. final String rawDictWord = cursor.getString(dictWordIndex); if (null == rawDictWord) { if (DebugFlags.DEBUG_ENABLED) { Log.d(mTag, "loadPersonalDictionary() : Null word"); } continue; } // If the locale is null, that's interpreted to mean all locales. Note, the special // zz locale for an Alphabet (QWERTY) layout will not match any actual language. String localeString = cursor.getString(dictLocaleIndex); if (null == localeString) { if (DebugFlags.DEBUG_ENABLED) { Log.d(mTag, "loadPersonalDictionary() : Null locale for word [" + rawDictWord + "], assuming all locales"); } // For purposes of LocaleUtils, an empty locale matches everything. localeString = ""; } final Locale dictLocale = LocaleUtils.constructLocaleFromString(localeString); // Lowercase the word before storing it. final String dictWord = rawDictWord.toLowerCase(dictLocale); if (DebugFlags.DEBUG_ENABLED) { Log.d(mTag, "loadPersonalDictionary() : Adding word [" + dictWord + "] for locale " + dictLocale + "with value" + rawDictWord); } // Check if there is an existing entry for this word. HashMap dictLocales = dictWords.get(dictWord); if (CollectionUtils.isNullOrEmpty(dictLocales)) { // If there is no entry for this word, create one. if (DebugFlags.DEBUG_ENABLED) { Log.d(mTag, "loadPersonalDictionary() : Word [" + dictWord + "] not seen for other locales, creating new entry"); } dictLocales = new HashMap<>(); dictWords.put(dictWord, dictLocales); } // Append the locale to the list of locales this word is in. dictLocales.put(dictLocale, rawDictWord); // If there is no column for a shortcut, we're done. final int shortcutIndex = cursor.getColumnIndex(UserDictionary.Words.SHORTCUT); if (shortcutIndex < 0) { if (DebugFlags.DEBUG_ENABLED) { Log.d(mTag, "loadPersonalDictionary() : Entry without SHORTCUT, done"); } continue; } // If the shortcut is null, we're done. final String shortcut = cursor.getString(shortcutIndex); if (shortcut == null) { if (DebugFlags.DEBUG_ENABLED) { Log.d(mTag, "loadPersonalDictionary() : Null shortcut"); } continue; } // Else, save the shortcut. HashMap localeShortcuts = shortcutsPerLocale.get(dictLocale); if (localeShortcuts == null) { localeShortcuts = new HashMap<>(); shortcutsPerLocale.put(dictLocale, localeShortcuts); } // Map to the raw input, which might be capitalized. // This lets the user create a shortcut from "gm" to "General Motors". localeShortcuts.put(shortcut, rawDictWord); } } List stats = new ArrayList<>(); stats.add(new DictionaryStats(ANY_LOCALE, Dictionary.TYPE_USER, dictWords.size())); int numShortcuts = 0; for (HashMap shortcuts : shortcutsPerLocale.values()) { numShortcuts += shortcuts.size(); } stats.add(new DictionaryStats(ANY_LOCALE, Dictionary.TYPE_USER_SHORTCUT, numShortcuts)); mDictionaryStats = stats; // Atomically replace the copy of mDictWords and mShortcuts. mDictWords = dictWords; mShortcutsPerLocale = shortcutsPerLocale; // Allow other calls to loadPersonalDictionary to execute now. mIsLoading.set(false); Log.i(mTag, "loadPersonalDictionary() : Loaded " + mDictWords.size() + " words and " + numShortcuts + " shortcuts"); notifyListeners(); } }