/* * Copyright (C) 2010 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.android.inputmethod.latin; import android.content.Context; import android.content.SharedPreferences; import android.os.AsyncTask; import android.util.Log; import com.android.inputmethod.annotations.UsedForTesting; import com.android.inputmethod.keyboard.ProximityInfo; import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo; import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; import com.android.inputmethod.latin.utils.CollectionUtils; import com.android.inputmethod.latin.utils.UserHistoryDictIOUtils; import com.android.inputmethod.latin.utils.UserHistoryDictIOUtils.BigramDictionaryInterface; import com.android.inputmethod.latin.utils.UserHistoryDictIOUtils.OnAddWordListener; import com.android.inputmethod.latin.utils.UserHistoryForgettingCurveUtils; import com.android.inputmethod.latin.utils.UserHistoryForgettingCurveUtils.ForgettingCurveParams; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; import java.lang.ref.SoftReference; import java.util.ArrayList; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.locks.ReentrantLock; /** * Locally gathers stats about the words user types and various other signals like auto-correction * cancellation or manual picks. This allows the keyboard to adapt to the typist over time. */ public final class UserHistoryDictionary extends ExpandableDictionary { private static final String TAG = UserHistoryDictionary.class.getSimpleName(); private static final String NAME = UserHistoryDictionary.class.getSimpleName(); public static final boolean DBG_SAVE_RESTORE = false; public static final boolean DBG_STRESS_TEST = false; public static final boolean DBG_ALWAYS_WRITE = false; public static final boolean PROFILE_SAVE_RESTORE = LatinImeLogger.sDBG; private static final FormatOptions VERSION3 = new FormatOptions(3, true /* supportsDynamicUpdate */); /** Any pair being typed or picked */ private static final int FREQUENCY_FOR_TYPED = 2; /** Maximum number of pairs. Pruning will start when databases goes above this number. */ public static final int MAX_HISTORY_BIGRAMS = 10000; /** * When it hits maximum bigram pair, it will delete until you are left with * only (sMaxHistoryBigrams - sDeleteHistoryBigrams) pairs. * Do not keep this number small to avoid deleting too often. */ public static final int DELETE_HISTORY_BIGRAMS = 1000; /** Locale for which this user history dictionary is storing words */ private final String mLocale; private final UserHistoryDictionaryBigramList mBigramList = new UserHistoryDictionaryBigramList(); private final ReentrantLock mBigramListLock = new ReentrantLock(); private final SharedPreferences mPrefs; // Should always be false except when we use this class for test @UsedForTesting boolean isTest = false; private static final ConcurrentHashMap> sLangDictCache = CollectionUtils.newConcurrentHashMap(); public static synchronized UserHistoryDictionary getInstance( final Context context, final String locale, final SharedPreferences sp) { if (sLangDictCache.containsKey(locale)) { final SoftReference ref = sLangDictCache.get(locale); final UserHistoryDictionary dict = ref == null ? null : ref.get(); if (dict != null) { if (PROFILE_SAVE_RESTORE) { Log.w(TAG, "Use cached UserHistoryDictionary for " + locale); } return dict; } } final UserHistoryDictionary dict = new UserHistoryDictionary(context, locale, sp); sLangDictCache.put(locale, new SoftReference(dict)); return dict; } private UserHistoryDictionary(final Context context, final String locale, final SharedPreferences sp) { super(context, Dictionary.TYPE_USER_HISTORY); mLocale = locale; mPrefs = sp; if (mLocale != null && mLocale.length() > 1) { loadDictionary(); } } @Override public void close() { flushPendingWrites(); // Don't close the database as locale changes will require it to be reopened anyway // Also, the database is written to somewhat frequently, so it needs to be kept alive // throughout the life of the process. // mOpenHelper.close(); // Ignore close because we cache UserHistoryDictionary for each language. See getInstance() // above. // super.close(); } @Override protected ArrayList getWordsInner(final WordComposer composer, final String prevWord, final ProximityInfo proximityInfo) { // Inhibit suggestions (not predictions) for user history for now. Removing this method // is enough to use it through the standard ExpandableDictionary way. return null; } /** * Return whether the passed charsequence is in the dictionary. */ @Override public synchronized boolean isValidWord(final String word) { // TODO: figure out what is the correct thing to do here. return false; } /** * Pair will be added to the user history dictionary. * * The first word may be null. That means we don't know the context, in other words, * it's only a unigram. The first word may also be an empty string : this means start * context, as in beginning of a sentence for example. * The second word may not be null (a NullPointerException would be thrown). */ public int addToUserHistory(final String word1, final String word2, final boolean isValid) { if (word2.length() >= Constants.Dictionary.MAX_WORD_LENGTH || (word1 != null && word1.length() >= Constants.Dictionary.MAX_WORD_LENGTH)) { return -1; } if (mBigramListLock.tryLock()) { try { super.addWord( word2, null /* the "shortcut" parameter is null */, FREQUENCY_FOR_TYPED); mBigramList.addBigram(null, word2, (byte)FREQUENCY_FOR_TYPED); // Do not insert a word as a bigram of itself if (word2.equals(word1)) { return 0; } final int freq; if (null == word1) { freq = FREQUENCY_FOR_TYPED; } else { freq = super.setBigramAndGetFrequency( word1, word2, new ForgettingCurveParams(isValid)); } mBigramList.addBigram(word1, word2); return freq; } finally { mBigramListLock.unlock(); } } return -1; } public boolean cancelAddingUserHistory(final String word1, final String word2) { if (mBigramListLock.tryLock()) { try { if (mBigramList.removeBigram(word1, word2)) { return super.removeBigram(word1, word2); } } finally { mBigramListLock.unlock(); } } return false; } /** * Schedules a background thread to write any pending words to the database. */ private void flushPendingWrites() { // Create a background thread to write the pending entries new UpdateBinaryTask(mBigramList, mLocale, this, mPrefs, getContext()).execute(); } @Override public void loadDictionaryAsync() { // This must be run on non-main thread mBigramListLock.lock(); try { loadDictionaryAsyncLocked(); } finally { mBigramListLock.unlock(); } } private int profTotal; private void loadDictionaryAsyncLocked() { if (DBG_STRESS_TEST) { try { Log.w(TAG, "Start stress in loading: " + mLocale); Thread.sleep(15000); Log.w(TAG, "End stress in loading"); } catch (InterruptedException e) { } } final long last = Settings.readLastUserHistoryWriteTime(mPrefs, mLocale); final boolean initializing = last == 0; final long now = System.currentTimeMillis(); profTotal = 0; final String fileName = NAME + "." + mLocale + ".dict"; final ExpandableDictionary dictionary = this; final OnAddWordListener listener = new OnAddWordListener() { @Override public void setUnigram(final String word, final String shortcutTarget, final int frequency) { profTotal++; if (DBG_SAVE_RESTORE) { Log.d(TAG, "load unigram: " + word + "," + frequency); } dictionary.addWord(word, shortcutTarget, frequency); mBigramList.addBigram(null, word, (byte)frequency); } @Override public void setBigram(final String word1, final String word2, final int frequency) { if (word1.length() < Constants.Dictionary.MAX_WORD_LENGTH && word2.length() < Constants.Dictionary.MAX_WORD_LENGTH) { profTotal++; if (DBG_SAVE_RESTORE) { Log.d(TAG, "load bigram: " + word1 + "," + word2 + "," + frequency); } dictionary.setBigramAndGetFrequency( word1, word2, initializing ? new ForgettingCurveParams(true) : new ForgettingCurveParams(frequency, now, last)); } mBigramList.addBigram(word1, word2, (byte)frequency); } }; // Load the dictionary from binary file FileInputStream inStream = null; try { final File file = new File(getContext().getFilesDir(), fileName); final byte[] buffer = new byte[(int)file.length()]; inStream = new FileInputStream(file); inStream.read(buffer); UserHistoryDictIOUtils.readDictionaryBinary( new ByteArrayWrapper(buffer), listener); } catch (FileNotFoundException e) { // This is an expected condition: we don't have a user history dictionary for this // language yet. It will be created sometime later. } catch (IOException e) { Log.e(TAG, "IOException on opening a bytebuffer", e); } finally { if (inStream != null) { try { inStream.close(); } catch (IOException e) { // do nothing } } if (PROFILE_SAVE_RESTORE) { final long diff = System.currentTimeMillis() - now; Log.d(TAG, "PROF: Load UserHistoryDictionary: " + mLocale + ", " + diff + "ms. load " + profTotal + "entries."); } } } /** * Async task to write pending words to the binarydicts. */ private static final class UpdateBinaryTask extends AsyncTask implements BigramDictionaryInterface { private final UserHistoryDictionaryBigramList mBigramList; private final boolean mAddLevel0Bigrams; private final String mLocale; private final UserHistoryDictionary mUserHistoryDictionary; private final SharedPreferences mPrefs; private final Context mContext; public UpdateBinaryTask(final UserHistoryDictionaryBigramList pendingWrites, final String locale, final UserHistoryDictionary dict, final SharedPreferences prefs, final Context context) { mBigramList = pendingWrites; mLocale = locale; mUserHistoryDictionary = dict; mPrefs = prefs; mContext = context; mAddLevel0Bigrams = mBigramList.size() <= MAX_HISTORY_BIGRAMS; } @Override protected Void doInBackground(final Void... v) { if (mUserHistoryDictionary.isTest) { // If isTest == true, wait until the lock is released. mUserHistoryDictionary.mBigramListLock.lock(); try { doWriteTaskLocked(); } finally { mUserHistoryDictionary.mBigramListLock.unlock(); } } else if (mUserHistoryDictionary.mBigramListLock.tryLock()) { doWriteTaskLocked(); } return null; } private void doWriteTaskLocked() { if (DBG_STRESS_TEST) { try { Log.w(TAG, "Start stress in closing: " + mLocale); Thread.sleep(15000); Log.w(TAG, "End stress in closing"); } catch (InterruptedException e) { Log.e(TAG, "In stress test", e); } } final long now = PROFILE_SAVE_RESTORE ? System.currentTimeMillis() : 0; final String fileName = NAME + "." + mLocale + ".dict"; final File file = new File(mContext.getFilesDir(), fileName); FileOutputStream out = null; try { out = new FileOutputStream(file); UserHistoryDictIOUtils.writeDictionaryBinary(out, this, mBigramList, VERSION3); out.flush(); out.close(); } catch (IOException e) { Log.e(TAG, "IO Exception while writing file", e); } finally { if (out != null) { try { out.close(); } catch (IOException e) { // ignore } } } // Save the timestamp after we finish writing the binary dictionary. Settings.writeLastUserHistoryWriteTime(mPrefs, mLocale); if (PROFILE_SAVE_RESTORE) { final long diff = System.currentTimeMillis() - now; Log.w(TAG, "PROF: Write User HistoryDictionary: " + mLocale + ", " + diff + "ms."); } } @Override public int getFrequency(final String word1, final String word2) { final int freq; if (word1 == null) { // unigram freq = FREQUENCY_FOR_TYPED; final byte prevFc = mBigramList.getBigrams(word1).get(word2); } else { // bigram final NextWord nw = mUserHistoryDictionary.getBigramWord(word1, word2); if (nw != null) { final ForgettingCurveParams fcp = nw.getFcParams(); final byte prevFc = mBigramList.getBigrams(word1).get(word2); final byte fc = fcp.getFc(); final boolean isValid = fcp.isValid(); if (prevFc > 0 && prevFc == fc) { freq = fc & 0xFF; } else if (UserHistoryForgettingCurveUtils. needsToSave(fc, isValid, mAddLevel0Bigrams)) { freq = fc & 0xFF; } else { // Delete this entry freq = -1; } } else { // Delete this entry freq = -1; } } return freq; } } @UsedForTesting void forceAddWordForTest(final String word1, final String word2, final boolean isValid) { mBigramListLock.lock(); try { addToUserHistory(word1, word2, isValid); } finally { mBigramListLock.unlock(); } } }