From 87d06afc66db68f0b30b36593095511314793517 Mon Sep 17 00:00:00 2001 From: Satoshi Kataoka Date: Wed, 31 Jul 2013 14:42:50 +0900 Subject: [PATCH] Refactor on the user history dictionary Bug: 9429906 Bug: 4192129 Change-Id: I1a2bfe96e18119d30a72290aa48746f77a021018 --- .../android/inputmethod/latin/LatinIME.java | 15 +- .../android/inputmethod/latin/Suggest.java | 8 +- .../PersonalizationDictionaryHelper.java | 14 +- ...rsonalizationDictionaryUpdateListener.java | 2 +- .../PersonalizationPredictionDictionary.java | 370 +++++++++++++++- .../UserHistoryDictionary.java | 397 ------------------ .../UserHistoryDictionaryBigramList.java | 4 +- .../UserHistoryPredictionDictionary.java | 33 ++ .../UserHistoryDictionaryTests.java | 10 +- 9 files changed, 424 insertions(+), 429 deletions(-) delete mode 100644 java/src/com/android/inputmethod/latin/personalization/UserHistoryDictionary.java create mode 100644 java/src/com/android/inputmethod/latin/personalization/UserHistoryPredictionDictionary.java diff --git a/java/src/com/android/inputmethod/latin/LatinIME.java b/java/src/com/android/inputmethod/latin/LatinIME.java index 1211ea5ad..44185e896 100644 --- a/java/src/com/android/inputmethod/latin/LatinIME.java +++ b/java/src/com/android/inputmethod/latin/LatinIME.java @@ -76,7 +76,7 @@ import com.android.inputmethod.keyboard.MainKeyboardView; import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo; import com.android.inputmethod.latin.define.ProductionFlag; import com.android.inputmethod.latin.personalization.PersonalizationDictionaryHelper; -import com.android.inputmethod.latin.personalization.UserHistoryDictionary; +import com.android.inputmethod.latin.personalization.UserHistoryPredictionDictionary; import com.android.inputmethod.latin.settings.Settings; import com.android.inputmethod.latin.settings.SettingsActivity; import com.android.inputmethod.latin.settings.SettingsValues; @@ -169,7 +169,7 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen private boolean mIsMainDictionaryAvailable; private UserBinaryDictionary mUserDictionary; - private UserHistoryDictionary mUserHistoryDictionary; + private UserHistoryPredictionDictionary mUserHistoryPredictionDictionary; private boolean mIsUserDictionaryAvailable; private LastComposedWord mLastComposedWord = LastComposedWord.NOT_A_COMPOSED_WORD; @@ -565,9 +565,9 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen resetContactsDictionary(oldContactsDictionary); final SharedPreferences prefs = PreferenceManager.getDefaultSharedPreferences(this); - mUserHistoryDictionary = - PersonalizationDictionaryHelper.getUserHistoryDictionary(this, localeStr, prefs); - mSuggest.setUserHistoryDictionary(mUserHistoryDictionary); + mUserHistoryPredictionDictionary = PersonalizationDictionaryHelper + .getUserHistoryPredictionDictionary(this, localeStr, prefs); + mSuggest.setUserHistoryPredictionDictionary(mUserHistoryPredictionDictionary); } /** @@ -2507,7 +2507,8 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen if (!currentSettings.mCorrectionEnabled) return null; final Suggest suggest = mSuggest; - final UserHistoryDictionary userHistoryDictionary = mUserHistoryDictionary; + final UserHistoryPredictionDictionary userHistoryDictionary = + mUserHistoryPredictionDictionary; if (suggest == null || userHistoryDictionary == null) { // Avoid concurrent issue return null; @@ -2657,7 +2658,7 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen } mConnection.deleteSurroundingText(deleteLength, 0); if (!TextUtils.isEmpty(previousWord) && !TextUtils.isEmpty(committedWord)) { - mUserHistoryDictionary.cancelAddingUserHistory(previousWord, committedWord); + mUserHistoryPredictionDictionary.cancelAddingUserHistory(previousWord, committedWord); } mConnection.commitText(originallyTypedWord + mLastComposedWord.mSeparatorString, 1); if (mSettings.isInternal()) { diff --git a/java/src/com/android/inputmethod/latin/Suggest.java b/java/src/com/android/inputmethod/latin/Suggest.java index 647c6f6e1..6b016675a 100644 --- a/java/src/com/android/inputmethod/latin/Suggest.java +++ b/java/src/com/android/inputmethod/latin/Suggest.java @@ -22,7 +22,7 @@ import android.text.TextUtils; import com.android.inputmethod.annotations.UsedForTesting; import com.android.inputmethod.keyboard.ProximityInfo; import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo; -import com.android.inputmethod.latin.personalization.UserHistoryDictionary; +import com.android.inputmethod.latin.personalization.UserHistoryPredictionDictionary; import com.android.inputmethod.latin.utils.AutoCorrectionUtils; import com.android.inputmethod.latin.utils.BoundedTreeSet; import com.android.inputmethod.latin.utils.CollectionUtils; @@ -168,8 +168,10 @@ public final class Suggest { addOrReplaceDictionary(mDictionaries, Dictionary.TYPE_CONTACTS, contactsDictionary); } - public void setUserHistoryDictionary(final UserHistoryDictionary userHistoryDictionary) { - addOrReplaceDictionary(mDictionaries, Dictionary.TYPE_USER_HISTORY, userHistoryDictionary); + public void setUserHistoryPredictionDictionary( + final UserHistoryPredictionDictionary userHistoryPredictionDictionary) { + addOrReplaceDictionary(mDictionaries, Dictionary.TYPE_USER_HISTORY, + userHistoryPredictionDictionary); } public void setAutoCorrectionThreshold(float threshold) { diff --git a/java/src/com/android/inputmethod/latin/personalization/PersonalizationDictionaryHelper.java b/java/src/com/android/inputmethod/latin/personalization/PersonalizationDictionaryHelper.java index e09e834bf..f5dae99ef 100644 --- a/java/src/com/android/inputmethod/latin/personalization/PersonalizationDictionaryHelper.java +++ b/java/src/com/android/inputmethod/latin/personalization/PersonalizationDictionaryHelper.java @@ -29,15 +29,16 @@ public class PersonalizationDictionaryHelper { private static final String TAG = PersonalizationDictionaryHelper.class.getSimpleName(); private static final boolean DEBUG = false; - private static final ConcurrentHashMap> + private static final ConcurrentHashMap> sLangDictCache = CollectionUtils.newConcurrentHashMap(); - public static UserHistoryDictionary getUserHistoryDictionary( + public static UserHistoryPredictionDictionary getUserHistoryPredictionDictionary( final Context context, final String locale, final SharedPreferences sp) { synchronized (sLangDictCache) { if (sLangDictCache.containsKey(locale)) { - final SoftReference ref = sLangDictCache.get(locale); - final UserHistoryDictionary dict = ref == null ? null : ref.get(); + final SoftReference ref = + sLangDictCache.get(locale); + final UserHistoryPredictionDictionary dict = ref == null ? null : ref.get(); if (dict != null) { if (DEBUG) { Log.w(TAG, "Use cached UserHistoryDictionary for " + locale); @@ -45,8 +46,9 @@ public class PersonalizationDictionaryHelper { return dict; } } - final UserHistoryDictionary dict = new UserHistoryDictionary(context, locale, sp); - sLangDictCache.put(locale, new SoftReference(dict)); + final UserHistoryPredictionDictionary dict = + new UserHistoryPredictionDictionary(context, locale, sp); + sLangDictCache.put(locale, new SoftReference(dict)); return dict; } } diff --git a/java/src/com/android/inputmethod/latin/personalization/PersonalizationDictionaryUpdateListener.java b/java/src/com/android/inputmethod/latin/personalization/PersonalizationDictionaryUpdateListener.java index 2ec0dc00c..c78e5a95b 100644 --- a/java/src/com/android/inputmethod/latin/personalization/PersonalizationDictionaryUpdateListener.java +++ b/java/src/com/android/inputmethod/latin/personalization/PersonalizationDictionaryUpdateListener.java @@ -16,6 +16,6 @@ package com.android.inputmethod.latin.personalization; -public class PersonalizationDictionaryUpdateListener { +public interface PersonalizationDictionaryUpdateListener { // TODO: Implement } diff --git a/java/src/com/android/inputmethod/latin/personalization/PersonalizationPredictionDictionary.java b/java/src/com/android/inputmethod/latin/personalization/PersonalizationPredictionDictionary.java index 7bce97978..083ce5893 100644 --- a/java/src/com/android/inputmethod/latin/personalization/PersonalizationPredictionDictionary.java +++ b/java/src/com/android/inputmethod/latin/personalization/PersonalizationPredictionDictionary.java @@ -16,11 +16,34 @@ package com.android.inputmethod.latin.personalization; -import com.android.inputmethod.latin.Dictionary; -import com.android.inputmethod.latin.ExpandableDictionary; - import android.content.Context; import android.content.SharedPreferences; +import android.os.AsyncTask; +import android.util.Log; + +import com.android.inputmethod.annotations.UsedForTesting; +import com.android.inputmethod.keyboard.ProximityInfo; +import com.android.inputmethod.latin.Constants; +import com.android.inputmethod.latin.ExpandableDictionary; +import com.android.inputmethod.latin.LatinImeLogger; +import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo; +import com.android.inputmethod.latin.WordComposer; +import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; +import com.android.inputmethod.latin.settings.Settings; +import com.android.inputmethod.latin.utils.ByteArrayWrapper; +import com.android.inputmethod.latin.utils.UserHistoryDictIOUtils; +import com.android.inputmethod.latin.utils.UserHistoryDictIOUtils.BigramDictionaryInterface; +import com.android.inputmethod.latin.utils.UserHistoryDictIOUtils.OnAddWordListener; +import com.android.inputmethod.latin.utils.UserHistoryForgettingCurveUtils; +import com.android.inputmethod.latin.utils.UserHistoryForgettingCurveUtils.ForgettingCurveParams; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.FileOutputStream; +import java.io.IOException; +import java.util.ArrayList; +import java.util.concurrent.locks.ReentrantLock; /** * This class is a dictionary for the personalized prediction language model implemented in Java. @@ -30,17 +53,348 @@ public class PersonalizationPredictionDictionary extends ExpandableDictionary { // TODO: Implement } + private static final String TAG = PersonalizationPredictionDictionary.class.getSimpleName(); + private static final String NAME = PersonalizationPredictionDictionary.class.getSimpleName(); + public static final boolean DBG_SAVE_RESTORE = false; + public static final boolean DBG_STRESS_TEST = false; + public static final boolean DBG_ALWAYS_WRITE = false; + public static final boolean PROFILE_SAVE_RESTORE = LatinImeLogger.sDBG; + + private static final FormatOptions VERSION3 = new FormatOptions(3, + true /* supportsDynamicUpdate */); + + /** Any pair being typed or picked */ + private static final int FREQUENCY_FOR_TYPED = 2; + + /** Maximum number of pairs. Pruning will start when databases goes above this number. */ + public static final int MAX_HISTORY_BIGRAMS = 10000; + + /** + * When it hits maximum bigram pair, it will delete until you are left with + * only (sMaxHistoryBigrams - sDeleteHistoryBigrams) pairs. + * Do not keep this number small to avoid deleting too often. + */ + public static final int DELETE_HISTORY_BIGRAMS = 1000; + /** Locale for which this user history dictionary is storing words */ private final String mLocale; + + private final UserHistoryDictionaryBigramList mBigramList = + new UserHistoryDictionaryBigramList(); + private final ReentrantLock mBigramListLock = new ReentrantLock(); private final SharedPreferences mPrefs; - // Singleton - private PersonalizationPredictionDictionary(final Context context, final String locale, - final SharedPreferences sp) { - super(context, Dictionary.TYPE_PERSONALIZATION_PREDICTION_IN_JAVA); + // Should always be false except when we use this class for test + @UsedForTesting boolean isTest = false; + + /* package */ PersonalizationPredictionDictionary(final Context context, final String locale, + final SharedPreferences sp, final String dictionaryType) { + super(context, dictionaryType); mLocale = locale; mPrefs = sp; + if (mLocale != null && mLocale.length() > 1) { + loadDictionary(); + } } - // TODO: Implement + @Override + public void close() { + flushPendingWrites(); + // Don't close the database as locale changes will require it to be reopened anyway + // Also, the database is written to somewhat frequently, so it needs to be kept alive + // throughout the life of the process. + // mOpenHelper.close(); + // Ignore close because we cache PersonalizationPredictionDictionary for each language. + // See getInstance() above. + // super.close(); + } + + @Override + protected ArrayList getWordsInner(final WordComposer composer, + final String prevWord, final ProximityInfo proximityInfo) { + // Inhibit suggestions (not predictions) for user history for now. Removing this method + // is enough to use it through the standard ExpandableDictionary way. + return null; + } + + /** + * Return whether the passed charsequence is in the dictionary. + */ + @Override + public synchronized boolean isValidWord(final String word) { + // TODO: figure out what is the correct thing to do here. + return false; + } + + /** + * Pair will be added to the user history dictionary. + * + * The first word may be null. That means we don't know the context, in other words, + * it's only a unigram. The first word may also be an empty string : this means start + * context, as in beginning of a sentence for example. + * The second word may not be null (a NullPointerException would be thrown). + */ + public int addToUserHistory(final String word1, final String word2, final boolean isValid) { + if (word2.length() >= Constants.DICTIONARY_MAX_WORD_LENGTH || + (word1 != null && word1.length() >= Constants.DICTIONARY_MAX_WORD_LENGTH)) { + return -1; + } + if (mBigramListLock.tryLock()) { + try { + super.addWord( + word2, null /* the "shortcut" parameter is null */, FREQUENCY_FOR_TYPED); + mBigramList.addBigram(null, word2, (byte)FREQUENCY_FOR_TYPED); + // Do not insert a word as a bigram of itself + if (word2.equals(word1)) { + return 0; + } + final int freq; + if (null == word1) { + freq = FREQUENCY_FOR_TYPED; + } else { + freq = super.setBigramAndGetFrequency( + word1, word2, new ForgettingCurveParams(isValid)); + } + mBigramList.addBigram(word1, word2); + return freq; + } finally { + mBigramListLock.unlock(); + } + } + return -1; + } + + public boolean cancelAddingUserHistory(final String word1, final String word2) { + if (mBigramListLock.tryLock()) { + try { + if (mBigramList.removeBigram(word1, word2)) { + return super.removeBigram(word1, word2); + } + } finally { + mBigramListLock.unlock(); + } + } + return false; + } + + /** + * Schedules a background thread to write any pending words to the database. + */ + private void flushPendingWrites() { + // Create a background thread to write the pending entries + new UpdateBinaryTask(mBigramList, mLocale, this, mPrefs, getContext()).execute(); + } + + @Override + public void loadDictionaryAsync() { + // This must be run on non-main thread + mBigramListLock.lock(); + try { + loadDictionaryAsyncLocked(); + } finally { + mBigramListLock.unlock(); + } + } + + private int profTotal; + + private void loadDictionaryAsyncLocked() { + if (DBG_STRESS_TEST) { + try { + Log.w(TAG, "Start stress in loading: " + mLocale); + Thread.sleep(15000); + Log.w(TAG, "End stress in loading"); + } catch (InterruptedException e) { + } + } + final long last = Settings.readLastUserHistoryWriteTime(mPrefs, mLocale); + final boolean initializing = last == 0; + final long now = System.currentTimeMillis(); + profTotal = 0; + final String fileName = NAME + "." + mLocale + ".dict"; + final ExpandableDictionary dictionary = this; + final OnAddWordListener listener = new OnAddWordListener() { + @Override + public void setUnigram(final String word, final String shortcutTarget, + final int frequency) { + profTotal++; + if (DBG_SAVE_RESTORE) { + Log.d(TAG, "load unigram: " + word + "," + frequency); + } + dictionary.addWord(word, shortcutTarget, frequency); + mBigramList.addBigram(null, word, (byte)frequency); + } + + @Override + public void setBigram(final String word1, final String word2, final int frequency) { + if (word1.length() < Constants.DICTIONARY_MAX_WORD_LENGTH + && word2.length() < Constants.DICTIONARY_MAX_WORD_LENGTH) { + profTotal++; + if (DBG_SAVE_RESTORE) { + Log.d(TAG, "load bigram: " + word1 + "," + word2 + "," + frequency); + } + dictionary.setBigramAndGetFrequency( + word1, word2, initializing ? new ForgettingCurveParams(true) + : new ForgettingCurveParams(frequency, now, last)); + } + mBigramList.addBigram(word1, word2, (byte)frequency); + } + }; + + // Load the dictionary from binary file + FileInputStream inStream = null; + try { + final File file = new File(getContext().getFilesDir(), fileName); + final byte[] buffer = new byte[(int)file.length()]; + inStream = new FileInputStream(file); + inStream.read(buffer); + UserHistoryDictIOUtils.readDictionaryBinary( + new ByteArrayWrapper(buffer), listener); + } catch (FileNotFoundException e) { + // This is an expected condition: we don't have a user history dictionary for this + // language yet. It will be created sometime later. + } catch (IOException e) { + Log.e(TAG, "IOException on opening a bytebuffer", e); + } finally { + if (inStream != null) { + try { + inStream.close(); + } catch (IOException e) { + // do nothing + } + } + if (PROFILE_SAVE_RESTORE) { + final long diff = System.currentTimeMillis() - now; + Log.d(TAG, "PROF: Load UserHistoryDictionary: " + + mLocale + ", " + diff + "ms. load " + profTotal + "entries."); + } + } + } + + /** + * Async task to write pending words to the binarydicts. + */ + private static final class UpdateBinaryTask extends AsyncTask + implements BigramDictionaryInterface { + private final UserHistoryDictionaryBigramList mBigramList; + private final boolean mAddLevel0Bigrams; + private final String mLocale; + private final PersonalizationPredictionDictionary mPersonalizationPredictionDictionary; + private final SharedPreferences mPrefs; + private final Context mContext; + + public UpdateBinaryTask(final UserHistoryDictionaryBigramList pendingWrites, + final String locale, final PersonalizationPredictionDictionary dict, + final SharedPreferences prefs, final Context context) { + mBigramList = pendingWrites; + mLocale = locale; + mPersonalizationPredictionDictionary = dict; + mPrefs = prefs; + mContext = context; + mAddLevel0Bigrams = mBigramList.size() <= MAX_HISTORY_BIGRAMS; + } + + @Override + protected Void doInBackground(final Void... v) { + if (mPersonalizationPredictionDictionary.isTest) { + // If isTest == true, wait until the lock is released. + mPersonalizationPredictionDictionary.mBigramListLock.lock(); + try { + doWriteTaskLocked(); + } finally { + mPersonalizationPredictionDictionary.mBigramListLock.unlock(); + } + } else if (mPersonalizationPredictionDictionary.mBigramListLock.tryLock()) { + try { + doWriteTaskLocked(); + } finally { + mPersonalizationPredictionDictionary.mBigramListLock.unlock(); + } + } + return null; + } + + private void doWriteTaskLocked() { + if (DBG_STRESS_TEST) { + try { + Log.w(TAG, "Start stress in closing: " + mLocale); + Thread.sleep(15000); + Log.w(TAG, "End stress in closing"); + } catch (InterruptedException e) { + Log.e(TAG, "In stress test", e); + } + } + + final long now = PROFILE_SAVE_RESTORE ? System.currentTimeMillis() : 0; + final String fileName = NAME + "." + mLocale + ".dict"; + final File file = new File(mContext.getFilesDir(), fileName); + FileOutputStream out = null; + + try { + out = new FileOutputStream(file); + UserHistoryDictIOUtils.writeDictionaryBinary(out, this, mBigramList, VERSION3); + out.flush(); + out.close(); + } catch (IOException e) { + Log.e(TAG, "IO Exception while writing file", e); + } finally { + if (out != null) { + try { + out.close(); + } catch (IOException e) { + // ignore + } + } + } + + // Save the timestamp after we finish writing the binary dictionary. + Settings.writeLastUserHistoryWriteTime(mPrefs, mLocale); + if (PROFILE_SAVE_RESTORE) { + final long diff = System.currentTimeMillis() - now; + Log.w(TAG, "PROF: Write User HistoryDictionary: " + mLocale + ", " + diff + "ms."); + } + } + + @Override + public int getFrequency(final String word1, final String word2) { + final int freq; + if (word1 == null) { // unigram + freq = FREQUENCY_FOR_TYPED; + final byte prevFc = mBigramList.getBigrams(word1).get(word2); + } else { // bigram + final NextWord nw = + mPersonalizationPredictionDictionary.getBigramWord(word1, word2); + if (nw != null) { + final ForgettingCurveParams fcp = nw.getFcParams(); + final byte prevFc = mBigramList.getBigrams(word1).get(word2); + final byte fc = fcp.getFc(); + final boolean isValid = fcp.isValid(); + if (prevFc > 0 && prevFc == fc) { + freq = fc & 0xFF; + } else if (UserHistoryForgettingCurveUtils. + needsToSave(fc, isValid, mAddLevel0Bigrams)) { + freq = fc & 0xFF; + } else { + // Delete this entry + freq = -1; + } + } else { + // Delete this entry + freq = -1; + } + } + return freq; + } + } + + @UsedForTesting + /* package for test */ void forceAddWordForTest( + final String word1, final String word2, final boolean isValid) { + mBigramListLock.lock(); + try { + addToUserHistory(word1, word2, isValid); + } finally { + mBigramListLock.unlock(); + } + } } diff --git a/java/src/com/android/inputmethod/latin/personalization/UserHistoryDictionary.java b/java/src/com/android/inputmethod/latin/personalization/UserHistoryDictionary.java deleted file mode 100644 index c76dea0bb..000000000 --- a/java/src/com/android/inputmethod/latin/personalization/UserHistoryDictionary.java +++ /dev/null @@ -1,397 +0,0 @@ -/* - * Copyright (C) 2010 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package com.android.inputmethod.latin.personalization; - -import android.content.Context; -import android.content.SharedPreferences; -import android.os.AsyncTask; -import android.util.Log; - -import com.android.inputmethod.annotations.UsedForTesting; -import com.android.inputmethod.keyboard.ProximityInfo; -import com.android.inputmethod.latin.Constants; -import com.android.inputmethod.latin.Dictionary; -import com.android.inputmethod.latin.ExpandableDictionary; -import com.android.inputmethod.latin.LatinImeLogger; -import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo; -import com.android.inputmethod.latin.WordComposer; -import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; -import com.android.inputmethod.latin.settings.Settings; -import com.android.inputmethod.latin.utils.ByteArrayWrapper; -import com.android.inputmethod.latin.utils.UserHistoryDictIOUtils; -import com.android.inputmethod.latin.utils.UserHistoryDictIOUtils.BigramDictionaryInterface; -import com.android.inputmethod.latin.utils.UserHistoryDictIOUtils.OnAddWordListener; -import com.android.inputmethod.latin.utils.UserHistoryForgettingCurveUtils; -import com.android.inputmethod.latin.utils.UserHistoryForgettingCurveUtils.ForgettingCurveParams; - -import java.io.File; -import java.io.FileInputStream; -import java.io.FileNotFoundException; -import java.io.FileOutputStream; -import java.io.IOException; -import java.util.ArrayList; -import java.util.concurrent.locks.ReentrantLock; - -/** - * Locally gathers stats about the words user types and various other signals like auto-correction - * cancellation or manual picks. This allows the keyboard to adapt to the typist over time. - */ -public class UserHistoryDictionary extends ExpandableDictionary { - private static final String TAG = UserHistoryDictionary.class.getSimpleName(); - private static final String NAME = UserHistoryDictionary.class.getSimpleName(); - public static final boolean DBG_SAVE_RESTORE = false; - public static final boolean DBG_STRESS_TEST = false; - public static final boolean DBG_ALWAYS_WRITE = false; - public static final boolean PROFILE_SAVE_RESTORE = LatinImeLogger.sDBG; - - private static final FormatOptions VERSION3 = new FormatOptions(3, - true /* supportsDynamicUpdate */); - - /** Any pair being typed or picked */ - private static final int FREQUENCY_FOR_TYPED = 2; - - /** Maximum number of pairs. Pruning will start when databases goes above this number. */ - public static final int MAX_HISTORY_BIGRAMS = 10000; - - /** - * When it hits maximum bigram pair, it will delete until you are left with - * only (sMaxHistoryBigrams - sDeleteHistoryBigrams) pairs. - * Do not keep this number small to avoid deleting too often. - */ - public static final int DELETE_HISTORY_BIGRAMS = 1000; - - /** Locale for which this user history dictionary is storing words */ - private final String mLocale; - - private final UserHistoryDictionaryBigramList mBigramList = - new UserHistoryDictionaryBigramList(); - private final ReentrantLock mBigramListLock = new ReentrantLock(); - private final SharedPreferences mPrefs; - - // Should always be false except when we use this class for test - @UsedForTesting boolean isTest = false; - - /* package */ UserHistoryDictionary(final Context context, final String locale, - final SharedPreferences sp) { - super(context, Dictionary.TYPE_USER_HISTORY); - mLocale = locale; - mPrefs = sp; - if (mLocale != null && mLocale.length() > 1) { - loadDictionary(); - } - } - - @Override - public void close() { - flushPendingWrites(); - // Don't close the database as locale changes will require it to be reopened anyway - // Also, the database is written to somewhat frequently, so it needs to be kept alive - // throughout the life of the process. - // mOpenHelper.close(); - // Ignore close because we cache UserHistoryDictionary for each language. See getInstance() - // above. - // super.close(); - } - - @Override - protected ArrayList getWordsInner(final WordComposer composer, - final String prevWord, final ProximityInfo proximityInfo) { - // Inhibit suggestions (not predictions) for user history for now. Removing this method - // is enough to use it through the standard ExpandableDictionary way. - return null; - } - - /** - * Return whether the passed charsequence is in the dictionary. - */ - @Override - public synchronized boolean isValidWord(final String word) { - // TODO: figure out what is the correct thing to do here. - return false; - } - - /** - * Pair will be added to the user history dictionary. - * - * The first word may be null. That means we don't know the context, in other words, - * it's only a unigram. The first word may also be an empty string : this means start - * context, as in beginning of a sentence for example. - * The second word may not be null (a NullPointerException would be thrown). - */ - public int addToUserHistory(final String word1, final String word2, final boolean isValid) { - if (word2.length() >= Constants.DICTIONARY_MAX_WORD_LENGTH || - (word1 != null && word1.length() >= Constants.DICTIONARY_MAX_WORD_LENGTH)) { - return -1; - } - if (mBigramListLock.tryLock()) { - try { - super.addWord( - word2, null /* the "shortcut" parameter is null */, FREQUENCY_FOR_TYPED); - mBigramList.addBigram(null, word2, (byte)FREQUENCY_FOR_TYPED); - // Do not insert a word as a bigram of itself - if (word2.equals(word1)) { - return 0; - } - final int freq; - if (null == word1) { - freq = FREQUENCY_FOR_TYPED; - } else { - freq = super.setBigramAndGetFrequency( - word1, word2, new ForgettingCurveParams(isValid)); - } - mBigramList.addBigram(word1, word2); - return freq; - } finally { - mBigramListLock.unlock(); - } - } - return -1; - } - - public boolean cancelAddingUserHistory(final String word1, final String word2) { - if (mBigramListLock.tryLock()) { - try { - if (mBigramList.removeBigram(word1, word2)) { - return super.removeBigram(word1, word2); - } - } finally { - mBigramListLock.unlock(); - } - } - return false; - } - - /** - * Schedules a background thread to write any pending words to the database. - */ - private void flushPendingWrites() { - // Create a background thread to write the pending entries - new UpdateBinaryTask(mBigramList, mLocale, this, mPrefs, getContext()).execute(); - } - - @Override - public void loadDictionaryAsync() { - // This must be run on non-main thread - mBigramListLock.lock(); - try { - loadDictionaryAsyncLocked(); - } finally { - mBigramListLock.unlock(); - } - } - - private int profTotal; - - private void loadDictionaryAsyncLocked() { - if (DBG_STRESS_TEST) { - try { - Log.w(TAG, "Start stress in loading: " + mLocale); - Thread.sleep(15000); - Log.w(TAG, "End stress in loading"); - } catch (InterruptedException e) { - } - } - final long last = Settings.readLastUserHistoryWriteTime(mPrefs, mLocale); - final boolean initializing = last == 0; - final long now = System.currentTimeMillis(); - profTotal = 0; - final String fileName = NAME + "." + mLocale + ".dict"; - final ExpandableDictionary dictionary = this; - final OnAddWordListener listener = new OnAddWordListener() { - @Override - public void setUnigram(final String word, final String shortcutTarget, - final int frequency) { - profTotal++; - if (DBG_SAVE_RESTORE) { - Log.d(TAG, "load unigram: " + word + "," + frequency); - } - dictionary.addWord(word, shortcutTarget, frequency); - mBigramList.addBigram(null, word, (byte)frequency); - } - - @Override - public void setBigram(final String word1, final String word2, final int frequency) { - if (word1.length() < Constants.DICTIONARY_MAX_WORD_LENGTH - && word2.length() < Constants.DICTIONARY_MAX_WORD_LENGTH) { - profTotal++; - if (DBG_SAVE_RESTORE) { - Log.d(TAG, "load bigram: " + word1 + "," + word2 + "," + frequency); - } - dictionary.setBigramAndGetFrequency( - word1, word2, initializing ? new ForgettingCurveParams(true) - : new ForgettingCurveParams(frequency, now, last)); - } - mBigramList.addBigram(word1, word2, (byte)frequency); - } - }; - - // Load the dictionary from binary file - FileInputStream inStream = null; - try { - final File file = new File(getContext().getFilesDir(), fileName); - final byte[] buffer = new byte[(int)file.length()]; - inStream = new FileInputStream(file); - inStream.read(buffer); - UserHistoryDictIOUtils.readDictionaryBinary( - new ByteArrayWrapper(buffer), listener); - } catch (FileNotFoundException e) { - // This is an expected condition: we don't have a user history dictionary for this - // language yet. It will be created sometime later. - } catch (IOException e) { - Log.e(TAG, "IOException on opening a bytebuffer", e); - } finally { - if (inStream != null) { - try { - inStream.close(); - } catch (IOException e) { - // do nothing - } - } - if (PROFILE_SAVE_RESTORE) { - final long diff = System.currentTimeMillis() - now; - Log.d(TAG, "PROF: Load UserHistoryDictionary: " - + mLocale + ", " + diff + "ms. load " + profTotal + "entries."); - } - } - } - - /** - * Async task to write pending words to the binarydicts. - */ - private static final class UpdateBinaryTask extends AsyncTask - implements BigramDictionaryInterface { - private final UserHistoryDictionaryBigramList mBigramList; - private final boolean mAddLevel0Bigrams; - private final String mLocale; - private final UserHistoryDictionary mUserHistoryDictionary; - private final SharedPreferences mPrefs; - private final Context mContext; - - public UpdateBinaryTask(final UserHistoryDictionaryBigramList pendingWrites, - final String locale, final UserHistoryDictionary dict, - final SharedPreferences prefs, final Context context) { - mBigramList = pendingWrites; - mLocale = locale; - mUserHistoryDictionary = dict; - mPrefs = prefs; - mContext = context; - mAddLevel0Bigrams = mBigramList.size() <= MAX_HISTORY_BIGRAMS; - } - - @Override - protected Void doInBackground(final Void... v) { - if (mUserHistoryDictionary.isTest) { - // If isTest == true, wait until the lock is released. - mUserHistoryDictionary.mBigramListLock.lock(); - try { - doWriteTaskLocked(); - } finally { - mUserHistoryDictionary.mBigramListLock.unlock(); - } - } else if (mUserHistoryDictionary.mBigramListLock.tryLock()) { - try { - doWriteTaskLocked(); - } finally { - mUserHistoryDictionary.mBigramListLock.unlock(); - } - } - return null; - } - - private void doWriteTaskLocked() { - if (DBG_STRESS_TEST) { - try { - Log.w(TAG, "Start stress in closing: " + mLocale); - Thread.sleep(15000); - Log.w(TAG, "End stress in closing"); - } catch (InterruptedException e) { - Log.e(TAG, "In stress test", e); - } - } - - final long now = PROFILE_SAVE_RESTORE ? System.currentTimeMillis() : 0; - final String fileName = NAME + "." + mLocale + ".dict"; - final File file = new File(mContext.getFilesDir(), fileName); - FileOutputStream out = null; - - try { - out = new FileOutputStream(file); - UserHistoryDictIOUtils.writeDictionaryBinary(out, this, mBigramList, VERSION3); - out.flush(); - out.close(); - } catch (IOException e) { - Log.e(TAG, "IO Exception while writing file", e); - } finally { - if (out != null) { - try { - out.close(); - } catch (IOException e) { - // ignore - } - } - } - - // Save the timestamp after we finish writing the binary dictionary. - Settings.writeLastUserHistoryWriteTime(mPrefs, mLocale); - if (PROFILE_SAVE_RESTORE) { - final long diff = System.currentTimeMillis() - now; - Log.w(TAG, "PROF: Write User HistoryDictionary: " + mLocale + ", " + diff + "ms."); - } - } - - @Override - public int getFrequency(final String word1, final String word2) { - final int freq; - if (word1 == null) { // unigram - freq = FREQUENCY_FOR_TYPED; - final byte prevFc = mBigramList.getBigrams(word1).get(word2); - } else { // bigram - final NextWord nw = mUserHistoryDictionary.getBigramWord(word1, word2); - if (nw != null) { - final ForgettingCurveParams fcp = nw.getFcParams(); - final byte prevFc = mBigramList.getBigrams(word1).get(word2); - final byte fc = fcp.getFc(); - final boolean isValid = fcp.isValid(); - if (prevFc > 0 && prevFc == fc) { - freq = fc & 0xFF; - } else if (UserHistoryForgettingCurveUtils. - needsToSave(fc, isValid, mAddLevel0Bigrams)) { - freq = fc & 0xFF; - } else { - // Delete this entry - freq = -1; - } - } else { - // Delete this entry - freq = -1; - } - } - return freq; - } - } - - @UsedForTesting - /* package for test */ void forceAddWordForTest( - final String word1, final String word2, final boolean isValid) { - mBigramListLock.lock(); - try { - addToUserHistory(word1, word2, isValid); - } finally { - mBigramListLock.unlock(); - } - } -} diff --git a/java/src/com/android/inputmethod/latin/personalization/UserHistoryDictionaryBigramList.java b/java/src/com/android/inputmethod/latin/personalization/UserHistoryDictionaryBigramList.java index b93630a18..f21db25a6 100644 --- a/java/src/com/android/inputmethod/latin/personalization/UserHistoryDictionaryBigramList.java +++ b/java/src/com/android/inputmethod/latin/personalization/UserHistoryDictionaryBigramList.java @@ -53,7 +53,7 @@ public final class UserHistoryDictionaryBigramList { * Called when loaded from the SQL DB. */ public void addBigram(String word1, String word2, byte fcValue) { - if (UserHistoryDictionary.DBG_SAVE_RESTORE) { + if (UserHistoryPredictionDictionary.DBG_SAVE_RESTORE) { Log.d(TAG, "--- add bigram: " + word1 + ", " + word2 + ", " + fcValue); } final HashMap map; @@ -73,7 +73,7 @@ public final class UserHistoryDictionaryBigramList { * Called when inserted to the SQL DB. */ public void updateBigram(String word1, String word2, byte fcValue) { - if (UserHistoryDictionary.DBG_SAVE_RESTORE) { + if (UserHistoryPredictionDictionary.DBG_SAVE_RESTORE) { Log.d(TAG, "--- update bigram: " + word1 + ", " + word2 + ", " + fcValue); } final HashMap map; diff --git a/java/src/com/android/inputmethod/latin/personalization/UserHistoryPredictionDictionary.java b/java/src/com/android/inputmethod/latin/personalization/UserHistoryPredictionDictionary.java new file mode 100644 index 000000000..38987b6e3 --- /dev/null +++ b/java/src/com/android/inputmethod/latin/personalization/UserHistoryPredictionDictionary.java @@ -0,0 +1,33 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.android.inputmethod.latin.personalization; + +import com.android.inputmethod.latin.Dictionary; + +import android.content.Context; +import android.content.SharedPreferences; + +/** + * Locally gathers stats about the words user types and various other signals like auto-correction + * cancellation or manual picks. This allows the keyboard to adapt to the typist over time. + */ +public class UserHistoryPredictionDictionary extends PersonalizationPredictionDictionary { + /* package */ UserHistoryPredictionDictionary(final Context context, final String locale, + final SharedPreferences sp) { + super(context, locale, sp, Dictionary.TYPE_USER_HISTORY); + } +} diff --git a/tests/src/com/android/inputmethod/latin/personalization/UserHistoryDictionaryTests.java b/tests/src/com/android/inputmethod/latin/personalization/UserHistoryDictionaryTests.java index 7af83d078..8f9ef1ddd 100644 --- a/tests/src/com/android/inputmethod/latin/personalization/UserHistoryDictionaryTests.java +++ b/tests/src/com/android/inputmethod/latin/personalization/UserHistoryDictionaryTests.java @@ -70,7 +70,7 @@ public class UserHistoryDictionaryTests extends AndroidTestCase { return new ArrayList(wordSet); } - private void addToDict(final UserHistoryDictionary dict, final List words) { + private void addToDict(final UserHistoryPredictionDictionary dict, final List words) { String prevWord = null; for (String word : words) { dict.forceAddWordForTest(prevWord, word, true); @@ -90,8 +90,8 @@ public class UserHistoryDictionaryTests extends AndroidTestCase { final String locale = "testRandomWords"; final String fileName = "UserHistoryDictionary." + locale + ".dict"; dictFile = new File(getContext().getFilesDir(), fileName); - final UserHistoryDictionary dict = - PersonalizationDictionaryHelper.getUserHistoryDictionary( + final UserHistoryPredictionDictionary dict = + PersonalizationDictionaryHelper.getUserHistoryPredictionDictionary( getContext(), locale, mPrefs); dict.isTest = true; @@ -142,8 +142,8 @@ public class UserHistoryDictionaryTests extends AndroidTestCase { for (int i = 0; i < numberOfLanguageSwitching; i++) { final int index = i % numberOfLanguages; // Switch languages to locales[index]. - final UserHistoryDictionary dict = - PersonalizationDictionaryHelper.getUserHistoryDictionary( + final UserHistoryPredictionDictionary dict = + PersonalizationDictionaryHelper.getUserHistoryPredictionDictionary( getContext(), locales[index], mPrefs); final List words = generateWords( numberOfWordsIntertedForEachLanguageSwitch, random);