Merge "Refactor on the user history dictionary"
This commit is contained in:
commit
acb3cc7473
9 changed files with 424 additions and 429 deletions
|
@ -76,7 +76,7 @@ import com.android.inputmethod.keyboard.MainKeyboardView;
|
|||
import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo;
|
||||
import com.android.inputmethod.latin.define.ProductionFlag;
|
||||
import com.android.inputmethod.latin.personalization.PersonalizationDictionaryHelper;
|
||||
import com.android.inputmethod.latin.personalization.UserHistoryDictionary;
|
||||
import com.android.inputmethod.latin.personalization.UserHistoryPredictionDictionary;
|
||||
import com.android.inputmethod.latin.settings.Settings;
|
||||
import com.android.inputmethod.latin.settings.SettingsActivity;
|
||||
import com.android.inputmethod.latin.settings.SettingsValues;
|
||||
|
@ -169,7 +169,7 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen
|
|||
|
||||
private boolean mIsMainDictionaryAvailable;
|
||||
private UserBinaryDictionary mUserDictionary;
|
||||
private UserHistoryDictionary mUserHistoryDictionary;
|
||||
private UserHistoryPredictionDictionary mUserHistoryPredictionDictionary;
|
||||
private boolean mIsUserDictionaryAvailable;
|
||||
|
||||
private LastComposedWord mLastComposedWord = LastComposedWord.NOT_A_COMPOSED_WORD;
|
||||
|
@ -565,9 +565,9 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen
|
|||
resetContactsDictionary(oldContactsDictionary);
|
||||
|
||||
final SharedPreferences prefs = PreferenceManager.getDefaultSharedPreferences(this);
|
||||
mUserHistoryDictionary =
|
||||
PersonalizationDictionaryHelper.getUserHistoryDictionary(this, localeStr, prefs);
|
||||
mSuggest.setUserHistoryDictionary(mUserHistoryDictionary);
|
||||
mUserHistoryPredictionDictionary = PersonalizationDictionaryHelper
|
||||
.getUserHistoryPredictionDictionary(this, localeStr, prefs);
|
||||
mSuggest.setUserHistoryPredictionDictionary(mUserHistoryPredictionDictionary);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -2507,7 +2507,8 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen
|
|||
if (!currentSettings.mCorrectionEnabled) return null;
|
||||
|
||||
final Suggest suggest = mSuggest;
|
||||
final UserHistoryDictionary userHistoryDictionary = mUserHistoryDictionary;
|
||||
final UserHistoryPredictionDictionary userHistoryDictionary =
|
||||
mUserHistoryPredictionDictionary;
|
||||
if (suggest == null || userHistoryDictionary == null) {
|
||||
// Avoid concurrent issue
|
||||
return null;
|
||||
|
@ -2657,7 +2658,7 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen
|
|||
}
|
||||
mConnection.deleteSurroundingText(deleteLength, 0);
|
||||
if (!TextUtils.isEmpty(previousWord) && !TextUtils.isEmpty(committedWord)) {
|
||||
mUserHistoryDictionary.cancelAddingUserHistory(previousWord, committedWord);
|
||||
mUserHistoryPredictionDictionary.cancelAddingUserHistory(previousWord, committedWord);
|
||||
}
|
||||
mConnection.commitText(originallyTypedWord + mLastComposedWord.mSeparatorString, 1);
|
||||
if (mSettings.isInternal()) {
|
||||
|
|
|
@ -22,7 +22,7 @@ import android.text.TextUtils;
|
|||
import com.android.inputmethod.annotations.UsedForTesting;
|
||||
import com.android.inputmethod.keyboard.ProximityInfo;
|
||||
import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo;
|
||||
import com.android.inputmethod.latin.personalization.UserHistoryDictionary;
|
||||
import com.android.inputmethod.latin.personalization.UserHistoryPredictionDictionary;
|
||||
import com.android.inputmethod.latin.utils.AutoCorrectionUtils;
|
||||
import com.android.inputmethod.latin.utils.BoundedTreeSet;
|
||||
import com.android.inputmethod.latin.utils.CollectionUtils;
|
||||
|
@ -168,8 +168,10 @@ public final class Suggest {
|
|||
addOrReplaceDictionary(mDictionaries, Dictionary.TYPE_CONTACTS, contactsDictionary);
|
||||
}
|
||||
|
||||
public void setUserHistoryDictionary(final UserHistoryDictionary userHistoryDictionary) {
|
||||
addOrReplaceDictionary(mDictionaries, Dictionary.TYPE_USER_HISTORY, userHistoryDictionary);
|
||||
public void setUserHistoryPredictionDictionary(
|
||||
final UserHistoryPredictionDictionary userHistoryPredictionDictionary) {
|
||||
addOrReplaceDictionary(mDictionaries, Dictionary.TYPE_USER_HISTORY,
|
||||
userHistoryPredictionDictionary);
|
||||
}
|
||||
|
||||
public void setAutoCorrectionThreshold(float threshold) {
|
||||
|
|
|
@ -29,15 +29,16 @@ public class PersonalizationDictionaryHelper {
|
|||
private static final String TAG = PersonalizationDictionaryHelper.class.getSimpleName();
|
||||
private static final boolean DEBUG = false;
|
||||
|
||||
private static final ConcurrentHashMap<String, SoftReference<UserHistoryDictionary>>
|
||||
private static final ConcurrentHashMap<String, SoftReference<UserHistoryPredictionDictionary>>
|
||||
sLangDictCache = CollectionUtils.newConcurrentHashMap();
|
||||
|
||||
public static UserHistoryDictionary getUserHistoryDictionary(
|
||||
public static UserHistoryPredictionDictionary getUserHistoryPredictionDictionary(
|
||||
final Context context, final String locale, final SharedPreferences sp) {
|
||||
synchronized (sLangDictCache) {
|
||||
if (sLangDictCache.containsKey(locale)) {
|
||||
final SoftReference<UserHistoryDictionary> ref = sLangDictCache.get(locale);
|
||||
final UserHistoryDictionary dict = ref == null ? null : ref.get();
|
||||
final SoftReference<UserHistoryPredictionDictionary> ref =
|
||||
sLangDictCache.get(locale);
|
||||
final UserHistoryPredictionDictionary dict = ref == null ? null : ref.get();
|
||||
if (dict != null) {
|
||||
if (DEBUG) {
|
||||
Log.w(TAG, "Use cached UserHistoryDictionary for " + locale);
|
||||
|
@ -45,8 +46,9 @@ public class PersonalizationDictionaryHelper {
|
|||
return dict;
|
||||
}
|
||||
}
|
||||
final UserHistoryDictionary dict = new UserHistoryDictionary(context, locale, sp);
|
||||
sLangDictCache.put(locale, new SoftReference<UserHistoryDictionary>(dict));
|
||||
final UserHistoryPredictionDictionary dict =
|
||||
new UserHistoryPredictionDictionary(context, locale, sp);
|
||||
sLangDictCache.put(locale, new SoftReference<UserHistoryPredictionDictionary>(dict));
|
||||
return dict;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -16,6 +16,6 @@
|
|||
|
||||
package com.android.inputmethod.latin.personalization;
|
||||
|
||||
public class PersonalizationDictionaryUpdateListener {
|
||||
public interface PersonalizationDictionaryUpdateListener {
|
||||
// TODO: Implement
|
||||
}
|
||||
|
|
|
@ -16,11 +16,34 @@
|
|||
|
||||
package com.android.inputmethod.latin.personalization;
|
||||
|
||||
import com.android.inputmethod.latin.Dictionary;
|
||||
import com.android.inputmethod.latin.ExpandableDictionary;
|
||||
|
||||
import android.content.Context;
|
||||
import android.content.SharedPreferences;
|
||||
import android.os.AsyncTask;
|
||||
import android.util.Log;
|
||||
|
||||
import com.android.inputmethod.annotations.UsedForTesting;
|
||||
import com.android.inputmethod.keyboard.ProximityInfo;
|
||||
import com.android.inputmethod.latin.Constants;
|
||||
import com.android.inputmethod.latin.ExpandableDictionary;
|
||||
import com.android.inputmethod.latin.LatinImeLogger;
|
||||
import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo;
|
||||
import com.android.inputmethod.latin.WordComposer;
|
||||
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
|
||||
import com.android.inputmethod.latin.settings.Settings;
|
||||
import com.android.inputmethod.latin.utils.ByteArrayWrapper;
|
||||
import com.android.inputmethod.latin.utils.UserHistoryDictIOUtils;
|
||||
import com.android.inputmethod.latin.utils.UserHistoryDictIOUtils.BigramDictionaryInterface;
|
||||
import com.android.inputmethod.latin.utils.UserHistoryDictIOUtils.OnAddWordListener;
|
||||
import com.android.inputmethod.latin.utils.UserHistoryForgettingCurveUtils;
|
||||
import com.android.inputmethod.latin.utils.UserHistoryForgettingCurveUtils.ForgettingCurveParams;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.concurrent.locks.ReentrantLock;
|
||||
|
||||
/**
|
||||
* This class is a dictionary for the personalized prediction language model implemented in Java.
|
||||
|
@ -30,17 +53,348 @@ public class PersonalizationPredictionDictionary extends ExpandableDictionary {
|
|||
// TODO: Implement
|
||||
}
|
||||
|
||||
private static final String TAG = PersonalizationPredictionDictionary.class.getSimpleName();
|
||||
private static final String NAME = PersonalizationPredictionDictionary.class.getSimpleName();
|
||||
public static final boolean DBG_SAVE_RESTORE = false;
|
||||
public static final boolean DBG_STRESS_TEST = false;
|
||||
public static final boolean DBG_ALWAYS_WRITE = false;
|
||||
public static final boolean PROFILE_SAVE_RESTORE = LatinImeLogger.sDBG;
|
||||
|
||||
private static final FormatOptions VERSION3 = new FormatOptions(3,
|
||||
true /* supportsDynamicUpdate */);
|
||||
|
||||
/** Any pair being typed or picked */
|
||||
private static final int FREQUENCY_FOR_TYPED = 2;
|
||||
|
||||
/** Maximum number of pairs. Pruning will start when databases goes above this number. */
|
||||
public static final int MAX_HISTORY_BIGRAMS = 10000;
|
||||
|
||||
/**
|
||||
* When it hits maximum bigram pair, it will delete until you are left with
|
||||
* only (sMaxHistoryBigrams - sDeleteHistoryBigrams) pairs.
|
||||
* Do not keep this number small to avoid deleting too often.
|
||||
*/
|
||||
public static final int DELETE_HISTORY_BIGRAMS = 1000;
|
||||
|
||||
/** Locale for which this user history dictionary is storing words */
|
||||
private final String mLocale;
|
||||
|
||||
private final UserHistoryDictionaryBigramList mBigramList =
|
||||
new UserHistoryDictionaryBigramList();
|
||||
private final ReentrantLock mBigramListLock = new ReentrantLock();
|
||||
private final SharedPreferences mPrefs;
|
||||
|
||||
// Singleton
|
||||
private PersonalizationPredictionDictionary(final Context context, final String locale,
|
||||
final SharedPreferences sp) {
|
||||
super(context, Dictionary.TYPE_PERSONALIZATION_PREDICTION_IN_JAVA);
|
||||
// Should always be false except when we use this class for test
|
||||
@UsedForTesting boolean isTest = false;
|
||||
|
||||
/* package */ PersonalizationPredictionDictionary(final Context context, final String locale,
|
||||
final SharedPreferences sp, final String dictionaryType) {
|
||||
super(context, dictionaryType);
|
||||
mLocale = locale;
|
||||
mPrefs = sp;
|
||||
if (mLocale != null && mLocale.length() > 1) {
|
||||
loadDictionary();
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: Implement
|
||||
@Override
|
||||
public void close() {
|
||||
flushPendingWrites();
|
||||
// Don't close the database as locale changes will require it to be reopened anyway
|
||||
// Also, the database is written to somewhat frequently, so it needs to be kept alive
|
||||
// throughout the life of the process.
|
||||
// mOpenHelper.close();
|
||||
// Ignore close because we cache PersonalizationPredictionDictionary for each language.
|
||||
// See getInstance() above.
|
||||
// super.close();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected ArrayList<SuggestedWordInfo> getWordsInner(final WordComposer composer,
|
||||
final String prevWord, final ProximityInfo proximityInfo) {
|
||||
// Inhibit suggestions (not predictions) for user history for now. Removing this method
|
||||
// is enough to use it through the standard ExpandableDictionary way.
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return whether the passed charsequence is in the dictionary.
|
||||
*/
|
||||
@Override
|
||||
public synchronized boolean isValidWord(final String word) {
|
||||
// TODO: figure out what is the correct thing to do here.
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Pair will be added to the user history dictionary.
|
||||
*
|
||||
* The first word may be null. That means we don't know the context, in other words,
|
||||
* it's only a unigram. The first word may also be an empty string : this means start
|
||||
* context, as in beginning of a sentence for example.
|
||||
* The second word may not be null (a NullPointerException would be thrown).
|
||||
*/
|
||||
public int addToUserHistory(final String word1, final String word2, final boolean isValid) {
|
||||
if (word2.length() >= Constants.DICTIONARY_MAX_WORD_LENGTH ||
|
||||
(word1 != null && word1.length() >= Constants.DICTIONARY_MAX_WORD_LENGTH)) {
|
||||
return -1;
|
||||
}
|
||||
if (mBigramListLock.tryLock()) {
|
||||
try {
|
||||
super.addWord(
|
||||
word2, null /* the "shortcut" parameter is null */, FREQUENCY_FOR_TYPED);
|
||||
mBigramList.addBigram(null, word2, (byte)FREQUENCY_FOR_TYPED);
|
||||
// Do not insert a word as a bigram of itself
|
||||
if (word2.equals(word1)) {
|
||||
return 0;
|
||||
}
|
||||
final int freq;
|
||||
if (null == word1) {
|
||||
freq = FREQUENCY_FOR_TYPED;
|
||||
} else {
|
||||
freq = super.setBigramAndGetFrequency(
|
||||
word1, word2, new ForgettingCurveParams(isValid));
|
||||
}
|
||||
mBigramList.addBigram(word1, word2);
|
||||
return freq;
|
||||
} finally {
|
||||
mBigramListLock.unlock();
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
public boolean cancelAddingUserHistory(final String word1, final String word2) {
|
||||
if (mBigramListLock.tryLock()) {
|
||||
try {
|
||||
if (mBigramList.removeBigram(word1, word2)) {
|
||||
return super.removeBigram(word1, word2);
|
||||
}
|
||||
} finally {
|
||||
mBigramListLock.unlock();
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Schedules a background thread to write any pending words to the database.
|
||||
*/
|
||||
private void flushPendingWrites() {
|
||||
// Create a background thread to write the pending entries
|
||||
new UpdateBinaryTask(mBigramList, mLocale, this, mPrefs, getContext()).execute();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void loadDictionaryAsync() {
|
||||
// This must be run on non-main thread
|
||||
mBigramListLock.lock();
|
||||
try {
|
||||
loadDictionaryAsyncLocked();
|
||||
} finally {
|
||||
mBigramListLock.unlock();
|
||||
}
|
||||
}
|
||||
|
||||
private int profTotal;
|
||||
|
||||
private void loadDictionaryAsyncLocked() {
|
||||
if (DBG_STRESS_TEST) {
|
||||
try {
|
||||
Log.w(TAG, "Start stress in loading: " + mLocale);
|
||||
Thread.sleep(15000);
|
||||
Log.w(TAG, "End stress in loading");
|
||||
} catch (InterruptedException e) {
|
||||
}
|
||||
}
|
||||
final long last = Settings.readLastUserHistoryWriteTime(mPrefs, mLocale);
|
||||
final boolean initializing = last == 0;
|
||||
final long now = System.currentTimeMillis();
|
||||
profTotal = 0;
|
||||
final String fileName = NAME + "." + mLocale + ".dict";
|
||||
final ExpandableDictionary dictionary = this;
|
||||
final OnAddWordListener listener = new OnAddWordListener() {
|
||||
@Override
|
||||
public void setUnigram(final String word, final String shortcutTarget,
|
||||
final int frequency) {
|
||||
profTotal++;
|
||||
if (DBG_SAVE_RESTORE) {
|
||||
Log.d(TAG, "load unigram: " + word + "," + frequency);
|
||||
}
|
||||
dictionary.addWord(word, shortcutTarget, frequency);
|
||||
mBigramList.addBigram(null, word, (byte)frequency);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setBigram(final String word1, final String word2, final int frequency) {
|
||||
if (word1.length() < Constants.DICTIONARY_MAX_WORD_LENGTH
|
||||
&& word2.length() < Constants.DICTIONARY_MAX_WORD_LENGTH) {
|
||||
profTotal++;
|
||||
if (DBG_SAVE_RESTORE) {
|
||||
Log.d(TAG, "load bigram: " + word1 + "," + word2 + "," + frequency);
|
||||
}
|
||||
dictionary.setBigramAndGetFrequency(
|
||||
word1, word2, initializing ? new ForgettingCurveParams(true)
|
||||
: new ForgettingCurveParams(frequency, now, last));
|
||||
}
|
||||
mBigramList.addBigram(word1, word2, (byte)frequency);
|
||||
}
|
||||
};
|
||||
|
||||
// Load the dictionary from binary file
|
||||
FileInputStream inStream = null;
|
||||
try {
|
||||
final File file = new File(getContext().getFilesDir(), fileName);
|
||||
final byte[] buffer = new byte[(int)file.length()];
|
||||
inStream = new FileInputStream(file);
|
||||
inStream.read(buffer);
|
||||
UserHistoryDictIOUtils.readDictionaryBinary(
|
||||
new ByteArrayWrapper(buffer), listener);
|
||||
} catch (FileNotFoundException e) {
|
||||
// This is an expected condition: we don't have a user history dictionary for this
|
||||
// language yet. It will be created sometime later.
|
||||
} catch (IOException e) {
|
||||
Log.e(TAG, "IOException on opening a bytebuffer", e);
|
||||
} finally {
|
||||
if (inStream != null) {
|
||||
try {
|
||||
inStream.close();
|
||||
} catch (IOException e) {
|
||||
// do nothing
|
||||
}
|
||||
}
|
||||
if (PROFILE_SAVE_RESTORE) {
|
||||
final long diff = System.currentTimeMillis() - now;
|
||||
Log.d(TAG, "PROF: Load UserHistoryDictionary: "
|
||||
+ mLocale + ", " + diff + "ms. load " + profTotal + "entries.");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Async task to write pending words to the binarydicts.
|
||||
*/
|
||||
private static final class UpdateBinaryTask extends AsyncTask<Void, Void, Void>
|
||||
implements BigramDictionaryInterface {
|
||||
private final UserHistoryDictionaryBigramList mBigramList;
|
||||
private final boolean mAddLevel0Bigrams;
|
||||
private final String mLocale;
|
||||
private final PersonalizationPredictionDictionary mPersonalizationPredictionDictionary;
|
||||
private final SharedPreferences mPrefs;
|
||||
private final Context mContext;
|
||||
|
||||
public UpdateBinaryTask(final UserHistoryDictionaryBigramList pendingWrites,
|
||||
final String locale, final PersonalizationPredictionDictionary dict,
|
||||
final SharedPreferences prefs, final Context context) {
|
||||
mBigramList = pendingWrites;
|
||||
mLocale = locale;
|
||||
mPersonalizationPredictionDictionary = dict;
|
||||
mPrefs = prefs;
|
||||
mContext = context;
|
||||
mAddLevel0Bigrams = mBigramList.size() <= MAX_HISTORY_BIGRAMS;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Void doInBackground(final Void... v) {
|
||||
if (mPersonalizationPredictionDictionary.isTest) {
|
||||
// If isTest == true, wait until the lock is released.
|
||||
mPersonalizationPredictionDictionary.mBigramListLock.lock();
|
||||
try {
|
||||
doWriteTaskLocked();
|
||||
} finally {
|
||||
mPersonalizationPredictionDictionary.mBigramListLock.unlock();
|
||||
}
|
||||
} else if (mPersonalizationPredictionDictionary.mBigramListLock.tryLock()) {
|
||||
try {
|
||||
doWriteTaskLocked();
|
||||
} finally {
|
||||
mPersonalizationPredictionDictionary.mBigramListLock.unlock();
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
private void doWriteTaskLocked() {
|
||||
if (DBG_STRESS_TEST) {
|
||||
try {
|
||||
Log.w(TAG, "Start stress in closing: " + mLocale);
|
||||
Thread.sleep(15000);
|
||||
Log.w(TAG, "End stress in closing");
|
||||
} catch (InterruptedException e) {
|
||||
Log.e(TAG, "In stress test", e);
|
||||
}
|
||||
}
|
||||
|
||||
final long now = PROFILE_SAVE_RESTORE ? System.currentTimeMillis() : 0;
|
||||
final String fileName = NAME + "." + mLocale + ".dict";
|
||||
final File file = new File(mContext.getFilesDir(), fileName);
|
||||
FileOutputStream out = null;
|
||||
|
||||
try {
|
||||
out = new FileOutputStream(file);
|
||||
UserHistoryDictIOUtils.writeDictionaryBinary(out, this, mBigramList, VERSION3);
|
||||
out.flush();
|
||||
out.close();
|
||||
} catch (IOException e) {
|
||||
Log.e(TAG, "IO Exception while writing file", e);
|
||||
} finally {
|
||||
if (out != null) {
|
||||
try {
|
||||
out.close();
|
||||
} catch (IOException e) {
|
||||
// ignore
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Save the timestamp after we finish writing the binary dictionary.
|
||||
Settings.writeLastUserHistoryWriteTime(mPrefs, mLocale);
|
||||
if (PROFILE_SAVE_RESTORE) {
|
||||
final long diff = System.currentTimeMillis() - now;
|
||||
Log.w(TAG, "PROF: Write User HistoryDictionary: " + mLocale + ", " + diff + "ms.");
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getFrequency(final String word1, final String word2) {
|
||||
final int freq;
|
||||
if (word1 == null) { // unigram
|
||||
freq = FREQUENCY_FOR_TYPED;
|
||||
final byte prevFc = mBigramList.getBigrams(word1).get(word2);
|
||||
} else { // bigram
|
||||
final NextWord nw =
|
||||
mPersonalizationPredictionDictionary.getBigramWord(word1, word2);
|
||||
if (nw != null) {
|
||||
final ForgettingCurveParams fcp = nw.getFcParams();
|
||||
final byte prevFc = mBigramList.getBigrams(word1).get(word2);
|
||||
final byte fc = fcp.getFc();
|
||||
final boolean isValid = fcp.isValid();
|
||||
if (prevFc > 0 && prevFc == fc) {
|
||||
freq = fc & 0xFF;
|
||||
} else if (UserHistoryForgettingCurveUtils.
|
||||
needsToSave(fc, isValid, mAddLevel0Bigrams)) {
|
||||
freq = fc & 0xFF;
|
||||
} else {
|
||||
// Delete this entry
|
||||
freq = -1;
|
||||
}
|
||||
} else {
|
||||
// Delete this entry
|
||||
freq = -1;
|
||||
}
|
||||
}
|
||||
return freq;
|
||||
}
|
||||
}
|
||||
|
||||
@UsedForTesting
|
||||
/* package for test */ void forceAddWordForTest(
|
||||
final String word1, final String word2, final boolean isValid) {
|
||||
mBigramListLock.lock();
|
||||
try {
|
||||
addToUserHistory(word1, word2, isValid);
|
||||
} finally {
|
||||
mBigramListLock.unlock();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,397 +0,0 @@
|
|||
/*
|
||||
* Copyright (C) 2010 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package com.android.inputmethod.latin.personalization;
|
||||
|
||||
import android.content.Context;
|
||||
import android.content.SharedPreferences;
|
||||
import android.os.AsyncTask;
|
||||
import android.util.Log;
|
||||
|
||||
import com.android.inputmethod.annotations.UsedForTesting;
|
||||
import com.android.inputmethod.keyboard.ProximityInfo;
|
||||
import com.android.inputmethod.latin.Constants;
|
||||
import com.android.inputmethod.latin.Dictionary;
|
||||
import com.android.inputmethod.latin.ExpandableDictionary;
|
||||
import com.android.inputmethod.latin.LatinImeLogger;
|
||||
import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo;
|
||||
import com.android.inputmethod.latin.WordComposer;
|
||||
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
|
||||
import com.android.inputmethod.latin.settings.Settings;
|
||||
import com.android.inputmethod.latin.utils.ByteArrayWrapper;
|
||||
import com.android.inputmethod.latin.utils.UserHistoryDictIOUtils;
|
||||
import com.android.inputmethod.latin.utils.UserHistoryDictIOUtils.BigramDictionaryInterface;
|
||||
import com.android.inputmethod.latin.utils.UserHistoryDictIOUtils.OnAddWordListener;
|
||||
import com.android.inputmethod.latin.utils.UserHistoryForgettingCurveUtils;
|
||||
import com.android.inputmethod.latin.utils.UserHistoryForgettingCurveUtils.ForgettingCurveParams;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.concurrent.locks.ReentrantLock;
|
||||
|
||||
/**
|
||||
* Locally gathers stats about the words user types and various other signals like auto-correction
|
||||
* cancellation or manual picks. This allows the keyboard to adapt to the typist over time.
|
||||
*/
|
||||
public class UserHistoryDictionary extends ExpandableDictionary {
|
||||
private static final String TAG = UserHistoryDictionary.class.getSimpleName();
|
||||
private static final String NAME = UserHistoryDictionary.class.getSimpleName();
|
||||
public static final boolean DBG_SAVE_RESTORE = false;
|
||||
public static final boolean DBG_STRESS_TEST = false;
|
||||
public static final boolean DBG_ALWAYS_WRITE = false;
|
||||
public static final boolean PROFILE_SAVE_RESTORE = LatinImeLogger.sDBG;
|
||||
|
||||
private static final FormatOptions VERSION3 = new FormatOptions(3,
|
||||
true /* supportsDynamicUpdate */);
|
||||
|
||||
/** Any pair being typed or picked */
|
||||
private static final int FREQUENCY_FOR_TYPED = 2;
|
||||
|
||||
/** Maximum number of pairs. Pruning will start when databases goes above this number. */
|
||||
public static final int MAX_HISTORY_BIGRAMS = 10000;
|
||||
|
||||
/**
|
||||
* When it hits maximum bigram pair, it will delete until you are left with
|
||||
* only (sMaxHistoryBigrams - sDeleteHistoryBigrams) pairs.
|
||||
* Do not keep this number small to avoid deleting too often.
|
||||
*/
|
||||
public static final int DELETE_HISTORY_BIGRAMS = 1000;
|
||||
|
||||
/** Locale for which this user history dictionary is storing words */
|
||||
private final String mLocale;
|
||||
|
||||
private final UserHistoryDictionaryBigramList mBigramList =
|
||||
new UserHistoryDictionaryBigramList();
|
||||
private final ReentrantLock mBigramListLock = new ReentrantLock();
|
||||
private final SharedPreferences mPrefs;
|
||||
|
||||
// Should always be false except when we use this class for test
|
||||
@UsedForTesting boolean isTest = false;
|
||||
|
||||
/* package */ UserHistoryDictionary(final Context context, final String locale,
|
||||
final SharedPreferences sp) {
|
||||
super(context, Dictionary.TYPE_USER_HISTORY);
|
||||
mLocale = locale;
|
||||
mPrefs = sp;
|
||||
if (mLocale != null && mLocale.length() > 1) {
|
||||
loadDictionary();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
flushPendingWrites();
|
||||
// Don't close the database as locale changes will require it to be reopened anyway
|
||||
// Also, the database is written to somewhat frequently, so it needs to be kept alive
|
||||
// throughout the life of the process.
|
||||
// mOpenHelper.close();
|
||||
// Ignore close because we cache UserHistoryDictionary for each language. See getInstance()
|
||||
// above.
|
||||
// super.close();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected ArrayList<SuggestedWordInfo> getWordsInner(final WordComposer composer,
|
||||
final String prevWord, final ProximityInfo proximityInfo) {
|
||||
// Inhibit suggestions (not predictions) for user history for now. Removing this method
|
||||
// is enough to use it through the standard ExpandableDictionary way.
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return whether the passed charsequence is in the dictionary.
|
||||
*/
|
||||
@Override
|
||||
public synchronized boolean isValidWord(final String word) {
|
||||
// TODO: figure out what is the correct thing to do here.
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Pair will be added to the user history dictionary.
|
||||
*
|
||||
* The first word may be null. That means we don't know the context, in other words,
|
||||
* it's only a unigram. The first word may also be an empty string : this means start
|
||||
* context, as in beginning of a sentence for example.
|
||||
* The second word may not be null (a NullPointerException would be thrown).
|
||||
*/
|
||||
public int addToUserHistory(final String word1, final String word2, final boolean isValid) {
|
||||
if (word2.length() >= Constants.DICTIONARY_MAX_WORD_LENGTH ||
|
||||
(word1 != null && word1.length() >= Constants.DICTIONARY_MAX_WORD_LENGTH)) {
|
||||
return -1;
|
||||
}
|
||||
if (mBigramListLock.tryLock()) {
|
||||
try {
|
||||
super.addWord(
|
||||
word2, null /* the "shortcut" parameter is null */, FREQUENCY_FOR_TYPED);
|
||||
mBigramList.addBigram(null, word2, (byte)FREQUENCY_FOR_TYPED);
|
||||
// Do not insert a word as a bigram of itself
|
||||
if (word2.equals(word1)) {
|
||||
return 0;
|
||||
}
|
||||
final int freq;
|
||||
if (null == word1) {
|
||||
freq = FREQUENCY_FOR_TYPED;
|
||||
} else {
|
||||
freq = super.setBigramAndGetFrequency(
|
||||
word1, word2, new ForgettingCurveParams(isValid));
|
||||
}
|
||||
mBigramList.addBigram(word1, word2);
|
||||
return freq;
|
||||
} finally {
|
||||
mBigramListLock.unlock();
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
public boolean cancelAddingUserHistory(final String word1, final String word2) {
|
||||
if (mBigramListLock.tryLock()) {
|
||||
try {
|
||||
if (mBigramList.removeBigram(word1, word2)) {
|
||||
return super.removeBigram(word1, word2);
|
||||
}
|
||||
} finally {
|
||||
mBigramListLock.unlock();
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Schedules a background thread to write any pending words to the database.
|
||||
*/
|
||||
private void flushPendingWrites() {
|
||||
// Create a background thread to write the pending entries
|
||||
new UpdateBinaryTask(mBigramList, mLocale, this, mPrefs, getContext()).execute();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void loadDictionaryAsync() {
|
||||
// This must be run on non-main thread
|
||||
mBigramListLock.lock();
|
||||
try {
|
||||
loadDictionaryAsyncLocked();
|
||||
} finally {
|
||||
mBigramListLock.unlock();
|
||||
}
|
||||
}
|
||||
|
||||
private int profTotal;
|
||||
|
||||
private void loadDictionaryAsyncLocked() {
|
||||
if (DBG_STRESS_TEST) {
|
||||
try {
|
||||
Log.w(TAG, "Start stress in loading: " + mLocale);
|
||||
Thread.sleep(15000);
|
||||
Log.w(TAG, "End stress in loading");
|
||||
} catch (InterruptedException e) {
|
||||
}
|
||||
}
|
||||
final long last = Settings.readLastUserHistoryWriteTime(mPrefs, mLocale);
|
||||
final boolean initializing = last == 0;
|
||||
final long now = System.currentTimeMillis();
|
||||
profTotal = 0;
|
||||
final String fileName = NAME + "." + mLocale + ".dict";
|
||||
final ExpandableDictionary dictionary = this;
|
||||
final OnAddWordListener listener = new OnAddWordListener() {
|
||||
@Override
|
||||
public void setUnigram(final String word, final String shortcutTarget,
|
||||
final int frequency) {
|
||||
profTotal++;
|
||||
if (DBG_SAVE_RESTORE) {
|
||||
Log.d(TAG, "load unigram: " + word + "," + frequency);
|
||||
}
|
||||
dictionary.addWord(word, shortcutTarget, frequency);
|
||||
mBigramList.addBigram(null, word, (byte)frequency);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setBigram(final String word1, final String word2, final int frequency) {
|
||||
if (word1.length() < Constants.DICTIONARY_MAX_WORD_LENGTH
|
||||
&& word2.length() < Constants.DICTIONARY_MAX_WORD_LENGTH) {
|
||||
profTotal++;
|
||||
if (DBG_SAVE_RESTORE) {
|
||||
Log.d(TAG, "load bigram: " + word1 + "," + word2 + "," + frequency);
|
||||
}
|
||||
dictionary.setBigramAndGetFrequency(
|
||||
word1, word2, initializing ? new ForgettingCurveParams(true)
|
||||
: new ForgettingCurveParams(frequency, now, last));
|
||||
}
|
||||
mBigramList.addBigram(word1, word2, (byte)frequency);
|
||||
}
|
||||
};
|
||||
|
||||
// Load the dictionary from binary file
|
||||
FileInputStream inStream = null;
|
||||
try {
|
||||
final File file = new File(getContext().getFilesDir(), fileName);
|
||||
final byte[] buffer = new byte[(int)file.length()];
|
||||
inStream = new FileInputStream(file);
|
||||
inStream.read(buffer);
|
||||
UserHistoryDictIOUtils.readDictionaryBinary(
|
||||
new ByteArrayWrapper(buffer), listener);
|
||||
} catch (FileNotFoundException e) {
|
||||
// This is an expected condition: we don't have a user history dictionary for this
|
||||
// language yet. It will be created sometime later.
|
||||
} catch (IOException e) {
|
||||
Log.e(TAG, "IOException on opening a bytebuffer", e);
|
||||
} finally {
|
||||
if (inStream != null) {
|
||||
try {
|
||||
inStream.close();
|
||||
} catch (IOException e) {
|
||||
// do nothing
|
||||
}
|
||||
}
|
||||
if (PROFILE_SAVE_RESTORE) {
|
||||
final long diff = System.currentTimeMillis() - now;
|
||||
Log.d(TAG, "PROF: Load UserHistoryDictionary: "
|
||||
+ mLocale + ", " + diff + "ms. load " + profTotal + "entries.");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Async task to write pending words to the binarydicts.
|
||||
*/
|
||||
private static final class UpdateBinaryTask extends AsyncTask<Void, Void, Void>
|
||||
implements BigramDictionaryInterface {
|
||||
private final UserHistoryDictionaryBigramList mBigramList;
|
||||
private final boolean mAddLevel0Bigrams;
|
||||
private final String mLocale;
|
||||
private final UserHistoryDictionary mUserHistoryDictionary;
|
||||
private final SharedPreferences mPrefs;
|
||||
private final Context mContext;
|
||||
|
||||
public UpdateBinaryTask(final UserHistoryDictionaryBigramList pendingWrites,
|
||||
final String locale, final UserHistoryDictionary dict,
|
||||
final SharedPreferences prefs, final Context context) {
|
||||
mBigramList = pendingWrites;
|
||||
mLocale = locale;
|
||||
mUserHistoryDictionary = dict;
|
||||
mPrefs = prefs;
|
||||
mContext = context;
|
||||
mAddLevel0Bigrams = mBigramList.size() <= MAX_HISTORY_BIGRAMS;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Void doInBackground(final Void... v) {
|
||||
if (mUserHistoryDictionary.isTest) {
|
||||
// If isTest == true, wait until the lock is released.
|
||||
mUserHistoryDictionary.mBigramListLock.lock();
|
||||
try {
|
||||
doWriteTaskLocked();
|
||||
} finally {
|
||||
mUserHistoryDictionary.mBigramListLock.unlock();
|
||||
}
|
||||
} else if (mUserHistoryDictionary.mBigramListLock.tryLock()) {
|
||||
try {
|
||||
doWriteTaskLocked();
|
||||
} finally {
|
||||
mUserHistoryDictionary.mBigramListLock.unlock();
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
private void doWriteTaskLocked() {
|
||||
if (DBG_STRESS_TEST) {
|
||||
try {
|
||||
Log.w(TAG, "Start stress in closing: " + mLocale);
|
||||
Thread.sleep(15000);
|
||||
Log.w(TAG, "End stress in closing");
|
||||
} catch (InterruptedException e) {
|
||||
Log.e(TAG, "In stress test", e);
|
||||
}
|
||||
}
|
||||
|
||||
final long now = PROFILE_SAVE_RESTORE ? System.currentTimeMillis() : 0;
|
||||
final String fileName = NAME + "." + mLocale + ".dict";
|
||||
final File file = new File(mContext.getFilesDir(), fileName);
|
||||
FileOutputStream out = null;
|
||||
|
||||
try {
|
||||
out = new FileOutputStream(file);
|
||||
UserHistoryDictIOUtils.writeDictionaryBinary(out, this, mBigramList, VERSION3);
|
||||
out.flush();
|
||||
out.close();
|
||||
} catch (IOException e) {
|
||||
Log.e(TAG, "IO Exception while writing file", e);
|
||||
} finally {
|
||||
if (out != null) {
|
||||
try {
|
||||
out.close();
|
||||
} catch (IOException e) {
|
||||
// ignore
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Save the timestamp after we finish writing the binary dictionary.
|
||||
Settings.writeLastUserHistoryWriteTime(mPrefs, mLocale);
|
||||
if (PROFILE_SAVE_RESTORE) {
|
||||
final long diff = System.currentTimeMillis() - now;
|
||||
Log.w(TAG, "PROF: Write User HistoryDictionary: " + mLocale + ", " + diff + "ms.");
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getFrequency(final String word1, final String word2) {
|
||||
final int freq;
|
||||
if (word1 == null) { // unigram
|
||||
freq = FREQUENCY_FOR_TYPED;
|
||||
final byte prevFc = mBigramList.getBigrams(word1).get(word2);
|
||||
} else { // bigram
|
||||
final NextWord nw = mUserHistoryDictionary.getBigramWord(word1, word2);
|
||||
if (nw != null) {
|
||||
final ForgettingCurveParams fcp = nw.getFcParams();
|
||||
final byte prevFc = mBigramList.getBigrams(word1).get(word2);
|
||||
final byte fc = fcp.getFc();
|
||||
final boolean isValid = fcp.isValid();
|
||||
if (prevFc > 0 && prevFc == fc) {
|
||||
freq = fc & 0xFF;
|
||||
} else if (UserHistoryForgettingCurveUtils.
|
||||
needsToSave(fc, isValid, mAddLevel0Bigrams)) {
|
||||
freq = fc & 0xFF;
|
||||
} else {
|
||||
// Delete this entry
|
||||
freq = -1;
|
||||
}
|
||||
} else {
|
||||
// Delete this entry
|
||||
freq = -1;
|
||||
}
|
||||
}
|
||||
return freq;
|
||||
}
|
||||
}
|
||||
|
||||
@UsedForTesting
|
||||
/* package for test */ void forceAddWordForTest(
|
||||
final String word1, final String word2, final boolean isValid) {
|
||||
mBigramListLock.lock();
|
||||
try {
|
||||
addToUserHistory(word1, word2, isValid);
|
||||
} finally {
|
||||
mBigramListLock.unlock();
|
||||
}
|
||||
}
|
||||
}
|
|
@ -53,7 +53,7 @@ public final class UserHistoryDictionaryBigramList {
|
|||
* Called when loaded from the SQL DB.
|
||||
*/
|
||||
public void addBigram(String word1, String word2, byte fcValue) {
|
||||
if (UserHistoryDictionary.DBG_SAVE_RESTORE) {
|
||||
if (UserHistoryPredictionDictionary.DBG_SAVE_RESTORE) {
|
||||
Log.d(TAG, "--- add bigram: " + word1 + ", " + word2 + ", " + fcValue);
|
||||
}
|
||||
final HashMap<String, Byte> map;
|
||||
|
@ -73,7 +73,7 @@ public final class UserHistoryDictionaryBigramList {
|
|||
* Called when inserted to the SQL DB.
|
||||
*/
|
||||
public void updateBigram(String word1, String word2, byte fcValue) {
|
||||
if (UserHistoryDictionary.DBG_SAVE_RESTORE) {
|
||||
if (UserHistoryPredictionDictionary.DBG_SAVE_RESTORE) {
|
||||
Log.d(TAG, "--- update bigram: " + word1 + ", " + word2 + ", " + fcValue);
|
||||
}
|
||||
final HashMap<String, Byte> map;
|
||||
|
|
|
@ -0,0 +1,33 @@
|
|||
/*
|
||||
* Copyright (C) 2013 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package com.android.inputmethod.latin.personalization;
|
||||
|
||||
import com.android.inputmethod.latin.Dictionary;
|
||||
|
||||
import android.content.Context;
|
||||
import android.content.SharedPreferences;
|
||||
|
||||
/**
|
||||
* Locally gathers stats about the words user types and various other signals like auto-correction
|
||||
* cancellation or manual picks. This allows the keyboard to adapt to the typist over time.
|
||||
*/
|
||||
public class UserHistoryPredictionDictionary extends PersonalizationPredictionDictionary {
|
||||
/* package */ UserHistoryPredictionDictionary(final Context context, final String locale,
|
||||
final SharedPreferences sp) {
|
||||
super(context, locale, sp, Dictionary.TYPE_USER_HISTORY);
|
||||
}
|
||||
}
|
|
@ -70,7 +70,7 @@ public class UserHistoryDictionaryTests extends AndroidTestCase {
|
|||
return new ArrayList<String>(wordSet);
|
||||
}
|
||||
|
||||
private void addToDict(final UserHistoryDictionary dict, final List<String> words) {
|
||||
private void addToDict(final UserHistoryPredictionDictionary dict, final List<String> words) {
|
||||
String prevWord = null;
|
||||
for (String word : words) {
|
||||
dict.forceAddWordForTest(prevWord, word, true);
|
||||
|
@ -90,8 +90,8 @@ public class UserHistoryDictionaryTests extends AndroidTestCase {
|
|||
final String locale = "testRandomWords";
|
||||
final String fileName = "UserHistoryDictionary." + locale + ".dict";
|
||||
dictFile = new File(getContext().getFilesDir(), fileName);
|
||||
final UserHistoryDictionary dict =
|
||||
PersonalizationDictionaryHelper.getUserHistoryDictionary(
|
||||
final UserHistoryPredictionDictionary dict =
|
||||
PersonalizationDictionaryHelper.getUserHistoryPredictionDictionary(
|
||||
getContext(), locale, mPrefs);
|
||||
dict.isTest = true;
|
||||
|
||||
|
@ -142,8 +142,8 @@ public class UserHistoryDictionaryTests extends AndroidTestCase {
|
|||
for (int i = 0; i < numberOfLanguageSwitching; i++) {
|
||||
final int index = i % numberOfLanguages;
|
||||
// Switch languages to locales[index].
|
||||
final UserHistoryDictionary dict =
|
||||
PersonalizationDictionaryHelper.getUserHistoryDictionary(
|
||||
final UserHistoryPredictionDictionary dict =
|
||||
PersonalizationDictionaryHelper.getUserHistoryPredictionDictionary(
|
||||
getContext(), locales[index], mPrefs);
|
||||
final List<String> words = generateWords(
|
||||
numberOfWordsIntertedForEachLanguageSwitch, random);
|
||||
|
|
Loading…
Reference in a new issue