LatinIME/java/src/com/android/inputmethod/latin/personalization/UserHistoryDictionary.java
Satoshi Kataoka d45e4b6e5b Add personalization dictionary helper
Bug: 9429906
Bug: 4192129

Change-Id: Ic618b0b09a54ed46b20633bd4c1c570d4ac775af
2013-07-29 12:37:27 +09:00

397 lines
16 KiB
Java

/*
* Copyright (C) 2010 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.android.inputmethod.latin.personalization;
import android.content.Context;
import android.content.SharedPreferences;
import android.os.AsyncTask;
import android.util.Log;
import com.android.inputmethod.annotations.UsedForTesting;
import com.android.inputmethod.keyboard.ProximityInfo;
import com.android.inputmethod.latin.Constants;
import com.android.inputmethod.latin.Dictionary;
import com.android.inputmethod.latin.ExpandableDictionary;
import com.android.inputmethod.latin.LatinImeLogger;
import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo;
import com.android.inputmethod.latin.WordComposer;
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
import com.android.inputmethod.latin.settings.Settings;
import com.android.inputmethod.latin.utils.ByteArrayWrapper;
import com.android.inputmethod.latin.utils.UserHistoryDictIOUtils;
import com.android.inputmethod.latin.utils.UserHistoryDictIOUtils.BigramDictionaryInterface;
import com.android.inputmethod.latin.utils.UserHistoryDictIOUtils.OnAddWordListener;
import com.android.inputmethod.latin.utils.UserHistoryForgettingCurveUtils;
import com.android.inputmethod.latin.utils.UserHistoryForgettingCurveUtils.ForgettingCurveParams;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.concurrent.locks.ReentrantLock;
/**
* Locally gathers stats about the words user types and various other signals like auto-correction
* cancellation or manual picks. This allows the keyboard to adapt to the typist over time.
*/
public class UserHistoryDictionary extends ExpandableDictionary {
private static final String TAG = UserHistoryDictionary.class.getSimpleName();
private static final String NAME = UserHistoryDictionary.class.getSimpleName();
public static final boolean DBG_SAVE_RESTORE = false;
public static final boolean DBG_STRESS_TEST = false;
public static final boolean DBG_ALWAYS_WRITE = false;
public static final boolean PROFILE_SAVE_RESTORE = LatinImeLogger.sDBG;
private static final FormatOptions VERSION3 = new FormatOptions(3,
true /* supportsDynamicUpdate */);
/** Any pair being typed or picked */
private static final int FREQUENCY_FOR_TYPED = 2;
/** Maximum number of pairs. Pruning will start when databases goes above this number. */
public static final int MAX_HISTORY_BIGRAMS = 10000;
/**
* When it hits maximum bigram pair, it will delete until you are left with
* only (sMaxHistoryBigrams - sDeleteHistoryBigrams) pairs.
* Do not keep this number small to avoid deleting too often.
*/
public static final int DELETE_HISTORY_BIGRAMS = 1000;
/** Locale for which this user history dictionary is storing words */
private final String mLocale;
private final UserHistoryDictionaryBigramList mBigramList =
new UserHistoryDictionaryBigramList();
private final ReentrantLock mBigramListLock = new ReentrantLock();
private final SharedPreferences mPrefs;
// Should always be false except when we use this class for test
@UsedForTesting boolean isTest = false;
/* package */ UserHistoryDictionary(final Context context, final String locale,
final SharedPreferences sp) {
super(context, Dictionary.TYPE_USER_HISTORY);
mLocale = locale;
mPrefs = sp;
if (mLocale != null && mLocale.length() > 1) {
loadDictionary();
}
}
@Override
public void close() {
flushPendingWrites();
// Don't close the database as locale changes will require it to be reopened anyway
// Also, the database is written to somewhat frequently, so it needs to be kept alive
// throughout the life of the process.
// mOpenHelper.close();
// Ignore close because we cache UserHistoryDictionary for each language. See getInstance()
// above.
// super.close();
}
@Override
protected ArrayList<SuggestedWordInfo> getWordsInner(final WordComposer composer,
final String prevWord, final ProximityInfo proximityInfo) {
// Inhibit suggestions (not predictions) for user history for now. Removing this method
// is enough to use it through the standard ExpandableDictionary way.
return null;
}
/**
* Return whether the passed charsequence is in the dictionary.
*/
@Override
public synchronized boolean isValidWord(final String word) {
// TODO: figure out what is the correct thing to do here.
return false;
}
/**
* Pair will be added to the user history dictionary.
*
* The first word may be null. That means we don't know the context, in other words,
* it's only a unigram. The first word may also be an empty string : this means start
* context, as in beginning of a sentence for example.
* The second word may not be null (a NullPointerException would be thrown).
*/
public int addToUserHistory(final String word1, final String word2, final boolean isValid) {
if (word2.length() >= Constants.DICTIONARY_MAX_WORD_LENGTH ||
(word1 != null && word1.length() >= Constants.DICTIONARY_MAX_WORD_LENGTH)) {
return -1;
}
if (mBigramListLock.tryLock()) {
try {
super.addWord(
word2, null /* the "shortcut" parameter is null */, FREQUENCY_FOR_TYPED);
mBigramList.addBigram(null, word2, (byte)FREQUENCY_FOR_TYPED);
// Do not insert a word as a bigram of itself
if (word2.equals(word1)) {
return 0;
}
final int freq;
if (null == word1) {
freq = FREQUENCY_FOR_TYPED;
} else {
freq = super.setBigramAndGetFrequency(
word1, word2, new ForgettingCurveParams(isValid));
}
mBigramList.addBigram(word1, word2);
return freq;
} finally {
mBigramListLock.unlock();
}
}
return -1;
}
public boolean cancelAddingUserHistory(final String word1, final String word2) {
if (mBigramListLock.tryLock()) {
try {
if (mBigramList.removeBigram(word1, word2)) {
return super.removeBigram(word1, word2);
}
} finally {
mBigramListLock.unlock();
}
}
return false;
}
/**
* Schedules a background thread to write any pending words to the database.
*/
private void flushPendingWrites() {
// Create a background thread to write the pending entries
new UpdateBinaryTask(mBigramList, mLocale, this, mPrefs, getContext()).execute();
}
@Override
public void loadDictionaryAsync() {
// This must be run on non-main thread
mBigramListLock.lock();
try {
loadDictionaryAsyncLocked();
} finally {
mBigramListLock.unlock();
}
}
private int profTotal;
private void loadDictionaryAsyncLocked() {
if (DBG_STRESS_TEST) {
try {
Log.w(TAG, "Start stress in loading: " + mLocale);
Thread.sleep(15000);
Log.w(TAG, "End stress in loading");
} catch (InterruptedException e) {
}
}
final long last = Settings.readLastUserHistoryWriteTime(mPrefs, mLocale);
final boolean initializing = last == 0;
final long now = System.currentTimeMillis();
profTotal = 0;
final String fileName = NAME + "." + mLocale + ".dict";
final ExpandableDictionary dictionary = this;
final OnAddWordListener listener = new OnAddWordListener() {
@Override
public void setUnigram(final String word, final String shortcutTarget,
final int frequency) {
profTotal++;
if (DBG_SAVE_RESTORE) {
Log.d(TAG, "load unigram: " + word + "," + frequency);
}
dictionary.addWord(word, shortcutTarget, frequency);
mBigramList.addBigram(null, word, (byte)frequency);
}
@Override
public void setBigram(final String word1, final String word2, final int frequency) {
if (word1.length() < Constants.DICTIONARY_MAX_WORD_LENGTH
&& word2.length() < Constants.DICTIONARY_MAX_WORD_LENGTH) {
profTotal++;
if (DBG_SAVE_RESTORE) {
Log.d(TAG, "load bigram: " + word1 + "," + word2 + "," + frequency);
}
dictionary.setBigramAndGetFrequency(
word1, word2, initializing ? new ForgettingCurveParams(true)
: new ForgettingCurveParams(frequency, now, last));
}
mBigramList.addBigram(word1, word2, (byte)frequency);
}
};
// Load the dictionary from binary file
FileInputStream inStream = null;
try {
final File file = new File(getContext().getFilesDir(), fileName);
final byte[] buffer = new byte[(int)file.length()];
inStream = new FileInputStream(file);
inStream.read(buffer);
UserHistoryDictIOUtils.readDictionaryBinary(
new ByteArrayWrapper(buffer), listener);
} catch (FileNotFoundException e) {
// This is an expected condition: we don't have a user history dictionary for this
// language yet. It will be created sometime later.
} catch (IOException e) {
Log.e(TAG, "IOException on opening a bytebuffer", e);
} finally {
if (inStream != null) {
try {
inStream.close();
} catch (IOException e) {
// do nothing
}
}
if (PROFILE_SAVE_RESTORE) {
final long diff = System.currentTimeMillis() - now;
Log.d(TAG, "PROF: Load UserHistoryDictionary: "
+ mLocale + ", " + diff + "ms. load " + profTotal + "entries.");
}
}
}
/**
* Async task to write pending words to the binarydicts.
*/
private static final class UpdateBinaryTask extends AsyncTask<Void, Void, Void>
implements BigramDictionaryInterface {
private final UserHistoryDictionaryBigramList mBigramList;
private final boolean mAddLevel0Bigrams;
private final String mLocale;
private final UserHistoryDictionary mUserHistoryDictionary;
private final SharedPreferences mPrefs;
private final Context mContext;
public UpdateBinaryTask(final UserHistoryDictionaryBigramList pendingWrites,
final String locale, final UserHistoryDictionary dict,
final SharedPreferences prefs, final Context context) {
mBigramList = pendingWrites;
mLocale = locale;
mUserHistoryDictionary = dict;
mPrefs = prefs;
mContext = context;
mAddLevel0Bigrams = mBigramList.size() <= MAX_HISTORY_BIGRAMS;
}
@Override
protected Void doInBackground(final Void... v) {
if (mUserHistoryDictionary.isTest) {
// If isTest == true, wait until the lock is released.
mUserHistoryDictionary.mBigramListLock.lock();
try {
doWriteTaskLocked();
} finally {
mUserHistoryDictionary.mBigramListLock.unlock();
}
} else if (mUserHistoryDictionary.mBigramListLock.tryLock()) {
try {
doWriteTaskLocked();
} finally {
mUserHistoryDictionary.mBigramListLock.unlock();
}
}
return null;
}
private void doWriteTaskLocked() {
if (DBG_STRESS_TEST) {
try {
Log.w(TAG, "Start stress in closing: " + mLocale);
Thread.sleep(15000);
Log.w(TAG, "End stress in closing");
} catch (InterruptedException e) {
Log.e(TAG, "In stress test", e);
}
}
final long now = PROFILE_SAVE_RESTORE ? System.currentTimeMillis() : 0;
final String fileName = NAME + "." + mLocale + ".dict";
final File file = new File(mContext.getFilesDir(), fileName);
FileOutputStream out = null;
try {
out = new FileOutputStream(file);
UserHistoryDictIOUtils.writeDictionaryBinary(out, this, mBigramList, VERSION3);
out.flush();
out.close();
} catch (IOException e) {
Log.e(TAG, "IO Exception while writing file", e);
} finally {
if (out != null) {
try {
out.close();
} catch (IOException e) {
// ignore
}
}
}
// Save the timestamp after we finish writing the binary dictionary.
Settings.writeLastUserHistoryWriteTime(mPrefs, mLocale);
if (PROFILE_SAVE_RESTORE) {
final long diff = System.currentTimeMillis() - now;
Log.w(TAG, "PROF: Write User HistoryDictionary: " + mLocale + ", " + diff + "ms.");
}
}
@Override
public int getFrequency(final String word1, final String word2) {
final int freq;
if (word1 == null) { // unigram
freq = FREQUENCY_FOR_TYPED;
final byte prevFc = mBigramList.getBigrams(word1).get(word2);
} else { // bigram
final NextWord nw = mUserHistoryDictionary.getBigramWord(word1, word2);
if (nw != null) {
final ForgettingCurveParams fcp = nw.getFcParams();
final byte prevFc = mBigramList.getBigrams(word1).get(word2);
final byte fc = fcp.getFc();
final boolean isValid = fcp.isValid();
if (prevFc > 0 && prevFc == fc) {
freq = fc & 0xFF;
} else if (UserHistoryForgettingCurveUtils.
needsToSave(fc, isValid, mAddLevel0Bigrams)) {
freq = fc & 0xFF;
} else {
// Delete this entry
freq = -1;
}
} else {
// Delete this entry
freq = -1;
}
}
return freq;
}
}
@UsedForTesting
/* package for test */ void forceAddWordForTest(
final String word1, final String word2, final boolean isValid) {
mBigramListLock.lock();
try {
addToUserHistory(word1, word2, isValid);
} finally {
mBigramListLock.unlock();
}
}
}