From 84d858ed5e187eb9d4b56b593e1d9287f762bbca Mon Sep 17 00:00:00 2001 From: Yuichiro Hanada Date: Mon, 20 Aug 2012 19:29:20 +0900 Subject: [PATCH] Use BinaryDictInputOutput to save UserHistoryDictionary. bug: 6669677 Change-Id: I08193c26f76dbd48168f8ac02c1b737525bfc7b2 --- .../latin/UserHistoryDictIOUtils.java | 28 +- .../latin/UserHistoryDictionary.java | 484 ++++++------------ .../latin/makedict/BinaryDictIOUtils.java | 5 +- .../latin/UserHistoryDictionaryTests.java | 109 ++++ 4 files changed, 271 insertions(+), 355 deletions(-) create mode 100644 tests/src/com/android/inputmethod/latin/UserHistoryDictionaryTests.java diff --git a/java/src/com/android/inputmethod/latin/UserHistoryDictIOUtils.java b/java/src/com/android/inputmethod/latin/UserHistoryDictIOUtils.java index 05255a6b3..519165dd0 100644 --- a/java/src/com/android/inputmethod/latin/UserHistoryDictIOUtils.java +++ b/java/src/com/android/inputmethod/latin/UserHistoryDictIOUtils.java @@ -115,11 +115,10 @@ public class UserHistoryDictIOUtils { public static void writeDictionaryBinary(final OutputStream destination, final BigramDictionaryInterface dict, final UserHistoryDictionaryBigramList bigrams, final FormatOptions formatOptions) { - final FusionDictionary fusionDict = constructFusionDictionary(dict, bigrams); - try { BinaryDictInputOutput.writeDictionaryBinary(destination, fusionDict, formatOptions); + Log.d(TAG, "end writing"); } catch (IOException e) { Log.e(TAG, "IO exception while writing file: " + e); } catch (UnsupportedFormatException e) { @@ -132,16 +131,18 @@ public class UserHistoryDictIOUtils { */ /* packages for test */ static FusionDictionary constructFusionDictionary( final BigramDictionaryInterface dict, final UserHistoryDictionaryBigramList bigrams) { - final FusionDictionary fusionDict = new FusionDictionary(new Node(), - new FusionDictionary.DictionaryOptions( - new HashMap(), false, false)); - + new FusionDictionary.DictionaryOptions(new HashMap(), false, + false)); + int profTotal = 0; for (final String word1 : bigrams.keySet()) { final HashMap word1Bigrams = bigrams.getBigrams(word1); for (final String word2 : word1Bigrams.keySet()) { final int freq = dict.getFrequency(word1, word2); - + if (freq == -1) { + // don't add this bigram. + continue; + } if (DEBUG) { if (word1 == null) { Log.d(TAG, "add unigram: " + word2 + "," + Integer.toString(freq)); @@ -149,17 +150,22 @@ public class UserHistoryDictIOUtils { Log.d(TAG, "add bigram: " + word1 + "," + word2 + "," + Integer.toString(freq)); } + profTotal++; } - if (word1 == null) { // unigram fusionDict.add(word2, freq, null, false /* isNotAWord */); } else { // bigram + if (FusionDictionary.findWordInTree(fusionDict.mRoot, word1) == null) { + fusionDict.add(word1, 2, null, false /* isNotAWord */); + } fusionDict.setBigram(word1, word2, freq); } bigrams.updateBigram(word1, word2, (byte)freq); } } - + if (DEBUG) { + Log.d(TAG, "add " + profTotal + "words"); + } return fusionDict; } @@ -171,7 +177,6 @@ public class UserHistoryDictIOUtils { final Map unigrams = CollectionUtils.newTreeMap(); final Map frequencies = CollectionUtils.newTreeMap(); final Map> bigrams = CollectionUtils.newTreeMap(); - try { BinaryDictIOUtils.readUnigramsAndBigramsBinary(buffer, unigrams, frequencies, bigrams); @@ -189,14 +194,11 @@ public class UserHistoryDictIOUtils { /* package for test */ static void addWordsFromWordMap(final Map unigrams, final Map frequencies, final Map> bigrams, final OnAddWordListener to) { - for (Map.Entry entry : unigrams.entrySet()) { final String word1 = entry.getValue(); final int unigramFrequency = frequencies.get(entry.getKey()); to.setUnigram(word1, null, unigramFrequency); - final ArrayList attrList = bigrams.get(entry.getKey()); - if (attrList != null) { for (final PendingAttribute attr : attrList) { to.setBigram(word1, unigrams.get(attr.mAddress), diff --git a/java/src/com/android/inputmethod/latin/UserHistoryDictionary.java b/java/src/com/android/inputmethod/latin/UserHistoryDictionary.java index e03af649c..202aa642f 100644 --- a/java/src/com/android/inputmethod/latin/UserHistoryDictionary.java +++ b/java/src/com/android/inputmethod/latin/UserHistoryDictionary.java @@ -16,24 +16,25 @@ package com.android.inputmethod.latin; -import android.content.ContentValues; import android.content.Context; import android.content.SharedPreferences; -import android.database.Cursor; -import android.database.sqlite.SQLiteDatabase; -import android.database.sqlite.SQLiteOpenHelper; -import android.database.sqlite.SQLiteQueryBuilder; import android.os.AsyncTask; -import android.provider.BaseColumns; import android.util.Log; import com.android.inputmethod.keyboard.ProximityInfo; import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo; +import com.android.inputmethod.latin.UserHistoryDictIOUtils.BigramDictionaryInterface; +import com.android.inputmethod.latin.UserHistoryDictIOUtils.OnAddWordListener; import com.android.inputmethod.latin.UserHistoryForgettingCurveUtils.ForgettingCurveParams; +import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.FileOutputStream; +import java.io.IOException; import java.lang.ref.SoftReference; import java.util.ArrayList; -import java.util.HashMap; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.locks.ReentrantLock; @@ -43,45 +44,27 @@ import java.util.concurrent.locks.ReentrantLock; */ public class UserHistoryDictionary extends ExpandableDictionary { private static final String TAG = UserHistoryDictionary.class.getSimpleName(); + private static final String NAME = UserHistoryDictionary.class.getSimpleName(); public static final boolean DBG_SAVE_RESTORE = false; public static final boolean DBG_STRESS_TEST = false; public static final boolean DBG_ALWAYS_WRITE = false; public static final boolean PROFILE_SAVE_RESTORE = LatinImeLogger.sDBG; + private static final FormatOptions VERSION3 = new FormatOptions(3, + true /* supportsDynamicUpdate */); + /** Any pair being typed or picked */ private static final int FREQUENCY_FOR_TYPED = 2; /** Maximum number of pairs. Pruning will start when databases goes above this number. */ - public static final int sMaxHistoryBigrams = 10000; + public static final int MAX_HISTORY_BIGRAMS = 10000; /** * When it hits maximum bigram pair, it will delete until you are left with * only (sMaxHistoryBigrams - sDeleteHistoryBigrams) pairs. * Do not keep this number small to avoid deleting too often. */ - public static final int sDeleteHistoryBigrams = 1000; - - /** - * Database version should increase if the database structure changes - */ - private static final int DATABASE_VERSION = 1; - - private static final String DATABASE_NAME = "userbigram_dict.db"; - - /** Name of the words table in the database */ - private static final String MAIN_TABLE_NAME = "main"; - // TODO: Consume less space by using a unique id for locale instead of the whole - // 2-5 character string. - private static final String MAIN_COLUMN_ID = BaseColumns._ID; - private static final String MAIN_COLUMN_WORD1 = "word1"; - private static final String MAIN_COLUMN_WORD2 = "word2"; - private static final String MAIN_COLUMN_LOCALE = "locale"; - - /** Name of the frequency table in the database */ - private static final String FREQ_TABLE_NAME = "frequency"; - private static final String FREQ_COLUMN_ID = BaseColumns._ID; - private static final String FREQ_COLUMN_PAIR_ID = "pair_id"; - private static final String COLUMN_FORGETTING_CURVE_VALUE = "freq"; + public static final int DELETE_HISTORY_BIGRAMS = 1000; /** Locale for which this user history dictionary is storing words */ private final String mLocale; @@ -91,29 +74,13 @@ public class UserHistoryDictionary extends ExpandableDictionary { private final ReentrantLock mBigramListLock = new ReentrantLock(); private final SharedPreferences mPrefs; - private final static HashMap sDictProjectionMap; - private final static ConcurrentHashMap> + // Should always be false except when we use this class for test + /* package for test */ boolean isTest = false; + + private static final ConcurrentHashMap> sLangDictCache = CollectionUtils.newConcurrentHashMap(); - static { - sDictProjectionMap = CollectionUtils.newHashMap(); - sDictProjectionMap.put(MAIN_COLUMN_ID, MAIN_COLUMN_ID); - sDictProjectionMap.put(MAIN_COLUMN_WORD1, MAIN_COLUMN_WORD1); - sDictProjectionMap.put(MAIN_COLUMN_WORD2, MAIN_COLUMN_WORD2); - sDictProjectionMap.put(MAIN_COLUMN_LOCALE, MAIN_COLUMN_LOCALE); - - sDictProjectionMap.put(FREQ_COLUMN_ID, FREQ_COLUMN_ID); - sDictProjectionMap.put(FREQ_COLUMN_PAIR_ID, FREQ_COLUMN_PAIR_ID); - sDictProjectionMap.put(COLUMN_FORGETTING_CURVE_VALUE, COLUMN_FORGETTING_CURVE_VALUE); - } - - private static DatabaseHelper sOpenHelper = null; - - public String getLocale() { - return mLocale; - } - - public synchronized static UserHistoryDictionary getInstance( + public static synchronized UserHistoryDictionary getInstance( final Context context, final String locale, final SharedPreferences sp) { if (sLangDictCache.containsKey(locale)) { final SoftReference ref = sLangDictCache.get(locale); @@ -136,9 +103,6 @@ public class UserHistoryDictionary extends ExpandableDictionary { super(context, Dictionary.TYPE_USER_HISTORY); mLocale = locale; mPrefs = sp; - if (sOpenHelper == null) { - sOpenHelper = new DatabaseHelper(getContext()); - } if (mLocale != null && mLocale.length() > 1) { loadDictionary(); } @@ -190,6 +154,7 @@ public class UserHistoryDictionary extends ExpandableDictionary { try { super.addWord( word2, null /* the "shortcut" parameter is null */, FREQUENCY_FOR_TYPED); + mBigramList.addBigram(null, word2, (byte)FREQUENCY_FOR_TYPED); // Do not insert a word as a bigram of itself if (word2.equals(word1)) { return 0; @@ -227,11 +192,8 @@ public class UserHistoryDictionary extends ExpandableDictionary { * Schedules a background thread to write any pending words to the database. */ private void flushPendingWrites() { - if (mBigramListLock.isLocked()) { - return; - } // Create a background thread to write the pending entries - new UpdateDbTask(sOpenHelper, mBigramList, mLocale, this, mPrefs).execute(); + new UpdateBinaryTask(mBigramList, mLocale, this, mPrefs, getContext()).execute(); } @Override @@ -245,6 +207,8 @@ public class UserHistoryDictionary extends ExpandableDictionary { } } + private int profTotal; + private void loadDictionaryAsyncLocked() { if (DBG_STRESS_TEST) { try { @@ -257,343 +221,181 @@ public class UserHistoryDictionary extends ExpandableDictionary { final long last = SettingsValues.getLastUserHistoryWriteTime(mPrefs, mLocale); final boolean initializing = last == 0; final long now = System.currentTimeMillis(); - // Load the words that correspond to the current input locale - final Cursor cursor = query(MAIN_COLUMN_LOCALE + "=?", new String[] { mLocale }); - if (null == cursor) return; - try { - // TODO: Call SQLiteDataBase.beginTransaction / SQLiteDataBase.endTransaction - if (cursor.moveToFirst()) { - final int word1Index = cursor.getColumnIndex(MAIN_COLUMN_WORD1); - final int word2Index = cursor.getColumnIndex(MAIN_COLUMN_WORD2); - final int fcIndex = cursor.getColumnIndex(COLUMN_FORGETTING_CURVE_VALUE); - while (!cursor.isAfterLast()) { - final String word1 = cursor.getString(word1Index); - final String word2 = cursor.getString(word2Index); - final int fc = cursor.getInt(fcIndex); + profTotal = 0; + final String fileName = NAME + "." + mLocale + ".dict"; + final ExpandableDictionary dictionary = this; + final OnAddWordListener listener = new OnAddWordListener() { + @Override + public void setUnigram(String word, String shortcutTarget, int frequency) { + profTotal++; + if (DBG_SAVE_RESTORE) { + Log.d(TAG, "load unigram: " + word + "," + frequency); + } + dictionary.addWord(word, shortcutTarget, frequency); + mBigramList.addBigram(null, word, (byte)frequency); + } + + @Override + public void setBigram(String word1, String word2, int frequency) { + if (word1.length() < BinaryDictionary.MAX_WORD_LENGTH + && word2.length() < BinaryDictionary.MAX_WORD_LENGTH) { + profTotal++; if (DBG_SAVE_RESTORE) { - Log.d(TAG, "--- Load user history: " + word1 + ", " + word2 + "," - + mLocale + "," + this); + Log.d(TAG, "load bigram: " + word1 + "," + word2 + "," + frequency); } - // Safeguard against adding really long words. Stack may overflow due - // to recursive lookup - if (null == word1) { - super.addWord(word2, null /* shortcut */, fc); - } else if (word1.length() < BinaryDictionary.MAX_WORD_LENGTH - && word2.length() < BinaryDictionary.MAX_WORD_LENGTH) { - super.setBigramAndGetFrequency( - word1, word2, initializing ? new ForgettingCurveParams(true) - : new ForgettingCurveParams(fc, now, last)); - } - mBigramList.addBigram(word1, word2, (byte)fc); - cursor.moveToNext(); + dictionary.setBigramAndGetFrequency( + word1, word2, initializing ? new ForgettingCurveParams(true) + : new ForgettingCurveParams(frequency, now, last)); + } + mBigramList.addBigram(word1, word2, (byte)frequency); + } + }; + + // Load the dictionary from binary file + FileInputStream inStream = null; + try { + final File file = new File(getContext().getFilesDir(), fileName); + final byte[] buffer = new byte[(int)file.length()]; + inStream = new FileInputStream(file); + inStream.read(buffer); + UserHistoryDictIOUtils.readDictionaryBinary( + new UserHistoryDictIOUtils.ByteArrayWrapper(buffer), listener); + } catch (FileNotFoundException e) { + Log.e(TAG, "when loading: file not found" + e); + } catch (IOException e) { + Log.e(TAG, "IOException when open bytebuffer: " + e); + } finally { + if (inStream != null) { + try { + inStream.close(); + } catch (IOException e) { + // do nothing } } - } finally { - cursor.close(); if (PROFILE_SAVE_RESTORE) { final long diff = System.currentTimeMillis() - now; - Log.w(TAG, "PROF: Load User HistoryDictionary: " - + mLocale + ", " + diff + "ms."); + Log.d(TAG, "PROF: Load UserHistoryDictionary: " + + mLocale + ", " + diff + "ms. load " + profTotal + "entries."); } } } /** - * Query the database + * Async task to write pending words to the binarydicts. */ - private static Cursor query(String selection, String[] selectionArgs) { - SQLiteQueryBuilder qb = new SQLiteQueryBuilder(); - - // main INNER JOIN frequency ON (main._id=freq.pair_id) - qb.setTables(MAIN_TABLE_NAME + " INNER JOIN " + FREQ_TABLE_NAME + " ON (" - + MAIN_TABLE_NAME + "." + MAIN_COLUMN_ID + "=" + FREQ_TABLE_NAME + "." - + FREQ_COLUMN_PAIR_ID +")"); - - qb.setProjectionMap(sDictProjectionMap); - - // Get the database and run the query - try { - SQLiteDatabase db = sOpenHelper.getReadableDatabase(); - Cursor c = qb.query(db, - new String[] { - MAIN_COLUMN_WORD1, MAIN_COLUMN_WORD2, COLUMN_FORGETTING_CURVE_VALUE }, - selection, selectionArgs, null, null, null); - return c; - } catch (android.database.sqlite.SQLiteCantOpenDatabaseException e) { - // Can't open the database : presumably we can't access storage. That may happen - // when the device is wedged; do a best effort to still start the keyboard. - return null; - } - } - - /** - * This class helps open, create, and upgrade the database file. - */ - private static class DatabaseHelper extends SQLiteOpenHelper { - - DatabaseHelper(Context context) { - super(context, DATABASE_NAME, null, DATABASE_VERSION); - } - - @Override - public void onCreate(SQLiteDatabase db) { - db.execSQL("PRAGMA foreign_keys = ON;"); - db.execSQL("CREATE TABLE " + MAIN_TABLE_NAME + " (" - + MAIN_COLUMN_ID + " INTEGER PRIMARY KEY," - + MAIN_COLUMN_WORD1 + " TEXT," - + MAIN_COLUMN_WORD2 + " TEXT," - + MAIN_COLUMN_LOCALE + " TEXT" - + ");"); - db.execSQL("CREATE TABLE " + FREQ_TABLE_NAME + " (" - + FREQ_COLUMN_ID + " INTEGER PRIMARY KEY," - + FREQ_COLUMN_PAIR_ID + " INTEGER," - + COLUMN_FORGETTING_CURVE_VALUE + " INTEGER," - + "FOREIGN KEY(" + FREQ_COLUMN_PAIR_ID + ") REFERENCES " + MAIN_TABLE_NAME - + "(" + MAIN_COLUMN_ID + ")" + " ON DELETE CASCADE" - + ");"); - } - - @Override - public void onUpgrade(SQLiteDatabase db, int oldVersion, int newVersion) { - Log.w(TAG, "Upgrading database from version " + oldVersion + " to " - + newVersion + ", which will destroy all old data"); - db.execSQL("DROP TABLE IF EXISTS " + MAIN_TABLE_NAME); - db.execSQL("DROP TABLE IF EXISTS " + FREQ_TABLE_NAME); - onCreate(db); - } - } - - /** - * Async task to write pending words to the database so that it stays in sync with - * the in-memory trie. - */ - private static class UpdateDbTask extends AsyncTask { + private static class UpdateBinaryTask extends AsyncTask + implements BigramDictionaryInterface { private final UserHistoryDictionaryBigramList mBigramList; - private final DatabaseHelper mDbHelper; + private final boolean mAddLevel0Bigrams; private final String mLocale; private final UserHistoryDictionary mUserHistoryDictionary; private final SharedPreferences mPrefs; + private final Context mContext; - public UpdateDbTask( - DatabaseHelper openHelper, UserHistoryDictionaryBigramList pendingWrites, - String locale, UserHistoryDictionary dict, SharedPreferences prefs) { + public UpdateBinaryTask(UserHistoryDictionaryBigramList pendingWrites, String locale, + UserHistoryDictionary dict, SharedPreferences prefs, Context context) { mBigramList = pendingWrites; mLocale = locale; - mDbHelper = openHelper; mUserHistoryDictionary = dict; mPrefs = prefs; - } - - /** Prune any old data if the database is getting too big. */ - private static void checkPruneData(SQLiteDatabase db) { - db.execSQL("PRAGMA foreign_keys = ON;"); - Cursor c = db.query(FREQ_TABLE_NAME, new String[] { FREQ_COLUMN_PAIR_ID }, - null, null, null, null, null); - try { - int totalRowCount = c.getCount(); - // prune out old data if we have too much data - if (totalRowCount > sMaxHistoryBigrams) { - int numDeleteRows = (totalRowCount - sMaxHistoryBigrams) - + sDeleteHistoryBigrams; - int pairIdColumnId = c.getColumnIndex(FREQ_COLUMN_PAIR_ID); - c.moveToFirst(); - int count = 0; - while (count < numDeleteRows && !c.isAfterLast()) { - String pairId = c.getString(pairIdColumnId); - // Deleting from MAIN table will delete the frequencies - // due to FOREIGN KEY .. ON DELETE CASCADE - db.delete(MAIN_TABLE_NAME, MAIN_COLUMN_ID + "=?", - new String[] { pairId }); - c.moveToNext(); - count++; - } - } - } finally { - c.close(); - } + mContext = context; + mAddLevel0Bigrams = mBigramList.size() <= MAX_HISTORY_BIGRAMS; } @Override protected Void doInBackground(Void... v) { - SQLiteDatabase db = null; - if (mUserHistoryDictionary.mBigramListLock.tryLock()) { + if (mUserHistoryDictionary.isTest) { + // If isTest == true, wait until the lock is released. + mUserHistoryDictionary.mBigramListLock.lock(); try { - try { - db = mDbHelper.getWritableDatabase(); - } catch (android.database.sqlite.SQLiteCantOpenDatabaseException e) { - // If we can't open the db, don't do anything. Exit through the next test - // for non-nullity of the db variable. - } - if (null == db) { - // Not much we can do. Just exit. - return null; - } - db.beginTransaction(); - return doLoadTaskLocked(db); + doWriteTaskLocked(); } finally { - if (db != null) { - db.endTransaction(); - } mUserHistoryDictionary.mBigramListLock.unlock(); } + } else if (mUserHistoryDictionary.mBigramListLock.tryLock()) { + doWriteTaskLocked(); } return null; } - private Void doLoadTaskLocked(SQLiteDatabase db) { + private void doWriteTaskLocked() { if (DBG_STRESS_TEST) { try { Log.w(TAG, "Start stress in closing: " + mLocale); Thread.sleep(15000); Log.w(TAG, "End stress in closing"); } catch (InterruptedException e) { + Log.e(TAG, "In stress test: " + e); } } + final long now = PROFILE_SAVE_RESTORE ? System.currentTimeMillis() : 0; - int profTotal = 0; - int profInsert = 0; - int profDelete = 0; - db.execSQL("PRAGMA foreign_keys = ON;"); - final boolean addLevel0Bigram = mBigramList.size() <= sMaxHistoryBigrams; + final String fileName = NAME + "." + mLocale + ".dict"; + final File file = new File(mContext.getFilesDir(), fileName); + FileOutputStream out = null; - // Write all the entries to the db - for (String word1 : mBigramList.keySet()) { - final HashMap word1Bigrams = mBigramList.getBigrams(word1); - for (String word2 : word1Bigrams.keySet()) { - if (PROFILE_SAVE_RESTORE) { - ++profTotal; - } - // Get new frequency. Do not insert unigrams/bigrams which freq is "-1". - final int freq; // -1, or 0~255 - if (word1 == null) { // unigram - freq = FREQUENCY_FOR_TYPED; - final byte prevFc = word1Bigrams.get(word2); - if (prevFc == FREQUENCY_FOR_TYPED) { - // No need to update since we found no changes for this entry. - // Just skip to the next entry. - if (DBG_SAVE_RESTORE) { - Log.d(TAG, "Skip update user history: " + word1 + "," + word2 - + "," + prevFc); - } - if (!DBG_ALWAYS_WRITE) { - continue; - } - } - } else { // bigram - final NextWord nw = mUserHistoryDictionary.getBigramWord(word1, word2); - if (nw != null) { - final ForgettingCurveParams fcp = nw.getFcParams(); - final byte prevFc = word1Bigrams.get(word2); - final byte fc = (byte)fcp.getFc(); - final boolean isValid = fcp.isValid(); - if (prevFc > 0 && prevFc == fc) { - // No need to update since we found no changes for this entry. - // Just skip to the next entry. - if (DBG_SAVE_RESTORE) { - Log.d(TAG, "Skip update user history: " + word1 + "," - + word2 + "," + prevFc); - } - if (!DBG_ALWAYS_WRITE) { - continue; - } else { - freq = fc; - } - } else if (UserHistoryForgettingCurveUtils. - needsToSave(fc, isValid, addLevel0Bigram)) { - freq = fc; - } else { - // Delete this entry - freq = -1; - } - } else { - // Delete this entry - freq = -1; - } - } - // TODO: this process of making a text search for each pair each time - // is terribly inefficient. Optimize this. - // Find pair id - Cursor c = null; + try { + out = new FileOutputStream(file); + UserHistoryDictIOUtils.writeDictionaryBinary(out, this, mBigramList, VERSION3); + out.flush(); + out.close(); + } catch (IOException e) { + Log.e(TAG, "IO Exception while writing file: " + e); + } finally { + if (out != null) { try { - if (null != word1) { - c = db.query(MAIN_TABLE_NAME, new String[] { MAIN_COLUMN_ID }, - MAIN_COLUMN_WORD1 + "=? AND " + MAIN_COLUMN_WORD2 + "=? AND " - + MAIN_COLUMN_LOCALE + "=?", - new String[] { word1, word2, mLocale }, null, null, - null); - } else { - c = db.query(MAIN_TABLE_NAME, new String[] { MAIN_COLUMN_ID }, - MAIN_COLUMN_WORD1 + " IS NULL AND " + MAIN_COLUMN_WORD2 - + "=? AND " + MAIN_COLUMN_LOCALE + "=?", - new String[] { word2, mLocale }, null, null, null); - } - - final int pairId; - if (c.moveToFirst()) { - if (PROFILE_SAVE_RESTORE) { - ++profDelete; - } - // Delete existing pair - pairId = c.getInt(c.getColumnIndex(MAIN_COLUMN_ID)); - db.delete(FREQ_TABLE_NAME, FREQ_COLUMN_PAIR_ID + "=?", - new String[] { Integer.toString(pairId) }); - } else { - // Create new pair - Long pairIdLong = db.insert(MAIN_TABLE_NAME, null, - getContentValues(word1, word2, mLocale)); - pairId = pairIdLong.intValue(); - } - // Eliminate freq == 0 because that word is profanity. - if (freq > 0) { - if (PROFILE_SAVE_RESTORE) { - ++profInsert; - } - if (DBG_SAVE_RESTORE) { - Log.d(TAG, "--- Save user history: " + word1 + ", " + word2 - + mLocale + "," + this); - } - // Insert new frequency - db.insert(FREQ_TABLE_NAME, null, - getFrequencyContentValues(pairId, freq)); - // Update an existing bigram entry in mBigramList too in order to - // synchronize the SQL DB and mBigramList. - mBigramList.updateBigram(word1, word2, (byte)freq); - } - } finally { - if (c != null) { - c.close(); - } + out.close(); + } catch (IOException e) { + // ignore } } } - checkPruneData(db); - // Save the timestamp after we finish writing the SQL DB. + // Save the timestamp after we finish writing the binary dictionary. SettingsValues.setLastUserHistoryWriteTime(mPrefs, mLocale); if (PROFILE_SAVE_RESTORE) { final long diff = System.currentTimeMillis() - now; - Log.w(TAG, "PROF: Write User HistoryDictionary: " + mLocale + ", "+ diff - + "ms. Total: " + profTotal + ". Insert: " + profInsert + ". Delete: " - + profDelete); + Log.w(TAG, "PROF: Write User HistoryDictionary: " + mLocale + ", " + diff + "ms."); } - db.setTransactionSuccessful(); - return null; } - private static ContentValues getContentValues(String word1, String word2, String locale) { - ContentValues values = new ContentValues(3); - values.put(MAIN_COLUMN_WORD1, word1); - values.put(MAIN_COLUMN_WORD2, word2); - values.put(MAIN_COLUMN_LOCALE, locale); - return values; - } - - private static ContentValues getFrequencyContentValues(int pairId, int frequency) { - ContentValues values = new ContentValues(2); - values.put(FREQ_COLUMN_PAIR_ID, pairId); - values.put(COLUMN_FORGETTING_CURVE_VALUE, frequency); - return values; + @Override + public int getFrequency(String word1, String word2) { + final int freq; + if (word1 == null) { // unigram + freq = FREQUENCY_FOR_TYPED; + final byte prevFc = mBigramList.getBigrams(word1).get(word2); + } else { // bigram + final NextWord nw = mUserHistoryDictionary.getBigramWord(word1, word2); + if (nw != null) { + final ForgettingCurveParams fcp = nw.getFcParams(); + final byte prevFc = mBigramList.getBigrams(word1).get(word2); + final byte fc = fcp.getFc(); + final boolean isValid = fcp.isValid(); + if (prevFc > 0 && prevFc == fc) { + freq = ((int)fc) & 0xFF; + } else if (UserHistoryForgettingCurveUtils. + needsToSave(fc, isValid, mAddLevel0Bigrams)) { + freq = ((int)fc) & 0xFF; + } else { + // Delete this entry + freq = -1; + } + } else { + // Delete this entry + freq = -1; + } + } + return freq; } } + void forceAddWordForTest(final String word1, final String word2, final boolean isValid) { + mBigramListLock.lock(); + try { + addToUserHistory(word1, word2, isValid); + } finally { + mBigramListLock.unlock(); + } + } } diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java b/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java index b97be0543..a1606db60 100644 --- a/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java +++ b/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java @@ -77,7 +77,10 @@ public class BinaryDictIOUtils { p.mAddress += BinaryDictInputOutput.getGroupCountSize(p.mNumOfCharGroup); p.mPosition = 0; } - + if (p.mNumOfCharGroup == 0) { + stack.pop(); + continue; + } CharGroupInfo info = BinaryDictInputOutput.readCharGroup(buffer, p.mAddress - headerSize, formatOptions); for (int i = 0; i < info.mCharacters.length; ++i) { diff --git a/tests/src/com/android/inputmethod/latin/UserHistoryDictionaryTests.java b/tests/src/com/android/inputmethod/latin/UserHistoryDictionaryTests.java new file mode 100644 index 000000000..f2a17d206 --- /dev/null +++ b/tests/src/com/android/inputmethod/latin/UserHistoryDictionaryTests.java @@ -0,0 +1,109 @@ +/* + * Copyright (C) 2012 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.android.inputmethod.latin; + +import com.android.inputmethod.latin.UserHistoryDictionary; + +import android.content.SharedPreferences; +import android.preference.PreferenceManager; +import android.test.AndroidTestCase; +import android.util.Log; + +import java.util.ArrayList; +import java.util.List; +import java.util.Random; +import java.util.Set; + +/** + * Unit tests for UserHistoryDictionary + */ +public class UserHistoryDictionaryTests extends AndroidTestCase { + private static final String TAG = UserHistoryDictionaryTests.class.getSimpleName(); + private SharedPreferences mPrefs; + + private static final String[] CHARACTERS = { + "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", + "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z" + }; + + @Override + public void setUp() { + mPrefs = PreferenceManager.getDefaultSharedPreferences(getContext()); + } + + /** + * Generates a random word. + */ + private String generateWord(final int value) { + final int lengthOfChars = CHARACTERS.length; + StringBuilder builder = new StringBuilder(); + long lvalue = Math.abs((long)value); + while (lvalue > 0) { + builder.append(CHARACTERS[(int)(lvalue % lengthOfChars)]); + lvalue /= lengthOfChars; + } + return builder.toString(); + } + + private List generateWords(final int number, final Random random) { + final Set wordSet = CollectionUtils.newHashSet(); + while (wordSet.size() < number) { + wordSet.add(generateWord(random.nextInt())); + } + return new ArrayList(wordSet); + } + + private void addToDict(final UserHistoryDictionary dict, final List words) { + String prevWord = null; + for (String word : words) { + dict.forceAddWordForTest(prevWord, word, true); + prevWord = word; + } + } + + public void testRandomWords() { + Log.d(TAG, "This test can be used for profiling."); + Log.d(TAG, "Usage: please set UserHisotoryDictionary.PROFILE_SAVE_RESTORE to true."); + final int numberOfWords = 1000; + final Random random = new Random(123456); + List words = generateWords(numberOfWords, random); + + final String locale = "testRandomWords"; + final UserHistoryDictionary dict = UserHistoryDictionary.getInstance(getContext(), + locale, mPrefs); + dict.isTest = true; + + addToDict(dict, words); + + try { + Log.d(TAG, "waiting for adding the word ..."); + Thread.sleep(2000); + } catch (InterruptedException e) { + Log.d(TAG, "InterruptedException: " + e); + } + + // write to file + dict.close(); + + try { + Log.d(TAG, "waiting for writing ..."); + Thread.sleep(5000); + } catch (InterruptedException e) { + Log.d(TAG, "InterruptedException: " + e); + } + } +}