From bc5688506229bd5cd5e6f4dcdc73c21dc6b80ecb Mon Sep 17 00:00:00 2001 From: Satoshi Kataoka Date: Mon, 28 May 2012 19:08:34 +0900 Subject: [PATCH] Optimize the store of bigram list Bug: 4192129 Change-Id: Idcc62e4f9696b56b1d7013891b2da37b1784423e --- .../latin/UserHistoryDictionary.java | 183 ++++++++---------- .../UserHistoryDictionaryBigramList.java | 91 +++++++++ .../UserHistoryForgettingCurveUtils.java | 11 +- 3 files changed, 179 insertions(+), 106 deletions(-) create mode 100644 java/src/com/android/inputmethod/latin/UserHistoryDictionaryBigramList.java diff --git a/java/src/com/android/inputmethod/latin/UserHistoryDictionary.java b/java/src/com/android/inputmethod/latin/UserHistoryDictionary.java index fa3d1be11..d5163f2a1 100644 --- a/java/src/com/android/inputmethod/latin/UserHistoryDictionary.java +++ b/java/src/com/android/inputmethod/latin/UserHistoryDictionary.java @@ -30,8 +30,6 @@ import android.util.Log; import com.android.inputmethod.latin.UserHistoryForgettingCurveUtils.ForgettingCurveParams; import java.util.HashMap; -import java.util.HashSet; -import java.util.Iterator; /** * Locally gathers stats about the words user types and various other signals like auto-correction @@ -39,6 +37,7 @@ import java.util.Iterator; */ public class UserHistoryDictionary extends ExpandableDictionary { private static final String TAG = "UserHistoryDictionary"; + public static final boolean DBG_SAVE_RESTORE = false; /** Any pair being typed or picked */ private static final int FREQUENCY_FOR_TYPED = 2; @@ -78,7 +77,8 @@ public class UserHistoryDictionary extends ExpandableDictionary { /** Locale for which this auto dictionary is storing words */ private String mLocale; - private HashSet mPendingWrites = new HashSet(); + private UserHistoryDictionaryBigramList mBigramList = + new UserHistoryDictionaryBigramList(); private final Object mPendingWritesLock = new Object(); private static volatile boolean sUpdatingDB = false; private final SharedPreferences mPrefs; @@ -99,35 +99,6 @@ public class UserHistoryDictionary extends ExpandableDictionary { private static DatabaseHelper sOpenHelper = null; - private static class Bigram { - public final String mWord1; - public final String mWord2; - - Bigram(String word1, String word2) { - this.mWord1 = word1; - this.mWord2 = word2; - } - - @Override - public boolean equals(Object bigram) { - if (!(bigram instanceof Bigram)) { - return false; - } - final Bigram bigram2 = (Bigram) bigram; - final boolean eq1 = - mWord1 == null ? bigram2.mWord1 == null : mWord1.equals(bigram2.mWord1); - if (!eq1) { - return false; - } - return mWord2 == null ? bigram2.mWord2 == null : mWord2.equals(bigram2.mWord2); - } - - @Override - public int hashCode() { - return (mWord1 + " " + mWord2).hashCode(); - } - } - public void setDatabaseMax(int maxHistoryBigram) { sMaxHistoryBigrams = maxHistoryBigram; } @@ -190,20 +161,17 @@ public class UserHistoryDictionary extends ExpandableDictionary { freq = super.setBigramAndGetFrequency(word1, word2, new ForgettingCurveParams()); } synchronized (mPendingWritesLock) { - final Bigram bi = new Bigram(word1, word2); - if (!mPendingWrites.contains(bi)) { - mPendingWrites.add(bi); - } + mBigramList.addBigram(word1, word2); } return freq; } public boolean cancelAddingUserHistory(String word1, String word2) { - final Bigram bi = new Bigram(word1, word2); - if (mPendingWrites.contains(bi)) { - mPendingWrites.remove(bi); - return super.removeBigram(word1, word2); + synchronized (mPendingWritesLock) { + if (mBigramList.removeBigram(word1, word2)) { + return super.removeBigram(word1, word2); + } } return false; } @@ -214,11 +182,11 @@ public class UserHistoryDictionary extends ExpandableDictionary { private void flushPendingWrites() { synchronized (mPendingWritesLock) { // Nothing pending? Return - if (mPendingWrites.isEmpty()) return; + if (mBigramList.isEmpty()) return; // Create a background thread to write the pending entries - new UpdateDbTask(sOpenHelper, mPendingWrites, mLocale, this).execute(); + new UpdateDbTask(sOpenHelper, mBigramList, mLocale, this).execute(); // Create a new map for writing new entries into while the old one is written to db - mPendingWrites = new HashSet(); + mBigramList = new UserHistoryDictionaryBigramList(); } } @@ -251,6 +219,9 @@ public class UserHistoryDictionary extends ExpandableDictionary { final String word1 = cursor.getString(word1Index); final String word2 = cursor.getString(word2Index); final int frequency = cursor.getInt(frequencyIndex); + if (DBG_SAVE_RESTORE) { + Log.d(TAG, "--- Load user history: " + word1 + ", " + word2); + } // Safeguard against adding really long words. Stack may overflow due // to recursive lookup if (null == word1) { @@ -259,8 +230,9 @@ public class UserHistoryDictionary extends ExpandableDictionary { && word2.length() < BinaryDictionary.MAX_WORD_LENGTH) { super.setBigramAndGetFrequency( word1, word2, new ForgettingCurveParams(frequency, now, last)); - // TODO: optimize - mPendingWrites.add(new Bigram(word1, word2)); + } + synchronized(mPendingWritesLock) { + mBigramList.addBigram(word1, word2); } cursor.moveToNext(); } @@ -339,14 +311,15 @@ public class UserHistoryDictionary extends ExpandableDictionary { * the in-memory trie. */ private static class UpdateDbTask extends AsyncTask { - private final HashSet mMap; + private final UserHistoryDictionaryBigramList mBigramList; private final DatabaseHelper mDbHelper; private final String mLocale; private final UserHistoryDictionary mUserHistoryDictionary; - public UpdateDbTask(DatabaseHelper openHelper, HashSet pendingWrites, + public UpdateDbTask( + DatabaseHelper openHelper, UserHistoryDictionaryBigramList pendingWrites, String locale, UserHistoryDictionary dict) { - mMap = pendingWrites; + mBigramList = pendingWrites; mLocale = locale; mDbHelper = openHelper; mUserHistoryDictionary = dict; @@ -401,67 +374,71 @@ public class UserHistoryDictionary extends ExpandableDictionary { return null; } db.execSQL("PRAGMA foreign_keys = ON;"); + final boolean addLevel0Bigram = mBigramList.size() <= sMaxHistoryBigrams; + // Write all the entries to the db - final Iterator iterator = mMap.iterator(); - while (iterator.hasNext()) { - // TODO: this process of making a text search for each pair each time - // is terribly inefficient. Optimize this. - final Bigram bi = iterator.next(); + for (String word1 : mBigramList.keySet()) { + for (String word2 : mBigramList.getBigrams(word1)) { + // TODO: this process of making a text search for each pair each time + // is terribly inefficient. Optimize this. + // find pair id + Cursor c = null; + try { + if (null != word1) { + c = db.query(MAIN_TABLE_NAME, new String[] { MAIN_COLUMN_ID }, + MAIN_COLUMN_WORD1 + "=? AND " + MAIN_COLUMN_WORD2 + "=? AND " + + MAIN_COLUMN_LOCALE + "=?", + new String[] { word1, word2, mLocale }, null, null, + null); + } else { + c = db.query(MAIN_TABLE_NAME, new String[] { MAIN_COLUMN_ID }, + MAIN_COLUMN_WORD1 + " IS NULL AND " + MAIN_COLUMN_WORD2 + + "=? AND " + MAIN_COLUMN_LOCALE + "=?", + new String[] { word2, mLocale }, null, null, null); + } - // find pair id - Cursor c = null; - try { - if (null != bi.mWord1) { - c = db.query(MAIN_TABLE_NAME, new String[] { MAIN_COLUMN_ID }, - MAIN_COLUMN_WORD1 + "=? AND " + MAIN_COLUMN_WORD2 + "=? AND " - + MAIN_COLUMN_LOCALE + "=?", - new String[] { bi.mWord1, bi.mWord2, mLocale }, null, null, - null); - } else { - c = db.query(MAIN_TABLE_NAME, new String[] { MAIN_COLUMN_ID }, - MAIN_COLUMN_WORD1 + " IS NULL AND " + MAIN_COLUMN_WORD2 + "=? AND " - + MAIN_COLUMN_LOCALE + "=?", - new String[] { bi.mWord2, mLocale }, null, null, null); - } - - final int pairId; - if (c.moveToFirst()) { - // existing pair - pairId = c.getInt(c.getColumnIndex(MAIN_COLUMN_ID)); - db.delete(FREQ_TABLE_NAME, FREQ_COLUMN_PAIR_ID + "=?", - new String[] { Integer.toString(pairId) }); - } else { - // new pair - Long pairIdLong = db.insert(MAIN_TABLE_NAME, null, - getContentValues(bi.mWord1, bi.mWord2, mLocale)); - pairId = pairIdLong.intValue(); - } - // insert new frequency - final int freq; - if (bi.mWord1 == null) { - freq = FREQUENCY_FOR_TYPED; - } else { - final NextWord nw = mUserHistoryDictionary.getBigramWord( - bi.mWord1, bi.mWord2); - if (nw != null) { - final int tempFreq = nw.getFcValue(); - // TODO: Check whether the word is valid or not - if (UserHistoryForgettingCurveUtils.needsToSave( - (byte)tempFreq, false)) { - freq = tempFreq; + final int pairId; + if (c.moveToFirst()) { + // existing pair + pairId = c.getInt(c.getColumnIndex(MAIN_COLUMN_ID)); + db.delete(FREQ_TABLE_NAME, FREQ_COLUMN_PAIR_ID + "=?", + new String[] { Integer.toString(pairId) }); + } else { + // new pair + Long pairIdLong = db.insert(MAIN_TABLE_NAME, null, + getContentValues(word1, word2, mLocale)); + pairId = pairIdLong.intValue(); + } + // insert new frequency + final int freq; + if (word1 == null) { + freq = FREQUENCY_FOR_TYPED; + } else { + final NextWord nw = mUserHistoryDictionary.getBigramWord(word1, word2); + if (nw != null) { + final int tempFreq = nw.getFcValue(); + // TODO: Check whether the word is valid or not + if (UserHistoryForgettingCurveUtils.needsToSave( + (byte)tempFreq, false, addLevel0Bigram)) { + freq = tempFreq; + } else { + freq = -1; + } } else { freq = -1; } - } else { - freq = -1; } - } - if (freq > 0) { - db.insert(FREQ_TABLE_NAME, null, getFrequencyContentValues(pairId, freq)); - } - } finally { - if (c != null) { - c.close(); + if (freq > 0) { + if (DBG_SAVE_RESTORE) { + Log.d(TAG, "--- Save user history: " + word1 + ", " + word2); + } + db.insert(FREQ_TABLE_NAME, null, + getFrequencyContentValues(pairId, freq)); + } + } finally { + if (c != null) { + c.close(); + } } } } diff --git a/java/src/com/android/inputmethod/latin/UserHistoryDictionaryBigramList.java b/java/src/com/android/inputmethod/latin/UserHistoryDictionaryBigramList.java new file mode 100644 index 000000000..409f921ff --- /dev/null +++ b/java/src/com/android/inputmethod/latin/UserHistoryDictionaryBigramList.java @@ -0,0 +1,91 @@ +/* + * Copyright (C) 2012 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.android.inputmethod.latin; + +import android.util.Log; + +import java.util.HashMap; +import java.util.HashSet; +import java.util.Set; + +/** + * A store of bigrams which will be updated when the user history dictionary is closed + * All bigrams including stale ones in SQL DB should be stored in this class to avoid adding stale + * bigrams when we write to the SQL DB. + */ +public class UserHistoryDictionaryBigramList { + private static final String TAG = UserHistoryDictionaryBigramList.class.getSimpleName(); + private static final HashSet EMPTY_STRING_SET = new HashSet(); + private final HashMap> mBigramMap = + new HashMap>(); + private int mSize = 0; + + public void evictAll() { + mSize = 0; + mBigramMap.clear(); + } + + public void addBigram(String word1, String word2) { + if (UserHistoryDictionary.DBG_SAVE_RESTORE) { + Log.d(TAG, "--- add bigram: " + word1 + ", " + word2); + } + final HashSet set; + if (mBigramMap.containsKey(word1)) { + set = mBigramMap.get(word1); + } else { + set = new HashSet(); + mBigramMap.put(word1, set); + } + if (!set.contains(word2)) { + ++mSize; + set.add(word2); + } + } + + public int size() { + return mSize; + } + + public boolean isEmpty() { + return mBigramMap.isEmpty(); + } + + public Set keySet() { + return mBigramMap.keySet(); + } + + public HashSet getBigrams(String word1) { + if (!mBigramMap.containsKey(word1)) { + return EMPTY_STRING_SET; + } else { + return mBigramMap.get(word1); + } + } + + public boolean removeBigram(String word1, String word2) { + final HashSet set = getBigrams(word1); + if (set.isEmpty()) { + return false; + } + if (set.contains(word2)) { + set.remove(word2); + --mSize; + return true; + } + return false; + } +} diff --git a/java/src/com/android/inputmethod/latin/UserHistoryForgettingCurveUtils.java b/java/src/com/android/inputmethod/latin/UserHistoryForgettingCurveUtils.java index f30fee23e..feb1d0029 100644 --- a/java/src/com/android/inputmethod/latin/UserHistoryForgettingCurveUtils.java +++ b/java/src/com/android/inputmethod/latin/UserHistoryForgettingCurveUtils.java @@ -162,10 +162,15 @@ public class UserHistoryForgettingCurveUtils { // TODO: isValid should be false for a word whose frequency is 0, // or that is not in the dictionary. - public static boolean needsToSave(byte fc, boolean isValid) { + /** + * Check wheather we should save the bigram to the SQL DB or not + */ + public static boolean needsToSave(byte fc, boolean isValid, boolean addLevel0Bigram) { int level = fcToLevel(fc); - if (isValid && level == 0) { - return false; + if (level == 0) { + if (isValid || !addLevel0Bigram) { + return false; + } } final int elapsedTime = fcToElapsedTime(fc); return (elapsedTime < ELAPSED_TIME_MAX - 1 || level > 0);