Optimize the store of bigram list

Bug: 4192129

Change-Id: Idcc62e4f9696b56b1d7013891b2da37b1784423e
This commit is contained in:
Satoshi Kataoka 2012-05-28 19:08:34 +09:00
parent e09520290f
commit bc56885062
3 changed files with 179 additions and 106 deletions

View file

@ -30,8 +30,6 @@ import android.util.Log;
import com.android.inputmethod.latin.UserHistoryForgettingCurveUtils.ForgettingCurveParams; import com.android.inputmethod.latin.UserHistoryForgettingCurveUtils.ForgettingCurveParams;
import java.util.HashMap; import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
/** /**
* Locally gathers stats about the words user types and various other signals like auto-correction * Locally gathers stats about the words user types and various other signals like auto-correction
@ -39,6 +37,7 @@ import java.util.Iterator;
*/ */
public class UserHistoryDictionary extends ExpandableDictionary { public class UserHistoryDictionary extends ExpandableDictionary {
private static final String TAG = "UserHistoryDictionary"; private static final String TAG = "UserHistoryDictionary";
public static final boolean DBG_SAVE_RESTORE = false;
/** Any pair being typed or picked */ /** Any pair being typed or picked */
private static final int FREQUENCY_FOR_TYPED = 2; private static final int FREQUENCY_FOR_TYPED = 2;
@ -78,7 +77,8 @@ public class UserHistoryDictionary extends ExpandableDictionary {
/** Locale for which this auto dictionary is storing words */ /** Locale for which this auto dictionary is storing words */
private String mLocale; private String mLocale;
private HashSet<Bigram> mPendingWrites = new HashSet<Bigram>(); private UserHistoryDictionaryBigramList mBigramList =
new UserHistoryDictionaryBigramList();
private final Object mPendingWritesLock = new Object(); private final Object mPendingWritesLock = new Object();
private static volatile boolean sUpdatingDB = false; private static volatile boolean sUpdatingDB = false;
private final SharedPreferences mPrefs; private final SharedPreferences mPrefs;
@ -99,35 +99,6 @@ public class UserHistoryDictionary extends ExpandableDictionary {
private static DatabaseHelper sOpenHelper = null; private static DatabaseHelper sOpenHelper = null;
private static class Bigram {
public final String mWord1;
public final String mWord2;
Bigram(String word1, String word2) {
this.mWord1 = word1;
this.mWord2 = word2;
}
@Override
public boolean equals(Object bigram) {
if (!(bigram instanceof Bigram)) {
return false;
}
final Bigram bigram2 = (Bigram) bigram;
final boolean eq1 =
mWord1 == null ? bigram2.mWord1 == null : mWord1.equals(bigram2.mWord1);
if (!eq1) {
return false;
}
return mWord2 == null ? bigram2.mWord2 == null : mWord2.equals(bigram2.mWord2);
}
@Override
public int hashCode() {
return (mWord1 + " " + mWord2).hashCode();
}
}
public void setDatabaseMax(int maxHistoryBigram) { public void setDatabaseMax(int maxHistoryBigram) {
sMaxHistoryBigrams = maxHistoryBigram; sMaxHistoryBigrams = maxHistoryBigram;
} }
@ -190,20 +161,17 @@ public class UserHistoryDictionary extends ExpandableDictionary {
freq = super.setBigramAndGetFrequency(word1, word2, new ForgettingCurveParams()); freq = super.setBigramAndGetFrequency(word1, word2, new ForgettingCurveParams());
} }
synchronized (mPendingWritesLock) { synchronized (mPendingWritesLock) {
final Bigram bi = new Bigram(word1, word2); mBigramList.addBigram(word1, word2);
if (!mPendingWrites.contains(bi)) {
mPendingWrites.add(bi);
}
} }
return freq; return freq;
} }
public boolean cancelAddingUserHistory(String word1, String word2) { public boolean cancelAddingUserHistory(String word1, String word2) {
final Bigram bi = new Bigram(word1, word2); synchronized (mPendingWritesLock) {
if (mPendingWrites.contains(bi)) { if (mBigramList.removeBigram(word1, word2)) {
mPendingWrites.remove(bi); return super.removeBigram(word1, word2);
return super.removeBigram(word1, word2); }
} }
return false; return false;
} }
@ -214,11 +182,11 @@ public class UserHistoryDictionary extends ExpandableDictionary {
private void flushPendingWrites() { private void flushPendingWrites() {
synchronized (mPendingWritesLock) { synchronized (mPendingWritesLock) {
// Nothing pending? Return // Nothing pending? Return
if (mPendingWrites.isEmpty()) return; if (mBigramList.isEmpty()) return;
// Create a background thread to write the pending entries // Create a background thread to write the pending entries
new UpdateDbTask(sOpenHelper, mPendingWrites, mLocale, this).execute(); new UpdateDbTask(sOpenHelper, mBigramList, mLocale, this).execute();
// Create a new map for writing new entries into while the old one is written to db // Create a new map for writing new entries into while the old one is written to db
mPendingWrites = new HashSet<Bigram>(); mBigramList = new UserHistoryDictionaryBigramList();
} }
} }
@ -251,6 +219,9 @@ public class UserHistoryDictionary extends ExpandableDictionary {
final String word1 = cursor.getString(word1Index); final String word1 = cursor.getString(word1Index);
final String word2 = cursor.getString(word2Index); final String word2 = cursor.getString(word2Index);
final int frequency = cursor.getInt(frequencyIndex); final int frequency = cursor.getInt(frequencyIndex);
if (DBG_SAVE_RESTORE) {
Log.d(TAG, "--- Load user history: " + word1 + ", " + word2);
}
// Safeguard against adding really long words. Stack may overflow due // Safeguard against adding really long words. Stack may overflow due
// to recursive lookup // to recursive lookup
if (null == word1) { if (null == word1) {
@ -259,8 +230,9 @@ public class UserHistoryDictionary extends ExpandableDictionary {
&& word2.length() < BinaryDictionary.MAX_WORD_LENGTH) { && word2.length() < BinaryDictionary.MAX_WORD_LENGTH) {
super.setBigramAndGetFrequency( super.setBigramAndGetFrequency(
word1, word2, new ForgettingCurveParams(frequency, now, last)); word1, word2, new ForgettingCurveParams(frequency, now, last));
// TODO: optimize }
mPendingWrites.add(new Bigram(word1, word2)); synchronized(mPendingWritesLock) {
mBigramList.addBigram(word1, word2);
} }
cursor.moveToNext(); cursor.moveToNext();
} }
@ -339,14 +311,15 @@ public class UserHistoryDictionary extends ExpandableDictionary {
* the in-memory trie. * the in-memory trie.
*/ */
private static class UpdateDbTask extends AsyncTask<Void, Void, Void> { private static class UpdateDbTask extends AsyncTask<Void, Void, Void> {
private final HashSet<Bigram> mMap; private final UserHistoryDictionaryBigramList mBigramList;
private final DatabaseHelper mDbHelper; private final DatabaseHelper mDbHelper;
private final String mLocale; private final String mLocale;
private final UserHistoryDictionary mUserHistoryDictionary; private final UserHistoryDictionary mUserHistoryDictionary;
public UpdateDbTask(DatabaseHelper openHelper, HashSet<Bigram> pendingWrites, public UpdateDbTask(
DatabaseHelper openHelper, UserHistoryDictionaryBigramList pendingWrites,
String locale, UserHistoryDictionary dict) { String locale, UserHistoryDictionary dict) {
mMap = pendingWrites; mBigramList = pendingWrites;
mLocale = locale; mLocale = locale;
mDbHelper = openHelper; mDbHelper = openHelper;
mUserHistoryDictionary = dict; mUserHistoryDictionary = dict;
@ -401,67 +374,71 @@ public class UserHistoryDictionary extends ExpandableDictionary {
return null; return null;
} }
db.execSQL("PRAGMA foreign_keys = ON;"); db.execSQL("PRAGMA foreign_keys = ON;");
final boolean addLevel0Bigram = mBigramList.size() <= sMaxHistoryBigrams;
// Write all the entries to the db // Write all the entries to the db
final Iterator<Bigram> iterator = mMap.iterator(); for (String word1 : mBigramList.keySet()) {
while (iterator.hasNext()) { for (String word2 : mBigramList.getBigrams(word1)) {
// TODO: this process of making a text search for each pair each time // TODO: this process of making a text search for each pair each time
// is terribly inefficient. Optimize this. // is terribly inefficient. Optimize this.
final Bigram bi = iterator.next(); // find pair id
Cursor c = null;
try {
if (null != word1) {
c = db.query(MAIN_TABLE_NAME, new String[] { MAIN_COLUMN_ID },
MAIN_COLUMN_WORD1 + "=? AND " + MAIN_COLUMN_WORD2 + "=? AND "
+ MAIN_COLUMN_LOCALE + "=?",
new String[] { word1, word2, mLocale }, null, null,
null);
} else {
c = db.query(MAIN_TABLE_NAME, new String[] { MAIN_COLUMN_ID },
MAIN_COLUMN_WORD1 + " IS NULL AND " + MAIN_COLUMN_WORD2
+ "=? AND " + MAIN_COLUMN_LOCALE + "=?",
new String[] { word2, mLocale }, null, null, null);
}
// find pair id final int pairId;
Cursor c = null; if (c.moveToFirst()) {
try { // existing pair
if (null != bi.mWord1) { pairId = c.getInt(c.getColumnIndex(MAIN_COLUMN_ID));
c = db.query(MAIN_TABLE_NAME, new String[] { MAIN_COLUMN_ID }, db.delete(FREQ_TABLE_NAME, FREQ_COLUMN_PAIR_ID + "=?",
MAIN_COLUMN_WORD1 + "=? AND " + MAIN_COLUMN_WORD2 + "=? AND " new String[] { Integer.toString(pairId) });
+ MAIN_COLUMN_LOCALE + "=?", } else {
new String[] { bi.mWord1, bi.mWord2, mLocale }, null, null, // new pair
null); Long pairIdLong = db.insert(MAIN_TABLE_NAME, null,
} else { getContentValues(word1, word2, mLocale));
c = db.query(MAIN_TABLE_NAME, new String[] { MAIN_COLUMN_ID }, pairId = pairIdLong.intValue();
MAIN_COLUMN_WORD1 + " IS NULL AND " + MAIN_COLUMN_WORD2 + "=? AND " }
+ MAIN_COLUMN_LOCALE + "=?", // insert new frequency
new String[] { bi.mWord2, mLocale }, null, null, null); final int freq;
} if (word1 == null) {
freq = FREQUENCY_FOR_TYPED;
final int pairId; } else {
if (c.moveToFirst()) { final NextWord nw = mUserHistoryDictionary.getBigramWord(word1, word2);
// existing pair if (nw != null) {
pairId = c.getInt(c.getColumnIndex(MAIN_COLUMN_ID)); final int tempFreq = nw.getFcValue();
db.delete(FREQ_TABLE_NAME, FREQ_COLUMN_PAIR_ID + "=?", // TODO: Check whether the word is valid or not
new String[] { Integer.toString(pairId) }); if (UserHistoryForgettingCurveUtils.needsToSave(
} else { (byte)tempFreq, false, addLevel0Bigram)) {
// new pair freq = tempFreq;
Long pairIdLong = db.insert(MAIN_TABLE_NAME, null, } else {
getContentValues(bi.mWord1, bi.mWord2, mLocale)); freq = -1;
pairId = pairIdLong.intValue(); }
}
// insert new frequency
final int freq;
if (bi.mWord1 == null) {
freq = FREQUENCY_FOR_TYPED;
} else {
final NextWord nw = mUserHistoryDictionary.getBigramWord(
bi.mWord1, bi.mWord2);
if (nw != null) {
final int tempFreq = nw.getFcValue();
// TODO: Check whether the word is valid or not
if (UserHistoryForgettingCurveUtils.needsToSave(
(byte)tempFreq, false)) {
freq = tempFreq;
} else { } else {
freq = -1; freq = -1;
} }
} else {
freq = -1;
} }
} if (freq > 0) {
if (freq > 0) { if (DBG_SAVE_RESTORE) {
db.insert(FREQ_TABLE_NAME, null, getFrequencyContentValues(pairId, freq)); Log.d(TAG, "--- Save user history: " + word1 + ", " + word2);
} }
} finally { db.insert(FREQ_TABLE_NAME, null,
if (c != null) { getFrequencyContentValues(pairId, freq));
c.close(); }
} finally {
if (c != null) {
c.close();
}
} }
} }
} }

View file

@ -0,0 +1,91 @@
/*
* Copyright (C) 2012 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.android.inputmethod.latin;
import android.util.Log;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Set;
/**
* A store of bigrams which will be updated when the user history dictionary is closed
* All bigrams including stale ones in SQL DB should be stored in this class to avoid adding stale
* bigrams when we write to the SQL DB.
*/
public class UserHistoryDictionaryBigramList {
private static final String TAG = UserHistoryDictionaryBigramList.class.getSimpleName();
private static final HashSet<String> EMPTY_STRING_SET = new HashSet<String>();
private final HashMap<String, HashSet<String>> mBigramMap =
new HashMap<String, HashSet<String>>();
private int mSize = 0;
public void evictAll() {
mSize = 0;
mBigramMap.clear();
}
public void addBigram(String word1, String word2) {
if (UserHistoryDictionary.DBG_SAVE_RESTORE) {
Log.d(TAG, "--- add bigram: " + word1 + ", " + word2);
}
final HashSet<String> set;
if (mBigramMap.containsKey(word1)) {
set = mBigramMap.get(word1);
} else {
set = new HashSet<String>();
mBigramMap.put(word1, set);
}
if (!set.contains(word2)) {
++mSize;
set.add(word2);
}
}
public int size() {
return mSize;
}
public boolean isEmpty() {
return mBigramMap.isEmpty();
}
public Set<String> keySet() {
return mBigramMap.keySet();
}
public HashSet<String> getBigrams(String word1) {
if (!mBigramMap.containsKey(word1)) {
return EMPTY_STRING_SET;
} else {
return mBigramMap.get(word1);
}
}
public boolean removeBigram(String word1, String word2) {
final HashSet<String> set = getBigrams(word1);
if (set.isEmpty()) {
return false;
}
if (set.contains(word2)) {
set.remove(word2);
--mSize;
return true;
}
return false;
}
}

View file

@ -162,10 +162,15 @@ public class UserHistoryForgettingCurveUtils {
// TODO: isValid should be false for a word whose frequency is 0, // TODO: isValid should be false for a word whose frequency is 0,
// or that is not in the dictionary. // or that is not in the dictionary.
public static boolean needsToSave(byte fc, boolean isValid) { /**
* Check wheather we should save the bigram to the SQL DB or not
*/
public static boolean needsToSave(byte fc, boolean isValid, boolean addLevel0Bigram) {
int level = fcToLevel(fc); int level = fcToLevel(fc);
if (isValid && level == 0) { if (level == 0) {
return false; if (isValid || !addLevel0Bigram) {
return false;
}
} }
final int elapsedTime = fcToElapsedTime(fc); final int elapsedTime = fcToElapsedTime(fc);
return (elapsedTime < ELAPSED_TIME_MAX - 1 || level > 0); return (elapsedTime < ELAPSED_TIME_MAX - 1 || level > 0);