Merge "Optimize the store of bigram list" into jb-dev
This commit is contained in:
commit
bd40b94f96
3 changed files with 179 additions and 106 deletions
|
@ -30,8 +30,6 @@ import android.util.Log;
|
||||||
import com.android.inputmethod.latin.UserHistoryForgettingCurveUtils.ForgettingCurveParams;
|
import com.android.inputmethod.latin.UserHistoryForgettingCurveUtils.ForgettingCurveParams;
|
||||||
|
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.HashSet;
|
|
||||||
import java.util.Iterator;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Locally gathers stats about the words user types and various other signals like auto-correction
|
* Locally gathers stats about the words user types and various other signals like auto-correction
|
||||||
|
@ -39,6 +37,7 @@ import java.util.Iterator;
|
||||||
*/
|
*/
|
||||||
public class UserHistoryDictionary extends ExpandableDictionary {
|
public class UserHistoryDictionary extends ExpandableDictionary {
|
||||||
private static final String TAG = "UserHistoryDictionary";
|
private static final String TAG = "UserHistoryDictionary";
|
||||||
|
public static final boolean DBG_SAVE_RESTORE = false;
|
||||||
|
|
||||||
/** Any pair being typed or picked */
|
/** Any pair being typed or picked */
|
||||||
private static final int FREQUENCY_FOR_TYPED = 2;
|
private static final int FREQUENCY_FOR_TYPED = 2;
|
||||||
|
@ -78,7 +77,8 @@ public class UserHistoryDictionary extends ExpandableDictionary {
|
||||||
/** Locale for which this auto dictionary is storing words */
|
/** Locale for which this auto dictionary is storing words */
|
||||||
private String mLocale;
|
private String mLocale;
|
||||||
|
|
||||||
private HashSet<Bigram> mPendingWrites = new HashSet<Bigram>();
|
private UserHistoryDictionaryBigramList mBigramList =
|
||||||
|
new UserHistoryDictionaryBigramList();
|
||||||
private final Object mPendingWritesLock = new Object();
|
private final Object mPendingWritesLock = new Object();
|
||||||
private static volatile boolean sUpdatingDB = false;
|
private static volatile boolean sUpdatingDB = false;
|
||||||
private final SharedPreferences mPrefs;
|
private final SharedPreferences mPrefs;
|
||||||
|
@ -99,35 +99,6 @@ public class UserHistoryDictionary extends ExpandableDictionary {
|
||||||
|
|
||||||
private static DatabaseHelper sOpenHelper = null;
|
private static DatabaseHelper sOpenHelper = null;
|
||||||
|
|
||||||
private static class Bigram {
|
|
||||||
public final String mWord1;
|
|
||||||
public final String mWord2;
|
|
||||||
|
|
||||||
Bigram(String word1, String word2) {
|
|
||||||
this.mWord1 = word1;
|
|
||||||
this.mWord2 = word2;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean equals(Object bigram) {
|
|
||||||
if (!(bigram instanceof Bigram)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
final Bigram bigram2 = (Bigram) bigram;
|
|
||||||
final boolean eq1 =
|
|
||||||
mWord1 == null ? bigram2.mWord1 == null : mWord1.equals(bigram2.mWord1);
|
|
||||||
if (!eq1) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
return mWord2 == null ? bigram2.mWord2 == null : mWord2.equals(bigram2.mWord2);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int hashCode() {
|
|
||||||
return (mWord1 + " " + mWord2).hashCode();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setDatabaseMax(int maxHistoryBigram) {
|
public void setDatabaseMax(int maxHistoryBigram) {
|
||||||
sMaxHistoryBigrams = maxHistoryBigram;
|
sMaxHistoryBigrams = maxHistoryBigram;
|
||||||
}
|
}
|
||||||
|
@ -190,21 +161,18 @@ public class UserHistoryDictionary extends ExpandableDictionary {
|
||||||
freq = super.setBigramAndGetFrequency(word1, word2, new ForgettingCurveParams());
|
freq = super.setBigramAndGetFrequency(word1, word2, new ForgettingCurveParams());
|
||||||
}
|
}
|
||||||
synchronized (mPendingWritesLock) {
|
synchronized (mPendingWritesLock) {
|
||||||
final Bigram bi = new Bigram(word1, word2);
|
mBigramList.addBigram(word1, word2);
|
||||||
if (!mPendingWrites.contains(bi)) {
|
|
||||||
mPendingWrites.add(bi);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return freq;
|
return freq;
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean cancelAddingUserHistory(String word1, String word2) {
|
public boolean cancelAddingUserHistory(String word1, String word2) {
|
||||||
final Bigram bi = new Bigram(word1, word2);
|
synchronized (mPendingWritesLock) {
|
||||||
if (mPendingWrites.contains(bi)) {
|
if (mBigramList.removeBigram(word1, word2)) {
|
||||||
mPendingWrites.remove(bi);
|
|
||||||
return super.removeBigram(word1, word2);
|
return super.removeBigram(word1, word2);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -214,11 +182,11 @@ public class UserHistoryDictionary extends ExpandableDictionary {
|
||||||
private void flushPendingWrites() {
|
private void flushPendingWrites() {
|
||||||
synchronized (mPendingWritesLock) {
|
synchronized (mPendingWritesLock) {
|
||||||
// Nothing pending? Return
|
// Nothing pending? Return
|
||||||
if (mPendingWrites.isEmpty()) return;
|
if (mBigramList.isEmpty()) return;
|
||||||
// Create a background thread to write the pending entries
|
// Create a background thread to write the pending entries
|
||||||
new UpdateDbTask(sOpenHelper, mPendingWrites, mLocale, this).execute();
|
new UpdateDbTask(sOpenHelper, mBigramList, mLocale, this).execute();
|
||||||
// Create a new map for writing new entries into while the old one is written to db
|
// Create a new map for writing new entries into while the old one is written to db
|
||||||
mPendingWrites = new HashSet<Bigram>();
|
mBigramList = new UserHistoryDictionaryBigramList();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -251,6 +219,9 @@ public class UserHistoryDictionary extends ExpandableDictionary {
|
||||||
final String word1 = cursor.getString(word1Index);
|
final String word1 = cursor.getString(word1Index);
|
||||||
final String word2 = cursor.getString(word2Index);
|
final String word2 = cursor.getString(word2Index);
|
||||||
final int frequency = cursor.getInt(frequencyIndex);
|
final int frequency = cursor.getInt(frequencyIndex);
|
||||||
|
if (DBG_SAVE_RESTORE) {
|
||||||
|
Log.d(TAG, "--- Load user history: " + word1 + ", " + word2);
|
||||||
|
}
|
||||||
// Safeguard against adding really long words. Stack may overflow due
|
// Safeguard against adding really long words. Stack may overflow due
|
||||||
// to recursive lookup
|
// to recursive lookup
|
||||||
if (null == word1) {
|
if (null == word1) {
|
||||||
|
@ -259,8 +230,9 @@ public class UserHistoryDictionary extends ExpandableDictionary {
|
||||||
&& word2.length() < BinaryDictionary.MAX_WORD_LENGTH) {
|
&& word2.length() < BinaryDictionary.MAX_WORD_LENGTH) {
|
||||||
super.setBigramAndGetFrequency(
|
super.setBigramAndGetFrequency(
|
||||||
word1, word2, new ForgettingCurveParams(frequency, now, last));
|
word1, word2, new ForgettingCurveParams(frequency, now, last));
|
||||||
// TODO: optimize
|
}
|
||||||
mPendingWrites.add(new Bigram(word1, word2));
|
synchronized(mPendingWritesLock) {
|
||||||
|
mBigramList.addBigram(word1, word2);
|
||||||
}
|
}
|
||||||
cursor.moveToNext();
|
cursor.moveToNext();
|
||||||
}
|
}
|
||||||
|
@ -339,14 +311,15 @@ public class UserHistoryDictionary extends ExpandableDictionary {
|
||||||
* the in-memory trie.
|
* the in-memory trie.
|
||||||
*/
|
*/
|
||||||
private static class UpdateDbTask extends AsyncTask<Void, Void, Void> {
|
private static class UpdateDbTask extends AsyncTask<Void, Void, Void> {
|
||||||
private final HashSet<Bigram> mMap;
|
private final UserHistoryDictionaryBigramList mBigramList;
|
||||||
private final DatabaseHelper mDbHelper;
|
private final DatabaseHelper mDbHelper;
|
||||||
private final String mLocale;
|
private final String mLocale;
|
||||||
private final UserHistoryDictionary mUserHistoryDictionary;
|
private final UserHistoryDictionary mUserHistoryDictionary;
|
||||||
|
|
||||||
public UpdateDbTask(DatabaseHelper openHelper, HashSet<Bigram> pendingWrites,
|
public UpdateDbTask(
|
||||||
|
DatabaseHelper openHelper, UserHistoryDictionaryBigramList pendingWrites,
|
||||||
String locale, UserHistoryDictionary dict) {
|
String locale, UserHistoryDictionary dict) {
|
||||||
mMap = pendingWrites;
|
mBigramList = pendingWrites;
|
||||||
mLocale = locale;
|
mLocale = locale;
|
||||||
mDbHelper = openHelper;
|
mDbHelper = openHelper;
|
||||||
mUserHistoryDictionary = dict;
|
mUserHistoryDictionary = dict;
|
||||||
|
@ -401,27 +374,27 @@ public class UserHistoryDictionary extends ExpandableDictionary {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
db.execSQL("PRAGMA foreign_keys = ON;");
|
db.execSQL("PRAGMA foreign_keys = ON;");
|
||||||
|
final boolean addLevel0Bigram = mBigramList.size() <= sMaxHistoryBigrams;
|
||||||
|
|
||||||
// Write all the entries to the db
|
// Write all the entries to the db
|
||||||
final Iterator<Bigram> iterator = mMap.iterator();
|
for (String word1 : mBigramList.keySet()) {
|
||||||
while (iterator.hasNext()) {
|
for (String word2 : mBigramList.getBigrams(word1)) {
|
||||||
// TODO: this process of making a text search for each pair each time
|
// TODO: this process of making a text search for each pair each time
|
||||||
// is terribly inefficient. Optimize this.
|
// is terribly inefficient. Optimize this.
|
||||||
final Bigram bi = iterator.next();
|
|
||||||
|
|
||||||
// find pair id
|
// find pair id
|
||||||
Cursor c = null;
|
Cursor c = null;
|
||||||
try {
|
try {
|
||||||
if (null != bi.mWord1) {
|
if (null != word1) {
|
||||||
c = db.query(MAIN_TABLE_NAME, new String[] { MAIN_COLUMN_ID },
|
c = db.query(MAIN_TABLE_NAME, new String[] { MAIN_COLUMN_ID },
|
||||||
MAIN_COLUMN_WORD1 + "=? AND " + MAIN_COLUMN_WORD2 + "=? AND "
|
MAIN_COLUMN_WORD1 + "=? AND " + MAIN_COLUMN_WORD2 + "=? AND "
|
||||||
+ MAIN_COLUMN_LOCALE + "=?",
|
+ MAIN_COLUMN_LOCALE + "=?",
|
||||||
new String[] { bi.mWord1, bi.mWord2, mLocale }, null, null,
|
new String[] { word1, word2, mLocale }, null, null,
|
||||||
null);
|
null);
|
||||||
} else {
|
} else {
|
||||||
c = db.query(MAIN_TABLE_NAME, new String[] { MAIN_COLUMN_ID },
|
c = db.query(MAIN_TABLE_NAME, new String[] { MAIN_COLUMN_ID },
|
||||||
MAIN_COLUMN_WORD1 + " IS NULL AND " + MAIN_COLUMN_WORD2 + "=? AND "
|
MAIN_COLUMN_WORD1 + " IS NULL AND " + MAIN_COLUMN_WORD2
|
||||||
+ MAIN_COLUMN_LOCALE + "=?",
|
+ "=? AND " + MAIN_COLUMN_LOCALE + "=?",
|
||||||
new String[] { bi.mWord2, mLocale }, null, null, null);
|
new String[] { word2, mLocale }, null, null, null);
|
||||||
}
|
}
|
||||||
|
|
||||||
final int pairId;
|
final int pairId;
|
||||||
|
@ -433,21 +406,20 @@ public class UserHistoryDictionary extends ExpandableDictionary {
|
||||||
} else {
|
} else {
|
||||||
// new pair
|
// new pair
|
||||||
Long pairIdLong = db.insert(MAIN_TABLE_NAME, null,
|
Long pairIdLong = db.insert(MAIN_TABLE_NAME, null,
|
||||||
getContentValues(bi.mWord1, bi.mWord2, mLocale));
|
getContentValues(word1, word2, mLocale));
|
||||||
pairId = pairIdLong.intValue();
|
pairId = pairIdLong.intValue();
|
||||||
}
|
}
|
||||||
// insert new frequency
|
// insert new frequency
|
||||||
final int freq;
|
final int freq;
|
||||||
if (bi.mWord1 == null) {
|
if (word1 == null) {
|
||||||
freq = FREQUENCY_FOR_TYPED;
|
freq = FREQUENCY_FOR_TYPED;
|
||||||
} else {
|
} else {
|
||||||
final NextWord nw = mUserHistoryDictionary.getBigramWord(
|
final NextWord nw = mUserHistoryDictionary.getBigramWord(word1, word2);
|
||||||
bi.mWord1, bi.mWord2);
|
|
||||||
if (nw != null) {
|
if (nw != null) {
|
||||||
final int tempFreq = nw.getFcValue();
|
final int tempFreq = nw.getFcValue();
|
||||||
// TODO: Check whether the word is valid or not
|
// TODO: Check whether the word is valid or not
|
||||||
if (UserHistoryForgettingCurveUtils.needsToSave(
|
if (UserHistoryForgettingCurveUtils.needsToSave(
|
||||||
(byte)tempFreq, false)) {
|
(byte)tempFreq, false, addLevel0Bigram)) {
|
||||||
freq = tempFreq;
|
freq = tempFreq;
|
||||||
} else {
|
} else {
|
||||||
freq = -1;
|
freq = -1;
|
||||||
|
@ -457,7 +429,11 @@ public class UserHistoryDictionary extends ExpandableDictionary {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (freq > 0) {
|
if (freq > 0) {
|
||||||
db.insert(FREQ_TABLE_NAME, null, getFrequencyContentValues(pairId, freq));
|
if (DBG_SAVE_RESTORE) {
|
||||||
|
Log.d(TAG, "--- Save user history: " + word1 + ", " + word2);
|
||||||
|
}
|
||||||
|
db.insert(FREQ_TABLE_NAME, null,
|
||||||
|
getFrequencyContentValues(pairId, freq));
|
||||||
}
|
}
|
||||||
} finally {
|
} finally {
|
||||||
if (c != null) {
|
if (c != null) {
|
||||||
|
@ -465,6 +441,7 @@ public class UserHistoryDictionary extends ExpandableDictionary {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
checkPruneData(db);
|
checkPruneData(db);
|
||||||
sUpdatingDB = false;
|
sUpdatingDB = false;
|
||||||
|
|
|
@ -0,0 +1,91 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2012 The Android Open Source Project
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package com.android.inputmethod.latin;
|
||||||
|
|
||||||
|
import android.util.Log;
|
||||||
|
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A store of bigrams which will be updated when the user history dictionary is closed
|
||||||
|
* All bigrams including stale ones in SQL DB should be stored in this class to avoid adding stale
|
||||||
|
* bigrams when we write to the SQL DB.
|
||||||
|
*/
|
||||||
|
public class UserHistoryDictionaryBigramList {
|
||||||
|
private static final String TAG = UserHistoryDictionaryBigramList.class.getSimpleName();
|
||||||
|
private static final HashSet<String> EMPTY_STRING_SET = new HashSet<String>();
|
||||||
|
private final HashMap<String, HashSet<String>> mBigramMap =
|
||||||
|
new HashMap<String, HashSet<String>>();
|
||||||
|
private int mSize = 0;
|
||||||
|
|
||||||
|
public void evictAll() {
|
||||||
|
mSize = 0;
|
||||||
|
mBigramMap.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void addBigram(String word1, String word2) {
|
||||||
|
if (UserHistoryDictionary.DBG_SAVE_RESTORE) {
|
||||||
|
Log.d(TAG, "--- add bigram: " + word1 + ", " + word2);
|
||||||
|
}
|
||||||
|
final HashSet<String> set;
|
||||||
|
if (mBigramMap.containsKey(word1)) {
|
||||||
|
set = mBigramMap.get(word1);
|
||||||
|
} else {
|
||||||
|
set = new HashSet<String>();
|
||||||
|
mBigramMap.put(word1, set);
|
||||||
|
}
|
||||||
|
if (!set.contains(word2)) {
|
||||||
|
++mSize;
|
||||||
|
set.add(word2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public int size() {
|
||||||
|
return mSize;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isEmpty() {
|
||||||
|
return mBigramMap.isEmpty();
|
||||||
|
}
|
||||||
|
|
||||||
|
public Set<String> keySet() {
|
||||||
|
return mBigramMap.keySet();
|
||||||
|
}
|
||||||
|
|
||||||
|
public HashSet<String> getBigrams(String word1) {
|
||||||
|
if (!mBigramMap.containsKey(word1)) {
|
||||||
|
return EMPTY_STRING_SET;
|
||||||
|
} else {
|
||||||
|
return mBigramMap.get(word1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean removeBigram(String word1, String word2) {
|
||||||
|
final HashSet<String> set = getBigrams(word1);
|
||||||
|
if (set.isEmpty()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (set.contains(word2)) {
|
||||||
|
set.remove(word2);
|
||||||
|
--mSize;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
|
@ -162,11 +162,16 @@ public class UserHistoryForgettingCurveUtils {
|
||||||
|
|
||||||
// TODO: isValid should be false for a word whose frequency is 0,
|
// TODO: isValid should be false for a word whose frequency is 0,
|
||||||
// or that is not in the dictionary.
|
// or that is not in the dictionary.
|
||||||
public static boolean needsToSave(byte fc, boolean isValid) {
|
/**
|
||||||
|
* Check wheather we should save the bigram to the SQL DB or not
|
||||||
|
*/
|
||||||
|
public static boolean needsToSave(byte fc, boolean isValid, boolean addLevel0Bigram) {
|
||||||
int level = fcToLevel(fc);
|
int level = fcToLevel(fc);
|
||||||
if (isValid && level == 0) {
|
if (level == 0) {
|
||||||
|
if (isValid || !addLevel0Bigram) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
final int elapsedTime = fcToElapsedTime(fc);
|
final int elapsedTime = fcToElapsedTime(fc);
|
||||||
return (elapsedTime < ELAPSED_TIME_MAX - 1 || level > 0);
|
return (elapsedTime < ELAPSED_TIME_MAX - 1 || level > 0);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue