From ac093396ba20437a240dbcf4e2b35f9d5355bd6c Mon Sep 17 00:00:00 2001 From: Jae Yong Sung Date: Tue, 3 Aug 2010 18:28:38 -0700 Subject: [PATCH] -UserBigram -UnitTest for UserBigram -Changes for number of bigrams to load Change-Id: I2c6fbe6194d34112ccc52c7e199461d2350e8516 --- .../inputmethod/latin/AutoDictionary.java | 10 +- .../inputmethod/latin/BinaryDictionary.java | 10 +- .../inputmethod/latin/ContactsDictionary.java | 4 +- .../latin/ExpandableDictionary.java | 83 ++-- .../android/inputmethod/latin/LatinIME.java | 20 +- .../android/inputmethod/latin/Suggest.java | 14 +- .../latin/UserBigramDictionary.java | 402 ++++++++++++++++++ .../latin/{tests => }/SuggestHelper.java | 97 ++++- .../{tests => }/SuggestPerformanceTests.java | 7 +- .../latin/{tests => }/SuggestTests.java | 4 +- .../inputmethod/latin/UserBigramTests.java | 100 +++++ 11 files changed, 676 insertions(+), 75 deletions(-) create mode 100644 java/src/com/android/inputmethod/latin/UserBigramDictionary.java rename tests/src/com/android/inputmethod/latin/{tests => }/SuggestHelper.java (68%) rename tests/src/com/android/inputmethod/latin/{tests => }/SuggestPerformanceTests.java (97%) rename tests/src/com/android/inputmethod/latin/{tests => }/SuggestTests.java (98%) create mode 100644 tests/src/com/android/inputmethod/latin/UserBigramTests.java diff --git a/java/src/com/android/inputmethod/latin/AutoDictionary.java b/java/src/com/android/inputmethod/latin/AutoDictionary.java index 94331d3f2..4fbb5b012 100644 --- a/java/src/com/android/inputmethod/latin/AutoDictionary.java +++ b/java/src/com/android/inputmethod/latin/AutoDictionary.java @@ -83,14 +83,14 @@ public class AutoDictionary extends ExpandableDictionary { sDictProjectionMap.put(COLUMN_LOCALE, COLUMN_LOCALE); } - private static DatabaseHelper mOpenHelper = null; + private static DatabaseHelper sOpenHelper = null; public AutoDictionary(Context context, LatinIME ime, String locale, int dicTypeId) { super(context, dicTypeId); mIme = ime; mLocale = locale; - if (mOpenHelper == null) { - mOpenHelper = new DatabaseHelper(getContext()); + if (sOpenHelper == null) { + sOpenHelper = new DatabaseHelper(getContext()); } if (mLocale != null && mLocale.length() > 1) { loadDictionary(); @@ -169,7 +169,7 @@ public class AutoDictionary extends ExpandableDictionary { // Nothing pending? Return if (mPendingWrites.isEmpty()) return; // Create a background thread to write the pending entries - new UpdateDbTask(getContext(), mOpenHelper, mPendingWrites, mLocale).execute(); + new UpdateDbTask(getContext(), sOpenHelper, mPendingWrites, mLocale).execute(); // Create a new map for writing new entries into while the old one is written to db mPendingWrites = new HashMap(); } @@ -209,7 +209,7 @@ public class AutoDictionary extends ExpandableDictionary { qb.setProjectionMap(sDictProjectionMap); // Get the database and run the query - SQLiteDatabase db = mOpenHelper.getReadableDatabase(); + SQLiteDatabase db = sOpenHelper.getReadableDatabase(); Cursor c = qb.query(db, null, selection, selectionArgs, null, null, DEFAULT_SORT_ORDER); return c; diff --git a/java/src/com/android/inputmethod/latin/BinaryDictionary.java b/java/src/com/android/inputmethod/latin/BinaryDictionary.java index e2c0c4ccc..69c2b94f2 100644 --- a/java/src/com/android/inputmethod/latin/BinaryDictionary.java +++ b/java/src/com/android/inputmethod/latin/BinaryDictionary.java @@ -42,8 +42,8 @@ public class BinaryDictionary extends Dictionary { private static final String TAG = "BinaryDictionary"; private static final int MAX_ALTERNATIVES = 16; - private static final int MAX_WORDS = 16; - private static final int MAX_BIGRAMS = 255; // TODO Probably don't need all 255 + private static final int MAX_WORDS = 18; + private static final int MAX_BIGRAMS = 60; private static final int TYPED_LETTER_MULTIPLIER = 2; private static final boolean ENABLE_MISSED_CHARACTERS = true; @@ -140,8 +140,10 @@ public class BinaryDictionary extends Dictionary { Log.w(TAG, "No available memory for binary dictionary"); } finally { try { - for (int i = 0;i < is.length; i++) { - is[i].close(); + if (is != null) { + for (int i = 0; i < is.length; i++) { + is[i].close(); + } } } catch (IOException e) { Log.w(TAG, "Failed to close input stream"); diff --git a/java/src/com/android/inputmethod/latin/ContactsDictionary.java b/java/src/com/android/inputmethod/latin/ContactsDictionary.java index 756782887..ab75868cf 100644 --- a/java/src/com/android/inputmethod/latin/ContactsDictionary.java +++ b/java/src/com/android/inputmethod/latin/ContactsDictionary.java @@ -125,8 +125,8 @@ public class ContactsDictionary extends ExpandableDictionary { super.addWord(word, FREQUENCY_FOR_CONTACTS); if (!TextUtils.isEmpty(prevWord)) { // TODO Do not add email address - super.addBigrams(prevWord, word, - FREQUENCY_FOR_CONTACTS_BIGRAM); + // Not so critical + super.setBigram(prevWord, word, FREQUENCY_FOR_CONTACTS_BIGRAM); } prevWord = word; } diff --git a/java/src/com/android/inputmethod/latin/ExpandableDictionary.java b/java/src/com/android/inputmethod/latin/ExpandableDictionary.java index 53f9ed8c8..e954c0818 100644 --- a/java/src/com/android/inputmethod/latin/ExpandableDictionary.java +++ b/java/src/com/android/inputmethod/latin/ExpandableDictionary.java @@ -20,8 +20,6 @@ import java.util.LinkedList; import android.content.Context; import android.os.AsyncTask; -import android.os.SystemClock; -import android.util.Log; /** * Base class for an in-memory dictionary that can grow dynamically and can @@ -325,12 +323,21 @@ public class ExpandableDictionary extends Dictionary { } } + protected int setBigram(String word1, String word2, int frequency) { + return addOrSetBigram(word1, word2, frequency, false); + } + + protected int addBigram(String word1, String word2, int frequency) { + return addOrSetBigram(word1, word2, frequency, true); + } + /** * Adds bigrams to the in-memory trie structure that is being used to retrieve any word - * @param addFrequency adding frequency of the pair + * @param frequency frequency for this bigrams + * @param addFrequency if true, it adds to current frequency * @return returns the final frequency */ - protected int addBigrams(String word1, String word2, int addFrequency) { + private int addOrSetBigram(String word1, String word2, int frequency, boolean addFrequency) { Node firstWord = searchWord(mRoots, word1, 0, null); Node secondWord = searchWord(mRoots, word2, 0, null); LinkedList bigram = firstWord.ngrams; @@ -340,14 +347,18 @@ public class ExpandableDictionary extends Dictionary { } else { for (NextWord nw : bigram) { if (nw.word == secondWord) { - nw.frequency += addFrequency; + if (addFrequency) { + nw.frequency += frequency; + } else { + nw.frequency = frequency; + } return nw.frequency; } } } - NextWord nw = new NextWord(secondWord, addFrequency); + NextWord nw = new NextWord(secondWord, frequency); firstWord.ngrams.add(nw); - return addFrequency; + return frequency; } /** @@ -385,22 +396,44 @@ public class ExpandableDictionary extends Dictionary { return searchWord(childNode.children, word, depth + 1, childNode); } - @Override - public void getBigrams(final WordComposer codes, final CharSequence previousWord, - final WordCallback callback, int[] nextLettersFrequencies) { + // @VisibleForTesting + boolean reloadDictionaryIfRequired() { synchronized (mUpdatingLock) { // If we need to update, start off a background task if (mRequiresReload) startDictionaryLoadingTaskLocked(); // Currently updating contacts, don't return any results. - if (mUpdatingDictionary) return; + return mUpdatingDictionary; } + } + private void runReverseLookUp(final CharSequence previousWord, final WordCallback callback) { Node prevWord = searchNode(mRoots, previousWord, 0, previousWord.length()); if (prevWord != null && prevWord.ngrams != null) { reverseLookUp(prevWord.ngrams, callback); } } + @Override + public void getBigrams(final WordComposer codes, final CharSequence previousWord, + final WordCallback callback, int[] nextLettersFrequencies) { + if (!reloadDictionaryIfRequired()) { + runReverseLookUp(previousWord, callback); + } + } + + /** + * Used only for testing purposes + * This function will wait for loading from database to be done + */ + void waitForDictionaryLoading() { + while (mUpdatingDictionary) { + try { + Thread.sleep(100); + } catch (InterruptedException e) { + } + } + } + /** * reverseLookUp retrieves the full word given a list of terminal nodes and adds those words * through callback. @@ -413,15 +446,18 @@ public class ExpandableDictionary extends Dictionary { for (NextWord nextWord : terminalNodes) { node = nextWord.word; freq = nextWord.frequency; - sb.setLength(0); - do { - sb.insert(0, node.code); - node = node.parent; - } while(node != null); + // TODO Not the best way to limit suggestion threshold + if (freq >= UserBigramDictionary.SUGGEST_THRESHOLD) { + sb.setLength(0); + do { + sb.insert(0, node.code); + node = node.parent; + } while(node != null); - // TODO better way to feed char array? - callback.addWord(sb.toString().toCharArray(), 0, sb.length(), freq, mDicTypeId, - DataType.BIGRAM); + // TODO better way to feed char array? + callback.addWord(sb.toString().toCharArray(), 0, sb.length(), freq, mDicTypeId, + DataType.BIGRAM); + } } } @@ -460,18 +496,11 @@ public class ExpandableDictionary extends Dictionary { @Override protected Void doInBackground(Void... v) { loadDictionaryAsync(); - return null; - } - - @Override - protected void onPostExecute(Void result) { - // TODO Auto-generated method stub synchronized (mUpdatingLock) { mUpdatingDictionary = false; } - super.onPostExecute(result); + return null; } - } static char toLowerCase(char c) { diff --git a/java/src/com/android/inputmethod/latin/LatinIME.java b/java/src/com/android/inputmethod/latin/LatinIME.java index 917f89916..3ee9fe8eb 100644 --- a/java/src/com/android/inputmethod/latin/LatinIME.java +++ b/java/src/com/android/inputmethod/latin/LatinIME.java @@ -163,8 +163,7 @@ public class LatinIME extends InputMethodService KeyboardSwitcher mKeyboardSwitcher; private UserDictionary mUserDictionary; - // User Bigram is disabled for now - //private UserBigramDictionary mUserBigramDictionary; + private UserBigramDictionary mUserBigramDictionary; private ContactsDictionary mContactsDictionary; private AutoDictionary mAutoDictionary; @@ -454,15 +453,12 @@ public class LatinIME extends InputMethodService mAutoDictionary.close(); } mAutoDictionary = new AutoDictionary(this, this, mInputLocale, Suggest.DIC_AUTO); - // User Bigram is disabled for now - /* if (mUserBigramDictionary != null) { mUserBigramDictionary.close(); } mUserBigramDictionary = new UserBigramDictionary(this, this, mInputLocale, - Suggest.DIC_USERBIGRAM); + Suggest.DIC_USER); mSuggest.setUserBigramDictionary(mUserBigramDictionary); - */ mSuggest.setUserDictionary(mUserDictionary); mSuggest.setContactsDictionary(mContactsDictionary); mSuggest.setAutoDictionary(mAutoDictionary); @@ -698,8 +694,7 @@ public class LatinIME extends InputMethodService mKeyboardSwitcher.getInputView().closing(); } if (mAutoDictionary != null) mAutoDictionary.flushPendingWrites(); - // User Bigram is disabled for now - //if (mUserBigramDictionary != null) mUserBigramDictionary.flushPendingWrites(); + if (mUserBigramDictionary != null) mUserBigramDictionary.flushPendingWrites(); } @Override @@ -2007,15 +2002,14 @@ public class LatinIME extends InputMethodService && !mSuggest.isValidWord(suggestion.toString().toLowerCase()))) { mAutoDictionary.addWord(suggestion.toString(), frequencyDelta); } - // User Bigram is disabled for now - /* + if (mUserBigramDictionary != null) { - CharSequence prevWord = EditingUtil.getPreviousWord(getCurrentInputConnection()); + CharSequence prevWord = EditingUtil.getPreviousWord(getCurrentInputConnection(), + mSentenceSeparators); if (!TextUtils.isEmpty(prevWord)) { - mUserBigramDictionary.addBigrams(prevWord.toString(), suggestion.toString(), 1); + mUserBigramDictionary.addBigrams(prevWord.toString(), suggestion.toString()); } } - */ } } diff --git a/java/src/com/android/inputmethod/latin/Suggest.java b/java/src/com/android/inputmethod/latin/Suggest.java index cfb691021..a96737f5c 100755 --- a/java/src/com/android/inputmethod/latin/Suggest.java +++ b/java/src/com/android/inputmethod/latin/Suggest.java @@ -78,12 +78,13 @@ public class Suggest implements Dictionary.WordCallback { private Dictionary mUserBigramDictionary; private int mPrefMaxSuggestions = 12; - private int mPrefMaxBigrams = 255; + + private static final int PREF_MAX_BIGRAMS = 60; private boolean mAutoTextEnabled; private int[] mPriorities = new int[mPrefMaxSuggestions]; - private int[] mBigramPriorities = new int[mPrefMaxBigrams]; + private int[] mBigramPriorities = new int[PREF_MAX_BIGRAMS]; // Handle predictive correction for only the first 1280 characters for performance reasons // If we support scripts that need latin characters beyond that, we should probably use some @@ -92,7 +93,7 @@ public class Suggest implements Dictionary.WordCallback { // latin characters. private int[] mNextLettersFrequencies = new int[1280]; private ArrayList mSuggestions = new ArrayList(); - private ArrayList mBigramSuggestions = new ArrayList(); + ArrayList mBigramSuggestions = new ArrayList(); private ArrayList mStringPool = new ArrayList(); private boolean mHaveCorrection; private CharSequence mOriginalWord; @@ -173,7 +174,7 @@ public class Suggest implements Dictionary.WordCallback { } mPrefMaxSuggestions = maxSuggestions; mPriorities = new int[mPrefMaxSuggestions]; - mBigramPriorities = new int[mPrefMaxBigrams]; + mBigramPriorities = new int[PREF_MAX_BIGRAMS]; collectGarbage(mSuggestions, mPrefMaxSuggestions); while (mStringPool.size() < mPrefMaxSuggestions) { StringBuilder sb = new StringBuilder(getApproxMaxWordLength()); @@ -242,7 +243,7 @@ public class Suggest implements Dictionary.WordCallback { || mCorrectionMode == CORRECTION_BASIC)) { // At first character typed, search only the bigrams Arrays.fill(mBigramPriorities, 0); - collectGarbage(mBigramSuggestions, mPrefMaxBigrams); + collectGarbage(mBigramSuggestions, PREF_MAX_BIGRAMS); if (!TextUtils.isEmpty(prevWordForBigram)) { CharSequence lowerPrevWord = prevWordForBigram.toString().toLowerCase(); @@ -401,7 +402,7 @@ public class Suggest implements Dictionary.WordCallback { if(dataType == Dictionary.DataType.BIGRAM) { suggestions = mBigramSuggestions; priorities = mBigramPriorities; - prefMaxSuggestions = mPrefMaxBigrams; + prefMaxSuggestions = PREF_MAX_BIGRAMS; } else { suggestions = mSuggestions; priorities = mPriorities; @@ -443,7 +444,6 @@ public class Suggest implements Dictionary.WordCallback { pos++; } } - if (pos >= prefMaxSuggestions) { return true; } diff --git a/java/src/com/android/inputmethod/latin/UserBigramDictionary.java b/java/src/com/android/inputmethod/latin/UserBigramDictionary.java new file mode 100644 index 000000000..c3eab94f1 --- /dev/null +++ b/java/src/com/android/inputmethod/latin/UserBigramDictionary.java @@ -0,0 +1,402 @@ +/* + * Copyright (C) 2010 Google Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +package com.android.inputmethod.latin; + +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; + +import android.content.ContentValues; +import android.content.Context; +import android.database.Cursor; +import android.database.sqlite.SQLiteDatabase; +import android.database.sqlite.SQLiteOpenHelper; +import android.database.sqlite.SQLiteQueryBuilder; +import android.os.AsyncTask; +import android.provider.BaseColumns; +import android.util.Log; + +/** + * Stores all the pairs user types in databases. Prune the database if the size + * gets too big. Unlike AutoDictionary, it even stores the pairs that are already + * in the dictionary. + */ +public class UserBigramDictionary extends ExpandableDictionary { + private static final String TAG = "UserBigramDictionary"; + + /** Any pair being typed or picked */ + private static final int FREQUENCY_FOR_TYPED = 2; + + /** Maximum frequency for all pairs */ + private static final int FREQUENCY_MAX = 127; + + /** + * If this pair is typed 6 times, it would be suggested. + * Should be smaller than ContactsDictionary.FREQUENCY_FOR_CONTACTS_BIGRAM + */ + protected static final int SUGGEST_THRESHOLD = 6 * FREQUENCY_FOR_TYPED; + + /** Maximum number of pairs. Pruning will start when databases goes above this number. */ + private static int sMaxUserBigrams = 10000; + + /** + * When it hits maximum bigram pair, it will delete until you are left with + * only (sMaxUserBigrams - sDeleteUserBigrams) pairs. + * Do not keep this number small to avoid deleting too often. + */ + private static int sDeleteUserBigrams = 1000; + + /** + * Database version should increase if the database structure changes + */ + private static final int DATABASE_VERSION = 1; + + private static final String DATABASE_NAME = "userbigram_dict.db"; + + /** Name of the words table in the database */ + private static final String MAIN_TABLE_NAME = "main"; + // TODO: Consume less space by using a unique id for locale instead of the whole + // 2-5 character string. (Same TODO from AutoDictionary) + private static final String MAIN_COLUMN_ID = BaseColumns._ID; + private static final String MAIN_COLUMN_WORD1 = "word1"; + private static final String MAIN_COLUMN_WORD2 = "word2"; + private static final String MAIN_COLUMN_LOCALE = "locale"; + + /** Name of the frequency table in the database */ + private static final String FREQ_TABLE_NAME = "frequency"; + private static final String FREQ_COLUMN_ID = BaseColumns._ID; + private static final String FREQ_COLUMN_PAIR_ID = "pair_id"; + private static final String FREQ_COLUMN_FREQUENCY = "freq"; + + private final LatinIME mIme; + + /** Locale for which this auto dictionary is storing words */ + private String mLocale; + + private HashSet mPendingWrites = new HashSet(); + private final Object mPendingWritesLock = new Object(); + private static volatile boolean sUpdatingDB = false; + + private final static HashMap sDictProjectionMap; + + static { + sDictProjectionMap = new HashMap(); + sDictProjectionMap.put(MAIN_COLUMN_ID, MAIN_COLUMN_ID); + sDictProjectionMap.put(MAIN_COLUMN_WORD1, MAIN_COLUMN_WORD1); + sDictProjectionMap.put(MAIN_COLUMN_WORD2, MAIN_COLUMN_WORD2); + sDictProjectionMap.put(MAIN_COLUMN_LOCALE, MAIN_COLUMN_LOCALE); + + sDictProjectionMap.put(FREQ_COLUMN_ID, FREQ_COLUMN_ID); + sDictProjectionMap.put(FREQ_COLUMN_PAIR_ID, FREQ_COLUMN_PAIR_ID); + sDictProjectionMap.put(FREQ_COLUMN_FREQUENCY, FREQ_COLUMN_FREQUENCY); + } + + private static DatabaseHelper sOpenHelper = null; + + private static class Bigram { + String word1; + String word2; + int frequency; + + Bigram(String word1, String word2, int frequency) { + this.word1 = word1; + this.word2 = word2; + this.frequency = frequency; + } + + @Override + public boolean equals(Object bigram) { + Bigram bigram2 = (Bigram) bigram; + return (word1.equals(bigram2.word1) && word2.equals(bigram2.word2)); + } + + @Override + public int hashCode() { + return (word1 + " " + word2).hashCode(); + } + } + + public void setDatabaseMax(int maxUserBigram) { + sMaxUserBigrams = maxUserBigram; + } + + public void setDatabaseDelete(int deleteUserBigram) { + sDeleteUserBigrams = deleteUserBigram; + } + + public UserBigramDictionary(Context context, LatinIME ime, String locale, int dicTypeId) { + super(context, dicTypeId); + mIme = ime; + mLocale = locale; + if (sOpenHelper == null) { + sOpenHelper = new DatabaseHelper(getContext()); + } + if (mLocale != null && mLocale.length() > 1) { + loadDictionary(); + } + } + + @Override + public void close() { + flushPendingWrites(); + // Don't close the database as locale changes will require it to be reopened anyway + // Also, the database is written to somewhat frequently, so it needs to be kept alive + // throughout the life of the process. + // mOpenHelper.close(); + super.close(); + } + + /** + * Pair will be added to the userbigram database. + */ + public int addBigrams(String word1, String word2) { + // remove caps + if (mIme != null && mIme.getCurrentWord().isAutoCapitalized()) { + word2 = Character.toLowerCase(word2.charAt(0)) + word2.substring(1); + } + + int freq = super.addBigram(word1, word2, FREQUENCY_FOR_TYPED); + if (freq > FREQUENCY_MAX) freq = FREQUENCY_MAX; + synchronized (mPendingWritesLock) { + if (freq == FREQUENCY_FOR_TYPED || mPendingWrites.isEmpty()) { + mPendingWrites.add(new Bigram(word1, word2, freq)); + } else { + Bigram bi = new Bigram(word1, word2, freq); + mPendingWrites.remove(bi); + mPendingWrites.add(bi); + } + } + + return freq; + } + + /** + * Schedules a background thread to write any pending words to the database. + */ + public void flushPendingWrites() { + synchronized (mPendingWritesLock) { + // Nothing pending? Return + if (mPendingWrites.isEmpty()) return; + // Create a background thread to write the pending entries + new UpdateDbTask(getContext(), sOpenHelper, mPendingWrites, mLocale).execute(); + // Create a new map for writing new entries into while the old one is written to db + mPendingWrites = new HashSet(); + } + } + + /** Used for testing purpose **/ + void waitUntilUpdateDBDone() { + synchronized (mPendingWritesLock) { + while (sUpdatingDB) { + try { + Thread.sleep(100); + } catch (InterruptedException e) { + } + } + return; + } + } + + @Override + public void loadDictionaryAsync() { + // Load the words that correspond to the current input locale + Cursor cursor = query(MAIN_COLUMN_LOCALE + "=?", new String[] { mLocale }); + try { + if (cursor.moveToFirst()) { + int word1Index = cursor.getColumnIndex(MAIN_COLUMN_WORD1); + int word2Index = cursor.getColumnIndex(MAIN_COLUMN_WORD2); + int frequencyIndex = cursor.getColumnIndex(FREQ_COLUMN_FREQUENCY); + while (!cursor.isAfterLast()) { + String word1 = cursor.getString(word1Index); + String word2 = cursor.getString(word2Index); + int frequency = cursor.getInt(frequencyIndex); + // Safeguard against adding really long words. Stack may overflow due + // to recursive lookup + if (word1.length() < MAX_WORD_LENGTH && word2.length() < MAX_WORD_LENGTH) { + super.setBigram(word1, word2, frequency); + } + cursor.moveToNext(); + } + } + } finally { + cursor.close(); + } + } + + /** + * Query the database + */ + private Cursor query(String selection, String[] selectionArgs) { + SQLiteQueryBuilder qb = new SQLiteQueryBuilder(); + + // main INNER JOIN frequency ON (main._id=freq.pair_id) + qb.setTables(MAIN_TABLE_NAME + " INNER JOIN " + FREQ_TABLE_NAME + " ON (" + + MAIN_TABLE_NAME + "." + MAIN_COLUMN_ID + "=" + FREQ_TABLE_NAME + "." + + FREQ_COLUMN_PAIR_ID +")"); + + qb.setProjectionMap(sDictProjectionMap); + + // Get the database and run the query + SQLiteDatabase db = sOpenHelper.getReadableDatabase(); + Cursor c = qb.query(db, + new String[] { MAIN_COLUMN_WORD1, MAIN_COLUMN_WORD2, FREQ_COLUMN_FREQUENCY }, + selection, selectionArgs, null, null, null); + return c; + } + + /** + * This class helps open, create, and upgrade the database file. + */ + private static class DatabaseHelper extends SQLiteOpenHelper { + + DatabaseHelper(Context context) { + super(context, DATABASE_NAME, null, DATABASE_VERSION); + } + + @Override + public void onCreate(SQLiteDatabase db) { + db.execSQL("PRAGMA foreign_keys = ON;"); + db.execSQL("CREATE TABLE " + MAIN_TABLE_NAME + " (" + + MAIN_COLUMN_ID + " INTEGER PRIMARY KEY," + + MAIN_COLUMN_WORD1 + " TEXT," + + MAIN_COLUMN_WORD2 + " TEXT," + + MAIN_COLUMN_LOCALE + " TEXT" + + ");"); + db.execSQL("CREATE TABLE " + FREQ_TABLE_NAME + " (" + + FREQ_COLUMN_ID + " INTEGER PRIMARY KEY," + + FREQ_COLUMN_PAIR_ID + " INTEGER," + + FREQ_COLUMN_FREQUENCY + " INTEGER," + + "FOREIGN KEY(" + FREQ_COLUMN_PAIR_ID + ") REFERENCES " + MAIN_TABLE_NAME + + "(" + MAIN_COLUMN_ID + ")" + " ON DELETE CASCADE" + + ");"); + } + + @Override + public void onUpgrade(SQLiteDatabase db, int oldVersion, int newVersion) { + Log.w(TAG, "Upgrading database from version " + oldVersion + " to " + + newVersion + ", which will destroy all old data"); + db.execSQL("DROP TABLE IF EXISTS " + MAIN_TABLE_NAME); + db.execSQL("DROP TABLE IF EXISTS " + FREQ_TABLE_NAME); + onCreate(db); + } + } + + /** + * Async task to write pending words to the database so that it stays in sync with + * the in-memory trie. + */ + private static class UpdateDbTask extends AsyncTask { + private final HashSet mMap; + private final DatabaseHelper mDbHelper; + private final String mLocale; + + public UpdateDbTask(Context context, DatabaseHelper openHelper, + HashSet pendingWrites, String locale) { + mMap = pendingWrites; + mLocale = locale; + mDbHelper = openHelper; + } + + /** Prune any old data if the database is getting too big. */ + private void checkPruneData(SQLiteDatabase db) { + db.execSQL("PRAGMA foreign_keys = ON;"); + Cursor c = db.query(FREQ_TABLE_NAME, new String[] { FREQ_COLUMN_PAIR_ID }, + null, null, null, null, null); + try { + int totalRowCount = c.getCount(); + // prune out old data if we have too much data + if (totalRowCount > sMaxUserBigrams) { + int numDeleteRows = (totalRowCount - sMaxUserBigrams) + sDeleteUserBigrams; + int pairIdColumnId = c.getColumnIndex(FREQ_COLUMN_PAIR_ID); + c.moveToFirst(); + int count = 0; + while (count < numDeleteRows && !c.isAfterLast()) { + String pairId = c.getString(pairIdColumnId); + // Deleting from MAIN table will delete the frequencies + // due to FOREIGN KEY .. ON DELETE CASCADE + db.delete(MAIN_TABLE_NAME, MAIN_COLUMN_ID + "=?", + new String[] { pairId }); + c.moveToNext(); + count++; + } + } + } finally { + c.close(); + } + } + + @Override + protected void onPreExecute() { + sUpdatingDB = true; + } + + @Override + protected Void doInBackground(Void... v) { + SQLiteDatabase db = mDbHelper.getWritableDatabase(); + db.execSQL("PRAGMA foreign_keys = ON;"); + // Write all the entries to the db + Iterator iterator = mMap.iterator(); + while (iterator.hasNext()) { + Bigram bi = iterator.next(); + + // find pair id + Cursor c = db.query(MAIN_TABLE_NAME, new String[] { MAIN_COLUMN_ID }, + MAIN_COLUMN_WORD1 + "=? AND " + MAIN_COLUMN_WORD2 + "=? AND " + + MAIN_COLUMN_LOCALE + "=?", + new String[] { bi.word1, bi.word2, mLocale }, null, null, null); + + int pairId; + if (c.moveToFirst()) { + // existing pair + pairId = c.getInt(c.getColumnIndex(MAIN_COLUMN_ID)); + db.delete(FREQ_TABLE_NAME, FREQ_COLUMN_PAIR_ID + "=?", + new String[] { Integer.toString(pairId) }); + } else { + // new pair + Long pairIdLong = db.insert(MAIN_TABLE_NAME, null, + getContentValues(bi.word1, bi.word2, mLocale)); + pairId = pairIdLong.intValue(); + } + c.close(); + + // insert new frequency + long s = db.insert(FREQ_TABLE_NAME, null, + getFrequencyContentValues(pairId, bi.frequency)); + } + checkPruneData(db); + sUpdatingDB = false; + + return null; + } + + private ContentValues getContentValues(String word1, String word2, String locale) { + ContentValues values = new ContentValues(3); + values.put(MAIN_COLUMN_WORD1, word1); + values.put(MAIN_COLUMN_WORD2, word2); + values.put(MAIN_COLUMN_LOCALE, locale); + return values; + } + + private ContentValues getFrequencyContentValues(int pairId, int frequency) { + ContentValues values = new ContentValues(2); + values.put(FREQ_COLUMN_PAIR_ID, pairId); + values.put(FREQ_COLUMN_FREQUENCY, frequency); + return values; + } + } + +} diff --git a/tests/src/com/android/inputmethod/latin/tests/SuggestHelper.java b/tests/src/com/android/inputmethod/latin/SuggestHelper.java similarity index 68% rename from tests/src/com/android/inputmethod/latin/tests/SuggestHelper.java rename to tests/src/com/android/inputmethod/latin/SuggestHelper.java index 107f04c7c..759bfa18a 100644 --- a/tests/src/com/android/inputmethod/latin/tests/SuggestHelper.java +++ b/tests/src/com/android/inputmethod/latin/SuggestHelper.java @@ -14,13 +14,13 @@ * the License. */ -package com.android.inputmethod.latin.tests; +package com.android.inputmethod.latin; import android.content.Context; -import android.test.AndroidTestCase; import android.text.TextUtils; import android.util.Log; import com.android.inputmethod.latin.Suggest; +import com.android.inputmethod.latin.UserBigramDictionary; import com.android.inputmethod.latin.WordComposer; import java.io.IOException; @@ -29,28 +29,32 @@ import java.nio.ByteBuffer; import java.nio.ByteOrder; import java.nio.channels.Channels; import java.util.List; +import java.util.Locale; +import java.util.StringTokenizer; public class SuggestHelper { private Suggest mSuggest; + private UserBigramDictionary mUserBigram; private final String TAG; + /** Uses main dictionary only **/ public SuggestHelper(String tag, Context context, int[] resId) { TAG = tag; - InputStream[] res = null; + InputStream[] is = null; try { // merging separated dictionary into one if dictionary is separated int total = 0; - res = new InputStream[resId.length]; + is = new InputStream[resId.length]; for (int i = 0; i < resId.length; i++) { - res[i] = context.getResources().openRawResource(resId[i]); - total += res[i].available(); + is[i] = context.getResources().openRawResource(resId[i]); + total += is[i].available(); } ByteBuffer byteBuffer = ByteBuffer.allocateDirect(total).order(ByteOrder.nativeOrder()); int got = 0; for (int i = 0; i < resId.length; i++) { - got += Channels.newChannel(res[i]).read(byteBuffer); + got += Channels.newChannel(is[i]).read(byteBuffer); } if (got != total) { Log.w(TAG, "Read " + got + " bytes, expected " + total); @@ -62,8 +66,10 @@ public class SuggestHelper { Log.w(TAG, "No available memory for binary dictionary"); } finally { try { - for (int i = 0;i < res.length; i++) { - res[i].close(); + if (is != null) { + for (int i = 0; i < is.length; i++) { + is[i].close(); + } } } catch (IOException e) { Log.w(TAG, "Failed to close input stream"); @@ -73,6 +79,27 @@ public class SuggestHelper { mSuggest.setCorrectionMode(Suggest.CORRECTION_FULL_BIGRAM); } + /** Uses both main dictionary and user-bigram dictionary **/ + public SuggestHelper(String tag, Context context, int[] resId, int userBigramMax, + int userBigramDelete) { + this(tag, context, resId); + mUserBigram = new UserBigramDictionary(context, null, Locale.US.toString(), + Suggest.DIC_USER); + mUserBigram.setDatabaseMax(userBigramMax); + mUserBigram.setDatabaseDelete(userBigramDelete); + mSuggest.setUserBigramDictionary(mUserBigram); + } + + void changeUserBigramLocale(Context context, Locale locale) { + if (mUserBigram != null) { + flushUserBigrams(); + mUserBigram.close(); + mUserBigram = new UserBigramDictionary(context, null, locale.toString(), + Suggest.DIC_USER); + mSuggest.setUserBigramDictionary(mUserBigram); + } + } + private WordComposer createWordComposer(CharSequence s) { WordComposer word = new WordComposer(); for (int i = 0; i < s.length(); i++) { @@ -125,8 +152,8 @@ public class SuggestHelper { } private void getBigramSuggestions(CharSequence previous, CharSequence typed) { - if(!TextUtils.isEmpty(previous) && (typed.length() > 1)) { - WordComposer firstChar = createWordComposer(typed.charAt(0) + ""); + if (!TextUtils.isEmpty(previous) && (typed.length() > 1)) { + WordComposer firstChar = createWordComposer(Character.toString(typed.charAt(0))); mSuggest.getSuggestions(null, firstChar, false, previous); } } @@ -162,6 +189,54 @@ public class SuggestHelper { return mSuggest.isValidWord(typed); } + boolean isUserBigramSuggestion(CharSequence previous, char typed, + CharSequence expected) { + WordComposer word = createWordComposer(Character.toString(typed)); + + if (mUserBigram == null) return false; + + flushUserBigrams(); + if (!TextUtils.isEmpty(previous) && !TextUtils.isEmpty(Character.toString(typed))) { + WordComposer firstChar = createWordComposer(Character.toString(typed)); + mSuggest.getSuggestions(null, firstChar, false, previous); + boolean reloading = mUserBigram.reloadDictionaryIfRequired(); + if (reloading) mUserBigram.waitForDictionaryLoading(); + mUserBigram.getBigrams(firstChar, previous, mSuggest, null); + } + + List suggestions = mSuggest.mBigramSuggestions; + for (int i = 0; i < suggestions.size(); i++) { + if (TextUtils.equals(suggestions.get(i), expected)) return true; + } + + return false; + } + + void addToUserBigram(String sentence) { + StringTokenizer st = new StringTokenizer(sentence); + String previous = null; + while (st.hasMoreTokens()) { + String current = st.nextToken(); + if (previous != null) { + addToUserBigram(new String[] {previous, current}); + } + previous = current; + } + } + + void addToUserBigram(String[] pair) { + if (mUserBigram != null && pair.length == 2) { + mUserBigram.addBigrams(pair[0], pair[1]); + } + } + + void flushUserBigrams() { + if (mUserBigram != null) { + mUserBigram.flushPendingWrites(); + mUserBigram.waitUntilUpdateDBDone(); + } + } + final int[][] adjacents = { {'a','s','w','q',-1}, {'b','h','v','n','g','j',-1}, diff --git a/tests/src/com/android/inputmethod/latin/tests/SuggestPerformanceTests.java b/tests/src/com/android/inputmethod/latin/SuggestPerformanceTests.java similarity index 97% rename from tests/src/com/android/inputmethod/latin/tests/SuggestPerformanceTests.java rename to tests/src/com/android/inputmethod/latin/SuggestPerformanceTests.java index 473c440f9..7eb66d502 100644 --- a/tests/src/com/android/inputmethod/latin/tests/SuggestPerformanceTests.java +++ b/tests/src/com/android/inputmethod/latin/SuggestPerformanceTests.java @@ -14,16 +14,15 @@ * the License. */ -package com.android.inputmethod.latin.tests; +package com.android.inputmethod.latin; import android.test.AndroidTestCase; import android.util.Log; - +import com.android.inputmethod.latin.tests.R; import java.io.InputStreamReader; import java.io.InputStream; import java.io.BufferedReader; import java.util.StringTokenizer; -import java.util.regex.Pattern; public class SuggestPerformanceTests extends AndroidTestCase { private static final String TAG = "SuggestPerformanceTests"; @@ -122,6 +121,6 @@ public class SuggestPerformanceTests extends AndroidTestCase { * Check the log for detail */ public void testSuggestPerformance() { - assertTrue(runText(false) < runText(true)); + assertTrue(runText(false) <= runText(true)); } } diff --git a/tests/src/com/android/inputmethod/latin/tests/SuggestTests.java b/tests/src/com/android/inputmethod/latin/SuggestTests.java similarity index 98% rename from tests/src/com/android/inputmethod/latin/tests/SuggestTests.java rename to tests/src/com/android/inputmethod/latin/SuggestTests.java index a42422b91..8463ed316 100644 --- a/tests/src/com/android/inputmethod/latin/tests/SuggestTests.java +++ b/tests/src/com/android/inputmethod/latin/SuggestTests.java @@ -14,10 +14,10 @@ * the License. */ -package com.android.inputmethod.latin.tests; +package com.android.inputmethod.latin; import android.test.AndroidTestCase; -import android.util.Log; +import com.android.inputmethod.latin.tests.R; public class SuggestTests extends AndroidTestCase { private static final String TAG = "SuggestTests"; diff --git a/tests/src/com/android/inputmethod/latin/UserBigramTests.java b/tests/src/com/android/inputmethod/latin/UserBigramTests.java new file mode 100644 index 000000000..cbf7bd8e1 --- /dev/null +++ b/tests/src/com/android/inputmethod/latin/UserBigramTests.java @@ -0,0 +1,100 @@ +/* + * Copyright (C) 2010 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +package com.android.inputmethod.latin; + +import android.test.AndroidTestCase; +import com.android.inputmethod.latin.tests.R; +import java.util.Locale; + +public class UserBigramTests extends AndroidTestCase { + private static final String TAG = "UserBigramTests"; + + private static final int SUGGESTION_STARTS = 6; + private static final int MAX_DATA = 20; + private static final int DELETE_DATA = 10; + + private SuggestHelper sh; + + @Override + protected void setUp() { + int[] resId = new int[] { R.raw.test }; + sh = new SuggestHelper(TAG, getTestContext(), resId, MAX_DATA, DELETE_DATA); + } + + /************************** Tests ************************/ + + /** + * Test suggestion started at right time + */ + public void testUserBigram() { + for (int i = 0; i < SUGGESTION_STARTS; i++) sh.addToUserBigram(pair1); + for (int i = 0; i < (SUGGESTION_STARTS - 1); i++) sh.addToUserBigram(pair2); + + assertTrue(sh.isUserBigramSuggestion("user", 'b', "bigram")); + assertFalse(sh.isUserBigramSuggestion("android", 'p', "platform")); + } + + /** + * Test loading correct (locale) bigrams + */ + public void testOpenAndClose() { + for (int i = 0; i < SUGGESTION_STARTS; i++) sh.addToUserBigram(pair1); + assertTrue(sh.isUserBigramSuggestion("user", 'b', "bigram")); + + // change to fr_FR + sh.changeUserBigramLocale(getTestContext(), Locale.FRANCE); + for (int i = 0; i < SUGGESTION_STARTS; i++) sh.addToUserBigram(pair3); + assertTrue(sh.isUserBigramSuggestion("locale", 'f', "france")); + assertFalse(sh.isUserBigramSuggestion("user", 'b', "bigram")); + + // change back to en_US + sh.changeUserBigramLocale(getTestContext(), Locale.US); + assertFalse(sh.isUserBigramSuggestion("locale", 'f', "france")); + assertTrue(sh.isUserBigramSuggestion("user", 'b', "bigram")); + } + + /** + * Test data gets pruned when it is over maximum + */ + public void testPruningData() { + for (int i = 0; i < SUGGESTION_STARTS; i++) sh.addToUserBigram(sentence0); + sh.flushUserBigrams(); + assertTrue(sh.isUserBigramSuggestion("Hello", 'w', "world")); + + sh.addToUserBigram(sentence1); + sh.addToUserBigram(sentence2); + assertTrue(sh.isUserBigramSuggestion("Hello", 'w', "world")); + + // pruning should happen + sh.addToUserBigram(sentence3); + sh.addToUserBigram(sentence4); + + // trying to reopen database to check pruning happened in database + sh.changeUserBigramLocale(getTestContext(), Locale.US); + assertFalse(sh.isUserBigramSuggestion("Hello", 'w', "world")); + } + + final String[] pair1 = new String[] {"user", "bigram"}; + final String[] pair2 = new String[] {"android","platform"}; + final String[] pair3 = new String[] {"locale", "france"}; + final String sentence0 = "Hello world"; + final String sentence1 = "This is a test for user input based bigram"; + final String sentence2 = "It learns phrases that contain both dictionary and nondictionary " + + "words"; + final String sentence3 = "This should give better suggestions than the previous version"; + final String sentence4 = "Android stock keyboard is improving"; +}