-UserBigram

-UnitTest for UserBigram
-Changes for number of bigrams to load

Change-Id: I2c6fbe6194d34112ccc52c7e199461d2350e8516
This commit is contained in:
Jae Yong Sung 2010-08-03 18:28:38 -07:00
parent 6dea425480
commit ac093396ba
11 changed files with 676 additions and 75 deletions

View file

@ -83,14 +83,14 @@ public class AutoDictionary extends ExpandableDictionary {
sDictProjectionMap.put(COLUMN_LOCALE, COLUMN_LOCALE);
}
private static DatabaseHelper mOpenHelper = null;
private static DatabaseHelper sOpenHelper = null;
public AutoDictionary(Context context, LatinIME ime, String locale, int dicTypeId) {
super(context, dicTypeId);
mIme = ime;
mLocale = locale;
if (mOpenHelper == null) {
mOpenHelper = new DatabaseHelper(getContext());
if (sOpenHelper == null) {
sOpenHelper = new DatabaseHelper(getContext());
}
if (mLocale != null && mLocale.length() > 1) {
loadDictionary();
@ -169,7 +169,7 @@ public class AutoDictionary extends ExpandableDictionary {
// Nothing pending? Return
if (mPendingWrites.isEmpty()) return;
// Create a background thread to write the pending entries
new UpdateDbTask(getContext(), mOpenHelper, mPendingWrites, mLocale).execute();
new UpdateDbTask(getContext(), sOpenHelper, mPendingWrites, mLocale).execute();
// Create a new map for writing new entries into while the old one is written to db
mPendingWrites = new HashMap<String, Integer>();
}
@ -209,7 +209,7 @@ public class AutoDictionary extends ExpandableDictionary {
qb.setProjectionMap(sDictProjectionMap);
// Get the database and run the query
SQLiteDatabase db = mOpenHelper.getReadableDatabase();
SQLiteDatabase db = sOpenHelper.getReadableDatabase();
Cursor c = qb.query(db, null, selection, selectionArgs, null, null,
DEFAULT_SORT_ORDER);
return c;

View file

@ -42,8 +42,8 @@ public class BinaryDictionary extends Dictionary {
private static final String TAG = "BinaryDictionary";
private static final int MAX_ALTERNATIVES = 16;
private static final int MAX_WORDS = 16;
private static final int MAX_BIGRAMS = 255; // TODO Probably don't need all 255
private static final int MAX_WORDS = 18;
private static final int MAX_BIGRAMS = 60;
private static final int TYPED_LETTER_MULTIPLIER = 2;
private static final boolean ENABLE_MISSED_CHARACTERS = true;
@ -140,8 +140,10 @@ public class BinaryDictionary extends Dictionary {
Log.w(TAG, "No available memory for binary dictionary");
} finally {
try {
for (int i = 0;i < is.length; i++) {
is[i].close();
if (is != null) {
for (int i = 0; i < is.length; i++) {
is[i].close();
}
}
} catch (IOException e) {
Log.w(TAG, "Failed to close input stream");

View file

@ -125,8 +125,8 @@ public class ContactsDictionary extends ExpandableDictionary {
super.addWord(word, FREQUENCY_FOR_CONTACTS);
if (!TextUtils.isEmpty(prevWord)) {
// TODO Do not add email address
super.addBigrams(prevWord, word,
FREQUENCY_FOR_CONTACTS_BIGRAM);
// Not so critical
super.setBigram(prevWord, word, FREQUENCY_FOR_CONTACTS_BIGRAM);
}
prevWord = word;
}

View file

@ -20,8 +20,6 @@ import java.util.LinkedList;
import android.content.Context;
import android.os.AsyncTask;
import android.os.SystemClock;
import android.util.Log;
/**
* Base class for an in-memory dictionary that can grow dynamically and can
@ -325,12 +323,21 @@ public class ExpandableDictionary extends Dictionary {
}
}
protected int setBigram(String word1, String word2, int frequency) {
return addOrSetBigram(word1, word2, frequency, false);
}
protected int addBigram(String word1, String word2, int frequency) {
return addOrSetBigram(word1, word2, frequency, true);
}
/**
* Adds bigrams to the in-memory trie structure that is being used to retrieve any word
* @param addFrequency adding frequency of the pair
* @param frequency frequency for this bigrams
* @param addFrequency if true, it adds to current frequency
* @return returns the final frequency
*/
protected int addBigrams(String word1, String word2, int addFrequency) {
private int addOrSetBigram(String word1, String word2, int frequency, boolean addFrequency) {
Node firstWord = searchWord(mRoots, word1, 0, null);
Node secondWord = searchWord(mRoots, word2, 0, null);
LinkedList<NextWord> bigram = firstWord.ngrams;
@ -340,14 +347,18 @@ public class ExpandableDictionary extends Dictionary {
} else {
for (NextWord nw : bigram) {
if (nw.word == secondWord) {
nw.frequency += addFrequency;
if (addFrequency) {
nw.frequency += frequency;
} else {
nw.frequency = frequency;
}
return nw.frequency;
}
}
}
NextWord nw = new NextWord(secondWord, addFrequency);
NextWord nw = new NextWord(secondWord, frequency);
firstWord.ngrams.add(nw);
return addFrequency;
return frequency;
}
/**
@ -385,22 +396,44 @@ public class ExpandableDictionary extends Dictionary {
return searchWord(childNode.children, word, depth + 1, childNode);
}
@Override
public void getBigrams(final WordComposer codes, final CharSequence previousWord,
final WordCallback callback, int[] nextLettersFrequencies) {
// @VisibleForTesting
boolean reloadDictionaryIfRequired() {
synchronized (mUpdatingLock) {
// If we need to update, start off a background task
if (mRequiresReload) startDictionaryLoadingTaskLocked();
// Currently updating contacts, don't return any results.
if (mUpdatingDictionary) return;
return mUpdatingDictionary;
}
}
private void runReverseLookUp(final CharSequence previousWord, final WordCallback callback) {
Node prevWord = searchNode(mRoots, previousWord, 0, previousWord.length());
if (prevWord != null && prevWord.ngrams != null) {
reverseLookUp(prevWord.ngrams, callback);
}
}
@Override
public void getBigrams(final WordComposer codes, final CharSequence previousWord,
final WordCallback callback, int[] nextLettersFrequencies) {
if (!reloadDictionaryIfRequired()) {
runReverseLookUp(previousWord, callback);
}
}
/**
* Used only for testing purposes
* This function will wait for loading from database to be done
*/
void waitForDictionaryLoading() {
while (mUpdatingDictionary) {
try {
Thread.sleep(100);
} catch (InterruptedException e) {
}
}
}
/**
* reverseLookUp retrieves the full word given a list of terminal nodes and adds those words
* through callback.
@ -413,15 +446,18 @@ public class ExpandableDictionary extends Dictionary {
for (NextWord nextWord : terminalNodes) {
node = nextWord.word;
freq = nextWord.frequency;
sb.setLength(0);
do {
sb.insert(0, node.code);
node = node.parent;
} while(node != null);
// TODO Not the best way to limit suggestion threshold
if (freq >= UserBigramDictionary.SUGGEST_THRESHOLD) {
sb.setLength(0);
do {
sb.insert(0, node.code);
node = node.parent;
} while(node != null);
// TODO better way to feed char array?
callback.addWord(sb.toString().toCharArray(), 0, sb.length(), freq, mDicTypeId,
DataType.BIGRAM);
// TODO better way to feed char array?
callback.addWord(sb.toString().toCharArray(), 0, sb.length(), freq, mDicTypeId,
DataType.BIGRAM);
}
}
}
@ -460,18 +496,11 @@ public class ExpandableDictionary extends Dictionary {
@Override
protected Void doInBackground(Void... v) {
loadDictionaryAsync();
return null;
}
@Override
protected void onPostExecute(Void result) {
// TODO Auto-generated method stub
synchronized (mUpdatingLock) {
mUpdatingDictionary = false;
}
super.onPostExecute(result);
return null;
}
}
static char toLowerCase(char c) {

View file

@ -163,8 +163,7 @@ public class LatinIME extends InputMethodService
KeyboardSwitcher mKeyboardSwitcher;
private UserDictionary mUserDictionary;
// User Bigram is disabled for now
//private UserBigramDictionary mUserBigramDictionary;
private UserBigramDictionary mUserBigramDictionary;
private ContactsDictionary mContactsDictionary;
private AutoDictionary mAutoDictionary;
@ -454,15 +453,12 @@ public class LatinIME extends InputMethodService
mAutoDictionary.close();
}
mAutoDictionary = new AutoDictionary(this, this, mInputLocale, Suggest.DIC_AUTO);
// User Bigram is disabled for now
/*
if (mUserBigramDictionary != null) {
mUserBigramDictionary.close();
}
mUserBigramDictionary = new UserBigramDictionary(this, this, mInputLocale,
Suggest.DIC_USERBIGRAM);
Suggest.DIC_USER);
mSuggest.setUserBigramDictionary(mUserBigramDictionary);
*/
mSuggest.setUserDictionary(mUserDictionary);
mSuggest.setContactsDictionary(mContactsDictionary);
mSuggest.setAutoDictionary(mAutoDictionary);
@ -698,8 +694,7 @@ public class LatinIME extends InputMethodService
mKeyboardSwitcher.getInputView().closing();
}
if (mAutoDictionary != null) mAutoDictionary.flushPendingWrites();
// User Bigram is disabled for now
//if (mUserBigramDictionary != null) mUserBigramDictionary.flushPendingWrites();
if (mUserBigramDictionary != null) mUserBigramDictionary.flushPendingWrites();
}
@Override
@ -2007,15 +2002,14 @@ public class LatinIME extends InputMethodService
&& !mSuggest.isValidWord(suggestion.toString().toLowerCase()))) {
mAutoDictionary.addWord(suggestion.toString(), frequencyDelta);
}
// User Bigram is disabled for now
/*
if (mUserBigramDictionary != null) {
CharSequence prevWord = EditingUtil.getPreviousWord(getCurrentInputConnection());
CharSequence prevWord = EditingUtil.getPreviousWord(getCurrentInputConnection(),
mSentenceSeparators);
if (!TextUtils.isEmpty(prevWord)) {
mUserBigramDictionary.addBigrams(prevWord.toString(), suggestion.toString(), 1);
mUserBigramDictionary.addBigrams(prevWord.toString(), suggestion.toString());
}
}
*/
}
}

View file

@ -78,12 +78,13 @@ public class Suggest implements Dictionary.WordCallback {
private Dictionary mUserBigramDictionary;
private int mPrefMaxSuggestions = 12;
private int mPrefMaxBigrams = 255;
private static final int PREF_MAX_BIGRAMS = 60;
private boolean mAutoTextEnabled;
private int[] mPriorities = new int[mPrefMaxSuggestions];
private int[] mBigramPriorities = new int[mPrefMaxBigrams];
private int[] mBigramPriorities = new int[PREF_MAX_BIGRAMS];
// Handle predictive correction for only the first 1280 characters for performance reasons
// If we support scripts that need latin characters beyond that, we should probably use some
@ -92,7 +93,7 @@ public class Suggest implements Dictionary.WordCallback {
// latin characters.
private int[] mNextLettersFrequencies = new int[1280];
private ArrayList<CharSequence> mSuggestions = new ArrayList<CharSequence>();
private ArrayList<CharSequence> mBigramSuggestions = new ArrayList<CharSequence>();
ArrayList<CharSequence> mBigramSuggestions = new ArrayList<CharSequence>();
private ArrayList<CharSequence> mStringPool = new ArrayList<CharSequence>();
private boolean mHaveCorrection;
private CharSequence mOriginalWord;
@ -173,7 +174,7 @@ public class Suggest implements Dictionary.WordCallback {
}
mPrefMaxSuggestions = maxSuggestions;
mPriorities = new int[mPrefMaxSuggestions];
mBigramPriorities = new int[mPrefMaxBigrams];
mBigramPriorities = new int[PREF_MAX_BIGRAMS];
collectGarbage(mSuggestions, mPrefMaxSuggestions);
while (mStringPool.size() < mPrefMaxSuggestions) {
StringBuilder sb = new StringBuilder(getApproxMaxWordLength());
@ -242,7 +243,7 @@ public class Suggest implements Dictionary.WordCallback {
|| mCorrectionMode == CORRECTION_BASIC)) {
// At first character typed, search only the bigrams
Arrays.fill(mBigramPriorities, 0);
collectGarbage(mBigramSuggestions, mPrefMaxBigrams);
collectGarbage(mBigramSuggestions, PREF_MAX_BIGRAMS);
if (!TextUtils.isEmpty(prevWordForBigram)) {
CharSequence lowerPrevWord = prevWordForBigram.toString().toLowerCase();
@ -401,7 +402,7 @@ public class Suggest implements Dictionary.WordCallback {
if(dataType == Dictionary.DataType.BIGRAM) {
suggestions = mBigramSuggestions;
priorities = mBigramPriorities;
prefMaxSuggestions = mPrefMaxBigrams;
prefMaxSuggestions = PREF_MAX_BIGRAMS;
} else {
suggestions = mSuggestions;
priorities = mPriorities;
@ -443,7 +444,6 @@ public class Suggest implements Dictionary.WordCallback {
pos++;
}
}
if (pos >= prefMaxSuggestions) {
return true;
}

View file

@ -0,0 +1,402 @@
/*
* Copyright (C) 2010 Google Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package com.android.inputmethod.latin;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import android.content.ContentValues;
import android.content.Context;
import android.database.Cursor;
import android.database.sqlite.SQLiteDatabase;
import android.database.sqlite.SQLiteOpenHelper;
import android.database.sqlite.SQLiteQueryBuilder;
import android.os.AsyncTask;
import android.provider.BaseColumns;
import android.util.Log;
/**
* Stores all the pairs user types in databases. Prune the database if the size
* gets too big. Unlike AutoDictionary, it even stores the pairs that are already
* in the dictionary.
*/
public class UserBigramDictionary extends ExpandableDictionary {
private static final String TAG = "UserBigramDictionary";
/** Any pair being typed or picked */
private static final int FREQUENCY_FOR_TYPED = 2;
/** Maximum frequency for all pairs */
private static final int FREQUENCY_MAX = 127;
/**
* If this pair is typed 6 times, it would be suggested.
* Should be smaller than ContactsDictionary.FREQUENCY_FOR_CONTACTS_BIGRAM
*/
protected static final int SUGGEST_THRESHOLD = 6 * FREQUENCY_FOR_TYPED;
/** Maximum number of pairs. Pruning will start when databases goes above this number. */
private static int sMaxUserBigrams = 10000;
/**
* When it hits maximum bigram pair, it will delete until you are left with
* only (sMaxUserBigrams - sDeleteUserBigrams) pairs.
* Do not keep this number small to avoid deleting too often.
*/
private static int sDeleteUserBigrams = 1000;
/**
* Database version should increase if the database structure changes
*/
private static final int DATABASE_VERSION = 1;
private static final String DATABASE_NAME = "userbigram_dict.db";
/** Name of the words table in the database */
private static final String MAIN_TABLE_NAME = "main";
// TODO: Consume less space by using a unique id for locale instead of the whole
// 2-5 character string. (Same TODO from AutoDictionary)
private static final String MAIN_COLUMN_ID = BaseColumns._ID;
private static final String MAIN_COLUMN_WORD1 = "word1";
private static final String MAIN_COLUMN_WORD2 = "word2";
private static final String MAIN_COLUMN_LOCALE = "locale";
/** Name of the frequency table in the database */
private static final String FREQ_TABLE_NAME = "frequency";
private static final String FREQ_COLUMN_ID = BaseColumns._ID;
private static final String FREQ_COLUMN_PAIR_ID = "pair_id";
private static final String FREQ_COLUMN_FREQUENCY = "freq";
private final LatinIME mIme;
/** Locale for which this auto dictionary is storing words */
private String mLocale;
private HashSet<Bigram> mPendingWrites = new HashSet<Bigram>();
private final Object mPendingWritesLock = new Object();
private static volatile boolean sUpdatingDB = false;
private final static HashMap<String, String> sDictProjectionMap;
static {
sDictProjectionMap = new HashMap<String, String>();
sDictProjectionMap.put(MAIN_COLUMN_ID, MAIN_COLUMN_ID);
sDictProjectionMap.put(MAIN_COLUMN_WORD1, MAIN_COLUMN_WORD1);
sDictProjectionMap.put(MAIN_COLUMN_WORD2, MAIN_COLUMN_WORD2);
sDictProjectionMap.put(MAIN_COLUMN_LOCALE, MAIN_COLUMN_LOCALE);
sDictProjectionMap.put(FREQ_COLUMN_ID, FREQ_COLUMN_ID);
sDictProjectionMap.put(FREQ_COLUMN_PAIR_ID, FREQ_COLUMN_PAIR_ID);
sDictProjectionMap.put(FREQ_COLUMN_FREQUENCY, FREQ_COLUMN_FREQUENCY);
}
private static DatabaseHelper sOpenHelper = null;
private static class Bigram {
String word1;
String word2;
int frequency;
Bigram(String word1, String word2, int frequency) {
this.word1 = word1;
this.word2 = word2;
this.frequency = frequency;
}
@Override
public boolean equals(Object bigram) {
Bigram bigram2 = (Bigram) bigram;
return (word1.equals(bigram2.word1) && word2.equals(bigram2.word2));
}
@Override
public int hashCode() {
return (word1 + " " + word2).hashCode();
}
}
public void setDatabaseMax(int maxUserBigram) {
sMaxUserBigrams = maxUserBigram;
}
public void setDatabaseDelete(int deleteUserBigram) {
sDeleteUserBigrams = deleteUserBigram;
}
public UserBigramDictionary(Context context, LatinIME ime, String locale, int dicTypeId) {
super(context, dicTypeId);
mIme = ime;
mLocale = locale;
if (sOpenHelper == null) {
sOpenHelper = new DatabaseHelper(getContext());
}
if (mLocale != null && mLocale.length() > 1) {
loadDictionary();
}
}
@Override
public void close() {
flushPendingWrites();
// Don't close the database as locale changes will require it to be reopened anyway
// Also, the database is written to somewhat frequently, so it needs to be kept alive
// throughout the life of the process.
// mOpenHelper.close();
super.close();
}
/**
* Pair will be added to the userbigram database.
*/
public int addBigrams(String word1, String word2) {
// remove caps
if (mIme != null && mIme.getCurrentWord().isAutoCapitalized()) {
word2 = Character.toLowerCase(word2.charAt(0)) + word2.substring(1);
}
int freq = super.addBigram(word1, word2, FREQUENCY_FOR_TYPED);
if (freq > FREQUENCY_MAX) freq = FREQUENCY_MAX;
synchronized (mPendingWritesLock) {
if (freq == FREQUENCY_FOR_TYPED || mPendingWrites.isEmpty()) {
mPendingWrites.add(new Bigram(word1, word2, freq));
} else {
Bigram bi = new Bigram(word1, word2, freq);
mPendingWrites.remove(bi);
mPendingWrites.add(bi);
}
}
return freq;
}
/**
* Schedules a background thread to write any pending words to the database.
*/
public void flushPendingWrites() {
synchronized (mPendingWritesLock) {
// Nothing pending? Return
if (mPendingWrites.isEmpty()) return;
// Create a background thread to write the pending entries
new UpdateDbTask(getContext(), sOpenHelper, mPendingWrites, mLocale).execute();
// Create a new map for writing new entries into while the old one is written to db
mPendingWrites = new HashSet<Bigram>();
}
}
/** Used for testing purpose **/
void waitUntilUpdateDBDone() {
synchronized (mPendingWritesLock) {
while (sUpdatingDB) {
try {
Thread.sleep(100);
} catch (InterruptedException e) {
}
}
return;
}
}
@Override
public void loadDictionaryAsync() {
// Load the words that correspond to the current input locale
Cursor cursor = query(MAIN_COLUMN_LOCALE + "=?", new String[] { mLocale });
try {
if (cursor.moveToFirst()) {
int word1Index = cursor.getColumnIndex(MAIN_COLUMN_WORD1);
int word2Index = cursor.getColumnIndex(MAIN_COLUMN_WORD2);
int frequencyIndex = cursor.getColumnIndex(FREQ_COLUMN_FREQUENCY);
while (!cursor.isAfterLast()) {
String word1 = cursor.getString(word1Index);
String word2 = cursor.getString(word2Index);
int frequency = cursor.getInt(frequencyIndex);
// Safeguard against adding really long words. Stack may overflow due
// to recursive lookup
if (word1.length() < MAX_WORD_LENGTH && word2.length() < MAX_WORD_LENGTH) {
super.setBigram(word1, word2, frequency);
}
cursor.moveToNext();
}
}
} finally {
cursor.close();
}
}
/**
* Query the database
*/
private Cursor query(String selection, String[] selectionArgs) {
SQLiteQueryBuilder qb = new SQLiteQueryBuilder();
// main INNER JOIN frequency ON (main._id=freq.pair_id)
qb.setTables(MAIN_TABLE_NAME + " INNER JOIN " + FREQ_TABLE_NAME + " ON ("
+ MAIN_TABLE_NAME + "." + MAIN_COLUMN_ID + "=" + FREQ_TABLE_NAME + "."
+ FREQ_COLUMN_PAIR_ID +")");
qb.setProjectionMap(sDictProjectionMap);
// Get the database and run the query
SQLiteDatabase db = sOpenHelper.getReadableDatabase();
Cursor c = qb.query(db,
new String[] { MAIN_COLUMN_WORD1, MAIN_COLUMN_WORD2, FREQ_COLUMN_FREQUENCY },
selection, selectionArgs, null, null, null);
return c;
}
/**
* This class helps open, create, and upgrade the database file.
*/
private static class DatabaseHelper extends SQLiteOpenHelper {
DatabaseHelper(Context context) {
super(context, DATABASE_NAME, null, DATABASE_VERSION);
}
@Override
public void onCreate(SQLiteDatabase db) {
db.execSQL("PRAGMA foreign_keys = ON;");
db.execSQL("CREATE TABLE " + MAIN_TABLE_NAME + " ("
+ MAIN_COLUMN_ID + " INTEGER PRIMARY KEY,"
+ MAIN_COLUMN_WORD1 + " TEXT,"
+ MAIN_COLUMN_WORD2 + " TEXT,"
+ MAIN_COLUMN_LOCALE + " TEXT"
+ ");");
db.execSQL("CREATE TABLE " + FREQ_TABLE_NAME + " ("
+ FREQ_COLUMN_ID + " INTEGER PRIMARY KEY,"
+ FREQ_COLUMN_PAIR_ID + " INTEGER,"
+ FREQ_COLUMN_FREQUENCY + " INTEGER,"
+ "FOREIGN KEY(" + FREQ_COLUMN_PAIR_ID + ") REFERENCES " + MAIN_TABLE_NAME
+ "(" + MAIN_COLUMN_ID + ")" + " ON DELETE CASCADE"
+ ");");
}
@Override
public void onUpgrade(SQLiteDatabase db, int oldVersion, int newVersion) {
Log.w(TAG, "Upgrading database from version " + oldVersion + " to "
+ newVersion + ", which will destroy all old data");
db.execSQL("DROP TABLE IF EXISTS " + MAIN_TABLE_NAME);
db.execSQL("DROP TABLE IF EXISTS " + FREQ_TABLE_NAME);
onCreate(db);
}
}
/**
* Async task to write pending words to the database so that it stays in sync with
* the in-memory trie.
*/
private static class UpdateDbTask extends AsyncTask<Void, Void, Void> {
private final HashSet<Bigram> mMap;
private final DatabaseHelper mDbHelper;
private final String mLocale;
public UpdateDbTask(Context context, DatabaseHelper openHelper,
HashSet<Bigram> pendingWrites, String locale) {
mMap = pendingWrites;
mLocale = locale;
mDbHelper = openHelper;
}
/** Prune any old data if the database is getting too big. */
private void checkPruneData(SQLiteDatabase db) {
db.execSQL("PRAGMA foreign_keys = ON;");
Cursor c = db.query(FREQ_TABLE_NAME, new String[] { FREQ_COLUMN_PAIR_ID },
null, null, null, null, null);
try {
int totalRowCount = c.getCount();
// prune out old data if we have too much data
if (totalRowCount > sMaxUserBigrams) {
int numDeleteRows = (totalRowCount - sMaxUserBigrams) + sDeleteUserBigrams;
int pairIdColumnId = c.getColumnIndex(FREQ_COLUMN_PAIR_ID);
c.moveToFirst();
int count = 0;
while (count < numDeleteRows && !c.isAfterLast()) {
String pairId = c.getString(pairIdColumnId);
// Deleting from MAIN table will delete the frequencies
// due to FOREIGN KEY .. ON DELETE CASCADE
db.delete(MAIN_TABLE_NAME, MAIN_COLUMN_ID + "=?",
new String[] { pairId });
c.moveToNext();
count++;
}
}
} finally {
c.close();
}
}
@Override
protected void onPreExecute() {
sUpdatingDB = true;
}
@Override
protected Void doInBackground(Void... v) {
SQLiteDatabase db = mDbHelper.getWritableDatabase();
db.execSQL("PRAGMA foreign_keys = ON;");
// Write all the entries to the db
Iterator<Bigram> iterator = mMap.iterator();
while (iterator.hasNext()) {
Bigram bi = iterator.next();
// find pair id
Cursor c = db.query(MAIN_TABLE_NAME, new String[] { MAIN_COLUMN_ID },
MAIN_COLUMN_WORD1 + "=? AND " + MAIN_COLUMN_WORD2 + "=? AND "
+ MAIN_COLUMN_LOCALE + "=?",
new String[] { bi.word1, bi.word2, mLocale }, null, null, null);
int pairId;
if (c.moveToFirst()) {
// existing pair
pairId = c.getInt(c.getColumnIndex(MAIN_COLUMN_ID));
db.delete(FREQ_TABLE_NAME, FREQ_COLUMN_PAIR_ID + "=?",
new String[] { Integer.toString(pairId) });
} else {
// new pair
Long pairIdLong = db.insert(MAIN_TABLE_NAME, null,
getContentValues(bi.word1, bi.word2, mLocale));
pairId = pairIdLong.intValue();
}
c.close();
// insert new frequency
long s = db.insert(FREQ_TABLE_NAME, null,
getFrequencyContentValues(pairId, bi.frequency));
}
checkPruneData(db);
sUpdatingDB = false;
return null;
}
private ContentValues getContentValues(String word1, String word2, String locale) {
ContentValues values = new ContentValues(3);
values.put(MAIN_COLUMN_WORD1, word1);
values.put(MAIN_COLUMN_WORD2, word2);
values.put(MAIN_COLUMN_LOCALE, locale);
return values;
}
private ContentValues getFrequencyContentValues(int pairId, int frequency) {
ContentValues values = new ContentValues(2);
values.put(FREQ_COLUMN_PAIR_ID, pairId);
values.put(FREQ_COLUMN_FREQUENCY, frequency);
return values;
}
}
}

View file

@ -14,13 +14,13 @@
* the License.
*/
package com.android.inputmethod.latin.tests;
package com.android.inputmethod.latin;
import android.content.Context;
import android.test.AndroidTestCase;
import android.text.TextUtils;
import android.util.Log;
import com.android.inputmethod.latin.Suggest;
import com.android.inputmethod.latin.UserBigramDictionary;
import com.android.inputmethod.latin.WordComposer;
import java.io.IOException;
@ -29,28 +29,32 @@ import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.channels.Channels;
import java.util.List;
import java.util.Locale;
import java.util.StringTokenizer;
public class SuggestHelper {
private Suggest mSuggest;
private UserBigramDictionary mUserBigram;
private final String TAG;
/** Uses main dictionary only **/
public SuggestHelper(String tag, Context context, int[] resId) {
TAG = tag;
InputStream[] res = null;
InputStream[] is = null;
try {
// merging separated dictionary into one if dictionary is separated
int total = 0;
res = new InputStream[resId.length];
is = new InputStream[resId.length];
for (int i = 0; i < resId.length; i++) {
res[i] = context.getResources().openRawResource(resId[i]);
total += res[i].available();
is[i] = context.getResources().openRawResource(resId[i]);
total += is[i].available();
}
ByteBuffer byteBuffer =
ByteBuffer.allocateDirect(total).order(ByteOrder.nativeOrder());
int got = 0;
for (int i = 0; i < resId.length; i++) {
got += Channels.newChannel(res[i]).read(byteBuffer);
got += Channels.newChannel(is[i]).read(byteBuffer);
}
if (got != total) {
Log.w(TAG, "Read " + got + " bytes, expected " + total);
@ -62,8 +66,10 @@ public class SuggestHelper {
Log.w(TAG, "No available memory for binary dictionary");
} finally {
try {
for (int i = 0;i < res.length; i++) {
res[i].close();
if (is != null) {
for (int i = 0; i < is.length; i++) {
is[i].close();
}
}
} catch (IOException e) {
Log.w(TAG, "Failed to close input stream");
@ -73,6 +79,27 @@ public class SuggestHelper {
mSuggest.setCorrectionMode(Suggest.CORRECTION_FULL_BIGRAM);
}
/** Uses both main dictionary and user-bigram dictionary **/
public SuggestHelper(String tag, Context context, int[] resId, int userBigramMax,
int userBigramDelete) {
this(tag, context, resId);
mUserBigram = new UserBigramDictionary(context, null, Locale.US.toString(),
Suggest.DIC_USER);
mUserBigram.setDatabaseMax(userBigramMax);
mUserBigram.setDatabaseDelete(userBigramDelete);
mSuggest.setUserBigramDictionary(mUserBigram);
}
void changeUserBigramLocale(Context context, Locale locale) {
if (mUserBigram != null) {
flushUserBigrams();
mUserBigram.close();
mUserBigram = new UserBigramDictionary(context, null, locale.toString(),
Suggest.DIC_USER);
mSuggest.setUserBigramDictionary(mUserBigram);
}
}
private WordComposer createWordComposer(CharSequence s) {
WordComposer word = new WordComposer();
for (int i = 0; i < s.length(); i++) {
@ -125,8 +152,8 @@ public class SuggestHelper {
}
private void getBigramSuggestions(CharSequence previous, CharSequence typed) {
if(!TextUtils.isEmpty(previous) && (typed.length() > 1)) {
WordComposer firstChar = createWordComposer(typed.charAt(0) + "");
if (!TextUtils.isEmpty(previous) && (typed.length() > 1)) {
WordComposer firstChar = createWordComposer(Character.toString(typed.charAt(0)));
mSuggest.getSuggestions(null, firstChar, false, previous);
}
}
@ -162,6 +189,54 @@ public class SuggestHelper {
return mSuggest.isValidWord(typed);
}
boolean isUserBigramSuggestion(CharSequence previous, char typed,
CharSequence expected) {
WordComposer word = createWordComposer(Character.toString(typed));
if (mUserBigram == null) return false;
flushUserBigrams();
if (!TextUtils.isEmpty(previous) && !TextUtils.isEmpty(Character.toString(typed))) {
WordComposer firstChar = createWordComposer(Character.toString(typed));
mSuggest.getSuggestions(null, firstChar, false, previous);
boolean reloading = mUserBigram.reloadDictionaryIfRequired();
if (reloading) mUserBigram.waitForDictionaryLoading();
mUserBigram.getBigrams(firstChar, previous, mSuggest, null);
}
List<CharSequence> suggestions = mSuggest.mBigramSuggestions;
for (int i = 0; i < suggestions.size(); i++) {
if (TextUtils.equals(suggestions.get(i), expected)) return true;
}
return false;
}
void addToUserBigram(String sentence) {
StringTokenizer st = new StringTokenizer(sentence);
String previous = null;
while (st.hasMoreTokens()) {
String current = st.nextToken();
if (previous != null) {
addToUserBigram(new String[] {previous, current});
}
previous = current;
}
}
void addToUserBigram(String[] pair) {
if (mUserBigram != null && pair.length == 2) {
mUserBigram.addBigrams(pair[0], pair[1]);
}
}
void flushUserBigrams() {
if (mUserBigram != null) {
mUserBigram.flushPendingWrites();
mUserBigram.waitUntilUpdateDBDone();
}
}
final int[][] adjacents = {
{'a','s','w','q',-1},
{'b','h','v','n','g','j',-1},

View file

@ -14,16 +14,15 @@
* the License.
*/
package com.android.inputmethod.latin.tests;
package com.android.inputmethod.latin;
import android.test.AndroidTestCase;
import android.util.Log;
import com.android.inputmethod.latin.tests.R;
import java.io.InputStreamReader;
import java.io.InputStream;
import java.io.BufferedReader;
import java.util.StringTokenizer;
import java.util.regex.Pattern;
public class SuggestPerformanceTests extends AndroidTestCase {
private static final String TAG = "SuggestPerformanceTests";
@ -122,6 +121,6 @@ public class SuggestPerformanceTests extends AndroidTestCase {
* Check the log for detail
*/
public void testSuggestPerformance() {
assertTrue(runText(false) < runText(true));
assertTrue(runText(false) <= runText(true));
}
}

View file

@ -14,10 +14,10 @@
* the License.
*/
package com.android.inputmethod.latin.tests;
package com.android.inputmethod.latin;
import android.test.AndroidTestCase;
import android.util.Log;
import com.android.inputmethod.latin.tests.R;
public class SuggestTests extends AndroidTestCase {
private static final String TAG = "SuggestTests";

View file

@ -0,0 +1,100 @@
/*
* Copyright (C) 2010 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package com.android.inputmethod.latin;
import android.test.AndroidTestCase;
import com.android.inputmethod.latin.tests.R;
import java.util.Locale;
public class UserBigramTests extends AndroidTestCase {
private static final String TAG = "UserBigramTests";
private static final int SUGGESTION_STARTS = 6;
private static final int MAX_DATA = 20;
private static final int DELETE_DATA = 10;
private SuggestHelper sh;
@Override
protected void setUp() {
int[] resId = new int[] { R.raw.test };
sh = new SuggestHelper(TAG, getTestContext(), resId, MAX_DATA, DELETE_DATA);
}
/************************** Tests ************************/
/**
* Test suggestion started at right time
*/
public void testUserBigram() {
for (int i = 0; i < SUGGESTION_STARTS; i++) sh.addToUserBigram(pair1);
for (int i = 0; i < (SUGGESTION_STARTS - 1); i++) sh.addToUserBigram(pair2);
assertTrue(sh.isUserBigramSuggestion("user", 'b', "bigram"));
assertFalse(sh.isUserBigramSuggestion("android", 'p', "platform"));
}
/**
* Test loading correct (locale) bigrams
*/
public void testOpenAndClose() {
for (int i = 0; i < SUGGESTION_STARTS; i++) sh.addToUserBigram(pair1);
assertTrue(sh.isUserBigramSuggestion("user", 'b', "bigram"));
// change to fr_FR
sh.changeUserBigramLocale(getTestContext(), Locale.FRANCE);
for (int i = 0; i < SUGGESTION_STARTS; i++) sh.addToUserBigram(pair3);
assertTrue(sh.isUserBigramSuggestion("locale", 'f', "france"));
assertFalse(sh.isUserBigramSuggestion("user", 'b', "bigram"));
// change back to en_US
sh.changeUserBigramLocale(getTestContext(), Locale.US);
assertFalse(sh.isUserBigramSuggestion("locale", 'f', "france"));
assertTrue(sh.isUserBigramSuggestion("user", 'b', "bigram"));
}
/**
* Test data gets pruned when it is over maximum
*/
public void testPruningData() {
for (int i = 0; i < SUGGESTION_STARTS; i++) sh.addToUserBigram(sentence0);
sh.flushUserBigrams();
assertTrue(sh.isUserBigramSuggestion("Hello", 'w', "world"));
sh.addToUserBigram(sentence1);
sh.addToUserBigram(sentence2);
assertTrue(sh.isUserBigramSuggestion("Hello", 'w', "world"));
// pruning should happen
sh.addToUserBigram(sentence3);
sh.addToUserBigram(sentence4);
// trying to reopen database to check pruning happened in database
sh.changeUserBigramLocale(getTestContext(), Locale.US);
assertFalse(sh.isUserBigramSuggestion("Hello", 'w', "world"));
}
final String[] pair1 = new String[] {"user", "bigram"};
final String[] pair2 = new String[] {"android","platform"};
final String[] pair3 = new String[] {"locale", "france"};
final String sentence0 = "Hello world";
final String sentence1 = "This is a test for user input based bigram";
final String sentence2 = "It learns phrases that contain both dictionary and nondictionary "
+ "words";
final String sentence3 = "This should give better suggestions than the previous version";
final String sentence4 = "Android stock keyboard is improving";
}