/* * Copyright (C) 2010 Google Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of * the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations under * the License. */ package com.android.inputmethod.latin; import android.content.ContentValues; import android.content.Context; import android.database.Cursor; import android.database.sqlite.SQLiteDatabase; import android.database.sqlite.SQLiteOpenHelper; import android.database.sqlite.SQLiteQueryBuilder; import android.os.AsyncTask; import android.provider.BaseColumns; import android.util.Log; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; /** * Stores all the pairs user types in databases. Prune the database if the size * gets too big. Unlike AutoDictionary, it even stores the pairs that are already * in the dictionary. */ public class UserBigramDictionary extends ExpandableDictionary { private static final String TAG = "UserBigramDictionary"; /** Any pair being typed or picked */ private static final int FREQUENCY_FOR_TYPED = 2; /** Maximum frequency for all pairs */ private static final int FREQUENCY_MAX = 127; /** Maximum number of pairs. Pruning will start when databases goes above this number. */ private static int sMaxUserBigrams = 10000; /** * When it hits maximum bigram pair, it will delete until you are left with * only (sMaxUserBigrams - sDeleteUserBigrams) pairs. * Do not keep this number small to avoid deleting too often. */ private static int sDeleteUserBigrams = 1000; /** * Database version should increase if the database structure changes */ private static final int DATABASE_VERSION = 1; private static final String DATABASE_NAME = "userbigram_dict.db"; /** Name of the words table in the database */ private static final String MAIN_TABLE_NAME = "main"; // TODO: Consume less space by using a unique id for locale instead of the whole // 2-5 character string. (Same TODO from AutoDictionary) private static final String MAIN_COLUMN_ID = BaseColumns._ID; private static final String MAIN_COLUMN_WORD1 = "word1"; private static final String MAIN_COLUMN_WORD2 = "word2"; private static final String MAIN_COLUMN_LOCALE = "locale"; /** Name of the frequency table in the database */ private static final String FREQ_TABLE_NAME = "frequency"; private static final String FREQ_COLUMN_ID = BaseColumns._ID; private static final String FREQ_COLUMN_PAIR_ID = "pair_id"; private static final String FREQ_COLUMN_FREQUENCY = "freq"; private final LatinIME mIme; /** Locale for which this auto dictionary is storing words */ private String mLocale; private HashSet mPendingWrites = new HashSet(); private final Object mPendingWritesLock = new Object(); private static volatile boolean sUpdatingDB = false; private final static HashMap sDictProjectionMap; static { sDictProjectionMap = new HashMap(); sDictProjectionMap.put(MAIN_COLUMN_ID, MAIN_COLUMN_ID); sDictProjectionMap.put(MAIN_COLUMN_WORD1, MAIN_COLUMN_WORD1); sDictProjectionMap.put(MAIN_COLUMN_WORD2, MAIN_COLUMN_WORD2); sDictProjectionMap.put(MAIN_COLUMN_LOCALE, MAIN_COLUMN_LOCALE); sDictProjectionMap.put(FREQ_COLUMN_ID, FREQ_COLUMN_ID); sDictProjectionMap.put(FREQ_COLUMN_PAIR_ID, FREQ_COLUMN_PAIR_ID); sDictProjectionMap.put(FREQ_COLUMN_FREQUENCY, FREQ_COLUMN_FREQUENCY); } private static DatabaseHelper sOpenHelper = null; private static class Bigram { public final String mWord1; public final String mWord2; public final int frequency; Bigram(String word1, String word2, int frequency) { this.mWord1 = word1; this.mWord2 = word2; this.frequency = frequency; } @Override public boolean equals(Object bigram) { Bigram bigram2 = (Bigram) bigram; return (mWord1.equals(bigram2.mWord1) && mWord2.equals(bigram2.mWord2)); } @Override public int hashCode() { return (mWord1 + " " + mWord2).hashCode(); } } public void setDatabaseMax(int maxUserBigram) { sMaxUserBigrams = maxUserBigram; } public void setDatabaseDelete(int deleteUserBigram) { sDeleteUserBigrams = deleteUserBigram; } public UserBigramDictionary(Context context, LatinIME ime, String locale, int dicTypeId) { super(context, dicTypeId); mIme = ime; mLocale = locale; if (sOpenHelper == null) { sOpenHelper = new DatabaseHelper(getContext()); } if (mLocale != null && mLocale.length() > 1) { loadDictionary(); } } @Override public void close() { flushPendingWrites(); // Don't close the database as locale changes will require it to be reopened anyway // Also, the database is written to somewhat frequently, so it needs to be kept alive // throughout the life of the process. // mOpenHelper.close(); super.close(); } /** * Pair will be added to the userbigram database. */ public int addBigrams(String word1, String word2) { // remove caps if (mIme != null && mIme.getCurrentWord().isAutoCapitalized()) { word2 = Character.toLowerCase(word2.charAt(0)) + word2.substring(1); } // Do not insert a word as a bigram of itself if (word1.equals(word2)) { return 0; } int freq = super.addBigram(word1, word2, FREQUENCY_FOR_TYPED); if (freq > FREQUENCY_MAX) freq = FREQUENCY_MAX; synchronized (mPendingWritesLock) { if (freq == FREQUENCY_FOR_TYPED || mPendingWrites.isEmpty()) { mPendingWrites.add(new Bigram(word1, word2, freq)); } else { Bigram bi = new Bigram(word1, word2, freq); mPendingWrites.remove(bi); mPendingWrites.add(bi); } } return freq; } /** * Schedules a background thread to write any pending words to the database. */ public void flushPendingWrites() { synchronized (mPendingWritesLock) { // Nothing pending? Return if (mPendingWrites.isEmpty()) return; // Create a background thread to write the pending entries new UpdateDbTask(getContext(), sOpenHelper, mPendingWrites, mLocale).execute(); // Create a new map for writing new entries into while the old one is written to db mPendingWrites = new HashSet(); } } /** Used for testing purpose **/ void waitUntilUpdateDBDone() { synchronized (mPendingWritesLock) { while (sUpdatingDB) { try { Thread.sleep(100); } catch (InterruptedException e) { } } return; } } @Override public void loadDictionaryAsync() { // Load the words that correspond to the current input locale Cursor cursor = query(MAIN_COLUMN_LOCALE + "=?", new String[] { mLocale }); try { if (cursor.moveToFirst()) { int word1Index = cursor.getColumnIndex(MAIN_COLUMN_WORD1); int word2Index = cursor.getColumnIndex(MAIN_COLUMN_WORD2); int frequencyIndex = cursor.getColumnIndex(FREQ_COLUMN_FREQUENCY); while (!cursor.isAfterLast()) { String word1 = cursor.getString(word1Index); String word2 = cursor.getString(word2Index); int frequency = cursor.getInt(frequencyIndex); // Safeguard against adding really long words. Stack may overflow due // to recursive lookup if (word1.length() < MAX_WORD_LENGTH && word2.length() < MAX_WORD_LENGTH) { super.setBigram(word1, word2, frequency); } cursor.moveToNext(); } } } finally { cursor.close(); } } /** * Query the database */ private Cursor query(String selection, String[] selectionArgs) { SQLiteQueryBuilder qb = new SQLiteQueryBuilder(); // main INNER JOIN frequency ON (main._id=freq.pair_id) qb.setTables(MAIN_TABLE_NAME + " INNER JOIN " + FREQ_TABLE_NAME + " ON (" + MAIN_TABLE_NAME + "." + MAIN_COLUMN_ID + "=" + FREQ_TABLE_NAME + "." + FREQ_COLUMN_PAIR_ID +")"); qb.setProjectionMap(sDictProjectionMap); // Get the database and run the query SQLiteDatabase db = sOpenHelper.getReadableDatabase(); Cursor c = qb.query(db, new String[] { MAIN_COLUMN_WORD1, MAIN_COLUMN_WORD2, FREQ_COLUMN_FREQUENCY }, selection, selectionArgs, null, null, null); return c; } /** * This class helps open, create, and upgrade the database file. */ private static class DatabaseHelper extends SQLiteOpenHelper { DatabaseHelper(Context context) { super(context, DATABASE_NAME, null, DATABASE_VERSION); } @Override public void onCreate(SQLiteDatabase db) { db.execSQL("PRAGMA foreign_keys = ON;"); db.execSQL("CREATE TABLE " + MAIN_TABLE_NAME + " (" + MAIN_COLUMN_ID + " INTEGER PRIMARY KEY," + MAIN_COLUMN_WORD1 + " TEXT," + MAIN_COLUMN_WORD2 + " TEXT," + MAIN_COLUMN_LOCALE + " TEXT" + ");"); db.execSQL("CREATE TABLE " + FREQ_TABLE_NAME + " (" + FREQ_COLUMN_ID + " INTEGER PRIMARY KEY," + FREQ_COLUMN_PAIR_ID + " INTEGER," + FREQ_COLUMN_FREQUENCY + " INTEGER," + "FOREIGN KEY(" + FREQ_COLUMN_PAIR_ID + ") REFERENCES " + MAIN_TABLE_NAME + "(" + MAIN_COLUMN_ID + ")" + " ON DELETE CASCADE" + ");"); } @Override public void onUpgrade(SQLiteDatabase db, int oldVersion, int newVersion) { Log.w(TAG, "Upgrading database from version " + oldVersion + " to " + newVersion + ", which will destroy all old data"); db.execSQL("DROP TABLE IF EXISTS " + MAIN_TABLE_NAME); db.execSQL("DROP TABLE IF EXISTS " + FREQ_TABLE_NAME); onCreate(db); } } /** * Async task to write pending words to the database so that it stays in sync with * the in-memory trie. */ private static class UpdateDbTask extends AsyncTask { private final HashSet mMap; private final DatabaseHelper mDbHelper; private final String mLocale; public UpdateDbTask(Context context, DatabaseHelper openHelper, HashSet pendingWrites, String locale) { mMap = pendingWrites; mLocale = locale; mDbHelper = openHelper; } /** Prune any old data if the database is getting too big. */ private void checkPruneData(SQLiteDatabase db) { db.execSQL("PRAGMA foreign_keys = ON;"); Cursor c = db.query(FREQ_TABLE_NAME, new String[] { FREQ_COLUMN_PAIR_ID }, null, null, null, null, null); try { int totalRowCount = c.getCount(); // prune out old data if we have too much data if (totalRowCount > sMaxUserBigrams) { int numDeleteRows = (totalRowCount - sMaxUserBigrams) + sDeleteUserBigrams; int pairIdColumnId = c.getColumnIndex(FREQ_COLUMN_PAIR_ID); c.moveToFirst(); int count = 0; while (count < numDeleteRows && !c.isAfterLast()) { String pairId = c.getString(pairIdColumnId); // Deleting from MAIN table will delete the frequencies // due to FOREIGN KEY .. ON DELETE CASCADE db.delete(MAIN_TABLE_NAME, MAIN_COLUMN_ID + "=?", new String[] { pairId }); c.moveToNext(); count++; } } } finally { c.close(); } } @Override protected void onPreExecute() { sUpdatingDB = true; } @Override protected Void doInBackground(Void... v) { SQLiteDatabase db = mDbHelper.getWritableDatabase(); db.execSQL("PRAGMA foreign_keys = ON;"); // Write all the entries to the db Iterator iterator = mMap.iterator(); while (iterator.hasNext()) { Bigram bi = iterator.next(); // find pair id Cursor c = db.query(MAIN_TABLE_NAME, new String[] { MAIN_COLUMN_ID }, MAIN_COLUMN_WORD1 + "=? AND " + MAIN_COLUMN_WORD2 + "=? AND " + MAIN_COLUMN_LOCALE + "=?", new String[] { bi.mWord1, bi.mWord2, mLocale }, null, null, null); int pairId; if (c.moveToFirst()) { // existing pair pairId = c.getInt(c.getColumnIndex(MAIN_COLUMN_ID)); db.delete(FREQ_TABLE_NAME, FREQ_COLUMN_PAIR_ID + "=?", new String[] { Integer.toString(pairId) }); } else { // new pair Long pairIdLong = db.insert(MAIN_TABLE_NAME, null, getContentValues(bi.mWord1, bi.mWord2, mLocale)); pairId = pairIdLong.intValue(); } c.close(); // insert new frequency db.insert(FREQ_TABLE_NAME, null, getFrequencyContentValues(pairId, bi.frequency)); } checkPruneData(db); sUpdatingDB = false; return null; } private ContentValues getContentValues(String word1, String word2, String locale) { ContentValues values = new ContentValues(3); values.put(MAIN_COLUMN_WORD1, word1); values.put(MAIN_COLUMN_WORD2, word2); values.put(MAIN_COLUMN_LOCALE, locale); return values; } private ContentValues getFrequencyContentValues(int pairId, int frequency) { ContentValues values = new ContentValues(2); values.put(FREQ_COLUMN_PAIR_ID, pairId); values.put(FREQ_COLUMN_FREQUENCY, frequency); return values; } } }