From 4d289d39aeae21064f63d958974816ceee3e9fde Mon Sep 17 00:00:00 2001 From: Tom Ouyang Date: Thu, 26 Apr 2012 23:50:21 -0700 Subject: [PATCH] Contacts dictionary rebuilds only when contact names have changed. Bug: 6396600 Change-Id: Iad693ec4bab6351793d624e5c5b0a9f5c12a60e3 --- .../inputmethod/latin/BinaryDictionary.java | 11 ++ .../latin/ContactsBinaryDictionary.java | 137 ++++++++++++++++-- .../latin/ExpandableBinaryDictionary.java | 79 +++++++--- ...oid_inputmethod_latin_BinaryDictionary.cpp | 15 ++ native/jni/src/bigram_dictionary.cpp | 22 ++- native/jni/src/bigram_dictionary.h | 1 + native/jni/src/dictionary.cpp | 5 + native/jni/src/dictionary.h | 1 + 8 files changed, 241 insertions(+), 30 deletions(-) diff --git a/java/src/com/android/inputmethod/latin/BinaryDictionary.java b/java/src/com/android/inputmethod/latin/BinaryDictionary.java index a644ec0d9..cc20f4294 100644 --- a/java/src/com/android/inputmethod/latin/BinaryDictionary.java +++ b/java/src/com/android/inputmethod/latin/BinaryDictionary.java @@ -17,6 +17,7 @@ package com.android.inputmethod.latin; import android.content.Context; +import android.text.TextUtils; import com.android.inputmethod.keyboard.ProximityInfo; @@ -84,6 +85,7 @@ public class BinaryDictionary extends Dictionary { int typedLetterMultiplier, int fullWordMultiplier, int maxWordLength, int maxWords); private native void closeNative(long dict); private native boolean isValidWordNative(long dict, int[] word, int wordLength); + private native boolean isValidBigramNative(long dict, int[] word1, int[] word2); private native int getSuggestionsNative(long dict, long proximityInfo, int[] xCoordinates, int[] yCoordinates, int[] inputCodes, int codesSize, int[] prevWordForBigrams, boolean useFullEditDistance, char[] outputChars, int[] scores); @@ -204,6 +206,15 @@ public class BinaryDictionary extends Dictionary { return isValidWordNative(mNativeDict, chars, chars.length); } + // TODO: Add a batch process version (isValidBigramMultiple?) to avoid excessive numbers of jni + // calls when checking for changes in an entire dictionary. + public boolean isValidBigram(CharSequence word1, CharSequence word2) { + if (TextUtils.isEmpty(word1) || TextUtils.isEmpty(word2)) return false; + int[] chars1 = StringUtils.toCodePointArray(word1.toString()); + int[] chars2 = StringUtils.toCodePointArray(word2.toString()); + return isValidBigramNative(mNativeDict, chars1, chars2); + } + @Override public synchronized void close() { closeInternal(); diff --git a/java/src/com/android/inputmethod/latin/ContactsBinaryDictionary.java b/java/src/com/android/inputmethod/latin/ContactsBinaryDictionary.java index 65f97e987..22787c218 100644 --- a/java/src/com/android/inputmethod/latin/ContactsBinaryDictionary.java +++ b/java/src/com/android/inputmethod/latin/ContactsBinaryDictionary.java @@ -18,6 +18,7 @@ import android.content.ContentResolver; import android.content.Context; import android.database.ContentObserver; import android.database.Cursor; +import android.os.SystemClock; import android.provider.BaseColumns; import android.provider.ContactsContract.Contacts; import android.text.TextUtils; @@ -30,18 +31,27 @@ import java.util.Locale; public class ContactsBinaryDictionary extends ExpandableBinaryDictionary { private static final String[] PROJECTION = {BaseColumns._ID, Contacts.DISPLAY_NAME,}; + private static final String[] PROJECTION_ID_ONLY = {BaseColumns._ID}; private static final String TAG = ContactsBinaryDictionary.class.getSimpleName(); private static final String NAME = "contacts"; + private static boolean DEBUG = false; + /** * Frequency for contacts information into the dictionary */ private static final int FREQUENCY_FOR_CONTACTS = 40; private static final int FREQUENCY_FOR_CONTACTS_BIGRAM = 90; + /** The maximum number of contacts that this dictionary supports. */ + private static final int MAX_CONTACT_COUNT = 10000; + private static final int INDEX_NAME = 1; + /** The number of contacts in the most recent dictionary rebuild. */ + static private int sContactCountAtLastRebuild = 0; + private ContentObserver mObserver; /** @@ -98,6 +108,7 @@ public class ContactsBinaryDictionary extends ExpandableBinaryDictionary { if (cursor != null) { try { if (cursor.moveToFirst()) { + sContactCountAtLastRebuild = getContactCount(); addWords(cursor); } } finally { @@ -125,15 +136,28 @@ public class ContactsBinaryDictionary extends ExpandableBinaryDictionary { private void addWords(Cursor cursor) { clearFusionDictionary(); - while (!cursor.isAfterLast()) { + int count = 0; + while (!cursor.isAfterLast() && count < MAX_CONTACT_COUNT) { String name = cursor.getString(INDEX_NAME); - if (name != null && -1 == name.indexOf('@')) { + if (isValidName(name)) { addName(name); + ++count; } cursor.moveToNext(); } } + private int getContactCount() { + // TODO: consider switching to a rawQuery("select count(*)...") on the database if + // performance is a bottleneck. + final Cursor cursor = mContext.getContentResolver().query( + Contacts.CONTENT_URI, PROJECTION_ID_ONLY, null, null, null); + if (cursor != null) { + return cursor.getCount(); + } + return 0; + } + /** * Adds the words in a name (e.g., firstname/lastname) to the binary dictionary along with their * bigrams depending on locale. @@ -144,16 +168,9 @@ public class ContactsBinaryDictionary extends ExpandableBinaryDictionary { // TODO: Better tokenization for non-Latin writing systems for (int i = 0; i < len; i++) { if (Character.isLetter(name.codePointAt(i))) { - int j; - for (j = i + 1; j < len; j++) { - final int codePoint = name.codePointAt(j); - if (!(codePoint == Keyboard.CODE_DASH || codePoint == Keyboard.CODE_SINGLE_QUOTE - || Character.isLetter(codePoint))) { - break; - } - } - String word = name.substring(i, j); - i = j - 1; + int end = getWordEndPosition(name, len, i); + String word = name.substring(i, end); + i = end - 1; // Don't add single letter words, possibly confuses // capitalization of i. final int wordLen = word.codePointCount(0, word.length()); @@ -169,4 +186,100 @@ public class ContactsBinaryDictionary extends ExpandableBinaryDictionary { } } } + + /** + * Returns the index of the last letter in the word, starting from position startIndex. + */ + private static int getWordEndPosition(String string, int len, int startIndex) { + int end; + int cp = 0; + for (end = startIndex + 1; end < len; end += Character.charCount(cp)) { + cp = string.codePointAt(end); + if (!(cp == Keyboard.CODE_DASH || cp == Keyboard.CODE_SINGLE_QUOTE + || Character.isLetter(cp))) { + break; + } + } + return end; + } + + @Override + protected boolean hasContentChanged() { + final long startTime = SystemClock.uptimeMillis(); + final int contactCount = getContactCount(); + if (contactCount > MAX_CONTACT_COUNT) { + // If there are too many contacts then return false. In this rare case it is impossible + // to include all of them anyways and the cost of rebuilding the dictionary is too high. + // TODO: Sort and check only the MAX_CONTACT_COUNT most recent contacts? + return false; + } + if (contactCount != sContactCountAtLastRebuild) { + return true; + } + // Check all contacts since it's not possible to find out which names have changed. + // This is needed because it's possible to receive extraneous onChange events even when no + // name has changed. + Cursor cursor = mContext.getContentResolver().query( + Contacts.CONTENT_URI, PROJECTION, null, null, null); + if (cursor != null) { + try { + if (cursor.moveToFirst()) { + while (!cursor.isAfterLast()) { + String name = cursor.getString(INDEX_NAME); + if (isValidName(name) && !isNameInDictionary(name)) { + if (DEBUG) { + Log.d(TAG, "Contact name missing: " + name + " (runtime = " + + (SystemClock.uptimeMillis() - startTime) + " ms)"); + } + return true; + } + cursor.moveToNext(); + } + } + } finally { + cursor.close(); + } + } + if (DEBUG) { + Log.d(TAG, "No contacts changed. (runtime = " + (SystemClock.uptimeMillis() - startTime) + + " ms)"); + } + return false; + } + + private static boolean isValidName(String name) { + if (name != null && -1 == name.indexOf('@')) { + return true; + } + return false; + } + + /** + * Checks if the words in a name are in the current binary dictionary. + */ + private boolean isNameInDictionary(String name) { + int len = name.codePointCount(0, name.length()); + String prevWord = null; + for (int i = 0; i < len; i++) { + if (Character.isLetter(name.codePointAt(i))) { + int end = getWordEndPosition(name, len, i); + String word = name.substring(i, end); + i = end - 1; + final int wordLen = word.codePointCount(0, word.length()); + if (wordLen < MAX_WORD_LENGTH && wordLen > 1) { + if (!TextUtils.isEmpty(prevWord) && mUseFirstLastBigrams) { + if (!super.isValidBigramLocked(prevWord, word)) { + return false; + } + } else { + if (!super.isValidWordLocked(word)) { + return false; + } + } + prevWord = word; + } + } + } + return true; + } } diff --git a/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java b/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java index 3d89226c0..22d8f24f1 100644 --- a/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java +++ b/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java @@ -95,6 +95,13 @@ abstract public class ExpandableBinaryDictionary extends Dictionary { */ protected abstract void loadDictionaryAsync(); + /** + * Indicates that the source dictionary content has changed and a rebuild of the binary file is + * required. If it returns false, the next reload will only read the current binary dictionary + * from file. Note that the shared binary dictionary is locked when this is called. + */ + protected abstract boolean hasContentChanged(); + /** * Gets the shared dictionary controller for the given filename. */ @@ -148,8 +155,9 @@ abstract public class ExpandableBinaryDictionary extends Dictionary { * the native side. */ public void clearFusionDictionary() { - mFusionDictionary = new FusionDictionary(new Node(), new FusionDictionary.DictionaryOptions( - new HashMap(), false, false)); + mFusionDictionary = new FusionDictionary(new Node(), + new FusionDictionary.DictionaryOptions(new HashMap(), false, + false)); } /** @@ -224,9 +232,7 @@ abstract public class ExpandableBinaryDictionary extends Dictionary { protected boolean isValidWordInner(final CharSequence word) { if (mLocalDictionaryController.tryLock()) { try { - if (mBinaryDictionary != null) { - return mBinaryDictionary.isValidWord(word); - } + return isValidWordLocked(word); } finally { mLocalDictionaryController.unlock(); } @@ -234,6 +240,32 @@ abstract public class ExpandableBinaryDictionary extends Dictionary { return false; } + protected boolean isValidWordLocked(final CharSequence word) { + if (mBinaryDictionary == null) return false; + return mBinaryDictionary.isValidWord(word); + } + + protected boolean isValidBigram(final CharSequence word1, final CharSequence word2) { + if (mBinaryDictionary == null) return false; + return mBinaryDictionary.isValidBigram(word1, word2); + } + + protected boolean isValidBigramInner(final CharSequence word1, final CharSequence word2) { + if (mLocalDictionaryController.tryLock()) { + try { + return isValidBigramLocked(word1, word2); + } finally { + mLocalDictionaryController.unlock(); + } + } + return false; + } + + protected boolean isValidBigramLocked(final CharSequence word1, final CharSequence word2) { + if (mBinaryDictionary == null) return false; + return mBinaryDictionary.isValidBigram(word1, word2); + } + /** * Load the current binary dictionary from internal storage in a background thread. If no binary * dictionary exists, this method will generate one. @@ -315,12 +347,16 @@ abstract public class ExpandableBinaryDictionary extends Dictionary { } /** - * Sets whether or not the dictionary is out of date and requires a reload. + * Marks that the dictionary is out of date and requires a reload. + * + * @param requiresRebuild Indicates that the source dictionary content has changed and a rebuild + * of the binary file is required. If not true, the next reload process will only read + * the current binary dictionary from file. */ - protected void setRequiresReload(final boolean reload) { - final long time = reload ? SystemClock.uptimeMillis() : 0; - mSharedDictionaryController.mLastUpdateRequestTime = time; + protected void setRequiresReload(final boolean requiresRebuild) { + final long time = SystemClock.uptimeMillis(); mLocalDictionaryController.mLastUpdateRequestTime = time; + mSharedDictionaryController.mLastUpdateRequestTime = time; if (DEBUG) { Log.d(TAG, "Reload request: request=" + time + " update=" + mSharedDictionaryController.mLastUpdateTime); @@ -351,21 +387,30 @@ abstract public class ExpandableBinaryDictionary extends Dictionary { if (mSharedDictionaryController.isOutOfDate() || !dictionaryFileExists()) { // If the shared dictionary file does not exist or is out of date, the first // instance that acquires the lock will generate a new one. - mSharedDictionaryController.mLastUpdateTime = time; - mLocalDictionaryController.mLastUpdateTime = time; - generateBinaryDictionary(); - loadBinaryDictionary(); - } else if (mLocalDictionaryController.isOutOfDate()) { - // Otherwise, if only the local dictionary for this instance is out of date, load - // the shared dictionary from file. - mLocalDictionaryController.mLastUpdateTime = time; + if (hasContentChanged()) { + // If the source content has changed, rebuild the binary dictionary. + mSharedDictionaryController.mLastUpdateTime = time; + generateBinaryDictionary(); + loadBinaryDictionary(); + } else { + // If not, the reload request was unnecessary so revert LastUpdateRequestTime + // to LastUpdateTime. + mSharedDictionaryController.mLastUpdateRequestTime = + mSharedDictionaryController.mLastUpdateTime; + } + } else if (mBinaryDictionary == null || mLocalDictionaryController.mLastUpdateTime + < mSharedDictionaryController.mLastUpdateTime) { + // Otherwise, if the local dictionary is older than the shared dictionary, load the + // shared dictionary. loadBinaryDictionary(); } + mLocalDictionaryController.mLastUpdateTime = time; } finally { mSharedDictionaryController.unlock(); } } + // TODO: cache the file's existence so that we avoid doing a disk access each time. private boolean dictionaryFileExists() { final File file = new File(mContext.getFilesDir(), mFilename); return file.exists(); diff --git a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp index de9dbf9fd..b8f4ec77a 100644 --- a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp +++ b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp @@ -182,6 +182,20 @@ static jboolean latinime_BinaryDictionary_isValidWord(JNIEnv *env, jobject objec return result; } +static jboolean latinime_BinaryDictionary_isValidBigram(JNIEnv *env, jobject object, jlong dict, + jintArray wordArray1, jintArray wordArray2) { + Dictionary *dictionary = (Dictionary*)dict; + if (!dictionary) return (jboolean) false; + jint *word1 = env->GetIntArrayElements(wordArray1, 0); + jint *word2 = env->GetIntArrayElements(wordArray2, 0); + jsize length1 = word1 ? env->GetArrayLength(wordArray1) : 0; + jsize length2 = word2 ? env->GetArrayLength(wordArray2) : 0; + jboolean result = dictionary->isValidBigram(word1, length1, word2, length2); + env->ReleaseIntArrayElements(wordArray2, word2, JNI_ABORT); + env->ReleaseIntArrayElements(wordArray1, word1, JNI_ABORT); + return result; +} + static jdouble latinime_BinaryDictionary_calcNormalizedScore(JNIEnv *env, jobject object, jcharArray before, jint beforeLength, jcharArray after, jint afterLength, jint score) { jchar *beforeChars = env->GetCharArrayElements(before, 0); @@ -239,6 +253,7 @@ static JNINativeMethod sMethods[] = { {"getSuggestionsNative", "(JJ[I[I[II[IZ[C[I)I", (void*)latinime_BinaryDictionary_getSuggestions}, {"isValidWordNative", "(J[II)Z", (void*)latinime_BinaryDictionary_isValidWord}, + {"isValidBigramNative", "(J[I[I)Z", (void*)latinime_BinaryDictionary_isValidBigram}, {"getBigramsNative", "(J[II[II[C[III)I", (void*)latinime_BinaryDictionary_getBigrams}, {"calcNormalizedScoreNative", "([CI[CII)D", (void*)latinime_BinaryDictionary_calcNormalizedScore}, diff --git a/native/jni/src/bigram_dictionary.cpp b/native/jni/src/bigram_dictionary.cpp index 07031086c..7ed4dc439 100644 --- a/native/jni/src/bigram_dictionary.cpp +++ b/native/jni/src/bigram_dictionary.cpp @@ -128,7 +128,7 @@ int BigramDictionary::getBigrams(const int32_t *prevWord, int prevWordLength, in ++bigramCount; } } - } while (0 != (UnigramDictionary::FLAG_ATTRIBUTE_HAS_NEXT & bigramFlags)); + } while (UnigramDictionary::FLAG_ATTRIBUTE_HAS_NEXT & bigramFlags); return bigramCount; } @@ -189,5 +189,25 @@ bool BigramDictionary::checkFirstCharacter(unsigned short *word) { return false; } +bool BigramDictionary::isValidBigram(const int32_t *word1, int length1, const int32_t *word2, + int length2) { + const uint8_t* const root = DICT; + int pos = getBigramListPositionForWord(word1, length1); + // getBigramListPositionForWord returns 0 if this word isn't in the dictionary or has no bigrams + if (0 == pos) return false; + int nextWordPos = BinaryFormat::getTerminalPosition(root, word2, length2); + if (NOT_VALID_WORD == nextWordPos) return false; + int bigramFlags; + do { + bigramFlags = BinaryFormat::getFlagsAndForwardPointer(root, &pos); + const int bigramPos = BinaryFormat::getAttributeAddressAndForwardPointer(root, bigramFlags, + &pos); + if (bigramPos == nextWordPos) { + return true; + } + } while (UnigramDictionary::FLAG_ATTRIBUTE_HAS_NEXT & bigramFlags); + return false; +} + // TODO: Move functions related to bigram to here } // namespace latinime diff --git a/native/jni/src/bigram_dictionary.h b/native/jni/src/bigram_dictionary.h index 7328d5828..b8763a515 100644 --- a/native/jni/src/bigram_dictionary.h +++ b/native/jni/src/bigram_dictionary.h @@ -33,6 +33,7 @@ class BigramDictionary { int getBigramListPositionForWord(const int32_t *prevWord, const int prevWordLength); void fillBigramAddressToFrequencyMapAndFilter(const int32_t *prevWord, const int prevWordLength, std::map *map, uint8_t *filter); + bool isValidBigram(const int32_t *word1, int length1, const int32_t *word2, int length2); ~BigramDictionary(); private: bool addWordBigram(unsigned short *word, int length, int frequency); diff --git a/native/jni/src/dictionary.cpp b/native/jni/src/dictionary.cpp index 9dc207223..8ea7c49fa 100644 --- a/native/jni/src/dictionary.cpp +++ b/native/jni/src/dictionary.cpp @@ -58,4 +58,9 @@ bool Dictionary::isValidWord(const int32_t *word, int length) { return mUnigramDictionary->isValidWord(word, length); } +bool Dictionary::isValidBigram(const int32_t *word1, int length1, const int32_t *word2, + int length2) { + return mBigramDictionary->isValidBigram(word1, length1, word2, length2); +} + } // namespace latinime diff --git a/native/jni/src/dictionary.h b/native/jni/src/dictionary.h index bce86d1ad..87891ee4d 100644 --- a/native/jni/src/dictionary.h +++ b/native/jni/src/dictionary.h @@ -53,6 +53,7 @@ class Dictionary { } bool isValidWord(const int32_t *word, int length); + bool isValidBigram(const int32_t *word1, int length1, const int32_t *word2, int length2); void *getDict() { return (void *)mDict; } int getDictSize() { return mDictSize; } int getMmapFd() { return mMmapFd; }