Merge "Contacts dictionary rebuilds only when contact names have changed." into jb-dev

This commit is contained in:
Tom Ouyang 2012-05-14 14:34:22 -07:00 committed by Android (Google) Code Review
commit 5282f00ff8
8 changed files with 241 additions and 30 deletions

View file

@ -17,6 +17,7 @@
package com.android.inputmethod.latin; package com.android.inputmethod.latin;
import android.content.Context; import android.content.Context;
import android.text.TextUtils;
import com.android.inputmethod.keyboard.ProximityInfo; import com.android.inputmethod.keyboard.ProximityInfo;
@ -84,6 +85,7 @@ public class BinaryDictionary extends Dictionary {
int typedLetterMultiplier, int fullWordMultiplier, int maxWordLength, int maxWords); int typedLetterMultiplier, int fullWordMultiplier, int maxWordLength, int maxWords);
private native void closeNative(long dict); private native void closeNative(long dict);
private native boolean isValidWordNative(long dict, int[] word, int wordLength); private native boolean isValidWordNative(long dict, int[] word, int wordLength);
private native boolean isValidBigramNative(long dict, int[] word1, int[] word2);
private native int getSuggestionsNative(long dict, long proximityInfo, int[] xCoordinates, private native int getSuggestionsNative(long dict, long proximityInfo, int[] xCoordinates,
int[] yCoordinates, int[] inputCodes, int codesSize, int[] prevWordForBigrams, int[] yCoordinates, int[] inputCodes, int codesSize, int[] prevWordForBigrams,
boolean useFullEditDistance, char[] outputChars, int[] scores); boolean useFullEditDistance, char[] outputChars, int[] scores);
@ -204,6 +206,15 @@ public class BinaryDictionary extends Dictionary {
return isValidWordNative(mNativeDict, chars, chars.length); return isValidWordNative(mNativeDict, chars, chars.length);
} }
// TODO: Add a batch process version (isValidBigramMultiple?) to avoid excessive numbers of jni
// calls when checking for changes in an entire dictionary.
public boolean isValidBigram(CharSequence word1, CharSequence word2) {
if (TextUtils.isEmpty(word1) || TextUtils.isEmpty(word2)) return false;
int[] chars1 = StringUtils.toCodePointArray(word1.toString());
int[] chars2 = StringUtils.toCodePointArray(word2.toString());
return isValidBigramNative(mNativeDict, chars1, chars2);
}
@Override @Override
public synchronized void close() { public synchronized void close() {
closeInternal(); closeInternal();

View file

@ -18,6 +18,7 @@ import android.content.ContentResolver;
import android.content.Context; import android.content.Context;
import android.database.ContentObserver; import android.database.ContentObserver;
import android.database.Cursor; import android.database.Cursor;
import android.os.SystemClock;
import android.provider.BaseColumns; import android.provider.BaseColumns;
import android.provider.ContactsContract.Contacts; import android.provider.ContactsContract.Contacts;
import android.text.TextUtils; import android.text.TextUtils;
@ -30,18 +31,27 @@ import java.util.Locale;
public class ContactsBinaryDictionary extends ExpandableBinaryDictionary { public class ContactsBinaryDictionary extends ExpandableBinaryDictionary {
private static final String[] PROJECTION = {BaseColumns._ID, Contacts.DISPLAY_NAME,}; private static final String[] PROJECTION = {BaseColumns._ID, Contacts.DISPLAY_NAME,};
private static final String[] PROJECTION_ID_ONLY = {BaseColumns._ID};
private static final String TAG = ContactsBinaryDictionary.class.getSimpleName(); private static final String TAG = ContactsBinaryDictionary.class.getSimpleName();
private static final String NAME = "contacts"; private static final String NAME = "contacts";
private static boolean DEBUG = false;
/** /**
* Frequency for contacts information into the dictionary * Frequency for contacts information into the dictionary
*/ */
private static final int FREQUENCY_FOR_CONTACTS = 40; private static final int FREQUENCY_FOR_CONTACTS = 40;
private static final int FREQUENCY_FOR_CONTACTS_BIGRAM = 90; private static final int FREQUENCY_FOR_CONTACTS_BIGRAM = 90;
/** The maximum number of contacts that this dictionary supports. */
private static final int MAX_CONTACT_COUNT = 10000;
private static final int INDEX_NAME = 1; private static final int INDEX_NAME = 1;
/** The number of contacts in the most recent dictionary rebuild. */
static private int sContactCountAtLastRebuild = 0;
private ContentObserver mObserver; private ContentObserver mObserver;
/** /**
@ -98,6 +108,7 @@ public class ContactsBinaryDictionary extends ExpandableBinaryDictionary {
if (cursor != null) { if (cursor != null) {
try { try {
if (cursor.moveToFirst()) { if (cursor.moveToFirst()) {
sContactCountAtLastRebuild = getContactCount();
addWords(cursor); addWords(cursor);
} }
} finally { } finally {
@ -125,15 +136,28 @@ public class ContactsBinaryDictionary extends ExpandableBinaryDictionary {
private void addWords(Cursor cursor) { private void addWords(Cursor cursor) {
clearFusionDictionary(); clearFusionDictionary();
while (!cursor.isAfterLast()) { int count = 0;
while (!cursor.isAfterLast() && count < MAX_CONTACT_COUNT) {
String name = cursor.getString(INDEX_NAME); String name = cursor.getString(INDEX_NAME);
if (name != null && -1 == name.indexOf('@')) { if (isValidName(name)) {
addName(name); addName(name);
++count;
} }
cursor.moveToNext(); cursor.moveToNext();
} }
} }
private int getContactCount() {
// TODO: consider switching to a rawQuery("select count(*)...") on the database if
// performance is a bottleneck.
final Cursor cursor = mContext.getContentResolver().query(
Contacts.CONTENT_URI, PROJECTION_ID_ONLY, null, null, null);
if (cursor != null) {
return cursor.getCount();
}
return 0;
}
/** /**
* Adds the words in a name (e.g., firstname/lastname) to the binary dictionary along with their * Adds the words in a name (e.g., firstname/lastname) to the binary dictionary along with their
* bigrams depending on locale. * bigrams depending on locale.
@ -144,16 +168,9 @@ public class ContactsBinaryDictionary extends ExpandableBinaryDictionary {
// TODO: Better tokenization for non-Latin writing systems // TODO: Better tokenization for non-Latin writing systems
for (int i = 0; i < len; i++) { for (int i = 0; i < len; i++) {
if (Character.isLetter(name.codePointAt(i))) { if (Character.isLetter(name.codePointAt(i))) {
int j; int end = getWordEndPosition(name, len, i);
for (j = i + 1; j < len; j++) { String word = name.substring(i, end);
final int codePoint = name.codePointAt(j); i = end - 1;
if (!(codePoint == Keyboard.CODE_DASH || codePoint == Keyboard.CODE_SINGLE_QUOTE
|| Character.isLetter(codePoint))) {
break;
}
}
String word = name.substring(i, j);
i = j - 1;
// Don't add single letter words, possibly confuses // Don't add single letter words, possibly confuses
// capitalization of i. // capitalization of i.
final int wordLen = word.codePointCount(0, word.length()); final int wordLen = word.codePointCount(0, word.length());
@ -169,4 +186,100 @@ public class ContactsBinaryDictionary extends ExpandableBinaryDictionary {
} }
} }
} }
/**
* Returns the index of the last letter in the word, starting from position startIndex.
*/
private static int getWordEndPosition(String string, int len, int startIndex) {
int end;
int cp = 0;
for (end = startIndex + 1; end < len; end += Character.charCount(cp)) {
cp = string.codePointAt(end);
if (!(cp == Keyboard.CODE_DASH || cp == Keyboard.CODE_SINGLE_QUOTE
|| Character.isLetter(cp))) {
break;
}
}
return end;
}
@Override
protected boolean hasContentChanged() {
final long startTime = SystemClock.uptimeMillis();
final int contactCount = getContactCount();
if (contactCount > MAX_CONTACT_COUNT) {
// If there are too many contacts then return false. In this rare case it is impossible
// to include all of them anyways and the cost of rebuilding the dictionary is too high.
// TODO: Sort and check only the MAX_CONTACT_COUNT most recent contacts?
return false;
}
if (contactCount != sContactCountAtLastRebuild) {
return true;
}
// Check all contacts since it's not possible to find out which names have changed.
// This is needed because it's possible to receive extraneous onChange events even when no
// name has changed.
Cursor cursor = mContext.getContentResolver().query(
Contacts.CONTENT_URI, PROJECTION, null, null, null);
if (cursor != null) {
try {
if (cursor.moveToFirst()) {
while (!cursor.isAfterLast()) {
String name = cursor.getString(INDEX_NAME);
if (isValidName(name) && !isNameInDictionary(name)) {
if (DEBUG) {
Log.d(TAG, "Contact name missing: " + name + " (runtime = "
+ (SystemClock.uptimeMillis() - startTime) + " ms)");
}
return true;
}
cursor.moveToNext();
}
}
} finally {
cursor.close();
}
}
if (DEBUG) {
Log.d(TAG, "No contacts changed. (runtime = " + (SystemClock.uptimeMillis() - startTime)
+ " ms)");
}
return false;
}
private static boolean isValidName(String name) {
if (name != null && -1 == name.indexOf('@')) {
return true;
}
return false;
}
/**
* Checks if the words in a name are in the current binary dictionary.
*/
private boolean isNameInDictionary(String name) {
int len = name.codePointCount(0, name.length());
String prevWord = null;
for (int i = 0; i < len; i++) {
if (Character.isLetter(name.codePointAt(i))) {
int end = getWordEndPosition(name, len, i);
String word = name.substring(i, end);
i = end - 1;
final int wordLen = word.codePointCount(0, word.length());
if (wordLen < MAX_WORD_LENGTH && wordLen > 1) {
if (!TextUtils.isEmpty(prevWord) && mUseFirstLastBigrams) {
if (!super.isValidBigramLocked(prevWord, word)) {
return false;
}
} else {
if (!super.isValidWordLocked(word)) {
return false;
}
}
prevWord = word;
}
}
}
return true;
}
} }

View file

@ -95,6 +95,13 @@ abstract public class ExpandableBinaryDictionary extends Dictionary {
*/ */
protected abstract void loadDictionaryAsync(); protected abstract void loadDictionaryAsync();
/**
* Indicates that the source dictionary content has changed and a rebuild of the binary file is
* required. If it returns false, the next reload will only read the current binary dictionary
* from file. Note that the shared binary dictionary is locked when this is called.
*/
protected abstract boolean hasContentChanged();
/** /**
* Gets the shared dictionary controller for the given filename. * Gets the shared dictionary controller for the given filename.
*/ */
@ -148,8 +155,9 @@ abstract public class ExpandableBinaryDictionary extends Dictionary {
* the native side. * the native side.
*/ */
public void clearFusionDictionary() { public void clearFusionDictionary() {
mFusionDictionary = new FusionDictionary(new Node(), new FusionDictionary.DictionaryOptions( mFusionDictionary = new FusionDictionary(new Node(),
new HashMap<String, String>(), false, false)); new FusionDictionary.DictionaryOptions(new HashMap<String, String>(), false,
false));
} }
/** /**
@ -224,9 +232,7 @@ abstract public class ExpandableBinaryDictionary extends Dictionary {
protected boolean isValidWordInner(final CharSequence word) { protected boolean isValidWordInner(final CharSequence word) {
if (mLocalDictionaryController.tryLock()) { if (mLocalDictionaryController.tryLock()) {
try { try {
if (mBinaryDictionary != null) { return isValidWordLocked(word);
return mBinaryDictionary.isValidWord(word);
}
} finally { } finally {
mLocalDictionaryController.unlock(); mLocalDictionaryController.unlock();
} }
@ -234,6 +240,32 @@ abstract public class ExpandableBinaryDictionary extends Dictionary {
return false; return false;
} }
protected boolean isValidWordLocked(final CharSequence word) {
if (mBinaryDictionary == null) return false;
return mBinaryDictionary.isValidWord(word);
}
protected boolean isValidBigram(final CharSequence word1, final CharSequence word2) {
if (mBinaryDictionary == null) return false;
return mBinaryDictionary.isValidBigram(word1, word2);
}
protected boolean isValidBigramInner(final CharSequence word1, final CharSequence word2) {
if (mLocalDictionaryController.tryLock()) {
try {
return isValidBigramLocked(word1, word2);
} finally {
mLocalDictionaryController.unlock();
}
}
return false;
}
protected boolean isValidBigramLocked(final CharSequence word1, final CharSequence word2) {
if (mBinaryDictionary == null) return false;
return mBinaryDictionary.isValidBigram(word1, word2);
}
/** /**
* Load the current binary dictionary from internal storage in a background thread. If no binary * Load the current binary dictionary from internal storage in a background thread. If no binary
* dictionary exists, this method will generate one. * dictionary exists, this method will generate one.
@ -315,12 +347,16 @@ abstract public class ExpandableBinaryDictionary extends Dictionary {
} }
/** /**
* Sets whether or not the dictionary is out of date and requires a reload. * Marks that the dictionary is out of date and requires a reload.
*
* @param requiresRebuild Indicates that the source dictionary content has changed and a rebuild
* of the binary file is required. If not true, the next reload process will only read
* the current binary dictionary from file.
*/ */
protected void setRequiresReload(final boolean reload) { protected void setRequiresReload(final boolean requiresRebuild) {
final long time = reload ? SystemClock.uptimeMillis() : 0; final long time = SystemClock.uptimeMillis();
mSharedDictionaryController.mLastUpdateRequestTime = time;
mLocalDictionaryController.mLastUpdateRequestTime = time; mLocalDictionaryController.mLastUpdateRequestTime = time;
mSharedDictionaryController.mLastUpdateRequestTime = time;
if (DEBUG) { if (DEBUG) {
Log.d(TAG, "Reload request: request=" + time + " update=" Log.d(TAG, "Reload request: request=" + time + " update="
+ mSharedDictionaryController.mLastUpdateTime); + mSharedDictionaryController.mLastUpdateTime);
@ -351,21 +387,30 @@ abstract public class ExpandableBinaryDictionary extends Dictionary {
if (mSharedDictionaryController.isOutOfDate() || !dictionaryFileExists()) { if (mSharedDictionaryController.isOutOfDate() || !dictionaryFileExists()) {
// If the shared dictionary file does not exist or is out of date, the first // If the shared dictionary file does not exist or is out of date, the first
// instance that acquires the lock will generate a new one. // instance that acquires the lock will generate a new one.
if (hasContentChanged()) {
// If the source content has changed, rebuild the binary dictionary.
mSharedDictionaryController.mLastUpdateTime = time; mSharedDictionaryController.mLastUpdateTime = time;
mLocalDictionaryController.mLastUpdateTime = time;
generateBinaryDictionary(); generateBinaryDictionary();
loadBinaryDictionary(); loadBinaryDictionary();
} else if (mLocalDictionaryController.isOutOfDate()) { } else {
// Otherwise, if only the local dictionary for this instance is out of date, load // If not, the reload request was unnecessary so revert LastUpdateRequestTime
// the shared dictionary from file. // to LastUpdateTime.
mLocalDictionaryController.mLastUpdateTime = time; mSharedDictionaryController.mLastUpdateRequestTime =
mSharedDictionaryController.mLastUpdateTime;
}
} else if (mBinaryDictionary == null || mLocalDictionaryController.mLastUpdateTime
< mSharedDictionaryController.mLastUpdateTime) {
// Otherwise, if the local dictionary is older than the shared dictionary, load the
// shared dictionary.
loadBinaryDictionary(); loadBinaryDictionary();
} }
mLocalDictionaryController.mLastUpdateTime = time;
} finally { } finally {
mSharedDictionaryController.unlock(); mSharedDictionaryController.unlock();
} }
} }
// TODO: cache the file's existence so that we avoid doing a disk access each time.
private boolean dictionaryFileExists() { private boolean dictionaryFileExists() {
final File file = new File(mContext.getFilesDir(), mFilename); final File file = new File(mContext.getFilesDir(), mFilename);
return file.exists(); return file.exists();

View file

@ -182,6 +182,20 @@ static jboolean latinime_BinaryDictionary_isValidWord(JNIEnv *env, jobject objec
return result; return result;
} }
static jboolean latinime_BinaryDictionary_isValidBigram(JNIEnv *env, jobject object, jlong dict,
jintArray wordArray1, jintArray wordArray2) {
Dictionary *dictionary = (Dictionary*)dict;
if (!dictionary) return (jboolean) false;
jint *word1 = env->GetIntArrayElements(wordArray1, 0);
jint *word2 = env->GetIntArrayElements(wordArray2, 0);
jsize length1 = word1 ? env->GetArrayLength(wordArray1) : 0;
jsize length2 = word2 ? env->GetArrayLength(wordArray2) : 0;
jboolean result = dictionary->isValidBigram(word1, length1, word2, length2);
env->ReleaseIntArrayElements(wordArray2, word2, JNI_ABORT);
env->ReleaseIntArrayElements(wordArray1, word1, JNI_ABORT);
return result;
}
static jdouble latinime_BinaryDictionary_calcNormalizedScore(JNIEnv *env, jobject object, static jdouble latinime_BinaryDictionary_calcNormalizedScore(JNIEnv *env, jobject object,
jcharArray before, jint beforeLength, jcharArray after, jint afterLength, jint score) { jcharArray before, jint beforeLength, jcharArray after, jint afterLength, jint score) {
jchar *beforeChars = env->GetCharArrayElements(before, 0); jchar *beforeChars = env->GetCharArrayElements(before, 0);
@ -239,6 +253,7 @@ static JNINativeMethod sMethods[] = {
{"getSuggestionsNative", "(JJ[I[I[II[IZ[C[I)I", {"getSuggestionsNative", "(JJ[I[I[II[IZ[C[I)I",
(void*)latinime_BinaryDictionary_getSuggestions}, (void*)latinime_BinaryDictionary_getSuggestions},
{"isValidWordNative", "(J[II)Z", (void*)latinime_BinaryDictionary_isValidWord}, {"isValidWordNative", "(J[II)Z", (void*)latinime_BinaryDictionary_isValidWord},
{"isValidBigramNative", "(J[I[I)Z", (void*)latinime_BinaryDictionary_isValidBigram},
{"getBigramsNative", "(J[II[II[C[III)I", (void*)latinime_BinaryDictionary_getBigrams}, {"getBigramsNative", "(J[II[II[C[III)I", (void*)latinime_BinaryDictionary_getBigrams},
{"calcNormalizedScoreNative", "([CI[CII)D", {"calcNormalizedScoreNative", "([CI[CII)D",
(void*)latinime_BinaryDictionary_calcNormalizedScore}, (void*)latinime_BinaryDictionary_calcNormalizedScore},

View file

@ -128,7 +128,7 @@ int BigramDictionary::getBigrams(const int32_t *prevWord, int prevWordLength, in
++bigramCount; ++bigramCount;
} }
} }
} while (0 != (UnigramDictionary::FLAG_ATTRIBUTE_HAS_NEXT & bigramFlags)); } while (UnigramDictionary::FLAG_ATTRIBUTE_HAS_NEXT & bigramFlags);
return bigramCount; return bigramCount;
} }
@ -189,5 +189,25 @@ bool BigramDictionary::checkFirstCharacter(unsigned short *word) {
return false; return false;
} }
bool BigramDictionary::isValidBigram(const int32_t *word1, int length1, const int32_t *word2,
int length2) {
const uint8_t* const root = DICT;
int pos = getBigramListPositionForWord(word1, length1);
// getBigramListPositionForWord returns 0 if this word isn't in the dictionary or has no bigrams
if (0 == pos) return false;
int nextWordPos = BinaryFormat::getTerminalPosition(root, word2, length2);
if (NOT_VALID_WORD == nextWordPos) return false;
int bigramFlags;
do {
bigramFlags = BinaryFormat::getFlagsAndForwardPointer(root, &pos);
const int bigramPos = BinaryFormat::getAttributeAddressAndForwardPointer(root, bigramFlags,
&pos);
if (bigramPos == nextWordPos) {
return true;
}
} while (UnigramDictionary::FLAG_ATTRIBUTE_HAS_NEXT & bigramFlags);
return false;
}
// TODO: Move functions related to bigram to here // TODO: Move functions related to bigram to here
} // namespace latinime } // namespace latinime

View file

@ -33,6 +33,7 @@ class BigramDictionary {
int getBigramListPositionForWord(const int32_t *prevWord, const int prevWordLength); int getBigramListPositionForWord(const int32_t *prevWord, const int prevWordLength);
void fillBigramAddressToFrequencyMapAndFilter(const int32_t *prevWord, const int prevWordLength, void fillBigramAddressToFrequencyMapAndFilter(const int32_t *prevWord, const int prevWordLength,
std::map<int, int> *map, uint8_t *filter); std::map<int, int> *map, uint8_t *filter);
bool isValidBigram(const int32_t *word1, int length1, const int32_t *word2, int length2);
~BigramDictionary(); ~BigramDictionary();
private: private:
bool addWordBigram(unsigned short *word, int length, int frequency); bool addWordBigram(unsigned short *word, int length, int frequency);

View file

@ -58,4 +58,9 @@ bool Dictionary::isValidWord(const int32_t *word, int length) {
return mUnigramDictionary->isValidWord(word, length); return mUnigramDictionary->isValidWord(word, length);
} }
bool Dictionary::isValidBigram(const int32_t *word1, int length1, const int32_t *word2,
int length2) {
return mBigramDictionary->isValidBigram(word1, length1, word2, length2);
}
} // namespace latinime } // namespace latinime

View file

@ -53,6 +53,7 @@ class Dictionary {
} }
bool isValidWord(const int32_t *word, int length); bool isValidWord(const int32_t *word, int length);
bool isValidBigram(const int32_t *word1, int length1, const int32_t *word2, int length2);
void *getDict() { return (void *)mDict; } void *getDict() { return (void *)mDict; }
int getDictSize() { return mDictSize; } int getDictSize() { return mDictSize; }
int getMmapFd() { return mMmapFd; } int getMmapFd() { return mMmapFd; }