Merge "Contacts dictionary rebuilds only when contact names have changed." into jb-dev
This commit is contained in:
commit
5282f00ff8
8 changed files with 241 additions and 30 deletions
|
@ -17,6 +17,7 @@
|
|||
package com.android.inputmethod.latin;
|
||||
|
||||
import android.content.Context;
|
||||
import android.text.TextUtils;
|
||||
|
||||
import com.android.inputmethod.keyboard.ProximityInfo;
|
||||
|
||||
|
@ -84,6 +85,7 @@ public class BinaryDictionary extends Dictionary {
|
|||
int typedLetterMultiplier, int fullWordMultiplier, int maxWordLength, int maxWords);
|
||||
private native void closeNative(long dict);
|
||||
private native boolean isValidWordNative(long dict, int[] word, int wordLength);
|
||||
private native boolean isValidBigramNative(long dict, int[] word1, int[] word2);
|
||||
private native int getSuggestionsNative(long dict, long proximityInfo, int[] xCoordinates,
|
||||
int[] yCoordinates, int[] inputCodes, int codesSize, int[] prevWordForBigrams,
|
||||
boolean useFullEditDistance, char[] outputChars, int[] scores);
|
||||
|
@ -204,6 +206,15 @@ public class BinaryDictionary extends Dictionary {
|
|||
return isValidWordNative(mNativeDict, chars, chars.length);
|
||||
}
|
||||
|
||||
// TODO: Add a batch process version (isValidBigramMultiple?) to avoid excessive numbers of jni
|
||||
// calls when checking for changes in an entire dictionary.
|
||||
public boolean isValidBigram(CharSequence word1, CharSequence word2) {
|
||||
if (TextUtils.isEmpty(word1) || TextUtils.isEmpty(word2)) return false;
|
||||
int[] chars1 = StringUtils.toCodePointArray(word1.toString());
|
||||
int[] chars2 = StringUtils.toCodePointArray(word2.toString());
|
||||
return isValidBigramNative(mNativeDict, chars1, chars2);
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized void close() {
|
||||
closeInternal();
|
||||
|
|
|
@ -18,6 +18,7 @@ import android.content.ContentResolver;
|
|||
import android.content.Context;
|
||||
import android.database.ContentObserver;
|
||||
import android.database.Cursor;
|
||||
import android.os.SystemClock;
|
||||
import android.provider.BaseColumns;
|
||||
import android.provider.ContactsContract.Contacts;
|
||||
import android.text.TextUtils;
|
||||
|
@ -30,18 +31,27 @@ import java.util.Locale;
|
|||
public class ContactsBinaryDictionary extends ExpandableBinaryDictionary {
|
||||
|
||||
private static final String[] PROJECTION = {BaseColumns._ID, Contacts.DISPLAY_NAME,};
|
||||
private static final String[] PROJECTION_ID_ONLY = {BaseColumns._ID};
|
||||
|
||||
private static final String TAG = ContactsBinaryDictionary.class.getSimpleName();
|
||||
private static final String NAME = "contacts";
|
||||
|
||||
private static boolean DEBUG = false;
|
||||
|
||||
/**
|
||||
* Frequency for contacts information into the dictionary
|
||||
*/
|
||||
private static final int FREQUENCY_FOR_CONTACTS = 40;
|
||||
private static final int FREQUENCY_FOR_CONTACTS_BIGRAM = 90;
|
||||
|
||||
/** The maximum number of contacts that this dictionary supports. */
|
||||
private static final int MAX_CONTACT_COUNT = 10000;
|
||||
|
||||
private static final int INDEX_NAME = 1;
|
||||
|
||||
/** The number of contacts in the most recent dictionary rebuild. */
|
||||
static private int sContactCountAtLastRebuild = 0;
|
||||
|
||||
private ContentObserver mObserver;
|
||||
|
||||
/**
|
||||
|
@ -98,6 +108,7 @@ public class ContactsBinaryDictionary extends ExpandableBinaryDictionary {
|
|||
if (cursor != null) {
|
||||
try {
|
||||
if (cursor.moveToFirst()) {
|
||||
sContactCountAtLastRebuild = getContactCount();
|
||||
addWords(cursor);
|
||||
}
|
||||
} finally {
|
||||
|
@ -125,15 +136,28 @@ public class ContactsBinaryDictionary extends ExpandableBinaryDictionary {
|
|||
|
||||
private void addWords(Cursor cursor) {
|
||||
clearFusionDictionary();
|
||||
while (!cursor.isAfterLast()) {
|
||||
int count = 0;
|
||||
while (!cursor.isAfterLast() && count < MAX_CONTACT_COUNT) {
|
||||
String name = cursor.getString(INDEX_NAME);
|
||||
if (name != null && -1 == name.indexOf('@')) {
|
||||
if (isValidName(name)) {
|
||||
addName(name);
|
||||
++count;
|
||||
}
|
||||
cursor.moveToNext();
|
||||
}
|
||||
}
|
||||
|
||||
private int getContactCount() {
|
||||
// TODO: consider switching to a rawQuery("select count(*)...") on the database if
|
||||
// performance is a bottleneck.
|
||||
final Cursor cursor = mContext.getContentResolver().query(
|
||||
Contacts.CONTENT_URI, PROJECTION_ID_ONLY, null, null, null);
|
||||
if (cursor != null) {
|
||||
return cursor.getCount();
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds the words in a name (e.g., firstname/lastname) to the binary dictionary along with their
|
||||
* bigrams depending on locale.
|
||||
|
@ -144,16 +168,9 @@ public class ContactsBinaryDictionary extends ExpandableBinaryDictionary {
|
|||
// TODO: Better tokenization for non-Latin writing systems
|
||||
for (int i = 0; i < len; i++) {
|
||||
if (Character.isLetter(name.codePointAt(i))) {
|
||||
int j;
|
||||
for (j = i + 1; j < len; j++) {
|
||||
final int codePoint = name.codePointAt(j);
|
||||
if (!(codePoint == Keyboard.CODE_DASH || codePoint == Keyboard.CODE_SINGLE_QUOTE
|
||||
|| Character.isLetter(codePoint))) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
String word = name.substring(i, j);
|
||||
i = j - 1;
|
||||
int end = getWordEndPosition(name, len, i);
|
||||
String word = name.substring(i, end);
|
||||
i = end - 1;
|
||||
// Don't add single letter words, possibly confuses
|
||||
// capitalization of i.
|
||||
final int wordLen = word.codePointCount(0, word.length());
|
||||
|
@ -169,4 +186,100 @@ public class ContactsBinaryDictionary extends ExpandableBinaryDictionary {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the index of the last letter in the word, starting from position startIndex.
|
||||
*/
|
||||
private static int getWordEndPosition(String string, int len, int startIndex) {
|
||||
int end;
|
||||
int cp = 0;
|
||||
for (end = startIndex + 1; end < len; end += Character.charCount(cp)) {
|
||||
cp = string.codePointAt(end);
|
||||
if (!(cp == Keyboard.CODE_DASH || cp == Keyboard.CODE_SINGLE_QUOTE
|
||||
|| Character.isLetter(cp))) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return end;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean hasContentChanged() {
|
||||
final long startTime = SystemClock.uptimeMillis();
|
||||
final int contactCount = getContactCount();
|
||||
if (contactCount > MAX_CONTACT_COUNT) {
|
||||
// If there are too many contacts then return false. In this rare case it is impossible
|
||||
// to include all of them anyways and the cost of rebuilding the dictionary is too high.
|
||||
// TODO: Sort and check only the MAX_CONTACT_COUNT most recent contacts?
|
||||
return false;
|
||||
}
|
||||
if (contactCount != sContactCountAtLastRebuild) {
|
||||
return true;
|
||||
}
|
||||
// Check all contacts since it's not possible to find out which names have changed.
|
||||
// This is needed because it's possible to receive extraneous onChange events even when no
|
||||
// name has changed.
|
||||
Cursor cursor = mContext.getContentResolver().query(
|
||||
Contacts.CONTENT_URI, PROJECTION, null, null, null);
|
||||
if (cursor != null) {
|
||||
try {
|
||||
if (cursor.moveToFirst()) {
|
||||
while (!cursor.isAfterLast()) {
|
||||
String name = cursor.getString(INDEX_NAME);
|
||||
if (isValidName(name) && !isNameInDictionary(name)) {
|
||||
if (DEBUG) {
|
||||
Log.d(TAG, "Contact name missing: " + name + " (runtime = "
|
||||
+ (SystemClock.uptimeMillis() - startTime) + " ms)");
|
||||
}
|
||||
return true;
|
||||
}
|
||||
cursor.moveToNext();
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
cursor.close();
|
||||
}
|
||||
}
|
||||
if (DEBUG) {
|
||||
Log.d(TAG, "No contacts changed. (runtime = " + (SystemClock.uptimeMillis() - startTime)
|
||||
+ " ms)");
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
private static boolean isValidName(String name) {
|
||||
if (name != null && -1 == name.indexOf('@')) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if the words in a name are in the current binary dictionary.
|
||||
*/
|
||||
private boolean isNameInDictionary(String name) {
|
||||
int len = name.codePointCount(0, name.length());
|
||||
String prevWord = null;
|
||||
for (int i = 0; i < len; i++) {
|
||||
if (Character.isLetter(name.codePointAt(i))) {
|
||||
int end = getWordEndPosition(name, len, i);
|
||||
String word = name.substring(i, end);
|
||||
i = end - 1;
|
||||
final int wordLen = word.codePointCount(0, word.length());
|
||||
if (wordLen < MAX_WORD_LENGTH && wordLen > 1) {
|
||||
if (!TextUtils.isEmpty(prevWord) && mUseFirstLastBigrams) {
|
||||
if (!super.isValidBigramLocked(prevWord, word)) {
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
if (!super.isValidWordLocked(word)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
prevWord = word;
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -95,6 +95,13 @@ abstract public class ExpandableBinaryDictionary extends Dictionary {
|
|||
*/
|
||||
protected abstract void loadDictionaryAsync();
|
||||
|
||||
/**
|
||||
* Indicates that the source dictionary content has changed and a rebuild of the binary file is
|
||||
* required. If it returns false, the next reload will only read the current binary dictionary
|
||||
* from file. Note that the shared binary dictionary is locked when this is called.
|
||||
*/
|
||||
protected abstract boolean hasContentChanged();
|
||||
|
||||
/**
|
||||
* Gets the shared dictionary controller for the given filename.
|
||||
*/
|
||||
|
@ -148,8 +155,9 @@ abstract public class ExpandableBinaryDictionary extends Dictionary {
|
|||
* the native side.
|
||||
*/
|
||||
public void clearFusionDictionary() {
|
||||
mFusionDictionary = new FusionDictionary(new Node(), new FusionDictionary.DictionaryOptions(
|
||||
new HashMap<String, String>(), false, false));
|
||||
mFusionDictionary = new FusionDictionary(new Node(),
|
||||
new FusionDictionary.DictionaryOptions(new HashMap<String, String>(), false,
|
||||
false));
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -224,9 +232,7 @@ abstract public class ExpandableBinaryDictionary extends Dictionary {
|
|||
protected boolean isValidWordInner(final CharSequence word) {
|
||||
if (mLocalDictionaryController.tryLock()) {
|
||||
try {
|
||||
if (mBinaryDictionary != null) {
|
||||
return mBinaryDictionary.isValidWord(word);
|
||||
}
|
||||
return isValidWordLocked(word);
|
||||
} finally {
|
||||
mLocalDictionaryController.unlock();
|
||||
}
|
||||
|
@ -234,6 +240,32 @@ abstract public class ExpandableBinaryDictionary extends Dictionary {
|
|||
return false;
|
||||
}
|
||||
|
||||
protected boolean isValidWordLocked(final CharSequence word) {
|
||||
if (mBinaryDictionary == null) return false;
|
||||
return mBinaryDictionary.isValidWord(word);
|
||||
}
|
||||
|
||||
protected boolean isValidBigram(final CharSequence word1, final CharSequence word2) {
|
||||
if (mBinaryDictionary == null) return false;
|
||||
return mBinaryDictionary.isValidBigram(word1, word2);
|
||||
}
|
||||
|
||||
protected boolean isValidBigramInner(final CharSequence word1, final CharSequence word2) {
|
||||
if (mLocalDictionaryController.tryLock()) {
|
||||
try {
|
||||
return isValidBigramLocked(word1, word2);
|
||||
} finally {
|
||||
mLocalDictionaryController.unlock();
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
protected boolean isValidBigramLocked(final CharSequence word1, final CharSequence word2) {
|
||||
if (mBinaryDictionary == null) return false;
|
||||
return mBinaryDictionary.isValidBigram(word1, word2);
|
||||
}
|
||||
|
||||
/**
|
||||
* Load the current binary dictionary from internal storage in a background thread. If no binary
|
||||
* dictionary exists, this method will generate one.
|
||||
|
@ -315,12 +347,16 @@ abstract public class ExpandableBinaryDictionary extends Dictionary {
|
|||
}
|
||||
|
||||
/**
|
||||
* Sets whether or not the dictionary is out of date and requires a reload.
|
||||
* Marks that the dictionary is out of date and requires a reload.
|
||||
*
|
||||
* @param requiresRebuild Indicates that the source dictionary content has changed and a rebuild
|
||||
* of the binary file is required. If not true, the next reload process will only read
|
||||
* the current binary dictionary from file.
|
||||
*/
|
||||
protected void setRequiresReload(final boolean reload) {
|
||||
final long time = reload ? SystemClock.uptimeMillis() : 0;
|
||||
mSharedDictionaryController.mLastUpdateRequestTime = time;
|
||||
protected void setRequiresReload(final boolean requiresRebuild) {
|
||||
final long time = SystemClock.uptimeMillis();
|
||||
mLocalDictionaryController.mLastUpdateRequestTime = time;
|
||||
mSharedDictionaryController.mLastUpdateRequestTime = time;
|
||||
if (DEBUG) {
|
||||
Log.d(TAG, "Reload request: request=" + time + " update="
|
||||
+ mSharedDictionaryController.mLastUpdateTime);
|
||||
|
@ -351,21 +387,30 @@ abstract public class ExpandableBinaryDictionary extends Dictionary {
|
|||
if (mSharedDictionaryController.isOutOfDate() || !dictionaryFileExists()) {
|
||||
// If the shared dictionary file does not exist or is out of date, the first
|
||||
// instance that acquires the lock will generate a new one.
|
||||
mSharedDictionaryController.mLastUpdateTime = time;
|
||||
mLocalDictionaryController.mLastUpdateTime = time;
|
||||
generateBinaryDictionary();
|
||||
loadBinaryDictionary();
|
||||
} else if (mLocalDictionaryController.isOutOfDate()) {
|
||||
// Otherwise, if only the local dictionary for this instance is out of date, load
|
||||
// the shared dictionary from file.
|
||||
mLocalDictionaryController.mLastUpdateTime = time;
|
||||
if (hasContentChanged()) {
|
||||
// If the source content has changed, rebuild the binary dictionary.
|
||||
mSharedDictionaryController.mLastUpdateTime = time;
|
||||
generateBinaryDictionary();
|
||||
loadBinaryDictionary();
|
||||
} else {
|
||||
// If not, the reload request was unnecessary so revert LastUpdateRequestTime
|
||||
// to LastUpdateTime.
|
||||
mSharedDictionaryController.mLastUpdateRequestTime =
|
||||
mSharedDictionaryController.mLastUpdateTime;
|
||||
}
|
||||
} else if (mBinaryDictionary == null || mLocalDictionaryController.mLastUpdateTime
|
||||
< mSharedDictionaryController.mLastUpdateTime) {
|
||||
// Otherwise, if the local dictionary is older than the shared dictionary, load the
|
||||
// shared dictionary.
|
||||
loadBinaryDictionary();
|
||||
}
|
||||
mLocalDictionaryController.mLastUpdateTime = time;
|
||||
} finally {
|
||||
mSharedDictionaryController.unlock();
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: cache the file's existence so that we avoid doing a disk access each time.
|
||||
private boolean dictionaryFileExists() {
|
||||
final File file = new File(mContext.getFilesDir(), mFilename);
|
||||
return file.exists();
|
||||
|
|
|
@ -182,6 +182,20 @@ static jboolean latinime_BinaryDictionary_isValidWord(JNIEnv *env, jobject objec
|
|||
return result;
|
||||
}
|
||||
|
||||
static jboolean latinime_BinaryDictionary_isValidBigram(JNIEnv *env, jobject object, jlong dict,
|
||||
jintArray wordArray1, jintArray wordArray2) {
|
||||
Dictionary *dictionary = (Dictionary*)dict;
|
||||
if (!dictionary) return (jboolean) false;
|
||||
jint *word1 = env->GetIntArrayElements(wordArray1, 0);
|
||||
jint *word2 = env->GetIntArrayElements(wordArray2, 0);
|
||||
jsize length1 = word1 ? env->GetArrayLength(wordArray1) : 0;
|
||||
jsize length2 = word2 ? env->GetArrayLength(wordArray2) : 0;
|
||||
jboolean result = dictionary->isValidBigram(word1, length1, word2, length2);
|
||||
env->ReleaseIntArrayElements(wordArray2, word2, JNI_ABORT);
|
||||
env->ReleaseIntArrayElements(wordArray1, word1, JNI_ABORT);
|
||||
return result;
|
||||
}
|
||||
|
||||
static jdouble latinime_BinaryDictionary_calcNormalizedScore(JNIEnv *env, jobject object,
|
||||
jcharArray before, jint beforeLength, jcharArray after, jint afterLength, jint score) {
|
||||
jchar *beforeChars = env->GetCharArrayElements(before, 0);
|
||||
|
@ -239,6 +253,7 @@ static JNINativeMethod sMethods[] = {
|
|||
{"getSuggestionsNative", "(JJ[I[I[II[IZ[C[I)I",
|
||||
(void*)latinime_BinaryDictionary_getSuggestions},
|
||||
{"isValidWordNative", "(J[II)Z", (void*)latinime_BinaryDictionary_isValidWord},
|
||||
{"isValidBigramNative", "(J[I[I)Z", (void*)latinime_BinaryDictionary_isValidBigram},
|
||||
{"getBigramsNative", "(J[II[II[C[III)I", (void*)latinime_BinaryDictionary_getBigrams},
|
||||
{"calcNormalizedScoreNative", "([CI[CII)D",
|
||||
(void*)latinime_BinaryDictionary_calcNormalizedScore},
|
||||
|
|
|
@ -128,7 +128,7 @@ int BigramDictionary::getBigrams(const int32_t *prevWord, int prevWordLength, in
|
|||
++bigramCount;
|
||||
}
|
||||
}
|
||||
} while (0 != (UnigramDictionary::FLAG_ATTRIBUTE_HAS_NEXT & bigramFlags));
|
||||
} while (UnigramDictionary::FLAG_ATTRIBUTE_HAS_NEXT & bigramFlags);
|
||||
return bigramCount;
|
||||
}
|
||||
|
||||
|
@ -189,5 +189,25 @@ bool BigramDictionary::checkFirstCharacter(unsigned short *word) {
|
|||
return false;
|
||||
}
|
||||
|
||||
bool BigramDictionary::isValidBigram(const int32_t *word1, int length1, const int32_t *word2,
|
||||
int length2) {
|
||||
const uint8_t* const root = DICT;
|
||||
int pos = getBigramListPositionForWord(word1, length1);
|
||||
// getBigramListPositionForWord returns 0 if this word isn't in the dictionary or has no bigrams
|
||||
if (0 == pos) return false;
|
||||
int nextWordPos = BinaryFormat::getTerminalPosition(root, word2, length2);
|
||||
if (NOT_VALID_WORD == nextWordPos) return false;
|
||||
int bigramFlags;
|
||||
do {
|
||||
bigramFlags = BinaryFormat::getFlagsAndForwardPointer(root, &pos);
|
||||
const int bigramPos = BinaryFormat::getAttributeAddressAndForwardPointer(root, bigramFlags,
|
||||
&pos);
|
||||
if (bigramPos == nextWordPos) {
|
||||
return true;
|
||||
}
|
||||
} while (UnigramDictionary::FLAG_ATTRIBUTE_HAS_NEXT & bigramFlags);
|
||||
return false;
|
||||
}
|
||||
|
||||
// TODO: Move functions related to bigram to here
|
||||
} // namespace latinime
|
||||
|
|
|
@ -33,6 +33,7 @@ class BigramDictionary {
|
|||
int getBigramListPositionForWord(const int32_t *prevWord, const int prevWordLength);
|
||||
void fillBigramAddressToFrequencyMapAndFilter(const int32_t *prevWord, const int prevWordLength,
|
||||
std::map<int, int> *map, uint8_t *filter);
|
||||
bool isValidBigram(const int32_t *word1, int length1, const int32_t *word2, int length2);
|
||||
~BigramDictionary();
|
||||
private:
|
||||
bool addWordBigram(unsigned short *word, int length, int frequency);
|
||||
|
|
|
@ -58,4 +58,9 @@ bool Dictionary::isValidWord(const int32_t *word, int length) {
|
|||
return mUnigramDictionary->isValidWord(word, length);
|
||||
}
|
||||
|
||||
bool Dictionary::isValidBigram(const int32_t *word1, int length1, const int32_t *word2,
|
||||
int length2) {
|
||||
return mBigramDictionary->isValidBigram(word1, length1, word2, length2);
|
||||
}
|
||||
|
||||
} // namespace latinime
|
||||
|
|
|
@ -53,6 +53,7 @@ class Dictionary {
|
|||
}
|
||||
|
||||
bool isValidWord(const int32_t *word, int length);
|
||||
bool isValidBigram(const int32_t *word1, int length1, const int32_t *word2, int length2);
|
||||
void *getDict() { return (void *)mDict; }
|
||||
int getDictSize() { return mDictSize; }
|
||||
int getMmapFd() { return mMmapFd; }
|
||||
|
|
Loading…
Reference in a new issue