Merge "Contacts dictionary rebuilds only when contact names have changed." into jb-dev
commit
5282f00ff8
|
@ -17,6 +17,7 @@
|
||||||
package com.android.inputmethod.latin;
|
package com.android.inputmethod.latin;
|
||||||
|
|
||||||
import android.content.Context;
|
import android.content.Context;
|
||||||
|
import android.text.TextUtils;
|
||||||
|
|
||||||
import com.android.inputmethod.keyboard.ProximityInfo;
|
import com.android.inputmethod.keyboard.ProximityInfo;
|
||||||
|
|
||||||
|
@ -84,6 +85,7 @@ public class BinaryDictionary extends Dictionary {
|
||||||
int typedLetterMultiplier, int fullWordMultiplier, int maxWordLength, int maxWords);
|
int typedLetterMultiplier, int fullWordMultiplier, int maxWordLength, int maxWords);
|
||||||
private native void closeNative(long dict);
|
private native void closeNative(long dict);
|
||||||
private native boolean isValidWordNative(long dict, int[] word, int wordLength);
|
private native boolean isValidWordNative(long dict, int[] word, int wordLength);
|
||||||
|
private native boolean isValidBigramNative(long dict, int[] word1, int[] word2);
|
||||||
private native int getSuggestionsNative(long dict, long proximityInfo, int[] xCoordinates,
|
private native int getSuggestionsNative(long dict, long proximityInfo, int[] xCoordinates,
|
||||||
int[] yCoordinates, int[] inputCodes, int codesSize, int[] prevWordForBigrams,
|
int[] yCoordinates, int[] inputCodes, int codesSize, int[] prevWordForBigrams,
|
||||||
boolean useFullEditDistance, char[] outputChars, int[] scores);
|
boolean useFullEditDistance, char[] outputChars, int[] scores);
|
||||||
|
@ -204,6 +206,15 @@ public class BinaryDictionary extends Dictionary {
|
||||||
return isValidWordNative(mNativeDict, chars, chars.length);
|
return isValidWordNative(mNativeDict, chars, chars.length);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO: Add a batch process version (isValidBigramMultiple?) to avoid excessive numbers of jni
|
||||||
|
// calls when checking for changes in an entire dictionary.
|
||||||
|
public boolean isValidBigram(CharSequence word1, CharSequence word2) {
|
||||||
|
if (TextUtils.isEmpty(word1) || TextUtils.isEmpty(word2)) return false;
|
||||||
|
int[] chars1 = StringUtils.toCodePointArray(word1.toString());
|
||||||
|
int[] chars2 = StringUtils.toCodePointArray(word2.toString());
|
||||||
|
return isValidBigramNative(mNativeDict, chars1, chars2);
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public synchronized void close() {
|
public synchronized void close() {
|
||||||
closeInternal();
|
closeInternal();
|
||||||
|
|
|
@ -18,6 +18,7 @@ import android.content.ContentResolver;
|
||||||
import android.content.Context;
|
import android.content.Context;
|
||||||
import android.database.ContentObserver;
|
import android.database.ContentObserver;
|
||||||
import android.database.Cursor;
|
import android.database.Cursor;
|
||||||
|
import android.os.SystemClock;
|
||||||
import android.provider.BaseColumns;
|
import android.provider.BaseColumns;
|
||||||
import android.provider.ContactsContract.Contacts;
|
import android.provider.ContactsContract.Contacts;
|
||||||
import android.text.TextUtils;
|
import android.text.TextUtils;
|
||||||
|
@ -30,18 +31,27 @@ import java.util.Locale;
|
||||||
public class ContactsBinaryDictionary extends ExpandableBinaryDictionary {
|
public class ContactsBinaryDictionary extends ExpandableBinaryDictionary {
|
||||||
|
|
||||||
private static final String[] PROJECTION = {BaseColumns._ID, Contacts.DISPLAY_NAME,};
|
private static final String[] PROJECTION = {BaseColumns._ID, Contacts.DISPLAY_NAME,};
|
||||||
|
private static final String[] PROJECTION_ID_ONLY = {BaseColumns._ID};
|
||||||
|
|
||||||
private static final String TAG = ContactsBinaryDictionary.class.getSimpleName();
|
private static final String TAG = ContactsBinaryDictionary.class.getSimpleName();
|
||||||
private static final String NAME = "contacts";
|
private static final String NAME = "contacts";
|
||||||
|
|
||||||
|
private static boolean DEBUG = false;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Frequency for contacts information into the dictionary
|
* Frequency for contacts information into the dictionary
|
||||||
*/
|
*/
|
||||||
private static final int FREQUENCY_FOR_CONTACTS = 40;
|
private static final int FREQUENCY_FOR_CONTACTS = 40;
|
||||||
private static final int FREQUENCY_FOR_CONTACTS_BIGRAM = 90;
|
private static final int FREQUENCY_FOR_CONTACTS_BIGRAM = 90;
|
||||||
|
|
||||||
|
/** The maximum number of contacts that this dictionary supports. */
|
||||||
|
private static final int MAX_CONTACT_COUNT = 10000;
|
||||||
|
|
||||||
private static final int INDEX_NAME = 1;
|
private static final int INDEX_NAME = 1;
|
||||||
|
|
||||||
|
/** The number of contacts in the most recent dictionary rebuild. */
|
||||||
|
static private int sContactCountAtLastRebuild = 0;
|
||||||
|
|
||||||
private ContentObserver mObserver;
|
private ContentObserver mObserver;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -98,6 +108,7 @@ public class ContactsBinaryDictionary extends ExpandableBinaryDictionary {
|
||||||
if (cursor != null) {
|
if (cursor != null) {
|
||||||
try {
|
try {
|
||||||
if (cursor.moveToFirst()) {
|
if (cursor.moveToFirst()) {
|
||||||
|
sContactCountAtLastRebuild = getContactCount();
|
||||||
addWords(cursor);
|
addWords(cursor);
|
||||||
}
|
}
|
||||||
} finally {
|
} finally {
|
||||||
|
@ -125,15 +136,28 @@ public class ContactsBinaryDictionary extends ExpandableBinaryDictionary {
|
||||||
|
|
||||||
private void addWords(Cursor cursor) {
|
private void addWords(Cursor cursor) {
|
||||||
clearFusionDictionary();
|
clearFusionDictionary();
|
||||||
while (!cursor.isAfterLast()) {
|
int count = 0;
|
||||||
|
while (!cursor.isAfterLast() && count < MAX_CONTACT_COUNT) {
|
||||||
String name = cursor.getString(INDEX_NAME);
|
String name = cursor.getString(INDEX_NAME);
|
||||||
if (name != null && -1 == name.indexOf('@')) {
|
if (isValidName(name)) {
|
||||||
addName(name);
|
addName(name);
|
||||||
|
++count;
|
||||||
}
|
}
|
||||||
cursor.moveToNext();
|
cursor.moveToNext();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private int getContactCount() {
|
||||||
|
// TODO: consider switching to a rawQuery("select count(*)...") on the database if
|
||||||
|
// performance is a bottleneck.
|
||||||
|
final Cursor cursor = mContext.getContentResolver().query(
|
||||||
|
Contacts.CONTENT_URI, PROJECTION_ID_ONLY, null, null, null);
|
||||||
|
if (cursor != null) {
|
||||||
|
return cursor.getCount();
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Adds the words in a name (e.g., firstname/lastname) to the binary dictionary along with their
|
* Adds the words in a name (e.g., firstname/lastname) to the binary dictionary along with their
|
||||||
* bigrams depending on locale.
|
* bigrams depending on locale.
|
||||||
|
@ -144,16 +168,9 @@ public class ContactsBinaryDictionary extends ExpandableBinaryDictionary {
|
||||||
// TODO: Better tokenization for non-Latin writing systems
|
// TODO: Better tokenization for non-Latin writing systems
|
||||||
for (int i = 0; i < len; i++) {
|
for (int i = 0; i < len; i++) {
|
||||||
if (Character.isLetter(name.codePointAt(i))) {
|
if (Character.isLetter(name.codePointAt(i))) {
|
||||||
int j;
|
int end = getWordEndPosition(name, len, i);
|
||||||
for (j = i + 1; j < len; j++) {
|
String word = name.substring(i, end);
|
||||||
final int codePoint = name.codePointAt(j);
|
i = end - 1;
|
||||||
if (!(codePoint == Keyboard.CODE_DASH || codePoint == Keyboard.CODE_SINGLE_QUOTE
|
|
||||||
|| Character.isLetter(codePoint))) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
String word = name.substring(i, j);
|
|
||||||
i = j - 1;
|
|
||||||
// Don't add single letter words, possibly confuses
|
// Don't add single letter words, possibly confuses
|
||||||
// capitalization of i.
|
// capitalization of i.
|
||||||
final int wordLen = word.codePointCount(0, word.length());
|
final int wordLen = word.codePointCount(0, word.length());
|
||||||
|
@ -169,4 +186,100 @@ public class ContactsBinaryDictionary extends ExpandableBinaryDictionary {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the index of the last letter in the word, starting from position startIndex.
|
||||||
|
*/
|
||||||
|
private static int getWordEndPosition(String string, int len, int startIndex) {
|
||||||
|
int end;
|
||||||
|
int cp = 0;
|
||||||
|
for (end = startIndex + 1; end < len; end += Character.charCount(cp)) {
|
||||||
|
cp = string.codePointAt(end);
|
||||||
|
if (!(cp == Keyboard.CODE_DASH || cp == Keyboard.CODE_SINGLE_QUOTE
|
||||||
|
|| Character.isLetter(cp))) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return end;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected boolean hasContentChanged() {
|
||||||
|
final long startTime = SystemClock.uptimeMillis();
|
||||||
|
final int contactCount = getContactCount();
|
||||||
|
if (contactCount > MAX_CONTACT_COUNT) {
|
||||||
|
// If there are too many contacts then return false. In this rare case it is impossible
|
||||||
|
// to include all of them anyways and the cost of rebuilding the dictionary is too high.
|
||||||
|
// TODO: Sort and check only the MAX_CONTACT_COUNT most recent contacts?
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (contactCount != sContactCountAtLastRebuild) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
// Check all contacts since it's not possible to find out which names have changed.
|
||||||
|
// This is needed because it's possible to receive extraneous onChange events even when no
|
||||||
|
// name has changed.
|
||||||
|
Cursor cursor = mContext.getContentResolver().query(
|
||||||
|
Contacts.CONTENT_URI, PROJECTION, null, null, null);
|
||||||
|
if (cursor != null) {
|
||||||
|
try {
|
||||||
|
if (cursor.moveToFirst()) {
|
||||||
|
while (!cursor.isAfterLast()) {
|
||||||
|
String name = cursor.getString(INDEX_NAME);
|
||||||
|
if (isValidName(name) && !isNameInDictionary(name)) {
|
||||||
|
if (DEBUG) {
|
||||||
|
Log.d(TAG, "Contact name missing: " + name + " (runtime = "
|
||||||
|
+ (SystemClock.uptimeMillis() - startTime) + " ms)");
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
cursor.moveToNext();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
cursor.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (DEBUG) {
|
||||||
|
Log.d(TAG, "No contacts changed. (runtime = " + (SystemClock.uptimeMillis() - startTime)
|
||||||
|
+ " ms)");
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static boolean isValidName(String name) {
|
||||||
|
if (name != null && -1 == name.indexOf('@')) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Checks if the words in a name are in the current binary dictionary.
|
||||||
|
*/
|
||||||
|
private boolean isNameInDictionary(String name) {
|
||||||
|
int len = name.codePointCount(0, name.length());
|
||||||
|
String prevWord = null;
|
||||||
|
for (int i = 0; i < len; i++) {
|
||||||
|
if (Character.isLetter(name.codePointAt(i))) {
|
||||||
|
int end = getWordEndPosition(name, len, i);
|
||||||
|
String word = name.substring(i, end);
|
||||||
|
i = end - 1;
|
||||||
|
final int wordLen = word.codePointCount(0, word.length());
|
||||||
|
if (wordLen < MAX_WORD_LENGTH && wordLen > 1) {
|
||||||
|
if (!TextUtils.isEmpty(prevWord) && mUseFirstLastBigrams) {
|
||||||
|
if (!super.isValidBigramLocked(prevWord, word)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (!super.isValidWordLocked(word)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
prevWord = word;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -95,6 +95,13 @@ abstract public class ExpandableBinaryDictionary extends Dictionary {
|
||||||
*/
|
*/
|
||||||
protected abstract void loadDictionaryAsync();
|
protected abstract void loadDictionaryAsync();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Indicates that the source dictionary content has changed and a rebuild of the binary file is
|
||||||
|
* required. If it returns false, the next reload will only read the current binary dictionary
|
||||||
|
* from file. Note that the shared binary dictionary is locked when this is called.
|
||||||
|
*/
|
||||||
|
protected abstract boolean hasContentChanged();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Gets the shared dictionary controller for the given filename.
|
* Gets the shared dictionary controller for the given filename.
|
||||||
*/
|
*/
|
||||||
|
@ -148,8 +155,9 @@ abstract public class ExpandableBinaryDictionary extends Dictionary {
|
||||||
* the native side.
|
* the native side.
|
||||||
*/
|
*/
|
||||||
public void clearFusionDictionary() {
|
public void clearFusionDictionary() {
|
||||||
mFusionDictionary = new FusionDictionary(new Node(), new FusionDictionary.DictionaryOptions(
|
mFusionDictionary = new FusionDictionary(new Node(),
|
||||||
new HashMap<String, String>(), false, false));
|
new FusionDictionary.DictionaryOptions(new HashMap<String, String>(), false,
|
||||||
|
false));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -224,9 +232,7 @@ abstract public class ExpandableBinaryDictionary extends Dictionary {
|
||||||
protected boolean isValidWordInner(final CharSequence word) {
|
protected boolean isValidWordInner(final CharSequence word) {
|
||||||
if (mLocalDictionaryController.tryLock()) {
|
if (mLocalDictionaryController.tryLock()) {
|
||||||
try {
|
try {
|
||||||
if (mBinaryDictionary != null) {
|
return isValidWordLocked(word);
|
||||||
return mBinaryDictionary.isValidWord(word);
|
|
||||||
}
|
|
||||||
} finally {
|
} finally {
|
||||||
mLocalDictionaryController.unlock();
|
mLocalDictionaryController.unlock();
|
||||||
}
|
}
|
||||||
|
@ -234,6 +240,32 @@ abstract public class ExpandableBinaryDictionary extends Dictionary {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
protected boolean isValidWordLocked(final CharSequence word) {
|
||||||
|
if (mBinaryDictionary == null) return false;
|
||||||
|
return mBinaryDictionary.isValidWord(word);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected boolean isValidBigram(final CharSequence word1, final CharSequence word2) {
|
||||||
|
if (mBinaryDictionary == null) return false;
|
||||||
|
return mBinaryDictionary.isValidBigram(word1, word2);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected boolean isValidBigramInner(final CharSequence word1, final CharSequence word2) {
|
||||||
|
if (mLocalDictionaryController.tryLock()) {
|
||||||
|
try {
|
||||||
|
return isValidBigramLocked(word1, word2);
|
||||||
|
} finally {
|
||||||
|
mLocalDictionaryController.unlock();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected boolean isValidBigramLocked(final CharSequence word1, final CharSequence word2) {
|
||||||
|
if (mBinaryDictionary == null) return false;
|
||||||
|
return mBinaryDictionary.isValidBigram(word1, word2);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Load the current binary dictionary from internal storage in a background thread. If no binary
|
* Load the current binary dictionary from internal storage in a background thread. If no binary
|
||||||
* dictionary exists, this method will generate one.
|
* dictionary exists, this method will generate one.
|
||||||
|
@ -315,12 +347,16 @@ abstract public class ExpandableBinaryDictionary extends Dictionary {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Sets whether or not the dictionary is out of date and requires a reload.
|
* Marks that the dictionary is out of date and requires a reload.
|
||||||
|
*
|
||||||
|
* @param requiresRebuild Indicates that the source dictionary content has changed and a rebuild
|
||||||
|
* of the binary file is required. If not true, the next reload process will only read
|
||||||
|
* the current binary dictionary from file.
|
||||||
*/
|
*/
|
||||||
protected void setRequiresReload(final boolean reload) {
|
protected void setRequiresReload(final boolean requiresRebuild) {
|
||||||
final long time = reload ? SystemClock.uptimeMillis() : 0;
|
final long time = SystemClock.uptimeMillis();
|
||||||
mSharedDictionaryController.mLastUpdateRequestTime = time;
|
|
||||||
mLocalDictionaryController.mLastUpdateRequestTime = time;
|
mLocalDictionaryController.mLastUpdateRequestTime = time;
|
||||||
|
mSharedDictionaryController.mLastUpdateRequestTime = time;
|
||||||
if (DEBUG) {
|
if (DEBUG) {
|
||||||
Log.d(TAG, "Reload request: request=" + time + " update="
|
Log.d(TAG, "Reload request: request=" + time + " update="
|
||||||
+ mSharedDictionaryController.mLastUpdateTime);
|
+ mSharedDictionaryController.mLastUpdateTime);
|
||||||
|
@ -351,21 +387,30 @@ abstract public class ExpandableBinaryDictionary extends Dictionary {
|
||||||
if (mSharedDictionaryController.isOutOfDate() || !dictionaryFileExists()) {
|
if (mSharedDictionaryController.isOutOfDate() || !dictionaryFileExists()) {
|
||||||
// If the shared dictionary file does not exist or is out of date, the first
|
// If the shared dictionary file does not exist or is out of date, the first
|
||||||
// instance that acquires the lock will generate a new one.
|
// instance that acquires the lock will generate a new one.
|
||||||
mSharedDictionaryController.mLastUpdateTime = time;
|
if (hasContentChanged()) {
|
||||||
mLocalDictionaryController.mLastUpdateTime = time;
|
// If the source content has changed, rebuild the binary dictionary.
|
||||||
generateBinaryDictionary();
|
mSharedDictionaryController.mLastUpdateTime = time;
|
||||||
loadBinaryDictionary();
|
generateBinaryDictionary();
|
||||||
} else if (mLocalDictionaryController.isOutOfDate()) {
|
loadBinaryDictionary();
|
||||||
// Otherwise, if only the local dictionary for this instance is out of date, load
|
} else {
|
||||||
// the shared dictionary from file.
|
// If not, the reload request was unnecessary so revert LastUpdateRequestTime
|
||||||
mLocalDictionaryController.mLastUpdateTime = time;
|
// to LastUpdateTime.
|
||||||
|
mSharedDictionaryController.mLastUpdateRequestTime =
|
||||||
|
mSharedDictionaryController.mLastUpdateTime;
|
||||||
|
}
|
||||||
|
} else if (mBinaryDictionary == null || mLocalDictionaryController.mLastUpdateTime
|
||||||
|
< mSharedDictionaryController.mLastUpdateTime) {
|
||||||
|
// Otherwise, if the local dictionary is older than the shared dictionary, load the
|
||||||
|
// shared dictionary.
|
||||||
loadBinaryDictionary();
|
loadBinaryDictionary();
|
||||||
}
|
}
|
||||||
|
mLocalDictionaryController.mLastUpdateTime = time;
|
||||||
} finally {
|
} finally {
|
||||||
mSharedDictionaryController.unlock();
|
mSharedDictionaryController.unlock();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO: cache the file's existence so that we avoid doing a disk access each time.
|
||||||
private boolean dictionaryFileExists() {
|
private boolean dictionaryFileExists() {
|
||||||
final File file = new File(mContext.getFilesDir(), mFilename);
|
final File file = new File(mContext.getFilesDir(), mFilename);
|
||||||
return file.exists();
|
return file.exists();
|
||||||
|
|
|
@ -182,6 +182,20 @@ static jboolean latinime_BinaryDictionary_isValidWord(JNIEnv *env, jobject objec
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static jboolean latinime_BinaryDictionary_isValidBigram(JNIEnv *env, jobject object, jlong dict,
|
||||||
|
jintArray wordArray1, jintArray wordArray2) {
|
||||||
|
Dictionary *dictionary = (Dictionary*)dict;
|
||||||
|
if (!dictionary) return (jboolean) false;
|
||||||
|
jint *word1 = env->GetIntArrayElements(wordArray1, 0);
|
||||||
|
jint *word2 = env->GetIntArrayElements(wordArray2, 0);
|
||||||
|
jsize length1 = word1 ? env->GetArrayLength(wordArray1) : 0;
|
||||||
|
jsize length2 = word2 ? env->GetArrayLength(wordArray2) : 0;
|
||||||
|
jboolean result = dictionary->isValidBigram(word1, length1, word2, length2);
|
||||||
|
env->ReleaseIntArrayElements(wordArray2, word2, JNI_ABORT);
|
||||||
|
env->ReleaseIntArrayElements(wordArray1, word1, JNI_ABORT);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
static jdouble latinime_BinaryDictionary_calcNormalizedScore(JNIEnv *env, jobject object,
|
static jdouble latinime_BinaryDictionary_calcNormalizedScore(JNIEnv *env, jobject object,
|
||||||
jcharArray before, jint beforeLength, jcharArray after, jint afterLength, jint score) {
|
jcharArray before, jint beforeLength, jcharArray after, jint afterLength, jint score) {
|
||||||
jchar *beforeChars = env->GetCharArrayElements(before, 0);
|
jchar *beforeChars = env->GetCharArrayElements(before, 0);
|
||||||
|
@ -239,6 +253,7 @@ static JNINativeMethod sMethods[] = {
|
||||||
{"getSuggestionsNative", "(JJ[I[I[II[IZ[C[I)I",
|
{"getSuggestionsNative", "(JJ[I[I[II[IZ[C[I)I",
|
||||||
(void*)latinime_BinaryDictionary_getSuggestions},
|
(void*)latinime_BinaryDictionary_getSuggestions},
|
||||||
{"isValidWordNative", "(J[II)Z", (void*)latinime_BinaryDictionary_isValidWord},
|
{"isValidWordNative", "(J[II)Z", (void*)latinime_BinaryDictionary_isValidWord},
|
||||||
|
{"isValidBigramNative", "(J[I[I)Z", (void*)latinime_BinaryDictionary_isValidBigram},
|
||||||
{"getBigramsNative", "(J[II[II[C[III)I", (void*)latinime_BinaryDictionary_getBigrams},
|
{"getBigramsNative", "(J[II[II[C[III)I", (void*)latinime_BinaryDictionary_getBigrams},
|
||||||
{"calcNormalizedScoreNative", "([CI[CII)D",
|
{"calcNormalizedScoreNative", "([CI[CII)D",
|
||||||
(void*)latinime_BinaryDictionary_calcNormalizedScore},
|
(void*)latinime_BinaryDictionary_calcNormalizedScore},
|
||||||
|
|
|
@ -128,7 +128,7 @@ int BigramDictionary::getBigrams(const int32_t *prevWord, int prevWordLength, in
|
||||||
++bigramCount;
|
++bigramCount;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} while (0 != (UnigramDictionary::FLAG_ATTRIBUTE_HAS_NEXT & bigramFlags));
|
} while (UnigramDictionary::FLAG_ATTRIBUTE_HAS_NEXT & bigramFlags);
|
||||||
return bigramCount;
|
return bigramCount;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -189,5 +189,25 @@ bool BigramDictionary::checkFirstCharacter(unsigned short *word) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool BigramDictionary::isValidBigram(const int32_t *word1, int length1, const int32_t *word2,
|
||||||
|
int length2) {
|
||||||
|
const uint8_t* const root = DICT;
|
||||||
|
int pos = getBigramListPositionForWord(word1, length1);
|
||||||
|
// getBigramListPositionForWord returns 0 if this word isn't in the dictionary or has no bigrams
|
||||||
|
if (0 == pos) return false;
|
||||||
|
int nextWordPos = BinaryFormat::getTerminalPosition(root, word2, length2);
|
||||||
|
if (NOT_VALID_WORD == nextWordPos) return false;
|
||||||
|
int bigramFlags;
|
||||||
|
do {
|
||||||
|
bigramFlags = BinaryFormat::getFlagsAndForwardPointer(root, &pos);
|
||||||
|
const int bigramPos = BinaryFormat::getAttributeAddressAndForwardPointer(root, bigramFlags,
|
||||||
|
&pos);
|
||||||
|
if (bigramPos == nextWordPos) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
} while (UnigramDictionary::FLAG_ATTRIBUTE_HAS_NEXT & bigramFlags);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
// TODO: Move functions related to bigram to here
|
// TODO: Move functions related to bigram to here
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
|
|
|
@ -33,6 +33,7 @@ class BigramDictionary {
|
||||||
int getBigramListPositionForWord(const int32_t *prevWord, const int prevWordLength);
|
int getBigramListPositionForWord(const int32_t *prevWord, const int prevWordLength);
|
||||||
void fillBigramAddressToFrequencyMapAndFilter(const int32_t *prevWord, const int prevWordLength,
|
void fillBigramAddressToFrequencyMapAndFilter(const int32_t *prevWord, const int prevWordLength,
|
||||||
std::map<int, int> *map, uint8_t *filter);
|
std::map<int, int> *map, uint8_t *filter);
|
||||||
|
bool isValidBigram(const int32_t *word1, int length1, const int32_t *word2, int length2);
|
||||||
~BigramDictionary();
|
~BigramDictionary();
|
||||||
private:
|
private:
|
||||||
bool addWordBigram(unsigned short *word, int length, int frequency);
|
bool addWordBigram(unsigned short *word, int length, int frequency);
|
||||||
|
|
|
@ -58,4 +58,9 @@ bool Dictionary::isValidWord(const int32_t *word, int length) {
|
||||||
return mUnigramDictionary->isValidWord(word, length);
|
return mUnigramDictionary->isValidWord(word, length);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool Dictionary::isValidBigram(const int32_t *word1, int length1, const int32_t *word2,
|
||||||
|
int length2) {
|
||||||
|
return mBigramDictionary->isValidBigram(word1, length1, word2, length2);
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
|
|
|
@ -53,6 +53,7 @@ class Dictionary {
|
||||||
}
|
}
|
||||||
|
|
||||||
bool isValidWord(const int32_t *word, int length);
|
bool isValidWord(const int32_t *word, int length);
|
||||||
|
bool isValidBigram(const int32_t *word1, int length1, const int32_t *word2, int length2);
|
||||||
void *getDict() { return (void *)mDict; }
|
void *getDict() { return (void *)mDict; }
|
||||||
int getDictSize() { return mDictSize; }
|
int getDictSize() { return mDictSize; }
|
||||||
int getMmapFd() { return mMmapFd; }
|
int getMmapFd() { return mMmapFd; }
|
||||||
|
|
Loading…
Reference in New Issue