Merge "Implement addMultipleDictionaryEntries as a jni method."

main
Keisuke Kuroyanagi 2013-11-26 07:32:23 +00:00 committed by Android (Google) Code Review
commit 581ca435cf
6 changed files with 175 additions and 65 deletions

View File

@ -137,6 +137,8 @@ public final class BinaryDictionary extends Dictionary {
private static native void addBigramWordsNative(long dict, int[] word0, int[] word1, private static native void addBigramWordsNative(long dict, int[] word0, int[] word1,
int probability); int probability);
private static native void removeBigramWordsNative(long dict, int[] word0, int[] word1); private static native void removeBigramWordsNative(long dict, int[] word0, int[] word1);
private static native int addMultipleDictionaryEntriesNative(long dict,
LanguageModelParam[] languageModelParams, int startIndex);
private static native int calculateProbabilityNative(long dict, int unigramProbability, private static native int calculateProbabilityNative(long dict, int unigramProbability,
int bigramProbability); int bigramProbability);
private static native String getPropertyNative(long dict, String query); private static native String getPropertyNative(long dict, String query);
@ -303,6 +305,46 @@ public final class BinaryDictionary extends Dictionary {
removeBigramWordsNative(mNativeDict, codePoints0, codePoints1); removeBigramWordsNative(mNativeDict, codePoints0, codePoints1);
} }
public static class LanguageModelParam {
public final int[] mWord0;
public final int[] mWord1;
public final int mUnigramProbability;
public final int mBigramProbability;
// Constructor for unigram.
public LanguageModelParam(final String word, final int unigramProbability) {
mWord0 = null;
mWord1 = StringUtils.toCodePointArray(word);
mUnigramProbability = unigramProbability;
mBigramProbability = NOT_A_PROBABILITY;
}
// Constructor for unigram and bigram.
public LanguageModelParam(final String word0, final String word1,
final int unigramProbability, final int bigramProbability) {
mWord0 = StringUtils.toCodePointArray(word0);
mWord1 = StringUtils.toCodePointArray(word1);
mUnigramProbability = unigramProbability;
mBigramProbability = bigramProbability;
}
}
public void addMultipleDictionaryEntries(final LanguageModelParam[] languageModelParams) {
if (!isValidDictionary()) return;
int processedParamCount = 0;
while (processedParamCount < languageModelParams.length) {
if (needsToRunGC(true /* mindsBlockByGC */)) {
flushWithGC();
}
processedParamCount = addMultipleDictionaryEntriesNative(mNativeDict,
languageModelParams, processedParamCount);
if (processedParamCount <= 0) {
return;
}
}
}
private void reopen() { private void reopen() {
close(); close();
final File dictFile = new File(mDictFilePath); final File dictFile = new File(mDictFilePath);

View File

@ -22,6 +22,7 @@ import android.util.Log;
import com.android.inputmethod.annotations.UsedForTesting; import com.android.inputmethod.annotations.UsedForTesting;
import com.android.inputmethod.keyboard.ProximityInfo; import com.android.inputmethod.keyboard.ProximityInfo;
import com.android.inputmethod.latin.BinaryDictionary.LanguageModelParam;
import com.android.inputmethod.latin.makedict.FormatSpec; import com.android.inputmethod.latin.makedict.FormatSpec;
import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo; import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo;
import com.android.inputmethod.latin.utils.AsyncResultHolder; import com.android.inputmethod.latin.utils.AsyncResultHolder;
@ -326,7 +327,7 @@ abstract public class ExpandableBinaryDictionary extends Dictionary {
* Dynamically adds a word bigram in the dictionary. May overwrite an existing entry. * Dynamically adds a word bigram in the dictionary. May overwrite an existing entry.
*/ */
protected void addBigramDynamically(final String word0, final String word1, protected void addBigramDynamically(final String word0, final String word1,
final int frequency, final boolean isValid) { final int frequency) {
if (!mIsUpdatable) { if (!mIsUpdatable) {
Log.w(TAG, "addBigramDynamically is called for non-updatable dictionary: " Log.w(TAG, "addBigramDynamically is called for non-updatable dictionary: "
+ mFilename); + mFilename);
@ -363,22 +364,6 @@ abstract public class ExpandableBinaryDictionary extends Dictionary {
public void onFinished(); public void onFinished();
} }
public static class LanguageModelParam {
public final String mWord0;
public final String mWord1;
public final boolean mIsValid;
public final int mFrequency;
public final int mBigramFrequency;
public LanguageModelParam(final String word0, final String word1, final boolean isValid,
final int frequency, final int bigramFrequency) {
mWord0 = word0;
mWord1 = word1;
mIsValid = isValid;
mFrequency = frequency;
mBigramFrequency = bigramFrequency;
}
}
/** /**
* Dynamically add multiple entries to the dictionary. * Dynamically add multiple entries to the dictionary.
*/ */
@ -395,21 +380,9 @@ abstract public class ExpandableBinaryDictionary extends Dictionary {
public void run() { public void run() {
final boolean locked = setProcessingLargeTaskIfNot(); final boolean locked = setProcessingLargeTaskIfNot();
try { try {
for (final LanguageModelParam languageModelParam : languageModelParams) { mBinaryDictionary.addMultipleDictionaryEntries(
if (languageModelParam.mWord1 == null) { languageModelParams.toArray(
continue; new LanguageModelParam[languageModelParams.size()]));
}
if (mBinaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) {
mBinaryDictionary.flushWithGC();
}
mBinaryDictionary.addUnigramWord(languageModelParam.mWord1,
languageModelParam.mFrequency);
if (languageModelParam.mWord0 != null
&& !languageModelParam.mWord0.equals(languageModelParam.mWord1)) {
mBinaryDictionary.addBigramWords(languageModelParam.mWord0,
languageModelParam.mWord1, languageModelParam.mBigramFrequency);
}
}
} finally { } finally {
if (callback != null) { if (callback != null) {
callback.onFinished(); callback.onFinished();

View File

@ -21,6 +21,7 @@ import android.content.SharedPreferences;
import android.util.Log; import android.util.Log;
import com.android.inputmethod.annotations.UsedForTesting; import com.android.inputmethod.annotations.UsedForTesting;
import com.android.inputmethod.latin.BinaryDictionary.LanguageModelParam;
import com.android.inputmethod.latin.Constants; import com.android.inputmethod.latin.Constants;
import com.android.inputmethod.latin.Dictionary; import com.android.inputmethod.latin.Dictionary;
import com.android.inputmethod.latin.ExpandableBinaryDictionary; import com.android.inputmethod.latin.ExpandableBinaryDictionary;
@ -147,7 +148,7 @@ public abstract class DecayingExpandableBinaryDictionaryBase extends ExpandableB
return; return;
} }
if (null != word0) { if (null != word0) {
addBigramDynamically(word0, word1, frequency, isValid); addBigramDynamically(word0, word1, frequency);
} }
} }

View File

@ -18,6 +18,7 @@ package com.android.inputmethod.latin.personalization;
import android.content.Context; import android.content.Context;
import com.android.inputmethod.latin.BinaryDictionary.LanguageModelParam;
import com.android.inputmethod.latin.ExpandableBinaryDictionary; import com.android.inputmethod.latin.ExpandableBinaryDictionary;
import java.lang.ref.WeakReference; import java.lang.ref.WeakReference;
@ -28,24 +29,6 @@ import java.util.ArrayList;
* dictionary. * dictionary.
*/ */
public abstract class PersonalizationDictionaryUpdateSession { public abstract class PersonalizationDictionaryUpdateSession {
/**
* This class is a parameter for a new unigram or bigram word which will be added
* to the personalization dictionary.
*/
public static class PersonalizationLanguageModelParam {
public final String mWord0;
public final String mWord1;
public final boolean mIsValid;
public final int mFrequency;
public PersonalizationLanguageModelParam(String word0, String word1, boolean isValid,
int frequency) {
mWord0 = word0;
mWord1 = word1;
mIsValid = isValid;
mFrequency = frequency;
}
}
// TODO: Use a dynamic binary dictionary instead // TODO: Use a dynamic binary dictionary instead
public WeakReference<PersonalizationDictionary> mDictionary; public WeakReference<PersonalizationDictionary> mDictionary;
public WeakReference<DecayingExpandableBinaryDictionaryBase> mPredictionDictionary; public WeakReference<DecayingExpandableBinaryDictionaryBase> mPredictionDictionary;
@ -117,7 +100,7 @@ public abstract class PersonalizationDictionaryUpdateSession {
// TODO: Support multi locale. // TODO: Support multi locale.
public void addMultipleDictionaryEntriesToPersonalizationDictionary( public void addMultipleDictionaryEntriesToPersonalizationDictionary(
final ArrayList<ExpandableBinaryDictionary.LanguageModelParam> languageModelParams, final ArrayList<LanguageModelParam> languageModelParams,
final ExpandableBinaryDictionary.AddMultipleDictionaryEntriesCallback callback) { final ExpandableBinaryDictionary.AddMultipleDictionaryEntriesCallback callback) {
final DecayingExpandableBinaryDictionaryBase dictionary = getPredictionDictionary(); final DecayingExpandableBinaryDictionaryBase dictionary = getPredictionDictionary();
if (dictionary == null) { if (dictionary == null) {
@ -128,17 +111,4 @@ public abstract class PersonalizationDictionaryUpdateSession {
} }
dictionary.addMultipleDictionaryEntriesToDictionary(languageModelParams, callback); dictionary.addMultipleDictionaryEntriesToDictionary(languageModelParams, callback);
} }
// Bulk import
// TODO: Support multi locale to add bigram
public void addBigramsToPersonalizationDictionary(
final ArrayList<PersonalizationLanguageModelParam> lmParams) {
final DecayingExpandableBinaryDictionaryBase dictionary = getPredictionDictionary();
if (dictionary == null) {
return;
}
for (final PersonalizationLanguageModelParam lmParam : lmParams) {
dictionary.addToDictionary(lmParam.mWord0, lmParam.mWord1, lmParam.mIsValid);
}
}
} }

View File

@ -320,6 +320,60 @@ static void latinime_BinaryDictionary_removeBigramWords(JNIEnv *env, jclass claz
word1Length); word1Length);
} }
// Returns how many language model params are processed.
static int latinime_BinaryDictionary_addMultipleDictionaryEntries(JNIEnv *env, jclass clazz,
jlong dict, jobjectArray languageModelParams, jint startIndex) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) {
return 0;
}
jsize languageModelParamCount = env->GetArrayLength(languageModelParams);
if (languageModelParamCount == 0 || startIndex >= languageModelParamCount) {
return 0;
}
jobject languageModelParam = env->GetObjectArrayElement(languageModelParams, 0);
jclass languageModelParamClass = env->GetObjectClass(languageModelParam);
env->DeleteLocalRef(languageModelParam);
jfieldID word0FieldId = env->GetFieldID(languageModelParamClass, "mWord0", "[I");
jfieldID word1FieldId = env->GetFieldID(languageModelParamClass, "mWord1", "[I");
jfieldID unigramProbabilityFieldId =
env->GetFieldID(languageModelParamClass, "mUnigramProbability", "I");
jfieldID bigramProbabilityFieldId =
env->GetFieldID(languageModelParamClass, "mBigramProbability", "I");
env->DeleteLocalRef(languageModelParamClass);
for (int i = startIndex; i < languageModelParamCount; ++i) {
jobject languageModelParam = env->GetObjectArrayElement(languageModelParams, i);
jintArray word0 = static_cast<jintArray>(
env->GetObjectField(languageModelParam, word0FieldId));
jsize word0Length = word0 ? env->GetArrayLength(word0) : 0;
int word0CodePoints[word0Length];
if (word0) {
env->GetIntArrayRegion(word0, 0, word0Length, word0CodePoints);
}
jintArray word1 = static_cast<jintArray>(
env->GetObjectField(languageModelParam, word1FieldId));
jsize word1Length = env->GetArrayLength(word1);
int word1CodePoints[word1Length];
env->GetIntArrayRegion(word1, 0, word1Length, word1CodePoints);
jint unigramProbability = env->GetIntField(languageModelParam, unigramProbabilityFieldId);
dictionary->addUnigramWord(word1CodePoints, word1Length, unigramProbability);
if (word0) {
jint bigramProbability = env->GetIntField(languageModelParam, bigramProbabilityFieldId);
dictionary->addBigramWords(word0CodePoints, word0Length, word1CodePoints, word1Length,
bigramProbability);
}
if (dictionary->needsToRunGC(true /* mindsBlockByGC */)) {
return i + 1;
}
env->DeleteLocalRef(word0);
env->DeleteLocalRef(word1);
env->DeleteLocalRef(languageModelParam);
}
return languageModelParamCount;
}
static int latinime_BinaryDictionary_calculateProbabilityNative(JNIEnv *env, jclass clazz, static int latinime_BinaryDictionary_calculateProbabilityNative(JNIEnv *env, jclass clazz,
jlong dict, jint unigramProbability, jint bigramProbability) { jlong dict, jint unigramProbability, jint bigramProbability) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict); Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
@ -418,6 +472,12 @@ static const JNINativeMethod sMethods[] = {
const_cast<char *>("(J[I[I)V"), const_cast<char *>("(J[I[I)V"),
reinterpret_cast<void *>(latinime_BinaryDictionary_removeBigramWords) reinterpret_cast<void *>(latinime_BinaryDictionary_removeBigramWords)
}, },
{
const_cast<char *>("addMultipleDictionaryEntriesNative"),
const_cast<char *>(
"(J[Lcom/android/inputmethod/latin/BinaryDictionary$LanguageModelParam;I)I"),
reinterpret_cast<void *>(latinime_BinaryDictionary_addMultipleDictionaryEntries)
},
{ {
const_cast<char *>("calculateProbabilityNative"), const_cast<char *>("calculateProbabilityNative"),
const_cast<char *>("(JII)I"), const_cast<char *>("(JII)I"),

View File

@ -21,6 +21,7 @@ import android.test.suitebuilder.annotation.LargeTest;
import android.text.TextUtils; import android.text.TextUtils;
import android.util.Pair; import android.util.Pair;
import com.android.inputmethod.latin.BinaryDictionary.LanguageModelParam;
import com.android.inputmethod.latin.makedict.CodePointUtils; import com.android.inputmethod.latin.makedict.CodePointUtils;
import com.android.inputmethod.latin.makedict.FormatSpec; import com.android.inputmethod.latin.makedict.FormatSpec;
@ -33,6 +34,7 @@ import java.util.Locale;
import java.util.Map; import java.util.Map;
import java.util.Random; import java.util.Random;
// TODO Use the seed passed as an argument for makedict test.
@LargeTest @LargeTest
public class BinaryDictionaryTests extends AndroidTestCase { public class BinaryDictionaryTests extends AndroidTestCase {
private static final String TEST_DICT_FILE_EXTENSION = ".testDict"; private static final String TEST_DICT_FILE_EXTENSION = ".testDict";
@ -776,4 +778,66 @@ public class BinaryDictionaryTests extends AndroidTestCase {
dictFile.delete(); dictFile.delete();
} }
public void testAddMultipleDictionaryEntries() {
testAddMultipleDictionaryEntries(3 /* formatVersion */);
testAddMultipleDictionaryEntries(4 /* formatVersion */);
}
private void testAddMultipleDictionaryEntries(final int formatVersion) {
final int codePointSetSize = 20;
final int lmParamCount = 1000;
final double bigramContinueRate = 0.9;
final long seed = System.currentTimeMillis();
final Random random = new Random(seed);
File dictFile = null;
try {
dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
} catch (IOException e) {
fail("IOException while writing an initial dictionary : " + e);
}
final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
final HashMap<String, Integer> unigramProbabilities = new HashMap<String, Integer>();
final HashMap<Pair<String, String>, Integer> bigramProbabilities =
new HashMap<Pair<String, String>, Integer>();
final LanguageModelParam[] languageModelParams = new LanguageModelParam[lmParamCount];
String prevWord = null;
for (int i = 0; i < languageModelParams.length; i++) {
final String word = CodePointUtils.generateWord(random, codePointSet);
final int probability = random.nextInt(0xFF);
final int bigramProbability = random.nextInt(0xF);
unigramProbabilities.put(word, probability);
if (prevWord == null) {
languageModelParams[i] = new LanguageModelParam(word, probability);
} else {
languageModelParams[i] = new LanguageModelParam(prevWord, word, probability,
bigramProbability);
bigramProbabilities.put(new Pair<String, String>(prevWord, word),
bigramProbability);
}
prevWord = (random.nextDouble() < bigramContinueRate) ? word : null;
}
final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
binaryDictionary.addMultipleDictionaryEntries(languageModelParams);
for (Map.Entry<String, Integer> entry : unigramProbabilities.entrySet()) {
assertEquals((int)entry.getValue(), binaryDictionary.getFrequency(entry.getKey()));
}
for (Map.Entry<Pair<String, String>, Integer> entry : bigramProbabilities.entrySet()) {
final String word0 = entry.getKey().first;
final String word1 = entry.getKey().second;
final int unigramProbability = unigramProbabilities.get(word1);
final int bigramProbability = entry.getValue();
final int probability = binaryDictionary.calculateProbability(
unigramProbability, bigramProbability);
assertEquals(probability, binaryDictionary.getBigramProbability(word0, word1));
}
}
} }