Merge "Extend jni methods and enable Beginning-of-Sentence."

This commit is contained in:
Keisuke Kuroyanagi 2014-05-23 11:01:04 +00:00 committed by Android (Google) Code Review
commit 662c22759b
9 changed files with 186 additions and 62 deletions

View file

@ -191,7 +191,8 @@ public final class BinaryDictionary extends Dictionary {
private static native void closeNative(long dict);
private static native int getFormatVersionNative(long dict);
private static native int getProbabilityNative(long dict, int[] word);
private static native int getBigramProbabilityNative(long dict, int[] word0, int[] word1);
private static native int getBigramProbabilityNative(long dict, int[] word0,
boolean isBeginningOfSentence, int[] word1);
private static native void getWordPropertyNative(long dict, int[] word,
int[] outCodePoints, boolean[] outFlags, int[] outProbabilityInfo,
ArrayList<int[]> outBigramTargets, ArrayList<int[]> outBigramProbabilityInfo,
@ -200,15 +201,17 @@ public final class BinaryDictionary extends Dictionary {
private static native void getSuggestionsNative(long dict, long proximityInfo,
long traverseSession, int[] xCoordinates, int[] yCoordinates, int[] times,
int[] pointerIds, int[] inputCodePoints, int inputSize, int[] suggestOptions,
int[] prevWordCodePointArray, int[] outputSuggestionCount, int[] outputCodePoints,
int[] outputScores, int[] outputIndices, int[] outputTypes,
int[] outputAutoCommitFirstWordConfidence, float[] inOutLanguageWeight);
int[] prevWordCodePointArray, boolean isBeginningOfSentence,
int[] outputSuggestionCount, int[] outputCodePoints, int[] outputScores,
int[] outputIndices, int[] outputTypes, int[] outputAutoCommitFirstWordConfidence,
float[] inOutLanguageWeight);
private static native void addUnigramWordNative(long dict, int[] word, int probability,
int[] shortcutTarget, int shortcutProbability, boolean isNotAWord,
boolean isBlacklisted, int timestamp);
private static native void addBigramWordsNative(long dict, int[] word0, int[] word1,
int probability, int timestamp);
private static native void removeBigramWordsNative(long dict, int[] word0, int[] word1);
int[] shortcutTarget, int shortcutProbability, boolean isBeginningOfSentence,
boolean isNotAWord, boolean isBlacklisted, int timestamp);
private static native void addBigramWordsNative(long dict, int[] word0,
boolean isBeginningOfSentence, int[] word1, int probability, int timestamp);
private static native void removeBigramWordsNative(long dict, int[] word0,
boolean isBeginningOfSentence, int[] word1);
private static native int addMultipleDictionaryEntriesNative(long dict,
LanguageModelParam[] languageModelParams, int startIndex);
private static native String getPropertyNative(long dict, String query);
@ -301,7 +304,8 @@ public final class BinaryDictionary extends Dictionary {
getTraverseSession(sessionId).getSession(), inputPointers.getXCoordinates(),
inputPointers.getYCoordinates(), inputPointers.getTimes(),
inputPointers.getPointerIds(), mInputCodePoints, inputSize,
mNativeSuggestOptions.getOptions(), prevWordCodePointArray, mOutputSuggestionCount,
mNativeSuggestOptions.getOptions(), prevWordCodePointArray,
prevWordsInfo.mIsBeginningOfSentence, mOutputSuggestionCount,
mOutputCodePoints, mOutputScores, mSpaceIndices, mOutputTypes,
mOutputAutoCommitFirstWordConfidence, mInputOutputLanguageWeight);
if (inOutLanguageWeight != null) {
@ -364,12 +368,13 @@ public final class BinaryDictionary extends Dictionary {
}
public int getNgramProbability(final PrevWordsInfo prevWordsInfo, final String word) {
if (TextUtils.isEmpty(prevWordsInfo.mPrevWord) || TextUtils.isEmpty(word)) {
if (!prevWordsInfo.isValid() || TextUtils.isEmpty(word)) {
return NOT_A_PROBABILITY;
}
final int[] codePoints0 = StringUtils.toCodePointArray(prevWordsInfo.mPrevWord);
final int[] codePoints1 = StringUtils.toCodePointArray(word);
return getBigramProbabilityNative(mNativeDict, codePoints0, codePoints1);
return getBigramProbabilityNative(mNativeDict, codePoints0,
prevWordsInfo.mIsBeginningOfSentence, codePoints1);
}
public WordProperty getWordProperty(final String word) {
@ -420,16 +425,17 @@ public final class BinaryDictionary extends Dictionary {
// Add a unigram entry to binary dictionary with unigram attributes in native code.
public void addUnigramEntry(final String word, final int probability,
final String shortcutTarget, final int shortcutProbability, final boolean isNotAWord,
final String shortcutTarget, final int shortcutProbability,
final boolean isBeginningOfSentence, final boolean isNotAWord,
final boolean isBlacklisted, final int timestamp) {
if (TextUtils.isEmpty(word)) {
if (word == null || (word.isEmpty() && !isBeginningOfSentence)) {
return;
}
final int[] codePoints = StringUtils.toCodePointArray(word);
final int[] shortcutTargetCodePoints = (shortcutTarget != null) ?
StringUtils.toCodePointArray(shortcutTarget) : null;
addUnigramWordNative(mNativeDict, codePoints, probability, shortcutTargetCodePoints,
shortcutProbability, isNotAWord, isBlacklisted, timestamp);
shortcutProbability, isBeginningOfSentence, isNotAWord, isBlacklisted, timestamp);
mHasUpdated = true;
}
@ -437,23 +443,25 @@ public final class BinaryDictionary extends Dictionary {
public void addNgramEntry(final PrevWordsInfo prevWordsInfo, final String word,
final int probability,
final int timestamp) {
if (TextUtils.isEmpty(prevWordsInfo.mPrevWord) || TextUtils.isEmpty(word)) {
if (!prevWordsInfo.isValid() || TextUtils.isEmpty(word)) {
return;
}
final int[] codePoints0 = StringUtils.toCodePointArray(prevWordsInfo.mPrevWord);
final int[] codePoints1 = StringUtils.toCodePointArray(word);
addBigramWordsNative(mNativeDict, codePoints0, codePoints1, probability, timestamp);
addBigramWordsNative(mNativeDict, codePoints0, prevWordsInfo.mIsBeginningOfSentence,
codePoints1, probability, timestamp);
mHasUpdated = true;
}
// Remove an n-gram entry from the binary dictionary in native code.
public void removeNgramEntry(final PrevWordsInfo prevWordsInfo, final String word) {
if (TextUtils.isEmpty(prevWordsInfo.mPrevWord) || TextUtils.isEmpty(word)) {
if (!prevWordsInfo.isValid() || TextUtils.isEmpty(word)) {
return;
}
final int[] codePoints0 = StringUtils.toCodePointArray(prevWordsInfo.mPrevWord);
final int[] codePoints1 = StringUtils.toCodePointArray(word);
removeBigramWordsNative(mNativeDict, codePoints0, codePoints1);
removeBigramWordsNative(mNativeDict, codePoints0, prevWordsInfo.mIsBeginningOfSentence,
codePoints1);
mHasUpdated = true;
}

View file

@ -292,7 +292,7 @@ abstract public class ExpandableBinaryDictionary extends Dictionary {
final String shortcutTarget, final int shortcutFreq, final boolean isNotAWord,
final boolean isBlacklisted, final int timestamp) {
mBinaryDictionary.addUnigramEntry(word, frequency, shortcutTarget, shortcutFreq,
isNotAWord, isBlacklisted, timestamp);
false /* isBeginningOfSentence */, isNotAWord, isBlacklisted, timestamp);
}
/**

View file

@ -20,6 +20,8 @@ import android.util.Log;
// TODO: Support multiple previous words for n-gram.
public class PrevWordsInfo {
public static final PrevWordsInfo BEGINNING_OF_SENTENCE = new PrevWordsInfo();
// The previous word. May be null after resetting and before starting a new composing word, or
// when there is no context like at the start of text for example. It can also be set to null
// externally when the user enters a separator that does not let bigrams across, like a period
@ -32,7 +34,7 @@ public class PrevWordsInfo {
// Beginning of sentence.
public PrevWordsInfo() {
mPrevWord = null;
mPrevWord = "";
mIsBeginningOfSentence = true;
}
@ -40,4 +42,8 @@ public class PrevWordsInfo {
mPrevWord = prevWord;
mIsBeginningOfSentence = false;
}
public boolean isValid() {
return mPrevWord != null;
}
}

View file

@ -35,6 +35,8 @@ public final class WordProperty implements Comparable<WordProperty> {
public final ProbabilityInfo mProbabilityInfo;
public final ArrayList<WeightedString> mShortcutTargets;
public final ArrayList<WeightedString> mBigrams;
// TODO: Support mIsBeginningOfSentence.
public final boolean mIsBeginningOfSentence;
public final boolean mIsNotAWord;
public final boolean mIsBlacklistEntry;
public final boolean mHasShortcuts;
@ -51,6 +53,7 @@ public final class WordProperty implements Comparable<WordProperty> {
mProbabilityInfo = probabilityInfo;
mShortcutTargets = shortcutTargets;
mBigrams = bigrams;
mIsBeginningOfSentence = false;
mIsNotAWord = isNotAWord;
mIsBlacklistEntry = isBlacklistEntry;
mHasBigrams = bigrams != null && !bigrams.isEmpty();
@ -77,6 +80,7 @@ public final class WordProperty implements Comparable<WordProperty> {
mProbabilityInfo = createProbabilityInfoFromArray(probabilityInfo);
mShortcutTargets = CollectionUtils.newArrayList();
mBigrams = CollectionUtils.newArrayList();
mIsBeginningOfSentence = false;
mIsNotAWord = isNotAWord;
mIsBlacklistEntry = isBlacklisted;
mHasShortcuts = hasShortcuts;

View file

@ -178,10 +178,10 @@ static void latinime_BinaryDictionary_getSuggestions(JNIEnv *env, jclass clazz,
jlong proximityInfo, jlong dicTraverseSession, jintArray xCoordinatesArray,
jintArray yCoordinatesArray, jintArray timesArray, jintArray pointerIdsArray,
jintArray inputCodePointsArray, jint inputSize, jintArray suggestOptions,
jintArray prevWordCodePointsForBigrams, jintArray outSuggestionCount,
jintArray outCodePointsArray, jintArray outScoresArray, jintArray outSpaceIndicesArray,
jintArray outTypesArray, jintArray outAutoCommitFirstWordConfidenceArray,
jfloatArray inOutLanguageWeight) {
jintArray prevWordCodePointsForBigrams, jboolean isBeginningOfSentence,
jintArray outSuggestionCount, jintArray outCodePointsArray, jintArray outScoresArray,
jintArray outSpaceIndicesArray, jintArray outTypesArray,
jintArray outAutoCommitFirstWordConfidenceArray, jfloatArray inOutLanguageWeight) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
// Assign 0 to outSuggestionCount here in case of returning earlier in this method.
JniDataUtils::putIntToArray(env, outSuggestionCount, 0 /* index */, 0);
@ -274,7 +274,7 @@ static jint latinime_BinaryDictionary_getProbability(JNIEnv *env, jclass clazz,
}
static jint latinime_BinaryDictionary_getBigramProbability(JNIEnv *env, jclass clazz,
jlong dict, jintArray word0, jintArray word1) {
jlong dict, jintArray word0, jboolean isBeginningOfSentence, jintArray word1) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) return JNI_FALSE;
const jsize word0Length = env->GetArrayLength(word0);
@ -283,7 +283,7 @@ static jint latinime_BinaryDictionary_getBigramProbability(JNIEnv *env, jclass c
int word1CodePoints[word1Length];
env->GetIntArrayRegion(word0, 0, word0Length, word0CodePoints);
env->GetIntArrayRegion(word1, 0, word1Length, word1CodePoints);
const PrevWordsInfo prevWordsInfo(word0CodePoints, word0Length, false /* isStartOfSentence */);
const PrevWordsInfo prevWordsInfo(word0CodePoints, word0Length, isBeginningOfSentence);
return dictionary->getBigramProbability(&prevWordsInfo, word1CodePoints, word1Length);
}
@ -326,7 +326,8 @@ static void latinime_BinaryDictionary_getWordProperty(JNIEnv *env, jclass clazz,
static void latinime_BinaryDictionary_addUnigramWord(JNIEnv *env, jclass clazz, jlong dict,
jintArray word, jint probability, jintArray shortcutTarget, jint shortcutProbability,
jboolean isNotAWord, jboolean isBlacklisted, jint timestamp) {
jboolean isBeginningOfSentence, jboolean isNotAWord, jboolean isBlacklisted,
jint timestamp) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) {
return;
@ -341,13 +342,14 @@ static void latinime_BinaryDictionary_addUnigramWord(JNIEnv *env, jclass clazz,
shortcuts.emplace_back(&shortcutTargetCodePoints, shortcutProbability);
}
// Use 1 for count to indicate the word has inputted.
const UnigramProperty unigramProperty(false /* isBeginningOfSentence */, isNotAWord,
const UnigramProperty unigramProperty(isBeginningOfSentence, isNotAWord,
isBlacklisted, probability, timestamp, 0 /* level */, 1 /* count */, &shortcuts);
dictionary->addUnigramEntry(codePoints, codePointCount, &unigramProperty);
}
static void latinime_BinaryDictionary_addBigramWords(JNIEnv *env, jclass clazz, jlong dict,
jintArray word0, jintArray word1, jint probability, jint timestamp) {
jintArray word0, jboolean isBeginningOfSentence, jintArray word1, jint probability,
jint timestamp) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) {
return;
@ -363,13 +365,12 @@ static void latinime_BinaryDictionary_addBigramWords(JNIEnv *env, jclass clazz,
// Use 1 for count to indicate the bigram has inputted.
const BigramProperty bigramProperty(&bigramTargetCodePoints, probability,
timestamp, 0 /* level */, 1 /* count */);
const PrevWordsInfo prevWordsInfo(word0CodePoints, word0Length,
false /* isBeginningOfSentence */);
const PrevWordsInfo prevWordsInfo(word0CodePoints, word0Length, isBeginningOfSentence);
dictionary->addNgramEntry(&prevWordsInfo, &bigramProperty);
}
static void latinime_BinaryDictionary_removeBigramWords(JNIEnv *env, jclass clazz, jlong dict,
jintArray word0, jintArray word1) {
jintArray word0, jboolean isBeginningOfSentence, jintArray word1) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) {
return;
@ -380,8 +381,7 @@ static void latinime_BinaryDictionary_removeBigramWords(JNIEnv *env, jclass claz
jsize word1Length = env->GetArrayLength(word1);
int word1CodePoints[word1Length];
env->GetIntArrayRegion(word1, 0, word1Length, word1CodePoints);
const PrevWordsInfo prevWordsInfo(word0CodePoints, word0Length,
false /* isBeginningOfSentence */);
const PrevWordsInfo prevWordsInfo(word0CodePoints, word0Length, isBeginningOfSentence);
dictionary->removeNgramEntry(&prevWordsInfo, word1CodePoints, word1Length);
}
@ -625,7 +625,7 @@ static const JNINativeMethod sMethods[] = {
},
{
const_cast<char *>("getSuggestionsNative"),
const_cast<char *>("(JJJ[I[I[I[I[II[I[I[I[I[I[I[I[I[F)V"),
const_cast<char *>("(JJJ[I[I[I[I[II[I[IZ[I[I[I[I[I[I[F)V"),
reinterpret_cast<void *>(latinime_BinaryDictionary_getSuggestions)
},
{
@ -635,7 +635,7 @@ static const JNINativeMethod sMethods[] = {
},
{
const_cast<char *>("getBigramProbabilityNative"),
const_cast<char *>("(J[I[I)I"),
const_cast<char *>("(J[IZ[I)I"),
reinterpret_cast<void *>(latinime_BinaryDictionary_getBigramProbability)
},
{
@ -651,17 +651,17 @@ static const JNINativeMethod sMethods[] = {
},
{
const_cast<char *>("addUnigramWordNative"),
const_cast<char *>("(J[II[IIZZI)V"),
const_cast<char *>("(J[II[IIZZZI)V"),
reinterpret_cast<void *>(latinime_BinaryDictionary_addUnigramWord)
},
{
const_cast<char *>("addBigramWordsNative"),
const_cast<char *>("(J[I[III)V"),
const_cast<char *>("(J[IZ[III)V"),
reinterpret_cast<void *>(latinime_BinaryDictionary_addBigramWords)
},
{
const_cast<char *>("removeBigramWordsNative"),
const_cast<char *>("(J[I[I)V"),
const_cast<char *>("(J[IZ[I)V"),
reinterpret_cast<void *>(latinime_BinaryDictionary_removeBigramWords)
},
{

View file

@ -56,7 +56,7 @@ bool DynamicPtGcEventListeners
}
} else {
mValueStack.back() += 1;
if (ptNodeParams->isTerminal()) {
if (ptNodeParams->isTerminal() && !ptNodeParams->representsNonWordInfo()) {
mValidUnigramCount += 1;
}
}

View file

@ -63,12 +63,16 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase {
super.tearDown();
}
private static boolean supportsBeginningOfSentence(final int formatVersion) {
return formatVersion >= FormatSpec.VERSION4_DEV;
}
private void addUnigramWord(final BinaryDictionary binaryDictionary, final String word,
final int probability) {
binaryDictionary.addUnigramEntry(word, probability, "" /* shortcutTarget */,
BinaryDictionary.NOT_A_PROBABILITY /* shortcutProbability */,
false /* isNotAWord */, false /* isBlacklisted */,
mCurrentTime /* timestamp */);
false /* isBeginningOfSentence */, false /* isNotAWord */,
false /* isBlacklisted */, mCurrentTime /* timestamp */);
}
private void addBigramWords(final BinaryDictionary binaryDictionary, final String word0,
@ -631,4 +635,57 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase {
binaryDictionary.close();
dictFile.delete();
}
public void testBeginningOfSentence() {
for (final int formatVersion : DICT_FORMAT_VERSIONS) {
if (supportsBeginningOfSentence(formatVersion)) {
testBeginningOfSentence(formatVersion);
}
}
}
private void testBeginningOfSentence(final int formatVersion) {
setCurrentTimeForTestMode(mCurrentTime);
File dictFile = null;
try {
dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
} catch (IOException e) {
fail("IOException while writing an initial dictionary : " + e);
}
final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
binaryDictionary.addUnigramEntry("", DUMMY_PROBABILITY, "" /* shortcutTarget */,
BinaryDictionary.NOT_A_PROBABILITY /* shortcutProbability */,
true /* isBeginningOfSentence */, true /* isNotAWord */, false /* isBlacklisted */,
mCurrentTime);
final PrevWordsInfo prevWordsInfoStartOfSentence = PrevWordsInfo.BEGINNING_OF_SENTENCE;
addUnigramWord(binaryDictionary, "aaa", DUMMY_PROBABILITY);
binaryDictionary.addNgramEntry(prevWordsInfoStartOfSentence, "aaa", DUMMY_PROBABILITY,
mCurrentTime);
assertTrue(binaryDictionary.isValidNgram(prevWordsInfoStartOfSentence, "aaa"));
binaryDictionary.addNgramEntry(prevWordsInfoStartOfSentence, "aaa", DUMMY_PROBABILITY,
mCurrentTime);
addUnigramWord(binaryDictionary, "bbb", DUMMY_PROBABILITY);
binaryDictionary.addNgramEntry(prevWordsInfoStartOfSentence, "bbb", DUMMY_PROBABILITY,
mCurrentTime);
assertTrue(binaryDictionary.isValidNgram(prevWordsInfoStartOfSentence, "aaa"));
assertTrue(binaryDictionary.isValidNgram(prevWordsInfoStartOfSentence, "bbb"));
forcePassingLongTime(binaryDictionary);
assertFalse(binaryDictionary.isValidNgram(prevWordsInfoStartOfSentence, "aaa"));
assertFalse(binaryDictionary.isValidNgram(prevWordsInfoStartOfSentence, "bbb"));
addUnigramWord(binaryDictionary, "aaa", DUMMY_PROBABILITY);
binaryDictionary.addNgramEntry(prevWordsInfoStartOfSentence, "aaa", DUMMY_PROBABILITY,
mCurrentTime);
addUnigramWord(binaryDictionary, "bbb", DUMMY_PROBABILITY);
binaryDictionary.addNgramEntry(prevWordsInfoStartOfSentence, "bbb", DUMMY_PROBABILITY,
mCurrentTime);
assertTrue(binaryDictionary.isValidNgram(prevWordsInfoStartOfSentence, "aaa"));
assertTrue(binaryDictionary.isValidNgram(prevWordsInfoStartOfSentence, "bbb"));
binaryDictionary.close();
dictFile.delete();
}
}

View file

@ -50,6 +50,10 @@ public class BinaryDictionaryTests extends AndroidTestCase {
return formatVersion >= FormatSpec.VERSION4_DEV;
}
private static boolean supportsBeginningOfSentence(final int formatVersion) {
return formatVersion >= FormatSpec.VERSION4_DEV;
}
private File createEmptyDictionaryAndGetFile(final String dictId,
final int formatVersion) throws IOException {
if (formatVersion == FormatSpec.VERSION4
@ -171,7 +175,8 @@ public class BinaryDictionaryTests extends AndroidTestCase {
addUnigramWord(binaryDictionary, invalidLongWord, probability);
// Too long short cut.
binaryDictionary.addUnigramEntry("a", probability, invalidLongWord,
10 /* shortcutProbability */, false /* isNotAWord */, false /* isBlacklisted */,
10 /* shortcutProbability */, false /* isBeginningOfSentence */,
false /* isNotAWord */, false /* isBlacklisted */,
BinaryDictionary.NOT_A_VALID_TIMESTAMP);
addUnigramWord(binaryDictionary, "abc", probability);
final int updatedProbability = 200;
@ -192,8 +197,8 @@ public class BinaryDictionaryTests extends AndroidTestCase {
final int probability) {
binaryDictionary.addUnigramEntry(word, probability, "" /* shortcutTarget */,
BinaryDictionary.NOT_A_PROBABILITY /* shortcutProbability */,
false /* isNotAWord */, false /* isBlacklisted */,
BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */);
false /* isBeginningOfSentence */, false /* isNotAWord */,
false /* isBlacklisted */, BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */);
}
private static void addBigramWords(final BinaryDictionary binaryDictionary, final String word0,
@ -1010,7 +1015,8 @@ public class BinaryDictionaryTests extends AndroidTestCase {
// TODO: Add tests for historical info.
binaryDictionary.addUnigramEntry(word, unigramProbability,
null /* shortcutTarget */, BinaryDictionary.NOT_A_PROBABILITY,
isNotAWord, isBlacklisted, BinaryDictionary.NOT_A_VALID_TIMESTAMP);
false /* isBeginningOfSentence */, isNotAWord, isBlacklisted,
BinaryDictionary.NOT_A_VALID_TIMESTAMP);
if (binaryDictionary.needsToRunGC(false /* mindsBlockByGC */)) {
binaryDictionary.flushWithGC();
}
@ -1188,24 +1194,24 @@ public class BinaryDictionaryTests extends AndroidTestCase {
final int unigramProbability = 100;
final int shortcutProbability = 10;
binaryDictionary.addUnigramEntry("aaa", unigramProbability, "zzz",
shortcutProbability, false /* isNotAWord */, false /* isBlacklisted */,
0 /* timestamp */);
shortcutProbability, false /* isBeginningOfSentence */,
false /* isNotAWord */, false /* isBlacklisted */, 0 /* timestamp */);
WordProperty wordProperty = binaryDictionary.getWordProperty("aaa");
assertEquals(1, wordProperty.mShortcutTargets.size());
assertEquals("zzz", wordProperty.mShortcutTargets.get(0).mWord);
assertEquals(shortcutProbability, wordProperty.mShortcutTargets.get(0).getProbability());
final int updatedShortcutProbability = 2;
binaryDictionary.addUnigramEntry("aaa", unigramProbability, "zzz",
updatedShortcutProbability, false /* isNotAWord */, false /* isBlacklisted */,
0 /* timestamp */);
updatedShortcutProbability, false /* isBeginningOfSentence */,
false /* isNotAWord */, false /* isBlacklisted */, 0 /* timestamp */);
wordProperty = binaryDictionary.getWordProperty("aaa");
assertEquals(1, wordProperty.mShortcutTargets.size());
assertEquals("zzz", wordProperty.mShortcutTargets.get(0).mWord);
assertEquals(updatedShortcutProbability,
wordProperty.mShortcutTargets.get(0).getProbability());
binaryDictionary.addUnigramEntry("aaa", unigramProbability, "yyy",
shortcutProbability, false /* isNotAWord */, false /* isBlacklisted */,
0 /* timestamp */);
shortcutProbability, false /* isBeginningOfSentence */, false /* isNotAWord */,
false /* isBlacklisted */, 0 /* timestamp */);
final HashMap<String, Integer> shortcutTargets = new HashMap<String, Integer>();
shortcutTargets.put("zzz", updatedShortcutProbability);
shortcutTargets.put("yyy", shortcutProbability);
@ -1275,8 +1281,8 @@ public class BinaryDictionaryTests extends AndroidTestCase {
final String word = words.get(random.nextInt(words.size()));
final int unigramProbability = unigramProbabilities.get(word);
binaryDictionary.addUnigramEntry(word, unigramProbability, shortcutTarget,
shortcutProbability, false /* isNotAWord */, false /* isBlacklisted */,
0 /* timestamp */);
shortcutProbability, false /* isBeginningOfSentence */, false /* isNotAWord */,
false /* isBlacklisted */, 0 /* timestamp */);
if (shortcutTargets.containsKey(word)) {
final HashMap<String, Integer> shortcutTargetsOfWord = shortcutTargets.get(word);
shortcutTargetsOfWord.put(shortcutTarget, shortcutProbability);
@ -1331,10 +1337,11 @@ public class BinaryDictionaryTests extends AndroidTestCase {
addBigramWords(binaryDictionary, "aaa", "bbb", bigramProbability);
final int shortcutProbability = 10;
binaryDictionary.addUnigramEntry("ccc", unigramProbability, "xxx", shortcutProbability,
false /* isNotAWord */, false /* isBlacklisted */, 0 /* timestamp */);
false /* isBeginningOfSentence */, false /* isNotAWord */,
false /* isBlacklisted */, 0 /* timestamp */);
binaryDictionary.addUnigramEntry("ddd", unigramProbability, null /* shortcutTarget */,
Dictionary.NOT_A_PROBABILITY, true /* isNotAWord */,
true /* isBlacklisted */, 0 /* timestamp */);
Dictionary.NOT_A_PROBABILITY, false /* isBeginningOfSentence */,
true /* isNotAWord */, true /* isBlacklisted */, 0 /* timestamp */);
assertEquals(unigramProbability, binaryDictionary.getFrequency("aaa"));
assertEquals(unigramProbability, binaryDictionary.getFrequency("bbb"));
assertTrue(isValidBigram(binaryDictionary, "aaa", "bbb"));
@ -1434,4 +1441,46 @@ public class BinaryDictionaryTests extends AndroidTestCase {
assertEquals(bigramProbabilities.size(), Integer.parseInt(
binaryDictionary.getPropertyForTest(BinaryDictionary.BIGRAM_COUNT_QUERY)));
}
public void testBeginningOfSentence() {
for (final int formatVersion : DICT_FORMAT_VERSIONS) {
if (supportsBeginningOfSentence(formatVersion)) {
testBeginningOfSentence(formatVersion);
}
}
}
private void testBeginningOfSentence(final int formatVersion) {
File dictFile = null;
try {
dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
} catch (IOException e) {
fail("IOException while writing an initial dictionary : " + e);
}
final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
final int dummyProbability = 0;
binaryDictionary.addUnigramEntry("", dummyProbability, "" /* shortcutTarget */,
BinaryDictionary.NOT_A_PROBABILITY /* shortcutProbability */,
true /* isBeginningOfSentence */, true /* isNotAWord */, false /* isBlacklisted */,
BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */);
final PrevWordsInfo prevWordsInfoStartOfSentence = PrevWordsInfo.BEGINNING_OF_SENTENCE;
final int bigramProbability = 200;
addUnigramWord(binaryDictionary, "aaa", dummyProbability);
binaryDictionary.addNgramEntry(prevWordsInfoStartOfSentence, "aaa", bigramProbability,
BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */);
assertEquals(bigramProbability,
binaryDictionary.getNgramProbability(prevWordsInfoStartOfSentence, "aaa"));
binaryDictionary.addNgramEntry(prevWordsInfoStartOfSentence, "aaa", bigramProbability,
BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */);
addUnigramWord(binaryDictionary, "bbb", dummyProbability);
binaryDictionary.addNgramEntry(prevWordsInfoStartOfSentence, "bbb", bigramProbability,
BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */);
binaryDictionary.flushWithGC();
assertEquals(bigramProbability,
binaryDictionary.getNgramProbability(prevWordsInfoStartOfSentence, "aaa"));
assertEquals(bigramProbability,
binaryDictionary.getNgramProbability(prevWordsInfoStartOfSentence, "bbb"));
}
}

View file

@ -77,14 +77,14 @@ public class Ver4DictEncoder implements DictEncoder {
if (null == wordProperty.mShortcutTargets || wordProperty.mShortcutTargets.isEmpty()) {
binaryDict.addUnigramEntry(wordProperty.mWord, wordProperty.getProbability(),
null /* shortcutTarget */, 0 /* shortcutProbability */,
wordProperty.mIsNotAWord, wordProperty.mIsBlacklistEntry,
0 /* timestamp */);
wordProperty.mIsBeginningOfSentence, wordProperty.mIsNotAWord,
wordProperty.mIsBlacklistEntry, 0 /* timestamp */);
} else {
for (final WeightedString shortcutTarget : wordProperty.mShortcutTargets) {
binaryDict.addUnigramEntry(wordProperty.mWord, wordProperty.getProbability(),
shortcutTarget.mWord, shortcutTarget.getProbability(),
wordProperty.mIsNotAWord, wordProperty.mIsBlacklistEntry,
0 /* timestamp */);
wordProperty.mIsBeginningOfSentence, wordProperty.mIsNotAWord,
wordProperty.mIsBlacklistEntry, 0 /* timestamp */);
}
}
if (binaryDict.needsToRunGC(true /* mindsBlockByGC */)) {