From 88fa47a27d45f6460971d0d223aa558e121b3478 Mon Sep 17 00:00:00 2001 From: Keisuke Kuroyanagi Date: Tue, 24 Jun 2014 12:37:07 +0900 Subject: [PATCH] Support migration/dump of Beginning-of-Sentence entries. Bug: 14119293 Change-Id: Ie975138f819794d5c34a7a547be5a6117050e084 --- .../inputmethod/latin/BinaryDictionary.java | 33 ++++++++------ .../latin/makedict/WordProperty.java | 6 +-- .../latin/utils/CombinedFormatUtils.java | 4 ++ ...oid_inputmethod_latin_BinaryDictionary.cpp | 43 ++++++++++++++----- .../dictionary/property/word_property.cpp | 3 +- native/jni/src/utils/char_utils.h | 4 ++ native/jni/src/utils/jni_data_utils.h | 24 ++++++++--- .../latin/BinaryDictionaryTests.java | 37 +++++++++++----- .../BinaryDictDecoderEncoderTests.java | 3 +- 9 files changed, 111 insertions(+), 46 deletions(-) diff --git a/java/src/com/android/inputmethod/latin/BinaryDictionary.java b/java/src/com/android/inputmethod/latin/BinaryDictionary.java index 42105e2c3..335e52fef 100644 --- a/java/src/com/android/inputmethod/latin/BinaryDictionary.java +++ b/java/src/com/android/inputmethod/latin/BinaryDictionary.java @@ -64,11 +64,12 @@ public final class BinaryDictionary extends Dictionary { public static final int NOT_A_VALID_TIMESTAMP = -1; // Format to get unigram flags from native side via getWordPropertyNative(). - private static final int FORMAT_WORD_PROPERTY_OUTPUT_FLAG_COUNT = 4; + private static final int FORMAT_WORD_PROPERTY_OUTPUT_FLAG_COUNT = 5; private static final int FORMAT_WORD_PROPERTY_IS_NOT_A_WORD_INDEX = 0; private static final int FORMAT_WORD_PROPERTY_IS_BLACKLISTED_INDEX = 1; private static final int FORMAT_WORD_PROPERTY_HAS_BIGRAMS_INDEX = 2; private static final int FORMAT_WORD_PROPERTY_HAS_SHORTCUTS_INDEX = 3; + private static final int FORMAT_WORD_PROPERTY_IS_BEGINNING_OF_SENTENCE_INDEX = 4; // Format to get probability and historical info from native side via getWordPropertyNative(). public static final int FORMAT_WORD_PROPERTY_OUTPUT_PROBABILITY_INFO_COUNT = 4; @@ -176,10 +177,12 @@ public final class BinaryDictionary extends Dictionary { private static native int getBigramProbabilityNative(long dict, int[] word0, boolean isBeginningOfSentence, int[] word1); private static native void getWordPropertyNative(long dict, int[] word, - int[] outCodePoints, boolean[] outFlags, int[] outProbabilityInfo, - ArrayList outBigramTargets, ArrayList outBigramProbabilityInfo, - ArrayList outShortcutTargets, ArrayList outShortcutProbabilities); - private static native int getNextWordNative(long dict, int token, int[] outCodePoints); + boolean isBeginningOfSentence, int[] outCodePoints, boolean[] outFlags, + int[] outProbabilityInfo, ArrayList outBigramTargets, + ArrayList outBigramProbabilityInfo, ArrayList outShortcutTargets, + ArrayList outShortcutProbabilities); + private static native int getNextWordNative(long dict, int token, int[] outCodePoints, + boolean[] outIsBeginningOfSentence); private static native void getSuggestionsNative(long dict, long proximityInfo, long traverseSession, int[] xCoordinates, int[] yCoordinates, int[] times, int[] pointerIds, int[] inputCodePoints, int inputSize, int[] suggestOptions, @@ -358,8 +361,8 @@ public final class BinaryDictionary extends Dictionary { prevWordsInfo.mIsBeginningOfSentence, codePoints1); } - public WordProperty getWordProperty(final String word) { - if (TextUtils.isEmpty(word)) { + public WordProperty getWordProperty(final String word, final boolean isBeginningOfSentence) { + if (word == null) { return null; } final int[] codePoints = StringUtils.toCodePointArray(word); @@ -371,14 +374,15 @@ public final class BinaryDictionary extends Dictionary { final ArrayList outBigramProbabilityInfo = new ArrayList<>(); final ArrayList outShortcutTargets = new ArrayList<>(); final ArrayList outShortcutProbabilities = new ArrayList<>(); - getWordPropertyNative(mNativeDict, codePoints, outCodePoints, outFlags, outProbabilityInfo, - outBigramTargets, outBigramProbabilityInfo, outShortcutTargets, - outShortcutProbabilities); + getWordPropertyNative(mNativeDict, codePoints, isBeginningOfSentence, outCodePoints, + outFlags, outProbabilityInfo, outBigramTargets, outBigramProbabilityInfo, + outShortcutTargets, outShortcutProbabilities); return new WordProperty(codePoints, outFlags[FORMAT_WORD_PROPERTY_IS_NOT_A_WORD_INDEX], outFlags[FORMAT_WORD_PROPERTY_IS_BLACKLISTED_INDEX], outFlags[FORMAT_WORD_PROPERTY_HAS_BIGRAMS_INDEX], - outFlags[FORMAT_WORD_PROPERTY_HAS_SHORTCUTS_INDEX], outProbabilityInfo, + outFlags[FORMAT_WORD_PROPERTY_HAS_SHORTCUTS_INDEX], + outFlags[FORMAT_WORD_PROPERTY_IS_BEGINNING_OF_SENTENCE_INDEX], outProbabilityInfo, outBigramTargets, outBigramProbabilityInfo, outShortcutTargets, outShortcutProbabilities); } @@ -399,9 +403,12 @@ public final class BinaryDictionary extends Dictionary { */ public GetNextWordPropertyResult getNextWordProperty(final int token) { final int[] codePoints = new int[Constants.DICTIONARY_MAX_WORD_LENGTH]; - final int nextToken = getNextWordNative(mNativeDict, token, codePoints); + final boolean[] isBeginningOfSentence = new boolean[1]; + final int nextToken = getNextWordNative(mNativeDict, token, codePoints, + isBeginningOfSentence); final String word = StringUtils.getStringFromNullTerminatedCodePointArray(codePoints); - return new GetNextWordPropertyResult(getWordProperty(word), nextToken); + return new GetNextWordPropertyResult( + getWordProperty(word, isBeginningOfSentence[0]), nextToken); } // Add a unigram entry to binary dictionary with unigram attributes in native code. diff --git a/java/src/com/android/inputmethod/latin/makedict/WordProperty.java b/java/src/com/android/inputmethod/latin/makedict/WordProperty.java index 31cb59756..cd78e2235 100644 --- a/java/src/com/android/inputmethod/latin/makedict/WordProperty.java +++ b/java/src/com/android/inputmethod/latin/makedict/WordProperty.java @@ -70,8 +70,8 @@ public final class WordProperty implements Comparable { // Construct word property using information from native code. // This represents invalid word when the probability is BinaryDictionary.NOT_A_PROBABILITY. public WordProperty(final int[] codePoints, final boolean isNotAWord, - final boolean isBlacklisted, final boolean hasBigram, - final boolean hasShortcuts, final int[] probabilityInfo, + final boolean isBlacklisted, final boolean hasBigram, final boolean hasShortcuts, + final boolean isBeginningOfSentence, final int[] probabilityInfo, final ArrayList bigramTargets, final ArrayList bigramProbabilityInfo, final ArrayList shortcutTargets, final ArrayList shortcutProbabilities) { @@ -79,7 +79,7 @@ public final class WordProperty implements Comparable { mProbabilityInfo = createProbabilityInfoFromArray(probabilityInfo); mShortcutTargets = new ArrayList<>(); mBigrams = new ArrayList<>(); - mIsBeginningOfSentence = false; + mIsBeginningOfSentence = isBeginningOfSentence; mIsNotAWord = isNotAWord; mIsBlacklistEntry = isBlacklisted; mHasShortcuts = hasShortcuts; diff --git a/java/src/com/android/inputmethod/latin/utils/CombinedFormatUtils.java b/java/src/com/android/inputmethod/latin/utils/CombinedFormatUtils.java index c66007537..34f59e8bc 100644 --- a/java/src/com/android/inputmethod/latin/utils/CombinedFormatUtils.java +++ b/java/src/com/android/inputmethod/latin/utils/CombinedFormatUtils.java @@ -31,6 +31,7 @@ public class CombinedFormatUtils { public static final String HISTORICAL_INFO_TAG = "historicalInfo"; public static final String HISTORICAL_INFO_SEPARATOR = ":"; public static final String WORD_TAG = "word"; + public static final String BEGINNING_OF_SENTENCE_TAG = "beginning_of_sentence"; public static final String NOT_A_WORD_TAG = "not_a_word"; public static final String BLACKLISTED_TAG = "blacklisted"; @@ -56,6 +57,9 @@ public class CombinedFormatUtils { builder.append(" " + WORD_TAG + "=" + wordProperty.mWord); builder.append(","); builder.append(formatProbabilityInfo(wordProperty.mProbabilityInfo)); + if (wordProperty.mIsBeginningOfSentence) { + builder.append("," + BEGINNING_OF_SENTENCE_TAG + "=true"); + } if (wordProperty.mIsNotAWord) { builder.append("," + NOT_A_WORD_TAG + "=true"); } diff --git a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp index c2cd2addd..2654a4a0a 100644 --- a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp +++ b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp @@ -301,7 +301,7 @@ static jint latinime_BinaryDictionary_getBigramProbability(JNIEnv *env, jclass c // If token is 0, this method newly starts iterating the dictionary. This method returns 0 when // the dictionary does not have a next word. static jint latinime_BinaryDictionary_getNextWord(JNIEnv *env, jclass clazz, - jlong dict, jint token, jintArray outCodePoints) { + jlong dict, jint token, jintArray outCodePoints, jbooleanArray outIsBeginningOfSentence) { Dictionary *dictionary = reinterpret_cast(dict); if (!dictionary) return 0; const jsize codePointBufSize = env->GetArrayLength(outCodePoints); @@ -317,19 +317,39 @@ static jint latinime_BinaryDictionary_getNextWord(JNIEnv *env, jclass clazz, JniDataUtils::outputCodePoints(env, outCodePoints, 0 /* start */, MAX_WORD_LENGTH /* maxLength */, wordCodePoints, wordCodePointCount, false /* needsNullTermination */); + bool isBeginningOfSentence = false; + if (wordCodePointCount > 0 && wordCodePoints[0] == CODE_POINT_BEGINNING_OF_SENTENCE) { + isBeginningOfSentence = true; + } + JniDataUtils::putBooleanToArray(env, outIsBeginningOfSentence, 0 /* index */, + isBeginningOfSentence); return nextToken; } static void latinime_BinaryDictionary_getWordProperty(JNIEnv *env, jclass clazz, - jlong dict, jintArray word, jintArray outCodePoints, jbooleanArray outFlags, - jintArray outProbabilityInfo, jobject outBigramTargets, jobject outBigramProbabilityInfo, - jobject outShortcutTargets, jobject outShortcutProbabilities) { + jlong dict, jintArray word, jboolean isBeginningOfSentence, jintArray outCodePoints, + jbooleanArray outFlags, jintArray outProbabilityInfo, jobject outBigramTargets, + jobject outBigramProbabilityInfo, jobject outShortcutTargets, + jobject outShortcutProbabilities) { Dictionary *dictionary = reinterpret_cast(dict); if (!dictionary) return; const jsize wordLength = env->GetArrayLength(word); - int wordCodePoints[wordLength]; + if (wordLength > MAX_WORD_LENGTH) { + AKLOGE("Invalid wordLength: %d", wordLength); + return; + } + int wordCodePoints[MAX_WORD_LENGTH]; env->GetIntArrayRegion(word, 0, wordLength, wordCodePoints); - const WordProperty wordProperty = dictionary->getWordProperty(wordCodePoints, wordLength); + int codePointCount = wordLength; + if (isBeginningOfSentence) { + codePointCount = CharUtils::attachBeginningOfSentenceMarker( + wordCodePoints, wordLength, MAX_WORD_LENGTH); + if (codePointCount < 0) { + AKLOGE("Cannot attach Beginning-of-Sentence marker."); + return; + } + } + const WordProperty wordProperty = dictionary->getWordProperty(wordCodePoints, codePointCount); wordProperty.outputProperties(env, outCodePoints, outFlags, outProbabilityInfo, outBigramTargets, outBigramProbabilityInfo, outShortcutTargets, outShortcutProbabilities); @@ -554,7 +574,6 @@ static bool latinime_BinaryDictionary_migrateNative(JNIEnv *env, jclass clazz, j return false; } - // TODO: Migrate historical information. int wordCodePoints[MAX_WORD_LENGTH]; int wordCodePointCount = 0; int token = 0; @@ -563,6 +582,10 @@ static bool latinime_BinaryDictionary_migrateNative(JNIEnv *env, jclass clazz, j token = dictionary->getNextWordAndNextToken(token, wordCodePoints, &wordCodePointCount); const WordProperty wordProperty = dictionary->getWordProperty(wordCodePoints, wordCodePointCount); + if (wordCodePoints[0] == CODE_POINT_BEGINNING_OF_SENTENCE) { + // Skip beginning-of-sentence unigram. + continue; + } if (dictionaryStructureWithBufferPolicy->needsToRunGC(true /* mindsBlockByGC */)) { dictionaryStructureWithBufferPolicy = runGCAndGetNewStructurePolicy( std::move(dictionaryStructureWithBufferPolicy), dictFilePathChars); @@ -592,7 +615,7 @@ static bool latinime_BinaryDictionary_migrateNative(JNIEnv *env, jclass clazz, j } } const PrevWordsInfo prevWordsInfo(wordCodePoints, wordCodePointCount, - false /* isBeginningOfSentence */); + wordProperty.getUnigramProperty()->representsBeginningOfSentence()); for (const BigramProperty &bigramProperty : *wordProperty.getBigramProperties()) { if (!dictionaryStructureWithBufferPolicy->addNgramEntry(&prevWordsInfo, &bigramProperty)) { @@ -669,13 +692,13 @@ static const JNINativeMethod sMethods[] = { }, { const_cast("getWordPropertyNative"), - const_cast("(J[I[I[Z[ILjava/util/ArrayList;Ljava/util/ArrayList;" + const_cast("(J[IZ[I[Z[ILjava/util/ArrayList;Ljava/util/ArrayList;" "Ljava/util/ArrayList;Ljava/util/ArrayList;)V"), reinterpret_cast(latinime_BinaryDictionary_getWordProperty) }, { const_cast("getNextWordNative"), - const_cast("(JI[I)I"), + const_cast("(JI[I[Z)I"), reinterpret_cast(latinime_BinaryDictionary_getNextWord) }, { diff --git a/native/jni/src/suggest/core/dictionary/property/word_property.cpp b/native/jni/src/suggest/core/dictionary/property/word_property.cpp index 6f5f808f8..5bdd5606b 100644 --- a/native/jni/src/suggest/core/dictionary/property/word_property.cpp +++ b/native/jni/src/suggest/core/dictionary/property/word_property.cpp @@ -28,7 +28,8 @@ void WordProperty::outputProperties(JNIEnv *const env, jintArray outCodePoints, MAX_WORD_LENGTH /* maxLength */, mCodePoints.data(), mCodePoints.size(), false /* needsNullTermination */); jboolean flags[] = {mUnigramProperty.isNotAWord(), mUnigramProperty.isBlacklisted(), - !mBigrams.empty(), mUnigramProperty.hasShortcuts()}; + !mBigrams.empty(), mUnigramProperty.hasShortcuts(), + mUnigramProperty.representsBeginningOfSentence()}; env->SetBooleanArrayRegion(outFlags, 0 /* start */, NELEMS(flags), flags); int probabilityInfo[] = {mUnigramProperty.getProbability(), mUnigramProperty.getTimestamp(), mUnigramProperty.getLevel(), mUnigramProperty.getCount()}; diff --git a/native/jni/src/utils/char_utils.h b/native/jni/src/utils/char_utils.h index f28ed5682..63786502b 100644 --- a/native/jni/src/utils/char_utils.h +++ b/native/jni/src/utils/char_utils.h @@ -98,6 +98,10 @@ class CharUtils { // Beginning-of-Sentence. static AK_FORCE_INLINE int attachBeginningOfSentenceMarker(int *const codePoints, const int codePointCount, const int maxCodePoint) { + if (codePointCount > 0 && codePoints[0] == CODE_POINT_BEGINNING_OF_SENTENCE) { + // Marker has already been attached. + return codePointCount; + } if (codePointCount >= maxCodePoint) { // the code points cannot be marked as a Beginning-of-Sentence. return 0; diff --git a/native/jni/src/utils/jni_data_utils.h b/native/jni/src/utils/jni_data_utils.h index 67a66fdfe..3514aeeb0 100644 --- a/native/jni/src/utils/jni_data_utils.h +++ b/native/jni/src/utils/jni_data_utils.h @@ -69,18 +69,23 @@ class JniDataUtils { static void outputCodePoints(JNIEnv *env, jintArray intArrayToOutputCodePoints, const int start, const int maxLength, const int *const codePoints, const int codePointCount, const bool needsNullTermination) { - const int outputCodePointCount = std::min(maxLength, codePointCount); - int outputCodePonts[outputCodePointCount]; - for (int i = 0; i < outputCodePointCount; ++i) { + const int codePointBufSize = std::min(maxLength, codePointCount); + int outputCodePonts[codePointBufSize]; + int outputCodePointCount = 0; + for (int i = 0; i < codePointBufSize; ++i) { const int codePoint = codePoints[i]; + int codePointToOutput = codePoint; if (!CharUtils::isInUnicodeSpace(codePoint)) { - outputCodePonts[i] = CODE_POINT_REPLACEMENT_CHARACTER; + if (codePoint == CODE_POINT_BEGINNING_OF_SENTENCE) { + // Just skip Beginning-of-Sentence marker. + continue; + } + codePointToOutput = CODE_POINT_REPLACEMENT_CHARACTER; } else if (codePoint >= 0x01 && codePoint <= 0x1F) { // Control code. - outputCodePonts[i] = CODE_POINT_REPLACEMENT_CHARACTER; - } else { - outputCodePonts[i] = codePoint; + codePointToOutput = CODE_POINT_REPLACEMENT_CHARACTER; } + outputCodePonts[outputCodePointCount++] = codePointToOutput; } env->SetIntArrayRegion(intArrayToOutputCodePoints, start, outputCodePointCount, outputCodePonts); @@ -90,6 +95,11 @@ class JniDataUtils { } } + static void putBooleanToArray(JNIEnv *env, jbooleanArray array, const int index, + const jboolean value) { + env->SetBooleanArrayRegion(array, index, 1 /* len */, &value); + } + static void putIntToArray(JNIEnv *env, jintArray array, const int index, const int value) { env->SetIntArrayRegion(array, index, 1 /* len */, &value); } diff --git a/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java b/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java index 160b08c4f..83ea19399 100644 --- a/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java +++ b/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java @@ -994,7 +994,8 @@ public class BinaryDictionaryTests extends AndroidTestCase { 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); - final WordProperty invalidWordProperty = binaryDictionary.getWordProperty("dummyWord"); + final WordProperty invalidWordProperty = binaryDictionary.getWordProperty("dummyWord", + false /* isBeginningOfSentence */); assertFalse(invalidWordProperty.isValid()); final ArrayList words = new ArrayList<>(); @@ -1017,7 +1018,8 @@ public class BinaryDictionaryTests extends AndroidTestCase { } words.add(word); wordProbabilities.put(word, unigramProbability); - final WordProperty wordProperty = binaryDictionary.getWordProperty(word); + final WordProperty wordProperty = binaryDictionary.getWordProperty(word, + false /* isBeginningOfSentence */); assertEquals(word, wordProperty.mWord); assertTrue(wordProperty.isValid()); assertEquals(isNotAWord, wordProperty.mIsNotAWord); @@ -1057,7 +1059,8 @@ public class BinaryDictionaryTests extends AndroidTestCase { continue; } final HashSet bigramWord1s = bigrams.get(word0); - final WordProperty wordProperty = binaryDictionary.getWordProperty(word0); + final WordProperty wordProperty = binaryDictionary.getWordProperty(word0, + false /* isBeginningOfSentence */); assertEquals(bigramWord1s.size(), wordProperty.mBigrams.size()); for (int j = 0; j < wordProperty.mBigrams.size(); j++) { final String word1 = wordProperty.mBigrams.get(j).mWord; @@ -1094,7 +1097,8 @@ public class BinaryDictionaryTests extends AndroidTestCase { 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); - final WordProperty invalidWordProperty = binaryDictionary.getWordProperty("dummyWord"); + final WordProperty invalidWordProperty = binaryDictionary.getWordProperty("dummyWord", + false /* isBeginningOfSentence */); assertFalse(invalidWordProperty.isValid()); final ArrayList words = new ArrayList<>(); @@ -1188,7 +1192,8 @@ public class BinaryDictionaryTests extends AndroidTestCase { binaryDictionary.addUnigramEntry("aaa", unigramProbability, "zzz", shortcutProbability, false /* isBeginningOfSentence */, false /* isNotAWord */, false /* isBlacklisted */, 0 /* timestamp */); - WordProperty wordProperty = binaryDictionary.getWordProperty("aaa"); + WordProperty wordProperty = binaryDictionary.getWordProperty("aaa", + false /* isBeginningOfSentence */); assertEquals(1, wordProperty.mShortcutTargets.size()); assertEquals("zzz", wordProperty.mShortcutTargets.get(0).mWord); assertEquals(shortcutProbability, wordProperty.mShortcutTargets.get(0).getProbability()); @@ -1196,7 +1201,8 @@ public class BinaryDictionaryTests extends AndroidTestCase { binaryDictionary.addUnigramEntry("aaa", unigramProbability, "zzz", updatedShortcutProbability, false /* isBeginningOfSentence */, false /* isNotAWord */, false /* isBlacklisted */, 0 /* timestamp */); - wordProperty = binaryDictionary.getWordProperty("aaa"); + wordProperty = binaryDictionary.getWordProperty("aaa", + false /* isBeginningOfSentence */); assertEquals(1, wordProperty.mShortcutTargets.size()); assertEquals("zzz", wordProperty.mShortcutTargets.get(0).mWord); assertEquals(updatedShortcutProbability, @@ -1207,7 +1213,8 @@ public class BinaryDictionaryTests extends AndroidTestCase { final HashMap shortcutTargets = new HashMap<>(); shortcutTargets.put("zzz", updatedShortcutProbability); shortcutTargets.put("yyy", shortcutProbability); - wordProperty = binaryDictionary.getWordProperty("aaa"); + wordProperty = binaryDictionary.getWordProperty("aaa", + false /* isBeginningOfSentence */); assertEquals(2, wordProperty.mShortcutTargets.size()); for (WeightedString shortcutTarget : wordProperty.mShortcutTargets) { assertTrue(shortcutTargets.containsKey(shortcutTarget.mWord)); @@ -1218,7 +1225,8 @@ public class BinaryDictionaryTests extends AndroidTestCase { shortcutTargets.put("zzz", updatedShortcutProbability); shortcutTargets.put("yyy", shortcutProbability); binaryDictionary.flushWithGC(); - wordProperty = binaryDictionary.getWordProperty("aaa"); + wordProperty = binaryDictionary.getWordProperty("aaa", + false /* isBeginningOfSentence */); assertEquals(2, wordProperty.mShortcutTargets.size()); for (WeightedString shortcutTarget : wordProperty.mShortcutTargets) { assertTrue(shortcutTargets.containsKey(shortcutTarget.mWord)); @@ -1288,7 +1296,8 @@ public class BinaryDictionaryTests extends AndroidTestCase { } for (final String word : words) { - final WordProperty wordProperty = binaryDictionary.getWordProperty(word); + final WordProperty wordProperty = binaryDictionary.getWordProperty(word, + false /* isBeginningOfSentence */); assertEquals((int)unigramProbabilities.get(word), wordProperty.mProbabilityInfo.mProbability); if (!shortcutTargets.containsKey(word)) { @@ -1332,6 +1341,8 @@ public class BinaryDictionaryTests extends AndroidTestCase { binaryDictionary.addUnigramEntry("ddd", unigramProbability, null /* shortcutTarget */, Dictionary.NOT_A_PROBABILITY, false /* isBeginningOfSentence */, true /* isNotAWord */, true /* isBlacklisted */, 0 /* timestamp */); + binaryDictionary.addNgramEntry(PrevWordsInfo.BEGINNING_OF_SENTENCE, + "aaa", bigramProbability, 0 /* timestamp */); assertEquals(unigramProbability, binaryDictionary.getFrequency("aaa")); assertEquals(unigramProbability, binaryDictionary.getFrequency("bbb")); assertTrue(isValidBigram(binaryDictionary, "aaa", "bbb")); @@ -1343,12 +1354,16 @@ public class BinaryDictionaryTests extends AndroidTestCase { assertEquals(unigramProbability, binaryDictionary.getFrequency("bbb")); if (canCheckBigramProbability(toFormatVersion)) { assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "aaa", "bbb")); + assertEquals(bigramProbability, binaryDictionary.getNgramProbability( + PrevWordsInfo.BEGINNING_OF_SENTENCE, "aaa")); } assertTrue(isValidBigram(binaryDictionary, "aaa", "bbb")); - WordProperty wordProperty = binaryDictionary.getWordProperty("ccc"); + WordProperty wordProperty = binaryDictionary.getWordProperty("ccc", + false /* isBeginningOfSentence */); assertEquals(1, wordProperty.mShortcutTargets.size()); assertEquals("xxx", wordProperty.mShortcutTargets.get(0).mWord); - wordProperty = binaryDictionary.getWordProperty("ddd"); + wordProperty = binaryDictionary.getWordProperty("ddd", + false /* isBeginningOfSentence */); assertTrue(wordProperty.mIsBlacklistEntry); assertTrue(wordProperty.mIsNotAWord); } diff --git a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java index 4b332ca84..406046a74 100644 --- a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java +++ b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java @@ -614,7 +614,8 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { 0 /* offset */, file.length(), true /* useFullEditDistance */, Locale.ENGLISH, dictName, false /* isUpdatable */); for (final String word : words) { - final WordProperty wordProperty = binaryDictionary.getWordProperty(word); + final WordProperty wordProperty = binaryDictionary.getWordProperty(word, + false /* isBeginningOfSentence */); assertEquals(word, wordProperty.mWord); assertEquals(UNIGRAM_FREQ, wordProperty.getProbability()); if (shortcuts.containsKey(word)) {