Extend jni methods and enable Beginning-of-Sentence.

Bug: 14119293
Change-Id: I78fc877367dd0d6240eeacb750b6d2d0b93cba83
main
Keisuke Kuroyanagi 2014-05-23 19:58:58 +09:00
parent 47ea762fb2
commit 1adca93381
9 changed files with 186 additions and 62 deletions

View File

@ -191,7 +191,8 @@ public final class BinaryDictionary extends Dictionary {
private static native void closeNative(long dict); private static native void closeNative(long dict);
private static native int getFormatVersionNative(long dict); private static native int getFormatVersionNative(long dict);
private static native int getProbabilityNative(long dict, int[] word); private static native int getProbabilityNative(long dict, int[] word);
private static native int getBigramProbabilityNative(long dict, int[] word0, int[] word1); private static native int getBigramProbabilityNative(long dict, int[] word0,
boolean isBeginningOfSentence, int[] word1);
private static native void getWordPropertyNative(long dict, int[] word, private static native void getWordPropertyNative(long dict, int[] word,
int[] outCodePoints, boolean[] outFlags, int[] outProbabilityInfo, int[] outCodePoints, boolean[] outFlags, int[] outProbabilityInfo,
ArrayList<int[]> outBigramTargets, ArrayList<int[]> outBigramProbabilityInfo, ArrayList<int[]> outBigramTargets, ArrayList<int[]> outBigramProbabilityInfo,
@ -200,15 +201,17 @@ public final class BinaryDictionary extends Dictionary {
private static native void getSuggestionsNative(long dict, long proximityInfo, private static native void getSuggestionsNative(long dict, long proximityInfo,
long traverseSession, int[] xCoordinates, int[] yCoordinates, int[] times, long traverseSession, int[] xCoordinates, int[] yCoordinates, int[] times,
int[] pointerIds, int[] inputCodePoints, int inputSize, int[] suggestOptions, int[] pointerIds, int[] inputCodePoints, int inputSize, int[] suggestOptions,
int[] prevWordCodePointArray, int[] outputSuggestionCount, int[] outputCodePoints, int[] prevWordCodePointArray, boolean isBeginningOfSentence,
int[] outputScores, int[] outputIndices, int[] outputTypes, int[] outputSuggestionCount, int[] outputCodePoints, int[] outputScores,
int[] outputAutoCommitFirstWordConfidence, float[] inOutLanguageWeight); int[] outputIndices, int[] outputTypes, int[] outputAutoCommitFirstWordConfidence,
float[] inOutLanguageWeight);
private static native void addUnigramWordNative(long dict, int[] word, int probability, private static native void addUnigramWordNative(long dict, int[] word, int probability,
int[] shortcutTarget, int shortcutProbability, boolean isNotAWord, int[] shortcutTarget, int shortcutProbability, boolean isBeginningOfSentence,
boolean isBlacklisted, int timestamp); boolean isNotAWord, boolean isBlacklisted, int timestamp);
private static native void addBigramWordsNative(long dict, int[] word0, int[] word1, private static native void addBigramWordsNative(long dict, int[] word0,
int probability, int timestamp); boolean isBeginningOfSentence, int[] word1, int probability, int timestamp);
private static native void removeBigramWordsNative(long dict, int[] word0, int[] word1); private static native void removeBigramWordsNative(long dict, int[] word0,
boolean isBeginningOfSentence, int[] word1);
private static native int addMultipleDictionaryEntriesNative(long dict, private static native int addMultipleDictionaryEntriesNative(long dict,
LanguageModelParam[] languageModelParams, int startIndex); LanguageModelParam[] languageModelParams, int startIndex);
private static native String getPropertyNative(long dict, String query); private static native String getPropertyNative(long dict, String query);
@ -301,7 +304,8 @@ public final class BinaryDictionary extends Dictionary {
getTraverseSession(sessionId).getSession(), inputPointers.getXCoordinates(), getTraverseSession(sessionId).getSession(), inputPointers.getXCoordinates(),
inputPointers.getYCoordinates(), inputPointers.getTimes(), inputPointers.getYCoordinates(), inputPointers.getTimes(),
inputPointers.getPointerIds(), mInputCodePoints, inputSize, inputPointers.getPointerIds(), mInputCodePoints, inputSize,
mNativeSuggestOptions.getOptions(), prevWordCodePointArray, mOutputSuggestionCount, mNativeSuggestOptions.getOptions(), prevWordCodePointArray,
prevWordsInfo.mIsBeginningOfSentence, mOutputSuggestionCount,
mOutputCodePoints, mOutputScores, mSpaceIndices, mOutputTypes, mOutputCodePoints, mOutputScores, mSpaceIndices, mOutputTypes,
mOutputAutoCommitFirstWordConfidence, mInputOutputLanguageWeight); mOutputAutoCommitFirstWordConfidence, mInputOutputLanguageWeight);
if (inOutLanguageWeight != null) { if (inOutLanguageWeight != null) {
@ -364,12 +368,13 @@ public final class BinaryDictionary extends Dictionary {
} }
public int getNgramProbability(final PrevWordsInfo prevWordsInfo, final String word) { public int getNgramProbability(final PrevWordsInfo prevWordsInfo, final String word) {
if (TextUtils.isEmpty(prevWordsInfo.mPrevWord) || TextUtils.isEmpty(word)) { if (!prevWordsInfo.isValid() || TextUtils.isEmpty(word)) {
return NOT_A_PROBABILITY; return NOT_A_PROBABILITY;
} }
final int[] codePoints0 = StringUtils.toCodePointArray(prevWordsInfo.mPrevWord); final int[] codePoints0 = StringUtils.toCodePointArray(prevWordsInfo.mPrevWord);
final int[] codePoints1 = StringUtils.toCodePointArray(word); final int[] codePoints1 = StringUtils.toCodePointArray(word);
return getBigramProbabilityNative(mNativeDict, codePoints0, codePoints1); return getBigramProbabilityNative(mNativeDict, codePoints0,
prevWordsInfo.mIsBeginningOfSentence, codePoints1);
} }
public WordProperty getWordProperty(final String word) { public WordProperty getWordProperty(final String word) {
@ -420,16 +425,17 @@ public final class BinaryDictionary extends Dictionary {
// Add a unigram entry to binary dictionary with unigram attributes in native code. // Add a unigram entry to binary dictionary with unigram attributes in native code.
public void addUnigramEntry(final String word, final int probability, public void addUnigramEntry(final String word, final int probability,
final String shortcutTarget, final int shortcutProbability, final boolean isNotAWord, final String shortcutTarget, final int shortcutProbability,
final boolean isBeginningOfSentence, final boolean isNotAWord,
final boolean isBlacklisted, final int timestamp) { final boolean isBlacklisted, final int timestamp) {
if (TextUtils.isEmpty(word)) { if (word == null || (word.isEmpty() && !isBeginningOfSentence)) {
return; return;
} }
final int[] codePoints = StringUtils.toCodePointArray(word); final int[] codePoints = StringUtils.toCodePointArray(word);
final int[] shortcutTargetCodePoints = (shortcutTarget != null) ? final int[] shortcutTargetCodePoints = (shortcutTarget != null) ?
StringUtils.toCodePointArray(shortcutTarget) : null; StringUtils.toCodePointArray(shortcutTarget) : null;
addUnigramWordNative(mNativeDict, codePoints, probability, shortcutTargetCodePoints, addUnigramWordNative(mNativeDict, codePoints, probability, shortcutTargetCodePoints,
shortcutProbability, isNotAWord, isBlacklisted, timestamp); shortcutProbability, isBeginningOfSentence, isNotAWord, isBlacklisted, timestamp);
mHasUpdated = true; mHasUpdated = true;
} }
@ -437,23 +443,25 @@ public final class BinaryDictionary extends Dictionary {
public void addNgramEntry(final PrevWordsInfo prevWordsInfo, final String word, public void addNgramEntry(final PrevWordsInfo prevWordsInfo, final String word,
final int probability, final int probability,
final int timestamp) { final int timestamp) {
if (TextUtils.isEmpty(prevWordsInfo.mPrevWord) || TextUtils.isEmpty(word)) { if (!prevWordsInfo.isValid() || TextUtils.isEmpty(word)) {
return; return;
} }
final int[] codePoints0 = StringUtils.toCodePointArray(prevWordsInfo.mPrevWord); final int[] codePoints0 = StringUtils.toCodePointArray(prevWordsInfo.mPrevWord);
final int[] codePoints1 = StringUtils.toCodePointArray(word); final int[] codePoints1 = StringUtils.toCodePointArray(word);
addBigramWordsNative(mNativeDict, codePoints0, codePoints1, probability, timestamp); addBigramWordsNative(mNativeDict, codePoints0, prevWordsInfo.mIsBeginningOfSentence,
codePoints1, probability, timestamp);
mHasUpdated = true; mHasUpdated = true;
} }
// Remove an n-gram entry from the binary dictionary in native code. // Remove an n-gram entry from the binary dictionary in native code.
public void removeNgramEntry(final PrevWordsInfo prevWordsInfo, final String word) { public void removeNgramEntry(final PrevWordsInfo prevWordsInfo, final String word) {
if (TextUtils.isEmpty(prevWordsInfo.mPrevWord) || TextUtils.isEmpty(word)) { if (!prevWordsInfo.isValid() || TextUtils.isEmpty(word)) {
return; return;
} }
final int[] codePoints0 = StringUtils.toCodePointArray(prevWordsInfo.mPrevWord); final int[] codePoints0 = StringUtils.toCodePointArray(prevWordsInfo.mPrevWord);
final int[] codePoints1 = StringUtils.toCodePointArray(word); final int[] codePoints1 = StringUtils.toCodePointArray(word);
removeBigramWordsNative(mNativeDict, codePoints0, codePoints1); removeBigramWordsNative(mNativeDict, codePoints0, prevWordsInfo.mIsBeginningOfSentence,
codePoints1);
mHasUpdated = true; mHasUpdated = true;
} }

View File

@ -292,7 +292,7 @@ abstract public class ExpandableBinaryDictionary extends Dictionary {
final String shortcutTarget, final int shortcutFreq, final boolean isNotAWord, final String shortcutTarget, final int shortcutFreq, final boolean isNotAWord,
final boolean isBlacklisted, final int timestamp) { final boolean isBlacklisted, final int timestamp) {
mBinaryDictionary.addUnigramEntry(word, frequency, shortcutTarget, shortcutFreq, mBinaryDictionary.addUnigramEntry(word, frequency, shortcutTarget, shortcutFreq,
isNotAWord, isBlacklisted, timestamp); false /* isBeginningOfSentence */, isNotAWord, isBlacklisted, timestamp);
} }
/** /**

View File

@ -20,6 +20,8 @@ import android.util.Log;
// TODO: Support multiple previous words for n-gram. // TODO: Support multiple previous words for n-gram.
public class PrevWordsInfo { public class PrevWordsInfo {
public static final PrevWordsInfo BEGINNING_OF_SENTENCE = new PrevWordsInfo();
// The previous word. May be null after resetting and before starting a new composing word, or // The previous word. May be null after resetting and before starting a new composing word, or
// when there is no context like at the start of text for example. It can also be set to null // when there is no context like at the start of text for example. It can also be set to null
// externally when the user enters a separator that does not let bigrams across, like a period // externally when the user enters a separator that does not let bigrams across, like a period
@ -32,7 +34,7 @@ public class PrevWordsInfo {
// Beginning of sentence. // Beginning of sentence.
public PrevWordsInfo() { public PrevWordsInfo() {
mPrevWord = null; mPrevWord = "";
mIsBeginningOfSentence = true; mIsBeginningOfSentence = true;
} }
@ -40,4 +42,8 @@ public class PrevWordsInfo {
mPrevWord = prevWord; mPrevWord = prevWord;
mIsBeginningOfSentence = false; mIsBeginningOfSentence = false;
} }
public boolean isValid() {
return mPrevWord != null;
}
} }

View File

@ -35,6 +35,8 @@ public final class WordProperty implements Comparable<WordProperty> {
public final ProbabilityInfo mProbabilityInfo; public final ProbabilityInfo mProbabilityInfo;
public final ArrayList<WeightedString> mShortcutTargets; public final ArrayList<WeightedString> mShortcutTargets;
public final ArrayList<WeightedString> mBigrams; public final ArrayList<WeightedString> mBigrams;
// TODO: Support mIsBeginningOfSentence.
public final boolean mIsBeginningOfSentence;
public final boolean mIsNotAWord; public final boolean mIsNotAWord;
public final boolean mIsBlacklistEntry; public final boolean mIsBlacklistEntry;
public final boolean mHasShortcuts; public final boolean mHasShortcuts;
@ -51,6 +53,7 @@ public final class WordProperty implements Comparable<WordProperty> {
mProbabilityInfo = probabilityInfo; mProbabilityInfo = probabilityInfo;
mShortcutTargets = shortcutTargets; mShortcutTargets = shortcutTargets;
mBigrams = bigrams; mBigrams = bigrams;
mIsBeginningOfSentence = false;
mIsNotAWord = isNotAWord; mIsNotAWord = isNotAWord;
mIsBlacklistEntry = isBlacklistEntry; mIsBlacklistEntry = isBlacklistEntry;
mHasBigrams = bigrams != null && !bigrams.isEmpty(); mHasBigrams = bigrams != null && !bigrams.isEmpty();
@ -77,6 +80,7 @@ public final class WordProperty implements Comparable<WordProperty> {
mProbabilityInfo = createProbabilityInfoFromArray(probabilityInfo); mProbabilityInfo = createProbabilityInfoFromArray(probabilityInfo);
mShortcutTargets = CollectionUtils.newArrayList(); mShortcutTargets = CollectionUtils.newArrayList();
mBigrams = CollectionUtils.newArrayList(); mBigrams = CollectionUtils.newArrayList();
mIsBeginningOfSentence = false;
mIsNotAWord = isNotAWord; mIsNotAWord = isNotAWord;
mIsBlacklistEntry = isBlacklisted; mIsBlacklistEntry = isBlacklisted;
mHasShortcuts = hasShortcuts; mHasShortcuts = hasShortcuts;

View File

@ -178,10 +178,10 @@ static void latinime_BinaryDictionary_getSuggestions(JNIEnv *env, jclass clazz,
jlong proximityInfo, jlong dicTraverseSession, jintArray xCoordinatesArray, jlong proximityInfo, jlong dicTraverseSession, jintArray xCoordinatesArray,
jintArray yCoordinatesArray, jintArray timesArray, jintArray pointerIdsArray, jintArray yCoordinatesArray, jintArray timesArray, jintArray pointerIdsArray,
jintArray inputCodePointsArray, jint inputSize, jintArray suggestOptions, jintArray inputCodePointsArray, jint inputSize, jintArray suggestOptions,
jintArray prevWordCodePointsForBigrams, jintArray outSuggestionCount, jintArray prevWordCodePointsForBigrams, jboolean isBeginningOfSentence,
jintArray outCodePointsArray, jintArray outScoresArray, jintArray outSpaceIndicesArray, jintArray outSuggestionCount, jintArray outCodePointsArray, jintArray outScoresArray,
jintArray outTypesArray, jintArray outAutoCommitFirstWordConfidenceArray, jintArray outSpaceIndicesArray, jintArray outTypesArray,
jfloatArray inOutLanguageWeight) { jintArray outAutoCommitFirstWordConfidenceArray, jfloatArray inOutLanguageWeight) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict); Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
// Assign 0 to outSuggestionCount here in case of returning earlier in this method. // Assign 0 to outSuggestionCount here in case of returning earlier in this method.
JniDataUtils::putIntToArray(env, outSuggestionCount, 0 /* index */, 0); JniDataUtils::putIntToArray(env, outSuggestionCount, 0 /* index */, 0);
@ -274,7 +274,7 @@ static jint latinime_BinaryDictionary_getProbability(JNIEnv *env, jclass clazz,
} }
static jint latinime_BinaryDictionary_getBigramProbability(JNIEnv *env, jclass clazz, static jint latinime_BinaryDictionary_getBigramProbability(JNIEnv *env, jclass clazz,
jlong dict, jintArray word0, jintArray word1) { jlong dict, jintArray word0, jboolean isBeginningOfSentence, jintArray word1) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict); Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) return JNI_FALSE; if (!dictionary) return JNI_FALSE;
const jsize word0Length = env->GetArrayLength(word0); const jsize word0Length = env->GetArrayLength(word0);
@ -283,7 +283,7 @@ static jint latinime_BinaryDictionary_getBigramProbability(JNIEnv *env, jclass c
int word1CodePoints[word1Length]; int word1CodePoints[word1Length];
env->GetIntArrayRegion(word0, 0, word0Length, word0CodePoints); env->GetIntArrayRegion(word0, 0, word0Length, word0CodePoints);
env->GetIntArrayRegion(word1, 0, word1Length, word1CodePoints); env->GetIntArrayRegion(word1, 0, word1Length, word1CodePoints);
const PrevWordsInfo prevWordsInfo(word0CodePoints, word0Length, false /* isStartOfSentence */); const PrevWordsInfo prevWordsInfo(word0CodePoints, word0Length, isBeginningOfSentence);
return dictionary->getBigramProbability(&prevWordsInfo, word1CodePoints, word1Length); return dictionary->getBigramProbability(&prevWordsInfo, word1CodePoints, word1Length);
} }
@ -326,7 +326,8 @@ static void latinime_BinaryDictionary_getWordProperty(JNIEnv *env, jclass clazz,
static void latinime_BinaryDictionary_addUnigramWord(JNIEnv *env, jclass clazz, jlong dict, static void latinime_BinaryDictionary_addUnigramWord(JNIEnv *env, jclass clazz, jlong dict,
jintArray word, jint probability, jintArray shortcutTarget, jint shortcutProbability, jintArray word, jint probability, jintArray shortcutTarget, jint shortcutProbability,
jboolean isNotAWord, jboolean isBlacklisted, jint timestamp) { jboolean isBeginningOfSentence, jboolean isNotAWord, jboolean isBlacklisted,
jint timestamp) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict); Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) { if (!dictionary) {
return; return;
@ -341,13 +342,14 @@ static void latinime_BinaryDictionary_addUnigramWord(JNIEnv *env, jclass clazz,
shortcuts.emplace_back(&shortcutTargetCodePoints, shortcutProbability); shortcuts.emplace_back(&shortcutTargetCodePoints, shortcutProbability);
} }
// Use 1 for count to indicate the word has inputted. // Use 1 for count to indicate the word has inputted.
const UnigramProperty unigramProperty(false /* isBeginningOfSentence */, isNotAWord, const UnigramProperty unigramProperty(isBeginningOfSentence, isNotAWord,
isBlacklisted, probability, timestamp, 0 /* level */, 1 /* count */, &shortcuts); isBlacklisted, probability, timestamp, 0 /* level */, 1 /* count */, &shortcuts);
dictionary->addUnigramEntry(codePoints, codePointCount, &unigramProperty); dictionary->addUnigramEntry(codePoints, codePointCount, &unigramProperty);
} }
static void latinime_BinaryDictionary_addBigramWords(JNIEnv *env, jclass clazz, jlong dict, static void latinime_BinaryDictionary_addBigramWords(JNIEnv *env, jclass clazz, jlong dict,
jintArray word0, jintArray word1, jint probability, jint timestamp) { jintArray word0, jboolean isBeginningOfSentence, jintArray word1, jint probability,
jint timestamp) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict); Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) { if (!dictionary) {
return; return;
@ -363,13 +365,12 @@ static void latinime_BinaryDictionary_addBigramWords(JNIEnv *env, jclass clazz,
// Use 1 for count to indicate the bigram has inputted. // Use 1 for count to indicate the bigram has inputted.
const BigramProperty bigramProperty(&bigramTargetCodePoints, probability, const BigramProperty bigramProperty(&bigramTargetCodePoints, probability,
timestamp, 0 /* level */, 1 /* count */); timestamp, 0 /* level */, 1 /* count */);
const PrevWordsInfo prevWordsInfo(word0CodePoints, word0Length, const PrevWordsInfo prevWordsInfo(word0CodePoints, word0Length, isBeginningOfSentence);
false /* isBeginningOfSentence */);
dictionary->addNgramEntry(&prevWordsInfo, &bigramProperty); dictionary->addNgramEntry(&prevWordsInfo, &bigramProperty);
} }
static void latinime_BinaryDictionary_removeBigramWords(JNIEnv *env, jclass clazz, jlong dict, static void latinime_BinaryDictionary_removeBigramWords(JNIEnv *env, jclass clazz, jlong dict,
jintArray word0, jintArray word1) { jintArray word0, jboolean isBeginningOfSentence, jintArray word1) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict); Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) { if (!dictionary) {
return; return;
@ -380,8 +381,7 @@ static void latinime_BinaryDictionary_removeBigramWords(JNIEnv *env, jclass claz
jsize word1Length = env->GetArrayLength(word1); jsize word1Length = env->GetArrayLength(word1);
int word1CodePoints[word1Length]; int word1CodePoints[word1Length];
env->GetIntArrayRegion(word1, 0, word1Length, word1CodePoints); env->GetIntArrayRegion(word1, 0, word1Length, word1CodePoints);
const PrevWordsInfo prevWordsInfo(word0CodePoints, word0Length, const PrevWordsInfo prevWordsInfo(word0CodePoints, word0Length, isBeginningOfSentence);
false /* isBeginningOfSentence */);
dictionary->removeNgramEntry(&prevWordsInfo, word1CodePoints, word1Length); dictionary->removeNgramEntry(&prevWordsInfo, word1CodePoints, word1Length);
} }
@ -625,7 +625,7 @@ static const JNINativeMethod sMethods[] = {
}, },
{ {
const_cast<char *>("getSuggestionsNative"), const_cast<char *>("getSuggestionsNative"),
const_cast<char *>("(JJJ[I[I[I[I[II[I[I[I[I[I[I[I[I[F)V"), const_cast<char *>("(JJJ[I[I[I[I[II[I[IZ[I[I[I[I[I[I[F)V"),
reinterpret_cast<void *>(latinime_BinaryDictionary_getSuggestions) reinterpret_cast<void *>(latinime_BinaryDictionary_getSuggestions)
}, },
{ {
@ -635,7 +635,7 @@ static const JNINativeMethod sMethods[] = {
}, },
{ {
const_cast<char *>("getBigramProbabilityNative"), const_cast<char *>("getBigramProbabilityNative"),
const_cast<char *>("(J[I[I)I"), const_cast<char *>("(J[IZ[I)I"),
reinterpret_cast<void *>(latinime_BinaryDictionary_getBigramProbability) reinterpret_cast<void *>(latinime_BinaryDictionary_getBigramProbability)
}, },
{ {
@ -651,17 +651,17 @@ static const JNINativeMethod sMethods[] = {
}, },
{ {
const_cast<char *>("addUnigramWordNative"), const_cast<char *>("addUnigramWordNative"),
const_cast<char *>("(J[II[IIZZI)V"), const_cast<char *>("(J[II[IIZZZI)V"),
reinterpret_cast<void *>(latinime_BinaryDictionary_addUnigramWord) reinterpret_cast<void *>(latinime_BinaryDictionary_addUnigramWord)
}, },
{ {
const_cast<char *>("addBigramWordsNative"), const_cast<char *>("addBigramWordsNative"),
const_cast<char *>("(J[I[III)V"), const_cast<char *>("(J[IZ[III)V"),
reinterpret_cast<void *>(latinime_BinaryDictionary_addBigramWords) reinterpret_cast<void *>(latinime_BinaryDictionary_addBigramWords)
}, },
{ {
const_cast<char *>("removeBigramWordsNative"), const_cast<char *>("removeBigramWordsNative"),
const_cast<char *>("(J[I[I)V"), const_cast<char *>("(J[IZ[I)V"),
reinterpret_cast<void *>(latinime_BinaryDictionary_removeBigramWords) reinterpret_cast<void *>(latinime_BinaryDictionary_removeBigramWords)
}, },
{ {

View File

@ -56,7 +56,7 @@ bool DynamicPtGcEventListeners
} }
} else { } else {
mValueStack.back() += 1; mValueStack.back() += 1;
if (ptNodeParams->isTerminal()) { if (ptNodeParams->isTerminal() && !ptNodeParams->representsNonWordInfo()) {
mValidUnigramCount += 1; mValidUnigramCount += 1;
} }
} }

View File

@ -63,12 +63,16 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase {
super.tearDown(); super.tearDown();
} }
private static boolean supportsBeginningOfSentence(final int formatVersion) {
return formatVersion >= FormatSpec.VERSION4_DEV;
}
private void addUnigramWord(final BinaryDictionary binaryDictionary, final String word, private void addUnigramWord(final BinaryDictionary binaryDictionary, final String word,
final int probability) { final int probability) {
binaryDictionary.addUnigramEntry(word, probability, "" /* shortcutTarget */, binaryDictionary.addUnigramEntry(word, probability, "" /* shortcutTarget */,
BinaryDictionary.NOT_A_PROBABILITY /* shortcutProbability */, BinaryDictionary.NOT_A_PROBABILITY /* shortcutProbability */,
false /* isNotAWord */, false /* isBlacklisted */, false /* isBeginningOfSentence */, false /* isNotAWord */,
mCurrentTime /* timestamp */); false /* isBlacklisted */, mCurrentTime /* timestamp */);
} }
private void addBigramWords(final BinaryDictionary binaryDictionary, final String word0, private void addBigramWords(final BinaryDictionary binaryDictionary, final String word0,
@ -631,4 +635,57 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase {
binaryDictionary.close(); binaryDictionary.close();
dictFile.delete(); dictFile.delete();
} }
public void testBeginningOfSentence() {
for (final int formatVersion : DICT_FORMAT_VERSIONS) {
if (supportsBeginningOfSentence(formatVersion)) {
testBeginningOfSentence(formatVersion);
}
}
}
private void testBeginningOfSentence(final int formatVersion) {
setCurrentTimeForTestMode(mCurrentTime);
File dictFile = null;
try {
dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
} catch (IOException e) {
fail("IOException while writing an initial dictionary : " + e);
}
final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
binaryDictionary.addUnigramEntry("", DUMMY_PROBABILITY, "" /* shortcutTarget */,
BinaryDictionary.NOT_A_PROBABILITY /* shortcutProbability */,
true /* isBeginningOfSentence */, true /* isNotAWord */, false /* isBlacklisted */,
mCurrentTime);
final PrevWordsInfo prevWordsInfoStartOfSentence = PrevWordsInfo.BEGINNING_OF_SENTENCE;
addUnigramWord(binaryDictionary, "aaa", DUMMY_PROBABILITY);
binaryDictionary.addNgramEntry(prevWordsInfoStartOfSentence, "aaa", DUMMY_PROBABILITY,
mCurrentTime);
assertTrue(binaryDictionary.isValidNgram(prevWordsInfoStartOfSentence, "aaa"));
binaryDictionary.addNgramEntry(prevWordsInfoStartOfSentence, "aaa", DUMMY_PROBABILITY,
mCurrentTime);
addUnigramWord(binaryDictionary, "bbb", DUMMY_PROBABILITY);
binaryDictionary.addNgramEntry(prevWordsInfoStartOfSentence, "bbb", DUMMY_PROBABILITY,
mCurrentTime);
assertTrue(binaryDictionary.isValidNgram(prevWordsInfoStartOfSentence, "aaa"));
assertTrue(binaryDictionary.isValidNgram(prevWordsInfoStartOfSentence, "bbb"));
forcePassingLongTime(binaryDictionary);
assertFalse(binaryDictionary.isValidNgram(prevWordsInfoStartOfSentence, "aaa"));
assertFalse(binaryDictionary.isValidNgram(prevWordsInfoStartOfSentence, "bbb"));
addUnigramWord(binaryDictionary, "aaa", DUMMY_PROBABILITY);
binaryDictionary.addNgramEntry(prevWordsInfoStartOfSentence, "aaa", DUMMY_PROBABILITY,
mCurrentTime);
addUnigramWord(binaryDictionary, "bbb", DUMMY_PROBABILITY);
binaryDictionary.addNgramEntry(prevWordsInfoStartOfSentence, "bbb", DUMMY_PROBABILITY,
mCurrentTime);
assertTrue(binaryDictionary.isValidNgram(prevWordsInfoStartOfSentence, "aaa"));
assertTrue(binaryDictionary.isValidNgram(prevWordsInfoStartOfSentence, "bbb"));
binaryDictionary.close();
dictFile.delete();
}
} }

View File

@ -50,6 +50,10 @@ public class BinaryDictionaryTests extends AndroidTestCase {
return formatVersion >= FormatSpec.VERSION4_DEV; return formatVersion >= FormatSpec.VERSION4_DEV;
} }
private static boolean supportsBeginningOfSentence(final int formatVersion) {
return formatVersion >= FormatSpec.VERSION4_DEV;
}
private File createEmptyDictionaryAndGetFile(final String dictId, private File createEmptyDictionaryAndGetFile(final String dictId,
final int formatVersion) throws IOException { final int formatVersion) throws IOException {
if (formatVersion == FormatSpec.VERSION4 if (formatVersion == FormatSpec.VERSION4
@ -171,7 +175,8 @@ public class BinaryDictionaryTests extends AndroidTestCase {
addUnigramWord(binaryDictionary, invalidLongWord, probability); addUnigramWord(binaryDictionary, invalidLongWord, probability);
// Too long short cut. // Too long short cut.
binaryDictionary.addUnigramEntry("a", probability, invalidLongWord, binaryDictionary.addUnigramEntry("a", probability, invalidLongWord,
10 /* shortcutProbability */, false /* isNotAWord */, false /* isBlacklisted */, 10 /* shortcutProbability */, false /* isBeginningOfSentence */,
false /* isNotAWord */, false /* isBlacklisted */,
BinaryDictionary.NOT_A_VALID_TIMESTAMP); BinaryDictionary.NOT_A_VALID_TIMESTAMP);
addUnigramWord(binaryDictionary, "abc", probability); addUnigramWord(binaryDictionary, "abc", probability);
final int updatedProbability = 200; final int updatedProbability = 200;
@ -192,8 +197,8 @@ public class BinaryDictionaryTests extends AndroidTestCase {
final int probability) { final int probability) {
binaryDictionary.addUnigramEntry(word, probability, "" /* shortcutTarget */, binaryDictionary.addUnigramEntry(word, probability, "" /* shortcutTarget */,
BinaryDictionary.NOT_A_PROBABILITY /* shortcutProbability */, BinaryDictionary.NOT_A_PROBABILITY /* shortcutProbability */,
false /* isNotAWord */, false /* isBlacklisted */, false /* isBeginningOfSentence */, false /* isNotAWord */,
BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */); false /* isBlacklisted */, BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */);
} }
private static void addBigramWords(final BinaryDictionary binaryDictionary, final String word0, private static void addBigramWords(final BinaryDictionary binaryDictionary, final String word0,
@ -1010,7 +1015,8 @@ public class BinaryDictionaryTests extends AndroidTestCase {
// TODO: Add tests for historical info. // TODO: Add tests for historical info.
binaryDictionary.addUnigramEntry(word, unigramProbability, binaryDictionary.addUnigramEntry(word, unigramProbability,
null /* shortcutTarget */, BinaryDictionary.NOT_A_PROBABILITY, null /* shortcutTarget */, BinaryDictionary.NOT_A_PROBABILITY,
isNotAWord, isBlacklisted, BinaryDictionary.NOT_A_VALID_TIMESTAMP); false /* isBeginningOfSentence */, isNotAWord, isBlacklisted,
BinaryDictionary.NOT_A_VALID_TIMESTAMP);
if (binaryDictionary.needsToRunGC(false /* mindsBlockByGC */)) { if (binaryDictionary.needsToRunGC(false /* mindsBlockByGC */)) {
binaryDictionary.flushWithGC(); binaryDictionary.flushWithGC();
} }
@ -1188,24 +1194,24 @@ public class BinaryDictionaryTests extends AndroidTestCase {
final int unigramProbability = 100; final int unigramProbability = 100;
final int shortcutProbability = 10; final int shortcutProbability = 10;
binaryDictionary.addUnigramEntry("aaa", unigramProbability, "zzz", binaryDictionary.addUnigramEntry("aaa", unigramProbability, "zzz",
shortcutProbability, false /* isNotAWord */, false /* isBlacklisted */, shortcutProbability, false /* isBeginningOfSentence */,
0 /* timestamp */); false /* isNotAWord */, false /* isBlacklisted */, 0 /* timestamp */);
WordProperty wordProperty = binaryDictionary.getWordProperty("aaa"); WordProperty wordProperty = binaryDictionary.getWordProperty("aaa");
assertEquals(1, wordProperty.mShortcutTargets.size()); assertEquals(1, wordProperty.mShortcutTargets.size());
assertEquals("zzz", wordProperty.mShortcutTargets.get(0).mWord); assertEquals("zzz", wordProperty.mShortcutTargets.get(0).mWord);
assertEquals(shortcutProbability, wordProperty.mShortcutTargets.get(0).getProbability()); assertEquals(shortcutProbability, wordProperty.mShortcutTargets.get(0).getProbability());
final int updatedShortcutProbability = 2; final int updatedShortcutProbability = 2;
binaryDictionary.addUnigramEntry("aaa", unigramProbability, "zzz", binaryDictionary.addUnigramEntry("aaa", unigramProbability, "zzz",
updatedShortcutProbability, false /* isNotAWord */, false /* isBlacklisted */, updatedShortcutProbability, false /* isBeginningOfSentence */,
0 /* timestamp */); false /* isNotAWord */, false /* isBlacklisted */, 0 /* timestamp */);
wordProperty = binaryDictionary.getWordProperty("aaa"); wordProperty = binaryDictionary.getWordProperty("aaa");
assertEquals(1, wordProperty.mShortcutTargets.size()); assertEquals(1, wordProperty.mShortcutTargets.size());
assertEquals("zzz", wordProperty.mShortcutTargets.get(0).mWord); assertEquals("zzz", wordProperty.mShortcutTargets.get(0).mWord);
assertEquals(updatedShortcutProbability, assertEquals(updatedShortcutProbability,
wordProperty.mShortcutTargets.get(0).getProbability()); wordProperty.mShortcutTargets.get(0).getProbability());
binaryDictionary.addUnigramEntry("aaa", unigramProbability, "yyy", binaryDictionary.addUnigramEntry("aaa", unigramProbability, "yyy",
shortcutProbability, false /* isNotAWord */, false /* isBlacklisted */, shortcutProbability, false /* isBeginningOfSentence */, false /* isNotAWord */,
0 /* timestamp */); false /* isBlacklisted */, 0 /* timestamp */);
final HashMap<String, Integer> shortcutTargets = new HashMap<String, Integer>(); final HashMap<String, Integer> shortcutTargets = new HashMap<String, Integer>();
shortcutTargets.put("zzz", updatedShortcutProbability); shortcutTargets.put("zzz", updatedShortcutProbability);
shortcutTargets.put("yyy", shortcutProbability); shortcutTargets.put("yyy", shortcutProbability);
@ -1275,8 +1281,8 @@ public class BinaryDictionaryTests extends AndroidTestCase {
final String word = words.get(random.nextInt(words.size())); final String word = words.get(random.nextInt(words.size()));
final int unigramProbability = unigramProbabilities.get(word); final int unigramProbability = unigramProbabilities.get(word);
binaryDictionary.addUnigramEntry(word, unigramProbability, shortcutTarget, binaryDictionary.addUnigramEntry(word, unigramProbability, shortcutTarget,
shortcutProbability, false /* isNotAWord */, false /* isBlacklisted */, shortcutProbability, false /* isBeginningOfSentence */, false /* isNotAWord */,
0 /* timestamp */); false /* isBlacklisted */, 0 /* timestamp */);
if (shortcutTargets.containsKey(word)) { if (shortcutTargets.containsKey(word)) {
final HashMap<String, Integer> shortcutTargetsOfWord = shortcutTargets.get(word); final HashMap<String, Integer> shortcutTargetsOfWord = shortcutTargets.get(word);
shortcutTargetsOfWord.put(shortcutTarget, shortcutProbability); shortcutTargetsOfWord.put(shortcutTarget, shortcutProbability);
@ -1331,10 +1337,11 @@ public class BinaryDictionaryTests extends AndroidTestCase {
addBigramWords(binaryDictionary, "aaa", "bbb", bigramProbability); addBigramWords(binaryDictionary, "aaa", "bbb", bigramProbability);
final int shortcutProbability = 10; final int shortcutProbability = 10;
binaryDictionary.addUnigramEntry("ccc", unigramProbability, "xxx", shortcutProbability, binaryDictionary.addUnigramEntry("ccc", unigramProbability, "xxx", shortcutProbability,
false /* isNotAWord */, false /* isBlacklisted */, 0 /* timestamp */); false /* isBeginningOfSentence */, false /* isNotAWord */,
false /* isBlacklisted */, 0 /* timestamp */);
binaryDictionary.addUnigramEntry("ddd", unigramProbability, null /* shortcutTarget */, binaryDictionary.addUnigramEntry("ddd", unigramProbability, null /* shortcutTarget */,
Dictionary.NOT_A_PROBABILITY, true /* isNotAWord */, Dictionary.NOT_A_PROBABILITY, false /* isBeginningOfSentence */,
true /* isBlacklisted */, 0 /* timestamp */); true /* isNotAWord */, true /* isBlacklisted */, 0 /* timestamp */);
assertEquals(unigramProbability, binaryDictionary.getFrequency("aaa")); assertEquals(unigramProbability, binaryDictionary.getFrequency("aaa"));
assertEquals(unigramProbability, binaryDictionary.getFrequency("bbb")); assertEquals(unigramProbability, binaryDictionary.getFrequency("bbb"));
assertTrue(isValidBigram(binaryDictionary, "aaa", "bbb")); assertTrue(isValidBigram(binaryDictionary, "aaa", "bbb"));
@ -1434,4 +1441,46 @@ public class BinaryDictionaryTests extends AndroidTestCase {
assertEquals(bigramProbabilities.size(), Integer.parseInt( assertEquals(bigramProbabilities.size(), Integer.parseInt(
binaryDictionary.getPropertyForTest(BinaryDictionary.BIGRAM_COUNT_QUERY))); binaryDictionary.getPropertyForTest(BinaryDictionary.BIGRAM_COUNT_QUERY)));
} }
public void testBeginningOfSentence() {
for (final int formatVersion : DICT_FORMAT_VERSIONS) {
if (supportsBeginningOfSentence(formatVersion)) {
testBeginningOfSentence(formatVersion);
}
}
}
private void testBeginningOfSentence(final int formatVersion) {
File dictFile = null;
try {
dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
} catch (IOException e) {
fail("IOException while writing an initial dictionary : " + e);
}
final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
final int dummyProbability = 0;
binaryDictionary.addUnigramEntry("", dummyProbability, "" /* shortcutTarget */,
BinaryDictionary.NOT_A_PROBABILITY /* shortcutProbability */,
true /* isBeginningOfSentence */, true /* isNotAWord */, false /* isBlacklisted */,
BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */);
final PrevWordsInfo prevWordsInfoStartOfSentence = PrevWordsInfo.BEGINNING_OF_SENTENCE;
final int bigramProbability = 200;
addUnigramWord(binaryDictionary, "aaa", dummyProbability);
binaryDictionary.addNgramEntry(prevWordsInfoStartOfSentence, "aaa", bigramProbability,
BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */);
assertEquals(bigramProbability,
binaryDictionary.getNgramProbability(prevWordsInfoStartOfSentence, "aaa"));
binaryDictionary.addNgramEntry(prevWordsInfoStartOfSentence, "aaa", bigramProbability,
BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */);
addUnigramWord(binaryDictionary, "bbb", dummyProbability);
binaryDictionary.addNgramEntry(prevWordsInfoStartOfSentence, "bbb", bigramProbability,
BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */);
binaryDictionary.flushWithGC();
assertEquals(bigramProbability,
binaryDictionary.getNgramProbability(prevWordsInfoStartOfSentence, "aaa"));
assertEquals(bigramProbability,
binaryDictionary.getNgramProbability(prevWordsInfoStartOfSentence, "bbb"));
}
} }

View File

@ -77,14 +77,14 @@ public class Ver4DictEncoder implements DictEncoder {
if (null == wordProperty.mShortcutTargets || wordProperty.mShortcutTargets.isEmpty()) { if (null == wordProperty.mShortcutTargets || wordProperty.mShortcutTargets.isEmpty()) {
binaryDict.addUnigramEntry(wordProperty.mWord, wordProperty.getProbability(), binaryDict.addUnigramEntry(wordProperty.mWord, wordProperty.getProbability(),
null /* shortcutTarget */, 0 /* shortcutProbability */, null /* shortcutTarget */, 0 /* shortcutProbability */,
wordProperty.mIsNotAWord, wordProperty.mIsBlacklistEntry, wordProperty.mIsBeginningOfSentence, wordProperty.mIsNotAWord,
0 /* timestamp */); wordProperty.mIsBlacklistEntry, 0 /* timestamp */);
} else { } else {
for (final WeightedString shortcutTarget : wordProperty.mShortcutTargets) { for (final WeightedString shortcutTarget : wordProperty.mShortcutTargets) {
binaryDict.addUnigramEntry(wordProperty.mWord, wordProperty.getProbability(), binaryDict.addUnigramEntry(wordProperty.mWord, wordProperty.getProbability(),
shortcutTarget.mWord, shortcutTarget.getProbability(), shortcutTarget.mWord, shortcutTarget.getProbability(),
wordProperty.mIsNotAWord, wordProperty.mIsBlacklistEntry, wordProperty.mIsBeginningOfSentence, wordProperty.mIsNotAWord,
0 /* timestamp */); wordProperty.mIsBlacklistEntry, 0 /* timestamp */);
} }
} }
if (binaryDict.needsToRunGC(true /* mindsBlockByGC */)) { if (binaryDict.needsToRunGC(true /* mindsBlockByGC */)) {