Merge "Extend jni methods and enable Beginning-of-Sentence."
This commit is contained in:
commit
662c22759b
9 changed files with 186 additions and 62 deletions
|
@ -191,7 +191,8 @@ public final class BinaryDictionary extends Dictionary {
|
|||
private static native void closeNative(long dict);
|
||||
private static native int getFormatVersionNative(long dict);
|
||||
private static native int getProbabilityNative(long dict, int[] word);
|
||||
private static native int getBigramProbabilityNative(long dict, int[] word0, int[] word1);
|
||||
private static native int getBigramProbabilityNative(long dict, int[] word0,
|
||||
boolean isBeginningOfSentence, int[] word1);
|
||||
private static native void getWordPropertyNative(long dict, int[] word,
|
||||
int[] outCodePoints, boolean[] outFlags, int[] outProbabilityInfo,
|
||||
ArrayList<int[]> outBigramTargets, ArrayList<int[]> outBigramProbabilityInfo,
|
||||
|
@ -200,15 +201,17 @@ public final class BinaryDictionary extends Dictionary {
|
|||
private static native void getSuggestionsNative(long dict, long proximityInfo,
|
||||
long traverseSession, int[] xCoordinates, int[] yCoordinates, int[] times,
|
||||
int[] pointerIds, int[] inputCodePoints, int inputSize, int[] suggestOptions,
|
||||
int[] prevWordCodePointArray, int[] outputSuggestionCount, int[] outputCodePoints,
|
||||
int[] outputScores, int[] outputIndices, int[] outputTypes,
|
||||
int[] outputAutoCommitFirstWordConfidence, float[] inOutLanguageWeight);
|
||||
int[] prevWordCodePointArray, boolean isBeginningOfSentence,
|
||||
int[] outputSuggestionCount, int[] outputCodePoints, int[] outputScores,
|
||||
int[] outputIndices, int[] outputTypes, int[] outputAutoCommitFirstWordConfidence,
|
||||
float[] inOutLanguageWeight);
|
||||
private static native void addUnigramWordNative(long dict, int[] word, int probability,
|
||||
int[] shortcutTarget, int shortcutProbability, boolean isNotAWord,
|
||||
boolean isBlacklisted, int timestamp);
|
||||
private static native void addBigramWordsNative(long dict, int[] word0, int[] word1,
|
||||
int probability, int timestamp);
|
||||
private static native void removeBigramWordsNative(long dict, int[] word0, int[] word1);
|
||||
int[] shortcutTarget, int shortcutProbability, boolean isBeginningOfSentence,
|
||||
boolean isNotAWord, boolean isBlacklisted, int timestamp);
|
||||
private static native void addBigramWordsNative(long dict, int[] word0,
|
||||
boolean isBeginningOfSentence, int[] word1, int probability, int timestamp);
|
||||
private static native void removeBigramWordsNative(long dict, int[] word0,
|
||||
boolean isBeginningOfSentence, int[] word1);
|
||||
private static native int addMultipleDictionaryEntriesNative(long dict,
|
||||
LanguageModelParam[] languageModelParams, int startIndex);
|
||||
private static native String getPropertyNative(long dict, String query);
|
||||
|
@ -301,7 +304,8 @@ public final class BinaryDictionary extends Dictionary {
|
|||
getTraverseSession(sessionId).getSession(), inputPointers.getXCoordinates(),
|
||||
inputPointers.getYCoordinates(), inputPointers.getTimes(),
|
||||
inputPointers.getPointerIds(), mInputCodePoints, inputSize,
|
||||
mNativeSuggestOptions.getOptions(), prevWordCodePointArray, mOutputSuggestionCount,
|
||||
mNativeSuggestOptions.getOptions(), prevWordCodePointArray,
|
||||
prevWordsInfo.mIsBeginningOfSentence, mOutputSuggestionCount,
|
||||
mOutputCodePoints, mOutputScores, mSpaceIndices, mOutputTypes,
|
||||
mOutputAutoCommitFirstWordConfidence, mInputOutputLanguageWeight);
|
||||
if (inOutLanguageWeight != null) {
|
||||
|
@ -364,12 +368,13 @@ public final class BinaryDictionary extends Dictionary {
|
|||
}
|
||||
|
||||
public int getNgramProbability(final PrevWordsInfo prevWordsInfo, final String word) {
|
||||
if (TextUtils.isEmpty(prevWordsInfo.mPrevWord) || TextUtils.isEmpty(word)) {
|
||||
if (!prevWordsInfo.isValid() || TextUtils.isEmpty(word)) {
|
||||
return NOT_A_PROBABILITY;
|
||||
}
|
||||
final int[] codePoints0 = StringUtils.toCodePointArray(prevWordsInfo.mPrevWord);
|
||||
final int[] codePoints1 = StringUtils.toCodePointArray(word);
|
||||
return getBigramProbabilityNative(mNativeDict, codePoints0, codePoints1);
|
||||
return getBigramProbabilityNative(mNativeDict, codePoints0,
|
||||
prevWordsInfo.mIsBeginningOfSentence, codePoints1);
|
||||
}
|
||||
|
||||
public WordProperty getWordProperty(final String word) {
|
||||
|
@ -420,16 +425,17 @@ public final class BinaryDictionary extends Dictionary {
|
|||
|
||||
// Add a unigram entry to binary dictionary with unigram attributes in native code.
|
||||
public void addUnigramEntry(final String word, final int probability,
|
||||
final String shortcutTarget, final int shortcutProbability, final boolean isNotAWord,
|
||||
final String shortcutTarget, final int shortcutProbability,
|
||||
final boolean isBeginningOfSentence, final boolean isNotAWord,
|
||||
final boolean isBlacklisted, final int timestamp) {
|
||||
if (TextUtils.isEmpty(word)) {
|
||||
if (word == null || (word.isEmpty() && !isBeginningOfSentence)) {
|
||||
return;
|
||||
}
|
||||
final int[] codePoints = StringUtils.toCodePointArray(word);
|
||||
final int[] shortcutTargetCodePoints = (shortcutTarget != null) ?
|
||||
StringUtils.toCodePointArray(shortcutTarget) : null;
|
||||
addUnigramWordNative(mNativeDict, codePoints, probability, shortcutTargetCodePoints,
|
||||
shortcutProbability, isNotAWord, isBlacklisted, timestamp);
|
||||
shortcutProbability, isBeginningOfSentence, isNotAWord, isBlacklisted, timestamp);
|
||||
mHasUpdated = true;
|
||||
}
|
||||
|
||||
|
@ -437,23 +443,25 @@ public final class BinaryDictionary extends Dictionary {
|
|||
public void addNgramEntry(final PrevWordsInfo prevWordsInfo, final String word,
|
||||
final int probability,
|
||||
final int timestamp) {
|
||||
if (TextUtils.isEmpty(prevWordsInfo.mPrevWord) || TextUtils.isEmpty(word)) {
|
||||
if (!prevWordsInfo.isValid() || TextUtils.isEmpty(word)) {
|
||||
return;
|
||||
}
|
||||
final int[] codePoints0 = StringUtils.toCodePointArray(prevWordsInfo.mPrevWord);
|
||||
final int[] codePoints1 = StringUtils.toCodePointArray(word);
|
||||
addBigramWordsNative(mNativeDict, codePoints0, codePoints1, probability, timestamp);
|
||||
addBigramWordsNative(mNativeDict, codePoints0, prevWordsInfo.mIsBeginningOfSentence,
|
||||
codePoints1, probability, timestamp);
|
||||
mHasUpdated = true;
|
||||
}
|
||||
|
||||
// Remove an n-gram entry from the binary dictionary in native code.
|
||||
public void removeNgramEntry(final PrevWordsInfo prevWordsInfo, final String word) {
|
||||
if (TextUtils.isEmpty(prevWordsInfo.mPrevWord) || TextUtils.isEmpty(word)) {
|
||||
if (!prevWordsInfo.isValid() || TextUtils.isEmpty(word)) {
|
||||
return;
|
||||
}
|
||||
final int[] codePoints0 = StringUtils.toCodePointArray(prevWordsInfo.mPrevWord);
|
||||
final int[] codePoints1 = StringUtils.toCodePointArray(word);
|
||||
removeBigramWordsNative(mNativeDict, codePoints0, codePoints1);
|
||||
removeBigramWordsNative(mNativeDict, codePoints0, prevWordsInfo.mIsBeginningOfSentence,
|
||||
codePoints1);
|
||||
mHasUpdated = true;
|
||||
}
|
||||
|
||||
|
|
|
@ -292,7 +292,7 @@ abstract public class ExpandableBinaryDictionary extends Dictionary {
|
|||
final String shortcutTarget, final int shortcutFreq, final boolean isNotAWord,
|
||||
final boolean isBlacklisted, final int timestamp) {
|
||||
mBinaryDictionary.addUnigramEntry(word, frequency, shortcutTarget, shortcutFreq,
|
||||
isNotAWord, isBlacklisted, timestamp);
|
||||
false /* isBeginningOfSentence */, isNotAWord, isBlacklisted, timestamp);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -20,6 +20,8 @@ import android.util.Log;
|
|||
|
||||
// TODO: Support multiple previous words for n-gram.
|
||||
public class PrevWordsInfo {
|
||||
public static final PrevWordsInfo BEGINNING_OF_SENTENCE = new PrevWordsInfo();
|
||||
|
||||
// The previous word. May be null after resetting and before starting a new composing word, or
|
||||
// when there is no context like at the start of text for example. It can also be set to null
|
||||
// externally when the user enters a separator that does not let bigrams across, like a period
|
||||
|
@ -32,7 +34,7 @@ public class PrevWordsInfo {
|
|||
|
||||
// Beginning of sentence.
|
||||
public PrevWordsInfo() {
|
||||
mPrevWord = null;
|
||||
mPrevWord = "";
|
||||
mIsBeginningOfSentence = true;
|
||||
}
|
||||
|
||||
|
@ -40,4 +42,8 @@ public class PrevWordsInfo {
|
|||
mPrevWord = prevWord;
|
||||
mIsBeginningOfSentence = false;
|
||||
}
|
||||
|
||||
public boolean isValid() {
|
||||
return mPrevWord != null;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -35,6 +35,8 @@ public final class WordProperty implements Comparable<WordProperty> {
|
|||
public final ProbabilityInfo mProbabilityInfo;
|
||||
public final ArrayList<WeightedString> mShortcutTargets;
|
||||
public final ArrayList<WeightedString> mBigrams;
|
||||
// TODO: Support mIsBeginningOfSentence.
|
||||
public final boolean mIsBeginningOfSentence;
|
||||
public final boolean mIsNotAWord;
|
||||
public final boolean mIsBlacklistEntry;
|
||||
public final boolean mHasShortcuts;
|
||||
|
@ -51,6 +53,7 @@ public final class WordProperty implements Comparable<WordProperty> {
|
|||
mProbabilityInfo = probabilityInfo;
|
||||
mShortcutTargets = shortcutTargets;
|
||||
mBigrams = bigrams;
|
||||
mIsBeginningOfSentence = false;
|
||||
mIsNotAWord = isNotAWord;
|
||||
mIsBlacklistEntry = isBlacklistEntry;
|
||||
mHasBigrams = bigrams != null && !bigrams.isEmpty();
|
||||
|
@ -77,6 +80,7 @@ public final class WordProperty implements Comparable<WordProperty> {
|
|||
mProbabilityInfo = createProbabilityInfoFromArray(probabilityInfo);
|
||||
mShortcutTargets = CollectionUtils.newArrayList();
|
||||
mBigrams = CollectionUtils.newArrayList();
|
||||
mIsBeginningOfSentence = false;
|
||||
mIsNotAWord = isNotAWord;
|
||||
mIsBlacklistEntry = isBlacklisted;
|
||||
mHasShortcuts = hasShortcuts;
|
||||
|
|
|
@ -178,10 +178,10 @@ static void latinime_BinaryDictionary_getSuggestions(JNIEnv *env, jclass clazz,
|
|||
jlong proximityInfo, jlong dicTraverseSession, jintArray xCoordinatesArray,
|
||||
jintArray yCoordinatesArray, jintArray timesArray, jintArray pointerIdsArray,
|
||||
jintArray inputCodePointsArray, jint inputSize, jintArray suggestOptions,
|
||||
jintArray prevWordCodePointsForBigrams, jintArray outSuggestionCount,
|
||||
jintArray outCodePointsArray, jintArray outScoresArray, jintArray outSpaceIndicesArray,
|
||||
jintArray outTypesArray, jintArray outAutoCommitFirstWordConfidenceArray,
|
||||
jfloatArray inOutLanguageWeight) {
|
||||
jintArray prevWordCodePointsForBigrams, jboolean isBeginningOfSentence,
|
||||
jintArray outSuggestionCount, jintArray outCodePointsArray, jintArray outScoresArray,
|
||||
jintArray outSpaceIndicesArray, jintArray outTypesArray,
|
||||
jintArray outAutoCommitFirstWordConfidenceArray, jfloatArray inOutLanguageWeight) {
|
||||
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
|
||||
// Assign 0 to outSuggestionCount here in case of returning earlier in this method.
|
||||
JniDataUtils::putIntToArray(env, outSuggestionCount, 0 /* index */, 0);
|
||||
|
@ -274,7 +274,7 @@ static jint latinime_BinaryDictionary_getProbability(JNIEnv *env, jclass clazz,
|
|||
}
|
||||
|
||||
static jint latinime_BinaryDictionary_getBigramProbability(JNIEnv *env, jclass clazz,
|
||||
jlong dict, jintArray word0, jintArray word1) {
|
||||
jlong dict, jintArray word0, jboolean isBeginningOfSentence, jintArray word1) {
|
||||
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
|
||||
if (!dictionary) return JNI_FALSE;
|
||||
const jsize word0Length = env->GetArrayLength(word0);
|
||||
|
@ -283,7 +283,7 @@ static jint latinime_BinaryDictionary_getBigramProbability(JNIEnv *env, jclass c
|
|||
int word1CodePoints[word1Length];
|
||||
env->GetIntArrayRegion(word0, 0, word0Length, word0CodePoints);
|
||||
env->GetIntArrayRegion(word1, 0, word1Length, word1CodePoints);
|
||||
const PrevWordsInfo prevWordsInfo(word0CodePoints, word0Length, false /* isStartOfSentence */);
|
||||
const PrevWordsInfo prevWordsInfo(word0CodePoints, word0Length, isBeginningOfSentence);
|
||||
return dictionary->getBigramProbability(&prevWordsInfo, word1CodePoints, word1Length);
|
||||
}
|
||||
|
||||
|
@ -326,7 +326,8 @@ static void latinime_BinaryDictionary_getWordProperty(JNIEnv *env, jclass clazz,
|
|||
|
||||
static void latinime_BinaryDictionary_addUnigramWord(JNIEnv *env, jclass clazz, jlong dict,
|
||||
jintArray word, jint probability, jintArray shortcutTarget, jint shortcutProbability,
|
||||
jboolean isNotAWord, jboolean isBlacklisted, jint timestamp) {
|
||||
jboolean isBeginningOfSentence, jboolean isNotAWord, jboolean isBlacklisted,
|
||||
jint timestamp) {
|
||||
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
|
||||
if (!dictionary) {
|
||||
return;
|
||||
|
@ -341,13 +342,14 @@ static void latinime_BinaryDictionary_addUnigramWord(JNIEnv *env, jclass clazz,
|
|||
shortcuts.emplace_back(&shortcutTargetCodePoints, shortcutProbability);
|
||||
}
|
||||
// Use 1 for count to indicate the word has inputted.
|
||||
const UnigramProperty unigramProperty(false /* isBeginningOfSentence */, isNotAWord,
|
||||
const UnigramProperty unigramProperty(isBeginningOfSentence, isNotAWord,
|
||||
isBlacklisted, probability, timestamp, 0 /* level */, 1 /* count */, &shortcuts);
|
||||
dictionary->addUnigramEntry(codePoints, codePointCount, &unigramProperty);
|
||||
}
|
||||
|
||||
static void latinime_BinaryDictionary_addBigramWords(JNIEnv *env, jclass clazz, jlong dict,
|
||||
jintArray word0, jintArray word1, jint probability, jint timestamp) {
|
||||
jintArray word0, jboolean isBeginningOfSentence, jintArray word1, jint probability,
|
||||
jint timestamp) {
|
||||
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
|
||||
if (!dictionary) {
|
||||
return;
|
||||
|
@ -363,13 +365,12 @@ static void latinime_BinaryDictionary_addBigramWords(JNIEnv *env, jclass clazz,
|
|||
// Use 1 for count to indicate the bigram has inputted.
|
||||
const BigramProperty bigramProperty(&bigramTargetCodePoints, probability,
|
||||
timestamp, 0 /* level */, 1 /* count */);
|
||||
const PrevWordsInfo prevWordsInfo(word0CodePoints, word0Length,
|
||||
false /* isBeginningOfSentence */);
|
||||
const PrevWordsInfo prevWordsInfo(word0CodePoints, word0Length, isBeginningOfSentence);
|
||||
dictionary->addNgramEntry(&prevWordsInfo, &bigramProperty);
|
||||
}
|
||||
|
||||
static void latinime_BinaryDictionary_removeBigramWords(JNIEnv *env, jclass clazz, jlong dict,
|
||||
jintArray word0, jintArray word1) {
|
||||
jintArray word0, jboolean isBeginningOfSentence, jintArray word1) {
|
||||
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
|
||||
if (!dictionary) {
|
||||
return;
|
||||
|
@ -380,8 +381,7 @@ static void latinime_BinaryDictionary_removeBigramWords(JNIEnv *env, jclass claz
|
|||
jsize word1Length = env->GetArrayLength(word1);
|
||||
int word1CodePoints[word1Length];
|
||||
env->GetIntArrayRegion(word1, 0, word1Length, word1CodePoints);
|
||||
const PrevWordsInfo prevWordsInfo(word0CodePoints, word0Length,
|
||||
false /* isBeginningOfSentence */);
|
||||
const PrevWordsInfo prevWordsInfo(word0CodePoints, word0Length, isBeginningOfSentence);
|
||||
dictionary->removeNgramEntry(&prevWordsInfo, word1CodePoints, word1Length);
|
||||
}
|
||||
|
||||
|
@ -625,7 +625,7 @@ static const JNINativeMethod sMethods[] = {
|
|||
},
|
||||
{
|
||||
const_cast<char *>("getSuggestionsNative"),
|
||||
const_cast<char *>("(JJJ[I[I[I[I[II[I[I[I[I[I[I[I[I[F)V"),
|
||||
const_cast<char *>("(JJJ[I[I[I[I[II[I[IZ[I[I[I[I[I[I[F)V"),
|
||||
reinterpret_cast<void *>(latinime_BinaryDictionary_getSuggestions)
|
||||
},
|
||||
{
|
||||
|
@ -635,7 +635,7 @@ static const JNINativeMethod sMethods[] = {
|
|||
},
|
||||
{
|
||||
const_cast<char *>("getBigramProbabilityNative"),
|
||||
const_cast<char *>("(J[I[I)I"),
|
||||
const_cast<char *>("(J[IZ[I)I"),
|
||||
reinterpret_cast<void *>(latinime_BinaryDictionary_getBigramProbability)
|
||||
},
|
||||
{
|
||||
|
@ -651,17 +651,17 @@ static const JNINativeMethod sMethods[] = {
|
|||
},
|
||||
{
|
||||
const_cast<char *>("addUnigramWordNative"),
|
||||
const_cast<char *>("(J[II[IIZZI)V"),
|
||||
const_cast<char *>("(J[II[IIZZZI)V"),
|
||||
reinterpret_cast<void *>(latinime_BinaryDictionary_addUnigramWord)
|
||||
},
|
||||
{
|
||||
const_cast<char *>("addBigramWordsNative"),
|
||||
const_cast<char *>("(J[I[III)V"),
|
||||
const_cast<char *>("(J[IZ[III)V"),
|
||||
reinterpret_cast<void *>(latinime_BinaryDictionary_addBigramWords)
|
||||
},
|
||||
{
|
||||
const_cast<char *>("removeBigramWordsNative"),
|
||||
const_cast<char *>("(J[I[I)V"),
|
||||
const_cast<char *>("(J[IZ[I)V"),
|
||||
reinterpret_cast<void *>(latinime_BinaryDictionary_removeBigramWords)
|
||||
},
|
||||
{
|
||||
|
|
|
@ -56,7 +56,7 @@ bool DynamicPtGcEventListeners
|
|||
}
|
||||
} else {
|
||||
mValueStack.back() += 1;
|
||||
if (ptNodeParams->isTerminal()) {
|
||||
if (ptNodeParams->isTerminal() && !ptNodeParams->representsNonWordInfo()) {
|
||||
mValidUnigramCount += 1;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -63,12 +63,16 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase {
|
|||
super.tearDown();
|
||||
}
|
||||
|
||||
private static boolean supportsBeginningOfSentence(final int formatVersion) {
|
||||
return formatVersion >= FormatSpec.VERSION4_DEV;
|
||||
}
|
||||
|
||||
private void addUnigramWord(final BinaryDictionary binaryDictionary, final String word,
|
||||
final int probability) {
|
||||
binaryDictionary.addUnigramEntry(word, probability, "" /* shortcutTarget */,
|
||||
BinaryDictionary.NOT_A_PROBABILITY /* shortcutProbability */,
|
||||
false /* isNotAWord */, false /* isBlacklisted */,
|
||||
mCurrentTime /* timestamp */);
|
||||
false /* isBeginningOfSentence */, false /* isNotAWord */,
|
||||
false /* isBlacklisted */, mCurrentTime /* timestamp */);
|
||||
}
|
||||
|
||||
private void addBigramWords(final BinaryDictionary binaryDictionary, final String word0,
|
||||
|
@ -631,4 +635,57 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase {
|
|||
binaryDictionary.close();
|
||||
dictFile.delete();
|
||||
}
|
||||
|
||||
public void testBeginningOfSentence() {
|
||||
for (final int formatVersion : DICT_FORMAT_VERSIONS) {
|
||||
if (supportsBeginningOfSentence(formatVersion)) {
|
||||
testBeginningOfSentence(formatVersion);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void testBeginningOfSentence(final int formatVersion) {
|
||||
setCurrentTimeForTestMode(mCurrentTime);
|
||||
File dictFile = null;
|
||||
try {
|
||||
dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
|
||||
} catch (IOException e) {
|
||||
fail("IOException while writing an initial dictionary : " + e);
|
||||
}
|
||||
final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
|
||||
0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
|
||||
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
|
||||
|
||||
binaryDictionary.addUnigramEntry("", DUMMY_PROBABILITY, "" /* shortcutTarget */,
|
||||
BinaryDictionary.NOT_A_PROBABILITY /* shortcutProbability */,
|
||||
true /* isBeginningOfSentence */, true /* isNotAWord */, false /* isBlacklisted */,
|
||||
mCurrentTime);
|
||||
final PrevWordsInfo prevWordsInfoStartOfSentence = PrevWordsInfo.BEGINNING_OF_SENTENCE;
|
||||
addUnigramWord(binaryDictionary, "aaa", DUMMY_PROBABILITY);
|
||||
binaryDictionary.addNgramEntry(prevWordsInfoStartOfSentence, "aaa", DUMMY_PROBABILITY,
|
||||
mCurrentTime);
|
||||
assertTrue(binaryDictionary.isValidNgram(prevWordsInfoStartOfSentence, "aaa"));
|
||||
binaryDictionary.addNgramEntry(prevWordsInfoStartOfSentence, "aaa", DUMMY_PROBABILITY,
|
||||
mCurrentTime);
|
||||
addUnigramWord(binaryDictionary, "bbb", DUMMY_PROBABILITY);
|
||||
binaryDictionary.addNgramEntry(prevWordsInfoStartOfSentence, "bbb", DUMMY_PROBABILITY,
|
||||
mCurrentTime);
|
||||
assertTrue(binaryDictionary.isValidNgram(prevWordsInfoStartOfSentence, "aaa"));
|
||||
assertTrue(binaryDictionary.isValidNgram(prevWordsInfoStartOfSentence, "bbb"));
|
||||
|
||||
forcePassingLongTime(binaryDictionary);
|
||||
assertFalse(binaryDictionary.isValidNgram(prevWordsInfoStartOfSentence, "aaa"));
|
||||
assertFalse(binaryDictionary.isValidNgram(prevWordsInfoStartOfSentence, "bbb"));
|
||||
|
||||
addUnigramWord(binaryDictionary, "aaa", DUMMY_PROBABILITY);
|
||||
binaryDictionary.addNgramEntry(prevWordsInfoStartOfSentence, "aaa", DUMMY_PROBABILITY,
|
||||
mCurrentTime);
|
||||
addUnigramWord(binaryDictionary, "bbb", DUMMY_PROBABILITY);
|
||||
binaryDictionary.addNgramEntry(prevWordsInfoStartOfSentence, "bbb", DUMMY_PROBABILITY,
|
||||
mCurrentTime);
|
||||
assertTrue(binaryDictionary.isValidNgram(prevWordsInfoStartOfSentence, "aaa"));
|
||||
assertTrue(binaryDictionary.isValidNgram(prevWordsInfoStartOfSentence, "bbb"));
|
||||
binaryDictionary.close();
|
||||
dictFile.delete();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -50,6 +50,10 @@ public class BinaryDictionaryTests extends AndroidTestCase {
|
|||
return formatVersion >= FormatSpec.VERSION4_DEV;
|
||||
}
|
||||
|
||||
private static boolean supportsBeginningOfSentence(final int formatVersion) {
|
||||
return formatVersion >= FormatSpec.VERSION4_DEV;
|
||||
}
|
||||
|
||||
private File createEmptyDictionaryAndGetFile(final String dictId,
|
||||
final int formatVersion) throws IOException {
|
||||
if (formatVersion == FormatSpec.VERSION4
|
||||
|
@ -171,7 +175,8 @@ public class BinaryDictionaryTests extends AndroidTestCase {
|
|||
addUnigramWord(binaryDictionary, invalidLongWord, probability);
|
||||
// Too long short cut.
|
||||
binaryDictionary.addUnigramEntry("a", probability, invalidLongWord,
|
||||
10 /* shortcutProbability */, false /* isNotAWord */, false /* isBlacklisted */,
|
||||
10 /* shortcutProbability */, false /* isBeginningOfSentence */,
|
||||
false /* isNotAWord */, false /* isBlacklisted */,
|
||||
BinaryDictionary.NOT_A_VALID_TIMESTAMP);
|
||||
addUnigramWord(binaryDictionary, "abc", probability);
|
||||
final int updatedProbability = 200;
|
||||
|
@ -192,8 +197,8 @@ public class BinaryDictionaryTests extends AndroidTestCase {
|
|||
final int probability) {
|
||||
binaryDictionary.addUnigramEntry(word, probability, "" /* shortcutTarget */,
|
||||
BinaryDictionary.NOT_A_PROBABILITY /* shortcutProbability */,
|
||||
false /* isNotAWord */, false /* isBlacklisted */,
|
||||
BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */);
|
||||
false /* isBeginningOfSentence */, false /* isNotAWord */,
|
||||
false /* isBlacklisted */, BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */);
|
||||
}
|
||||
|
||||
private static void addBigramWords(final BinaryDictionary binaryDictionary, final String word0,
|
||||
|
@ -1010,7 +1015,8 @@ public class BinaryDictionaryTests extends AndroidTestCase {
|
|||
// TODO: Add tests for historical info.
|
||||
binaryDictionary.addUnigramEntry(word, unigramProbability,
|
||||
null /* shortcutTarget */, BinaryDictionary.NOT_A_PROBABILITY,
|
||||
isNotAWord, isBlacklisted, BinaryDictionary.NOT_A_VALID_TIMESTAMP);
|
||||
false /* isBeginningOfSentence */, isNotAWord, isBlacklisted,
|
||||
BinaryDictionary.NOT_A_VALID_TIMESTAMP);
|
||||
if (binaryDictionary.needsToRunGC(false /* mindsBlockByGC */)) {
|
||||
binaryDictionary.flushWithGC();
|
||||
}
|
||||
|
@ -1188,24 +1194,24 @@ public class BinaryDictionaryTests extends AndroidTestCase {
|
|||
final int unigramProbability = 100;
|
||||
final int shortcutProbability = 10;
|
||||
binaryDictionary.addUnigramEntry("aaa", unigramProbability, "zzz",
|
||||
shortcutProbability, false /* isNotAWord */, false /* isBlacklisted */,
|
||||
0 /* timestamp */);
|
||||
shortcutProbability, false /* isBeginningOfSentence */,
|
||||
false /* isNotAWord */, false /* isBlacklisted */, 0 /* timestamp */);
|
||||
WordProperty wordProperty = binaryDictionary.getWordProperty("aaa");
|
||||
assertEquals(1, wordProperty.mShortcutTargets.size());
|
||||
assertEquals("zzz", wordProperty.mShortcutTargets.get(0).mWord);
|
||||
assertEquals(shortcutProbability, wordProperty.mShortcutTargets.get(0).getProbability());
|
||||
final int updatedShortcutProbability = 2;
|
||||
binaryDictionary.addUnigramEntry("aaa", unigramProbability, "zzz",
|
||||
updatedShortcutProbability, false /* isNotAWord */, false /* isBlacklisted */,
|
||||
0 /* timestamp */);
|
||||
updatedShortcutProbability, false /* isBeginningOfSentence */,
|
||||
false /* isNotAWord */, false /* isBlacklisted */, 0 /* timestamp */);
|
||||
wordProperty = binaryDictionary.getWordProperty("aaa");
|
||||
assertEquals(1, wordProperty.mShortcutTargets.size());
|
||||
assertEquals("zzz", wordProperty.mShortcutTargets.get(0).mWord);
|
||||
assertEquals(updatedShortcutProbability,
|
||||
wordProperty.mShortcutTargets.get(0).getProbability());
|
||||
binaryDictionary.addUnigramEntry("aaa", unigramProbability, "yyy",
|
||||
shortcutProbability, false /* isNotAWord */, false /* isBlacklisted */,
|
||||
0 /* timestamp */);
|
||||
shortcutProbability, false /* isBeginningOfSentence */, false /* isNotAWord */,
|
||||
false /* isBlacklisted */, 0 /* timestamp */);
|
||||
final HashMap<String, Integer> shortcutTargets = new HashMap<String, Integer>();
|
||||
shortcutTargets.put("zzz", updatedShortcutProbability);
|
||||
shortcutTargets.put("yyy", shortcutProbability);
|
||||
|
@ -1275,8 +1281,8 @@ public class BinaryDictionaryTests extends AndroidTestCase {
|
|||
final String word = words.get(random.nextInt(words.size()));
|
||||
final int unigramProbability = unigramProbabilities.get(word);
|
||||
binaryDictionary.addUnigramEntry(word, unigramProbability, shortcutTarget,
|
||||
shortcutProbability, false /* isNotAWord */, false /* isBlacklisted */,
|
||||
0 /* timestamp */);
|
||||
shortcutProbability, false /* isBeginningOfSentence */, false /* isNotAWord */,
|
||||
false /* isBlacklisted */, 0 /* timestamp */);
|
||||
if (shortcutTargets.containsKey(word)) {
|
||||
final HashMap<String, Integer> shortcutTargetsOfWord = shortcutTargets.get(word);
|
||||
shortcutTargetsOfWord.put(shortcutTarget, shortcutProbability);
|
||||
|
@ -1331,10 +1337,11 @@ public class BinaryDictionaryTests extends AndroidTestCase {
|
|||
addBigramWords(binaryDictionary, "aaa", "bbb", bigramProbability);
|
||||
final int shortcutProbability = 10;
|
||||
binaryDictionary.addUnigramEntry("ccc", unigramProbability, "xxx", shortcutProbability,
|
||||
false /* isNotAWord */, false /* isBlacklisted */, 0 /* timestamp */);
|
||||
false /* isBeginningOfSentence */, false /* isNotAWord */,
|
||||
false /* isBlacklisted */, 0 /* timestamp */);
|
||||
binaryDictionary.addUnigramEntry("ddd", unigramProbability, null /* shortcutTarget */,
|
||||
Dictionary.NOT_A_PROBABILITY, true /* isNotAWord */,
|
||||
true /* isBlacklisted */, 0 /* timestamp */);
|
||||
Dictionary.NOT_A_PROBABILITY, false /* isBeginningOfSentence */,
|
||||
true /* isNotAWord */, true /* isBlacklisted */, 0 /* timestamp */);
|
||||
assertEquals(unigramProbability, binaryDictionary.getFrequency("aaa"));
|
||||
assertEquals(unigramProbability, binaryDictionary.getFrequency("bbb"));
|
||||
assertTrue(isValidBigram(binaryDictionary, "aaa", "bbb"));
|
||||
|
@ -1434,4 +1441,46 @@ public class BinaryDictionaryTests extends AndroidTestCase {
|
|||
assertEquals(bigramProbabilities.size(), Integer.parseInt(
|
||||
binaryDictionary.getPropertyForTest(BinaryDictionary.BIGRAM_COUNT_QUERY)));
|
||||
}
|
||||
|
||||
public void testBeginningOfSentence() {
|
||||
for (final int formatVersion : DICT_FORMAT_VERSIONS) {
|
||||
if (supportsBeginningOfSentence(formatVersion)) {
|
||||
testBeginningOfSentence(formatVersion);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void testBeginningOfSentence(final int formatVersion) {
|
||||
File dictFile = null;
|
||||
try {
|
||||
dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
|
||||
} catch (IOException e) {
|
||||
fail("IOException while writing an initial dictionary : " + e);
|
||||
}
|
||||
final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
|
||||
0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
|
||||
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
|
||||
final int dummyProbability = 0;
|
||||
binaryDictionary.addUnigramEntry("", dummyProbability, "" /* shortcutTarget */,
|
||||
BinaryDictionary.NOT_A_PROBABILITY /* shortcutProbability */,
|
||||
true /* isBeginningOfSentence */, true /* isNotAWord */, false /* isBlacklisted */,
|
||||
BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */);
|
||||
final PrevWordsInfo prevWordsInfoStartOfSentence = PrevWordsInfo.BEGINNING_OF_SENTENCE;
|
||||
final int bigramProbability = 200;
|
||||
addUnigramWord(binaryDictionary, "aaa", dummyProbability);
|
||||
binaryDictionary.addNgramEntry(prevWordsInfoStartOfSentence, "aaa", bigramProbability,
|
||||
BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */);
|
||||
assertEquals(bigramProbability,
|
||||
binaryDictionary.getNgramProbability(prevWordsInfoStartOfSentence, "aaa"));
|
||||
binaryDictionary.addNgramEntry(prevWordsInfoStartOfSentence, "aaa", bigramProbability,
|
||||
BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */);
|
||||
addUnigramWord(binaryDictionary, "bbb", dummyProbability);
|
||||
binaryDictionary.addNgramEntry(prevWordsInfoStartOfSentence, "bbb", bigramProbability,
|
||||
BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */);
|
||||
binaryDictionary.flushWithGC();
|
||||
assertEquals(bigramProbability,
|
||||
binaryDictionary.getNgramProbability(prevWordsInfoStartOfSentence, "aaa"));
|
||||
assertEquals(bigramProbability,
|
||||
binaryDictionary.getNgramProbability(prevWordsInfoStartOfSentence, "bbb"));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -77,14 +77,14 @@ public class Ver4DictEncoder implements DictEncoder {
|
|||
if (null == wordProperty.mShortcutTargets || wordProperty.mShortcutTargets.isEmpty()) {
|
||||
binaryDict.addUnigramEntry(wordProperty.mWord, wordProperty.getProbability(),
|
||||
null /* shortcutTarget */, 0 /* shortcutProbability */,
|
||||
wordProperty.mIsNotAWord, wordProperty.mIsBlacklistEntry,
|
||||
0 /* timestamp */);
|
||||
wordProperty.mIsBeginningOfSentence, wordProperty.mIsNotAWord,
|
||||
wordProperty.mIsBlacklistEntry, 0 /* timestamp */);
|
||||
} else {
|
||||
for (final WeightedString shortcutTarget : wordProperty.mShortcutTargets) {
|
||||
binaryDict.addUnigramEntry(wordProperty.mWord, wordProperty.getProbability(),
|
||||
shortcutTarget.mWord, shortcutTarget.getProbability(),
|
||||
wordProperty.mIsNotAWord, wordProperty.mIsBlacklistEntry,
|
||||
0 /* timestamp */);
|
||||
wordProperty.mIsBeginningOfSentence, wordProperty.mIsNotAWord,
|
||||
wordProperty.mIsBlacklistEntry, 0 /* timestamp */);
|
||||
}
|
||||
}
|
||||
if (binaryDict.needsToRunGC(true /* mindsBlockByGC */)) {
|
||||
|
|
Loading…
Reference in a new issue