diff --git a/java/src/com/android/inputmethod/latin/BinaryDictionary.java b/java/src/com/android/inputmethod/latin/BinaryDictionary.java index 709b0a16e..207e8aa77 100644 --- a/java/src/com/android/inputmethod/latin/BinaryDictionary.java +++ b/java/src/com/android/inputmethod/latin/BinaryDictionary.java @@ -109,7 +109,7 @@ public final class BinaryDictionary extends Dictionary { private static native void flushWithGCNative(long dict, String filePath); private static native void closeNative(long dict); private static native int getProbabilityNative(long dict, int[] word); - private static native boolean isValidBigramNative(long dict, int[] word0, int[] word1); + private static native int getBigramProbabilityNative(long dict, int[] word0, int[] word1); private static native int getSuggestionsNative(long dict, long proximityInfo, long traverseSession, int[] xCoordinates, int[] yCoordinates, int[] times, int[] pointerIds, int[] inputCodePoints, int inputSize, int commitPoint, @@ -122,6 +122,8 @@ public final class BinaryDictionary extends Dictionary { private static native void addBigramWordsNative(long dict, int[] word0, int[] word1, int probability); private static native void removeBigramWordsNative(long dict, int[] word0, int[] word1); + private static native int calculateProbabilityNative(long dict, int unigramProbability, + int bigramProbability); // TODO: Move native dict into session private final void loadDictionary(final String path, final long startOffset, @@ -219,12 +221,12 @@ public final class BinaryDictionary extends Dictionary { @Override public boolean isValidWord(final String word) { - return getFrequency(word) >= 0; + return getFrequency(word) != NOT_A_PROBABILITY; } @Override public int getFrequency(final String word) { - if (word == null) return -1; + if (word == null) return NOT_A_PROBABILITY; int[] codePoints = StringUtils.toCodePointArray(word); return getProbabilityNative(mNativeDict, codePoints); } @@ -232,10 +234,14 @@ public final class BinaryDictionary extends Dictionary { // TODO: Add a batch process version (isValidBigramMultiple?) to avoid excessive numbers of jni // calls when checking for changes in an entire dictionary. public boolean isValidBigram(final String word0, final String word1) { - if (TextUtils.isEmpty(word0) || TextUtils.isEmpty(word1)) return false; + return getBigramProbability(word0, word1) != NOT_A_PROBABILITY; + } + + public int getBigramProbability(final String word0, final String word1) { + if (TextUtils.isEmpty(word0) || TextUtils.isEmpty(word1)) return NOT_A_PROBABILITY; final int[] codePoints0 = StringUtils.toCodePointArray(word0); final int[] codePoints1 = StringUtils.toCodePointArray(word1); - return isValidBigramNative(mNativeDict, codePoints0, codePoints1); + return getBigramProbabilityNative(mNativeDict, codePoints0, codePoints1); } // Add a unigram entry to binary dictionary in native code. @@ -285,6 +291,12 @@ public final class BinaryDictionary extends Dictionary { return needsToRunGCNative(mNativeDict); } + @UsedForTesting + public int calculateProbability(final int unigramProbability, final int bigramProbability) { + if (!isValidDictionary()) return NOT_A_PROBABILITY; + return calculateProbabilityNative(mNativeDict, unigramProbability, bigramProbability); + } + @Override public boolean shouldAutoCommit(final SuggestedWordInfo candidate) { // TODO: actually use the confidence rather than use this completely broken heuristic diff --git a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp index a63fab6dc..7f47493b2 100644 --- a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp +++ b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp @@ -188,8 +188,8 @@ static jint latinime_BinaryDictionary_getProbability(JNIEnv *env, jclass clazz, return dictionary->getProbability(codePoints, wordLength); } -static jboolean latinime_BinaryDictionary_isValidBigram(JNIEnv *env, jclass clazz, jlong dict, - jintArray word0, jintArray word1) { +static jint latinime_BinaryDictionary_getBigramProbability(JNIEnv *env, jclass clazz, + jlong dict, jintArray word0, jintArray word1) { Dictionary *dictionary = reinterpret_cast(dict); if (!dictionary) return JNI_FALSE; const jsize word0Length = env->GetArrayLength(word0); @@ -198,7 +198,8 @@ static jboolean latinime_BinaryDictionary_isValidBigram(JNIEnv *env, jclass claz int word1CodePoints[word1Length]; env->GetIntArrayRegion(word0, 0, word0Length, word0CodePoints); env->GetIntArrayRegion(word1, 0, word1Length, word1CodePoints); - return dictionary->isValidBigram(word0CodePoints, word0Length, word1CodePoints, word1Length); + return dictionary->getBigramProbability(word0CodePoints, word0Length, word1CodePoints, + word1Length); } static jfloat latinime_BinaryDictionary_calcNormalizedScore(JNIEnv *env, jclass clazz, @@ -269,6 +270,16 @@ static void latinime_BinaryDictionary_removeBigramWords(JNIEnv *env, jclass claz word1Length); } +static int latinime_BinaryDictionary_calculateProbabilityNative(JNIEnv *env, jclass clazz, + jlong dict, jint unigramProbability, jint bigramProbability) { + Dictionary *dictionary = reinterpret_cast(dict); + if (!dictionary) { + return NOT_A_PROBABILITY; + } + return dictionary->getDictionaryStructurePolicy()->getProbability(unigramProbability, + bigramProbability); +} + static const JNINativeMethod sMethods[] = { { const_cast("openNative"), @@ -306,9 +317,9 @@ static const JNINativeMethod sMethods[] = { reinterpret_cast(latinime_BinaryDictionary_getProbability) }, { - const_cast("isValidBigramNative"), - const_cast("(J[I[I)Z"), - reinterpret_cast(latinime_BinaryDictionary_isValidBigram) + const_cast("getBigramProbabilityNative"), + const_cast("(J[I[I)I"), + reinterpret_cast(latinime_BinaryDictionary_getBigramProbability) }, { const_cast("calcNormalizedScoreNative"), @@ -334,6 +345,11 @@ static const JNINativeMethod sMethods[] = { const_cast("removeBigramWordsNative"), const_cast("(J[I[I)V"), reinterpret_cast(latinime_BinaryDictionary_removeBigramWords) + }, + { + const_cast("calculateProbabilityNative"), + const_cast("(JII)I"), + reinterpret_cast(latinime_BinaryDictionary_calculateProbabilityNative) } }; diff --git a/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp b/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp index 425b07624..5ba71c168 100644 --- a/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp +++ b/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp @@ -150,24 +150,26 @@ int BigramDictionary::getBigramListPositionForWord(const int *prevWord, const in return mDictionaryStructurePolicy->getBigramsPositionOfNode(pos); } -bool BigramDictionary::isValidBigram(const int *word0, int length0, const int *word1, +int BigramDictionary::getBigramProbability(const int *word0, int length0, const int *word1, int length1) const { int pos = getBigramListPositionForWord(word0, length0, false /* forceLowerCaseSearch */); // getBigramListPositionForWord returns 0 if this word isn't in the dictionary or has no bigrams - if (NOT_A_DICT_POS == pos) return false; + if (NOT_A_DICT_POS == pos) return NOT_A_PROBABILITY; int nextWordPos = mDictionaryStructurePolicy->getTerminalNodePositionOfWord(word1, length1, false /* forceLowerCaseSearch */); - if (NOT_A_DICT_POS == nextWordPos) return false; + if (NOT_A_DICT_POS == nextWordPos) return NOT_A_PROBABILITY; BinaryDictionaryBigramsIterator bigramsIt( mDictionaryStructurePolicy->getBigramsStructurePolicy(), pos); while (bigramsIt.hasNext()) { bigramsIt.next(); if (bigramsIt.getBigramPos() == nextWordPos) { - return true; + return mDictionaryStructurePolicy->getProbability( + mDictionaryStructurePolicy->getUnigramProbabilityOfPtNode(nextWordPos), + bigramsIt.getProbability()); } } - return false; + return NOT_A_PROBABILITY; } // TODO: Move functions related to bigram to here diff --git a/native/jni/src/suggest/core/dictionary/bigram_dictionary.h b/native/jni/src/suggest/core/dictionary/bigram_dictionary.h index 99b964c49..8af7ee75d 100644 --- a/native/jni/src/suggest/core/dictionary/bigram_dictionary.h +++ b/native/jni/src/suggest/core/dictionary/bigram_dictionary.h @@ -29,7 +29,7 @@ class BigramDictionary { int getPredictions(const int *word, int length, int *outBigramCodePoints, int *outBigramProbability, int *outputTypes) const; - bool isValidBigram(const int *word1, int length1, const int *word2, int length2) const; + int getBigramProbability(const int *word1, int length1, const int *word2, int length2) const; ~BigramDictionary(); private: diff --git a/native/jni/src/suggest/core/dictionary/dictionary.cpp b/native/jni/src/suggest/core/dictionary/dictionary.cpp index 033572201..ec1b63a12 100644 --- a/native/jni/src/suggest/core/dictionary/dictionary.cpp +++ b/native/jni/src/suggest/core/dictionary/dictionary.cpp @@ -93,8 +93,9 @@ int Dictionary::getProbability(const int *word, int length) const { return getDictionaryStructurePolicy()->getUnigramProbabilityOfPtNode(pos); } -bool Dictionary::isValidBigram(const int *word0, int length0, const int *word1, int length1) const { - return mBigramDictionary->isValidBigram(word0, length0, word1, length1); +int Dictionary::getBigramProbability(const int *word0, int length0, const int *word1, + int length1) const { + return mBigramDictionary->getBigramProbability(word0, length0, word1, length1); } void Dictionary::addUnigramWord(const int *const word, const int length, const int probability) { diff --git a/native/jni/src/suggest/core/dictionary/dictionary.h b/native/jni/src/suggest/core/dictionary/dictionary.h index 06e84bbfe..974447468 100644 --- a/native/jni/src/suggest/core/dictionary/dictionary.h +++ b/native/jni/src/suggest/core/dictionary/dictionary.h @@ -67,7 +67,7 @@ class Dictionary { int getProbability(const int *word, int length) const; - bool isValidBigram(const int *word0, int length0, const int *word1, int length1) const; + int getBigramProbability(const int *word0, int length0, const int *word1, int length1) const; void addUnigramWord(const int *const word, const int length, const int probability); diff --git a/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java b/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java index 4d231cde7..739aedcf5 100644 --- a/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java +++ b/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java @@ -151,7 +151,7 @@ public class BinaryDictionaryTests extends AndroidTestCase { final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); for (int i = 0; i < wordCount; ++i) { final String word = CodePointUtils.generateWord(random, codePointSet); - probabilityMap.put(word, random.nextInt() & 0xFF); + probabilityMap.put(word, random.nextInt(0xFF)); } for (String word : probabilityMap.keySet()) { binaryDictionary.addUnigramWord(word, probabilityMap.get(word)); @@ -163,8 +163,6 @@ public class BinaryDictionaryTests extends AndroidTestCase { } public void testAddBigramWords() { - // TODO: Add a test to check the frequency of the bigram score which uses current value - // calculated in the native code File dictFile = null; try { dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary"); @@ -179,6 +177,7 @@ public class BinaryDictionaryTests extends AndroidTestCase { final int unigramProbability = 100; final int bigramProbability = 10; + final int updatedBigramProbability = 15; binaryDictionary.addUnigramWord("aaa", unigramProbability); binaryDictionary.addUnigramWord("abb", unigramProbability); binaryDictionary.addUnigramWord("bcc", unigramProbability); @@ -187,21 +186,49 @@ public class BinaryDictionaryTests extends AndroidTestCase { binaryDictionary.addBigramWords("abb", "aaa", bigramProbability); binaryDictionary.addBigramWords("abb", "bcc", bigramProbability); + final int probability = binaryDictionary.calculateProbability(unigramProbability, + bigramProbability); assertEquals(true, binaryDictionary.isValidBigram("aaa", "abb")); assertEquals(true, binaryDictionary.isValidBigram("aaa", "bcc")); assertEquals(true, binaryDictionary.isValidBigram("abb", "aaa")); assertEquals(true, binaryDictionary.isValidBigram("abb", "bcc")); + assertEquals(probability, binaryDictionary.getBigramProbability("aaa", "abb")); + assertEquals(probability, binaryDictionary.getBigramProbability("aaa", "bcc")); + assertEquals(probability, binaryDictionary.getBigramProbability("abb", "aaa")); + assertEquals(probability, binaryDictionary.getBigramProbability("abb", "bcc")); + + binaryDictionary.addBigramWords("aaa", "abb", updatedBigramProbability); + final int updatedProbability = binaryDictionary.calculateProbability(unigramProbability, + updatedBigramProbability); + assertEquals(updatedProbability, binaryDictionary.getBigramProbability("aaa", "abb")); assertEquals(false, binaryDictionary.isValidBigram("bcc", "aaa")); assertEquals(false, binaryDictionary.isValidBigram("bcc", "bbc")); assertEquals(false, binaryDictionary.isValidBigram("aaa", "aaa")); + assertEquals(Dictionary.NOT_A_PROBABILITY, + binaryDictionary.getBigramProbability("bcc", "aaa")); + assertEquals(Dictionary.NOT_A_PROBABILITY, + binaryDictionary.getBigramProbability("bcc", "bbc")); + assertEquals(Dictionary.NOT_A_PROBABILITY, + binaryDictionary.getBigramProbability("aaa", "aaa")); + + // Testing bigram link. + binaryDictionary.addUnigramWord("abcde", unigramProbability); + binaryDictionary.addUnigramWord("fghij", unigramProbability); + binaryDictionary.addBigramWords("abcde", "fghij", bigramProbability); + binaryDictionary.addUnigramWord("fgh", unigramProbability); + binaryDictionary.addUnigramWord("abc", unigramProbability); + binaryDictionary.addUnigramWord("f", unigramProbability); + assertEquals(probability, binaryDictionary.getBigramProbability("abcde", "fghij")); + assertEquals(Dictionary.NOT_A_PROBABILITY, + binaryDictionary.getBigramProbability("abcde", "fgh")); + binaryDictionary.addBigramWords("abcde", "fghij", updatedBigramProbability); + assertEquals(updatedProbability, binaryDictionary.getBigramProbability("abcde", "fghij")); dictFile.delete(); } public void testRandomlyAddBigramWords() { - // TODO: Add a test to check the frequency of the bigram score which uses current value - // calculated in the native code final int wordCount = 100; final int bigramCount = 1000; final int codePointSetSize = 50; @@ -222,29 +249,38 @@ public class BinaryDictionaryTests extends AndroidTestCase { // Test a word that isn't contained within the dictionary. final Random random = new Random(seed); final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); - final int unigramProbability = 100; - final int bigramProbability = 10; + final int[] unigramProbabilities = new int[wordCount]; for (int i = 0; i < wordCount; ++i) { final String word = CodePointUtils.generateWord(random, codePointSet); words.add(word); + final int unigramProbability = random.nextInt(0xFF); + unigramProbabilities[i] = unigramProbability; binaryDictionary.addUnigramWord(word, unigramProbability); } - final boolean[][] bigramRelations = new boolean[wordCount][wordCount]; + final int[][] probabilities = new int[wordCount][wordCount]; + + for (int i = 0; i < wordCount; ++i) { + for (int j = 0; j < wordCount; ++j) { + probabilities[i][j] = Dictionary.NOT_A_PROBABILITY; + } + } + for (int i = 0; i < bigramCount; i++) { final int word0Index = random.nextInt(wordCount); final int word1Index = random.nextInt(wordCount); final String word0 = words.get(word0Index); final String word1 = words.get(word1Index); - - bigramRelations[word0Index][word1Index] = true; + final int bigramProbability = random.nextInt(0xF); + probabilities[word0Index][word1Index] = binaryDictionary.calculateProbability( + unigramProbabilities[word1Index], bigramProbability); binaryDictionary.addBigramWords(word0, word1, bigramProbability); } for (int i = 0; i < words.size(); i++) { for (int j = 0; j < words.size(); j++) { - assertEquals(bigramRelations[i][j], - binaryDictionary.isValidBigram(words.get(i), words.get(j))); + assertEquals(probabilities[i][j], + binaryDictionary.getBigramProbability(words.get(i), words.get(j))); } }