From 2f854e170c9fde47cae804145f90d164cdb5ceb8 Mon Sep 17 00:00:00 2001 From: Satoshi Kataoka Date: Tue, 29 May 2012 15:58:13 +0900 Subject: [PATCH] Add a JNI to get the frequency Bug: 4192129 Change-Id: I3f220f5a10114d4eb23956148076cf76220bda0f --- .../inputmethod/latin/BinaryDictionary.java | 5 +++-- ...droid_inputmethod_latin_BinaryDictionary.cpp | 6 +++--- native/jni/src/dictionary.cpp | 4 ++-- native/jni/src/dictionary.h | 2 +- native/jni/src/unigram_dictionary.cpp | 17 +++++++++++++++-- native/jni/src/unigram_dictionary.h | 2 +- 6 files changed, 25 insertions(+), 11 deletions(-) diff --git a/java/src/com/android/inputmethod/latin/BinaryDictionary.java b/java/src/com/android/inputmethod/latin/BinaryDictionary.java index e18aee6ff..cb1069cfb 100644 --- a/java/src/com/android/inputmethod/latin/BinaryDictionary.java +++ b/java/src/com/android/inputmethod/latin/BinaryDictionary.java @@ -84,7 +84,7 @@ public class BinaryDictionary extends Dictionary { private native long openNative(String sourceDir, long dictOffset, long dictSize, int typedLetterMultiplier, int fullWordMultiplier, int maxWordLength, int maxWords); private native void closeNative(long dict); - private native boolean isValidWordNative(long dict, int[] word, int wordLength); + private native int getFrequencyNative(long dict, int[] word, int wordLength); private native boolean isValidBigramNative(long dict, int[] word1, int[] word2); private native int getSuggestionsNative(long dict, long proximityInfo, int[] xCoordinates, int[] yCoordinates, int[] inputCodes, int codesSize, int[] prevWordForBigrams, @@ -203,7 +203,8 @@ public class BinaryDictionary extends Dictionary { public boolean isValidWord(CharSequence word) { if (word == null) return false; int[] chars = StringUtils.toCodePointArray(word.toString()); - return isValidWordNative(mNativeDict, chars, chars.length); + final int freq = getFrequencyNative(mNativeDict, chars, chars.length); + return freq >= 0; } // TODO: Add a batch process version (isValidBigramMultiple?) to avoid excessive numbers of jni diff --git a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp index f130062a1..d10dc962e 100644 --- a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp +++ b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp @@ -173,12 +173,12 @@ static int latinime_BinaryDictionary_getBigrams(JNIEnv *env, jobject object, jlo return count; } -static jboolean latinime_BinaryDictionary_isValidWord(JNIEnv *env, jobject object, jlong dict, +static jint latinime_BinaryDictionary_getFrequency(JNIEnv *env, jobject object, jlong dict, jintArray wordArray, jint wordLength) { Dictionary *dictionary = (Dictionary*)dict; if (!dictionary) return (jboolean) false; jint *word = env->GetIntArrayElements(wordArray, 0); - jboolean result = dictionary->isValidWord(word, wordLength); + jint result = dictionary->getFrequency(word, wordLength); env->ReleaseIntArrayElements(wordArray, word, JNI_ABORT); return result; } @@ -253,7 +253,7 @@ static JNINativeMethod sMethods[] = { {"closeNative", "(J)V", (void*)latinime_BinaryDictionary_close}, {"getSuggestionsNative", "(JJ[I[I[II[IZ[C[I)I", (void*)latinime_BinaryDictionary_getSuggestions}, - {"isValidWordNative", "(J[II)Z", (void*)latinime_BinaryDictionary_isValidWord}, + {"getFrequencyNative", "(J[II)I", (void*)latinime_BinaryDictionary_getFrequency}, {"isValidBigramNative", "(J[I[I)Z", (void*)latinime_BinaryDictionary_isValidBigram}, {"getBigramsNative", "(J[II[II[C[III)I", (void*)latinime_BinaryDictionary_getBigrams}, {"calcNormalizedScoreNative", "([CI[CII)F", diff --git a/native/jni/src/dictionary.cpp b/native/jni/src/dictionary.cpp index 65d0f73a3..1fb02478b 100644 --- a/native/jni/src/dictionary.cpp +++ b/native/jni/src/dictionary.cpp @@ -55,8 +55,8 @@ Dictionary::~Dictionary() { delete mBigramDictionary; } -bool Dictionary::isValidWord(const int32_t *word, int length) { - return mUnigramDictionary->isValidWord(word, length); +int Dictionary::getFrequency(const int32_t *word, int length) { + return mUnigramDictionary->getFrequency(word, length); } bool Dictionary::isValidBigram(const int32_t *word1, int length1, const int32_t *word2, diff --git a/native/jni/src/dictionary.h b/native/jni/src/dictionary.h index 87891ee4d..9f2367904 100644 --- a/native/jni/src/dictionary.h +++ b/native/jni/src/dictionary.h @@ -52,7 +52,7 @@ class Dictionary { maxWordLength, maxBigrams); } - bool isValidWord(const int32_t *word, int length); + int getFrequency(const int32_t *word, int length); bool isValidBigram(const int32_t *word1, int length1, const int32_t *word2, int length2); void *getDict() { return (void *)mDict; } int getDictSize() { return mDictSize; } diff --git a/native/jni/src/unigram_dictionary.cpp b/native/jni/src/unigram_dictionary.cpp index 828582848..efe9c4fe3 100644 --- a/native/jni/src/unigram_dictionary.cpp +++ b/native/jni/src/unigram_dictionary.cpp @@ -747,8 +747,21 @@ int UnigramDictionary::getMostFrequentWordLikeInner(const uint16_t * const inWor return maxFreq; } -bool UnigramDictionary::isValidWord(const int32_t* const inWord, const int length) const { - return NOT_VALID_WORD != BinaryFormat::getTerminalPosition(DICT_ROOT, inWord, length); +int UnigramDictionary::getFrequency(const int32_t* const inWord, const int length) const { + const uint8_t* const root = DICT_ROOT; + int pos = BinaryFormat::getTerminalPosition(root, inWord, length); + if (NOT_VALID_WORD == pos) { + return NOT_A_PROBABILITY; + } + const uint8_t flags = BinaryFormat::getFlagsAndForwardPointer(root, &pos); + const bool hasMultipleChars = (0 != (FLAG_HAS_MULTIPLE_CHARS & flags)); + if (hasMultipleChars) { + pos = BinaryFormat::skipOtherCharacters(root, pos); + } else { + BinaryFormat::getCharCodeAndForwardPointer(DICT_ROOT, &pos); + } + const int unigramFreq = BinaryFormat::readFrequencyWithoutMovingPointer(root, pos); + return unigramFreq; } // TODO: remove this function. diff --git a/native/jni/src/unigram_dictionary.h b/native/jni/src/unigram_dictionary.h index b9233518f..b70894004 100644 --- a/native/jni/src/unigram_dictionary.h +++ b/native/jni/src/unigram_dictionary.h @@ -72,7 +72,7 @@ class UnigramDictionary { UnigramDictionary(const uint8_t* const streamStart, int typedLetterMultipler, int fullWordMultiplier, int maxWordLength, int maxWords, const unsigned int flags); - bool isValidWord(const int32_t* const inWord, const int length) const; + int getFrequency(const int32_t* const inWord, const int length) const; int getBigramPosition(int pos, unsigned short *word, int offset, int length) const; int getSuggestions(ProximityInfo *proximityInfo, WordsPriorityQueuePool *queuePool, Correction *correction, const int *xcoordinates, const int *ycoordinates,