From 522a04ea5b249d0af556647d2abcad57e5b99b4f Mon Sep 17 00:00:00 2001 From: Jean Chalard Date: Mon, 23 Apr 2012 15:37:07 +0900 Subject: [PATCH] Pass words as int[] to the native code. We need to get the bigrams during the call to getSuggestions for bug#6313806. We already give an int[] to getSuggestions and we wanted to get rid of char[]'s anyway because it doesn't work with surrogate pairs, so here we go. Bug: 6313806 Change-Id: I56ce99f1db6b3302cdf42f0527343bded837091e --- .../inputmethod/latin/BinaryDictionary.java | 12 +++++------ ...oid_inputmethod_latin_BinaryDictionary.cpp | 20 +++++++++---------- native/jni/src/bigram_dictionary.cpp | 4 ++-- native/jni/src/bigram_dictionary.h | 4 ++-- native/jni/src/binary_format.h | 6 +++--- native/jni/src/dictionary.cpp | 2 +- native/jni/src/dictionary.h | 4 ++-- native/jni/src/unigram_dictionary.cpp | 2 +- native/jni/src/unigram_dictionary.h | 2 +- 9 files changed, 28 insertions(+), 28 deletions(-) diff --git a/java/src/com/android/inputmethod/latin/BinaryDictionary.java b/java/src/com/android/inputmethod/latin/BinaryDictionary.java index 9429ef411..a644ec0d9 100644 --- a/java/src/com/android/inputmethod/latin/BinaryDictionary.java +++ b/java/src/com/android/inputmethod/latin/BinaryDictionary.java @@ -83,11 +83,11 @@ public class BinaryDictionary extends Dictionary { private native long openNative(String sourceDir, long dictOffset, long dictSize, int typedLetterMultiplier, int fullWordMultiplier, int maxWordLength, int maxWords); private native void closeNative(long dict); - private native boolean isValidWordNative(long dict, char[] word, int wordLength); + private native boolean isValidWordNative(long dict, int[] word, int wordLength); private native int getSuggestionsNative(long dict, long proximityInfo, int[] xCoordinates, int[] yCoordinates, int[] inputCodes, int codesSize, int[] prevWordForBigrams, boolean useFullEditDistance, char[] outputChars, int[] scores); - private native int getBigramsNative(long dict, char[] prevWord, int prevWordLength, + private native int getBigramsNative(long dict, int[] prevWord, int prevWordLength, int[] inputCodes, int inputCodesLength, char[] outputChars, int[] scores, int maxWordLength, int maxBigrams); private static native double calcNormalizedScoreNative( @@ -105,7 +105,7 @@ public class BinaryDictionary extends Dictionary { final WordCallback callback) { if (mNativeDict == 0) return; - char[] chars = previousWord.toString().toCharArray(); + int[] codePoints = StringUtils.toCodePointArray(previousWord.toString()); Arrays.fill(mOutputChars_bigrams, (char) 0); Arrays.fill(mBigramScores, 0); @@ -115,8 +115,8 @@ public class BinaryDictionary extends Dictionary { mInputCodes[0] = codes.getCodeAt(0); } - int count = getBigramsNative(mNativeDict, chars, chars.length, mInputCodes, codesSize, - mOutputChars_bigrams, mBigramScores, MAX_WORD_LENGTH, MAX_BIGRAMS); + int count = getBigramsNative(mNativeDict, codePoints, codePoints.length, mInputCodes, + codesSize, mOutputChars_bigrams, mBigramScores, MAX_WORD_LENGTH, MAX_BIGRAMS); if (count > MAX_BIGRAMS) { count = MAX_BIGRAMS; } @@ -200,7 +200,7 @@ public class BinaryDictionary extends Dictionary { @Override public boolean isValidWord(CharSequence word) { if (word == null) return false; - char[] chars = word.toString().toCharArray(); + int[] chars = StringUtils.toCodePointArray(word.toString()); return isValidWordNative(mNativeDict, chars, chars.length); } diff --git a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp index 2ef72e1e8..3e72ce684 100644 --- a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp +++ b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp @@ -153,30 +153,30 @@ static int latinime_BinaryDictionary_getSuggestions(JNIEnv *env, jobject object, } static int latinime_BinaryDictionary_getBigrams(JNIEnv *env, jobject object, jlong dict, - jcharArray prevWordArray, jint prevWordLength, jintArray inputArray, jint inputArraySize, + jintArray prevWordArray, jint prevWordLength, jintArray inputArray, jint inputArraySize, jcharArray outputArray, jintArray frequencyArray, jint maxWordLength, jint maxBigrams) { Dictionary *dictionary = (Dictionary*)dict; if (!dictionary) return 0; - jchar *prevWord = env->GetCharArrayElements(prevWordArray, 0); + jint *prevWord = env->GetIntArrayElements(prevWordArray, 0); int *inputCodes = env->GetIntArrayElements(inputArray, 0); jchar *outputChars = env->GetCharArrayElements(outputArray, 0); int *frequencies = env->GetIntArrayElements(frequencyArray, 0); - int count = dictionary->getBigrams((unsigned short*) prevWord, prevWordLength, inputCodes, + int count = dictionary->getBigrams(prevWord, prevWordLength, inputCodes, inputArraySize, (unsigned short*) outputChars, frequencies, maxWordLength, maxBigrams); env->ReleaseIntArrayElements(frequencyArray, frequencies, 0); env->ReleaseCharArrayElements(outputArray, outputChars, 0); env->ReleaseIntArrayElements(inputArray, inputCodes, JNI_ABORT); - env->ReleaseCharArrayElements(prevWordArray, prevWord, JNI_ABORT); + env->ReleaseIntArrayElements(prevWordArray, prevWord, JNI_ABORT); return count; } static jboolean latinime_BinaryDictionary_isValidWord(JNIEnv *env, jobject object, jlong dict, - jcharArray wordArray, jint wordLength) { + jintArray wordArray, jint wordLength) { Dictionary *dictionary = (Dictionary*)dict; if (!dictionary) return (jboolean) false; - jchar *word = env->GetCharArrayElements(wordArray, 0); - jboolean result = dictionary->isValidWord((unsigned short*) word, wordLength); - env->ReleaseCharArrayElements(wordArray, word, JNI_ABORT); + jint *word = env->GetIntArrayElements(wordArray, 0); + jboolean result = dictionary->isValidWord(word, wordLength); + env->ReleaseIntArrayElements(wordArray, word, JNI_ABORT); return result; } @@ -236,8 +236,8 @@ static JNINativeMethod sMethods[] = { {"closeNative", "(J)V", (void*)latinime_BinaryDictionary_close}, {"getSuggestionsNative", "(JJ[I[I[II[IZ[C[I)I", (void*)latinime_BinaryDictionary_getSuggestions}, - {"isValidWordNative", "(J[CI)Z", (void*)latinime_BinaryDictionary_isValidWord}, - {"getBigramsNative", "(J[CI[II[C[III)I", (void*)latinime_BinaryDictionary_getBigrams}, + {"isValidWordNative", "(J[II)Z", (void*)latinime_BinaryDictionary_isValidWord}, + {"getBigramsNative", "(J[II[II[C[III)I", (void*)latinime_BinaryDictionary_getBigrams}, {"calcNormalizedScoreNative", "([CI[CII)D", (void*)latinime_BinaryDictionary_calcNormalizedScore}, {"editDistanceNative", "([CI[CI)I", (void*)latinime_BinaryDictionary_editDistance} diff --git a/native/jni/src/bigram_dictionary.cpp b/native/jni/src/bigram_dictionary.cpp index 320b0af68..927381fdb 100644 --- a/native/jni/src/bigram_dictionary.cpp +++ b/native/jni/src/bigram_dictionary.cpp @@ -96,7 +96,7 @@ bool BigramDictionary::addWordBigram(unsigned short *word, int length, int frequ * and the bigrams are used to boost unigram result scores, it makes little sense to * reduce their scope to the ones that match the first letter. */ -int BigramDictionary::getBigrams(unsigned short *prevWord, int prevWordLength, int *codes, +int BigramDictionary::getBigrams(const int32_t *prevWord, int prevWordLength, int *codes, int codesSize, unsigned short *bigramChars, int *bigramFreq, int maxWordLength, int maxBigrams) { // TODO: remove unused arguments, and refrain from storing stuff in members of this class @@ -134,7 +134,7 @@ int BigramDictionary::getBigrams(unsigned short *prevWord, int prevWordLength, i // Returns a pointer to the start of the bigram list. // If the word is not found or has no bigrams, this function returns 0. int BigramDictionary::getBigramListForWord(const uint8_t* const root, - const unsigned short *prevWord, const int prevWordLength) { + const int32_t *prevWord, const int prevWordLength) { int pos = BinaryFormat::getTerminalPosition(root, prevWord, prevWordLength); if (NOT_VALID_WORD == pos) return 0; diff --git a/native/jni/src/bigram_dictionary.h b/native/jni/src/bigram_dictionary.h index 1612131c4..07e47f059 100644 --- a/native/jni/src/bigram_dictionary.h +++ b/native/jni/src/bigram_dictionary.h @@ -25,10 +25,10 @@ class Dictionary; class BigramDictionary { public: BigramDictionary(const unsigned char *dict, int maxWordLength, Dictionary *parentDictionary); - int getBigrams(unsigned short *word, int length, int *codes, int codesSize, + int getBigrams(const int32_t *word, int length, int *codes, int codesSize, unsigned short *outWords, int *frequencies, int maxWordLength, int maxBigrams); int getBigramListForWord(const uint8_t* const root, - const unsigned short *prevWord, const int prevWordLength); + const int32_t *prevWord, const int prevWordLength); ~BigramDictionary(); private: bool addWordBigram(unsigned short *word, int length, int frequency); diff --git a/native/jni/src/binary_format.h b/native/jni/src/binary_format.h index f59302460..b8ac95250 100644 --- a/native/jni/src/binary_format.h +++ b/native/jni/src/binary_format.h @@ -62,7 +62,7 @@ class BinaryFormat { static bool hasChildrenInFlags(const uint8_t flags); static int getAttributeAddressAndForwardPointer(const uint8_t* const dict, const uint8_t flags, int *pos); - static int getTerminalPosition(const uint8_t* const root, const uint16_t* const inWord, + static int getTerminalPosition(const uint8_t* const root, const int32_t* const inWord, const int length); static int getWordAtAddress(const uint8_t* const root, const int address, const int maxDepth, uint16_t* outWord); @@ -304,7 +304,7 @@ inline int BinaryFormat::getAttributeAddressAndForwardPointer(const uint8_t* con // This function gets the byte position of the last chargroup of the exact matching word in the // dictionary. If no match is found, it returns NOT_VALID_WORD. inline int BinaryFormat::getTerminalPosition(const uint8_t* const root, - const uint16_t* const inWord, const int length) { + const int32_t* const inWord, const int length) { int pos = 0; int wordPos = 0; @@ -313,7 +313,7 @@ inline int BinaryFormat::getTerminalPosition(const uint8_t* const root, // there was no match (or we would have found it). if (wordPos > length) return NOT_VALID_WORD; int charGroupCount = BinaryFormat::getGroupCountAndForwardPointer(root, &pos); - const uint16_t wChar = inWord[wordPos]; + const int32_t wChar = inWord[wordPos]; while (true) { // If there are no more character groups in this node, it means we could not // find a matching character for this depth, therefore there is no match. diff --git a/native/jni/src/dictionary.cpp b/native/jni/src/dictionary.cpp index 90ec207f0..9dc207223 100644 --- a/native/jni/src/dictionary.cpp +++ b/native/jni/src/dictionary.cpp @@ -54,7 +54,7 @@ Dictionary::~Dictionary() { delete mBigramDictionary; } -bool Dictionary::isValidWord(unsigned short *word, int length) { +bool Dictionary::isValidWord(const int32_t *word, int length) { return mUnigramDictionary->isValidWord(word, length); } diff --git a/native/jni/src/dictionary.h b/native/jni/src/dictionary.h index 66a5c2150..dea5763d0 100644 --- a/native/jni/src/dictionary.h +++ b/native/jni/src/dictionary.h @@ -40,13 +40,13 @@ class Dictionary { codesSize, useFullEditDistance, outWords, frequencies); } - int getBigrams(unsigned short *word, int length, int *codes, int codesSize, + int getBigrams(const int32_t *word, int length, int *codes, int codesSize, unsigned short *outWords, int *frequencies, int maxWordLength, int maxBigrams) { return mBigramDictionary->getBigrams(word, length, codes, codesSize, outWords, frequencies, maxWordLength, maxBigrams); } - bool isValidWord(unsigned short *word, int length); + bool isValidWord(const int32_t *word, int length); void *getDict() { return (void *)mDict; } int getDictSize() { return mDictSize; } int getMmapFd() { return mMmapFd; } diff --git a/native/jni/src/unigram_dictionary.cpp b/native/jni/src/unigram_dictionary.cpp index ab8570e6f..05c124b94 100644 --- a/native/jni/src/unigram_dictionary.cpp +++ b/native/jni/src/unigram_dictionary.cpp @@ -730,7 +730,7 @@ int UnigramDictionary::getMostFrequentWordLikeInner(const uint16_t * const inWor return maxFreq; } -bool UnigramDictionary::isValidWord(const uint16_t* const inWord, const int length) const { +bool UnigramDictionary::isValidWord(const int32_t* const inWord, const int length) const { return NOT_VALID_WORD != BinaryFormat::getTerminalPosition(DICT_ROOT, inWord, length); } diff --git a/native/jni/src/unigram_dictionary.h b/native/jni/src/unigram_dictionary.h index 4479cd94e..b09c0006d 100644 --- a/native/jni/src/unigram_dictionary.h +++ b/native/jni/src/unigram_dictionary.h @@ -71,7 +71,7 @@ class UnigramDictionary { UnigramDictionary(const uint8_t* const streamStart, int typedLetterMultipler, int fullWordMultiplier, int maxWordLength, int maxWords, const unsigned int flags); - bool isValidWord(const uint16_t* const inWord, const int length) const; + bool isValidWord(const int32_t* const inWord, const int length) const; int getBigramPosition(int pos, unsigned short *word, int offset, int length) const; int getSuggestions(ProximityInfo *proximityInfo, WordsPriorityQueuePool *queuePool, Correction *correction, const int *xcoordinates,