From 1e61493c50082264caaef862df02b1ccc84dc396 Mon Sep 17 00:00:00 2001 From: Ken Wakasa Date: Mon, 29 Oct 2012 18:06:22 +0900 Subject: [PATCH] Use 32-bit code points for suggestions output This is a multi-project commit with Ic43dd666 bug: 6526418 Change-Id: I39c1acb4e91d04cd8a4ec5a943c8cf575da75ebc --- .../inputmethod/latin/BinaryDictionary.java | 36 +++++---- ...oid_inputmethod_latin_BinaryDictionary.cpp | 56 ++++++------- native/jni/src/bigram_dictionary.cpp | 31 +++---- native/jni/src/bigram_dictionary.h | 16 ++-- native/jni/src/binary_format.h | 35 ++++---- native/jni/src/char_utils.h | 30 +++---- native/jni/src/correction.cpp | 75 ++++++++--------- native/jni/src/correction.h | 45 +++++------ native/jni/src/defines.h | 43 ++++------ native/jni/src/dictionary.cpp | 11 ++- native/jni/src/dictionary.h | 12 +-- .../jni/src/gesture/gesture_decoder_wrapper.h | 11 ++- .../gesture/incremental_decoder_interface.h | 7 +- .../src/gesture/incremental_decoder_wrapper.h | 11 ++- native/jni/src/proximity_info_state.cpp | 41 +++++----- native/jni/src/proximity_info_state.h | 61 +++++++------- native/jni/src/terminal_attributes.h | 10 +-- native/jni/src/unigram_dictionary.cpp | 80 +++++++++---------- native/jni/src/unigram_dictionary.h | 23 +++--- native/jni/src/words_priority_queue.h | 58 +++++++------- 20 files changed, 329 insertions(+), 363 deletions(-) diff --git a/java/src/com/android/inputmethod/latin/BinaryDictionary.java b/java/src/com/android/inputmethod/latin/BinaryDictionary.java index 80af4b9fa..a7024d1d8 100644 --- a/java/src/com/android/inputmethod/latin/BinaryDictionary.java +++ b/java/src/com/android/inputmethod/latin/BinaryDictionary.java @@ -51,8 +51,7 @@ public final class BinaryDictionary extends Dictionary { private long mNativeDict; private final Locale mLocale; private final int[] mInputCodePoints = new int[MAX_WORD_LENGTH]; - // TODO: The below should be int[] mOutputCodePoints - private final char[] mOutputChars = new char[MAX_WORD_LENGTH * MAX_RESULTS]; + private final int[] mOutputCodePoints = new int[MAX_WORD_LENGTH * MAX_RESULTS]; private final int[] mSpaceIndices = new int[MAX_SPACES]; private final int[] mOutputScores = new int[MAX_RESULTS]; private final int[] mOutputTypes = new int[MAX_RESULTS]; @@ -88,9 +87,9 @@ public final class BinaryDictionary extends Dictionary { * @param useFullEditDistance whether to use the full edit distance in suggestions * @param dictType the dictionary type, as a human-readable string */ - public BinaryDictionary(final Context context, - final String filename, final long offset, final long length, - final boolean useFullEditDistance, final Locale locale, final String dictType) { + public BinaryDictionary(final Context context, final String filename, final long offset, + final long length, final boolean useFullEditDistance, final Locale locale, + final String dictType) { super(dictType); mLocale = locale; mUseFullEditDistance = useFullEditDistance; @@ -109,10 +108,10 @@ public final class BinaryDictionary extends Dictionary { private native int getSuggestionsNative(long dict, long proximityInfo, long traverseSession, int[] xCoordinates, int[] yCoordinates, int[] times, int[] pointerIds, int[] inputCodePoints, int codesSize, int commitPoint, boolean isGesture, - int[] prevWordCodePointArray, boolean useFullEditDistance, char[] outputChars, + int[] prevWordCodePointArray, boolean useFullEditDistance, int[] outputCodePoints, int[] outputScores, int[] outputIndices, int[] outputTypes); - private static native float calcNormalizedScoreNative(char[] before, char[] after, int score); - private static native int editDistanceNative(char[] before, char[] after); + private static native float calcNormalizedScoreNative(int[] before, int[] after, int score); + private static native int editDistanceNative(int[] before, int[] after); // TODO: Move native dict into session private final void loadDictionary(final String path, final long startOffset, @@ -153,7 +152,8 @@ public final class BinaryDictionary extends Dictionary { proximityInfo.getNativeProximityInfo(), getTraverseSession(sessionId).getSession(), ips.getXCoordinates(), ips.getYCoordinates(), ips.getTimes(), ips.getPointerIds(), mInputCodePoints, codesSize, 0 /* commitPoint */, isGesture, prevWordCodePointArray, - mUseFullEditDistance, mOutputChars, mOutputScores, mSpaceIndices, mOutputTypes); + mUseFullEditDistance, mOutputCodePoints, mOutputScores, mSpaceIndices, + mOutputTypes); final int count = Math.min(tmpCount, MAX_PREDICTIONS); final ArrayList suggestions = CollectionUtils.newArrayList(); @@ -161,14 +161,14 @@ public final class BinaryDictionary extends Dictionary { if (composerSize > 0 && mOutputScores[j] < 1) break; final int start = j * MAX_WORD_LENGTH; int len = 0; - while (len < MAX_WORD_LENGTH && mOutputChars[start + len] != 0) { + while (len < MAX_WORD_LENGTH && mOutputCodePoints[start + len] != 0) { ++len; } if (len > 0) { final int score = SuggestedWordInfo.KIND_WHITELIST == mOutputTypes[j] ? SuggestedWordInfo.MAX_SCORE : mOutputScores[j]; - suggestions.add(new SuggestedWordInfo( - new String(mOutputChars, start, len), score, mOutputTypes[j], mDictType)); + suggestions.add(new SuggestedWordInfo(new String(mOutputCodePoints, start, len), + score, mOutputTypes[j], mDictType)); } } return suggestions; @@ -180,14 +180,16 @@ public final class BinaryDictionary extends Dictionary { public static float calcNormalizedScore(final String before, final String after, final int score) { - return calcNormalizedScoreNative(before.toCharArray(), after.toCharArray(), score); + return calcNormalizedScoreNative(StringUtils.toCodePointArray(before), + StringUtils.toCodePointArray(after), score); } public static int editDistance(final String before, final String after) { if (before == null || after == null) { throw new IllegalArgumentException(); } - return editDistanceNative(before.toCharArray(), after.toCharArray()); + return editDistanceNative(StringUtils.toCodePointArray(before), + StringUtils.toCodePointArray(after)); } @Override @@ -206,9 +208,9 @@ public final class BinaryDictionary extends Dictionary { // calls when checking for changes in an entire dictionary. public boolean isValidBigram(final String word1, final String word2) { if (TextUtils.isEmpty(word1) || TextUtils.isEmpty(word2)) return false; - final int[] chars1 = StringUtils.toCodePointArray(word1); - final int[] chars2 = StringUtils.toCodePointArray(word2); - return isValidBigramNative(mNativeDict, chars1, chars2); + final int[] codePoints1 = StringUtils.toCodePointArray(word1); + final int[] codePoints2 = StringUtils.toCodePointArray(word2); + return isValidBigramNative(mNativeDict, codePoints1, codePoints2); } @Override diff --git a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp index 42f7da9d3..5b8d1119d 100644 --- a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp +++ b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp @@ -132,7 +132,7 @@ static int latinime_BinaryDictionary_getSuggestions(JNIEnv *env, jobject object, jintArray yCoordinatesArray, jintArray timesArray, jintArray pointerIdsArray, jintArray inputCodePointsArray, jint arraySize, jint commitPoint, jboolean isGesture, jintArray prevWordCodePointsForBigrams, jboolean useFullEditDistance, - jcharArray outputCharsArray, jintArray scoresArray, jintArray spaceIndicesArray, + jintArray outputCodePointsArray, jintArray scoresArray, jintArray spaceIndicesArray, jintArray outputTypesArray) { Dictionary *dictionary = reinterpret_cast(dict); if (!dictionary) return 0; @@ -162,16 +162,15 @@ static int latinime_BinaryDictionary_getSuggestions(JNIEnv *env, jobject object, } // Output values - // TODO: Should be "outputCodePointsLength" and "int outputCodePoints[]" - const jsize outputCharsLength = env->GetArrayLength(outputCharsArray); - unsigned short outputChars[outputCharsLength]; + const jsize outputCodePointsLength = env->GetArrayLength(outputCodePointsArray); + int outputCodePoints[outputCodePointsLength]; const jsize scoresLength = env->GetArrayLength(scoresArray); int scores[scoresLength]; const jsize spaceIndicesLength = env->GetArrayLength(spaceIndicesArray); int spaceIndices[spaceIndicesLength]; const jsize outputTypesLength = env->GetArrayLength(outputTypesArray); int outputTypes[outputTypesLength]; - memset(outputChars, 0, sizeof(outputChars)); + memset(outputCodePoints, 0, sizeof(outputCodePoints)); memset(scores, 0, sizeof(scores)); memset(spaceIndices, 0, sizeof(spaceIndices)); memset(outputTypes, 0, sizeof(outputTypes)); @@ -180,16 +179,15 @@ static int latinime_BinaryDictionary_getSuggestions(JNIEnv *env, jobject object, if (isGesture || arraySize > 0) { count = dictionary->getSuggestions(pInfo, traverseSession, xCoordinates, yCoordinates, times, pointerIds, inputCodePoints, arraySize, prevWordCodePoints, - prevWordCodePointsLength, commitPoint, isGesture, useFullEditDistance, outputChars, - scores, spaceIndices, outputTypes); + prevWordCodePointsLength, commitPoint, isGesture, useFullEditDistance, + outputCodePoints, scores, spaceIndices, outputTypes); } else { count = dictionary->getBigrams(prevWordCodePoints, prevWordCodePointsLength, - inputCodePoints, arraySize, outputChars, scores, outputTypes); + inputCodePoints, arraySize, outputCodePoints, scores, outputTypes); } // Copy back the output values - // TODO: Should be SetIntArrayRegion() - env->SetCharArrayRegion(outputCharsArray, 0, outputCharsLength, outputChars); + env->SetIntArrayRegion(outputCodePointsArray, 0, outputCodePointsLength, outputCodePoints); env->SetIntArrayRegion(scoresArray, 0, scoresLength, scores); env->SetIntArrayRegion(spaceIndicesArray, 0, spaceIndicesLength, spaceIndices); env->SetIntArrayRegion(outputTypesArray, 0, outputTypesLength, outputTypes); @@ -221,29 +219,27 @@ static jboolean latinime_BinaryDictionary_isValidBigram(JNIEnv *env, jobject obj } static jfloat latinime_BinaryDictionary_calcNormalizedScore(JNIEnv *env, jobject object, - jcharArray before, jcharArray after, jint score) { + jintArray before, jintArray after, jint score) { jsize beforeLength = env->GetArrayLength(before); jsize afterLength = env->GetArrayLength(after); - jchar beforeChars[beforeLength]; - jchar afterChars[afterLength]; - env->GetCharArrayRegion(before, 0, beforeLength, beforeChars); - env->GetCharArrayRegion(after, 0, afterLength, afterChars); - return Correction::RankingAlgorithm::calcNormalizedScore( - static_cast(beforeChars), beforeLength, - static_cast(afterChars), afterLength, score); + int beforeCodePoints[beforeLength]; + int afterCodePoints[afterLength]; + env->GetIntArrayRegion(before, 0, beforeLength, beforeCodePoints); + env->GetIntArrayRegion(after, 0, afterLength, afterCodePoints); + return Correction::RankingAlgorithm::calcNormalizedScore(beforeCodePoints, beforeLength, + afterCodePoints, afterLength, score); } -static jint latinime_BinaryDictionary_editDistance(JNIEnv *env, jobject object, - jcharArray before, jcharArray after) { +static jint latinime_BinaryDictionary_editDistance(JNIEnv *env, jobject object, jintArray before, + jintArray after) { jsize beforeLength = env->GetArrayLength(before); jsize afterLength = env->GetArrayLength(after); - jchar beforeChars[beforeLength]; - jchar afterChars[afterLength]; - env->GetCharArrayRegion(before, 0, beforeLength, beforeChars); - env->GetCharArrayRegion(after, 0, afterLength, afterChars); - return Correction::RankingAlgorithm::editDistance( - static_cast(beforeChars), beforeLength, - static_cast(afterChars), afterLength); + int beforeCodePoints[beforeLength]; + int afterCodePoints[afterLength]; + env->GetIntArrayRegion(before, 0, beforeLength, beforeCodePoints); + env->GetIntArrayRegion(after, 0, afterLength, afterCodePoints); + return Correction::RankingAlgorithm::editDistance(beforeCodePoints, beforeLength, + afterCodePoints, afterLength); } static void latinime_BinaryDictionary_close(JNIEnv *env, jobject object, jlong dict) { @@ -279,15 +275,15 @@ static JNINativeMethod sMethods[] = { {"openNative", "(Ljava/lang/String;JJIIII)J", reinterpret_cast(latinime_BinaryDictionary_open)}, {"closeNative", "(J)V", reinterpret_cast(latinime_BinaryDictionary_close)}, - {"getSuggestionsNative", "(JJJ[I[I[I[I[IIIZ[IZ[C[I[I[I)I", + {"getSuggestionsNative", "(JJJ[I[I[I[I[IIIZ[IZ[I[I[I[I)I", reinterpret_cast(latinime_BinaryDictionary_getSuggestions)}, {"getFrequencyNative", "(J[I)I", reinterpret_cast(latinime_BinaryDictionary_getFrequency)}, {"isValidBigramNative", "(J[I[I)Z", reinterpret_cast(latinime_BinaryDictionary_isValidBigram)}, - {"calcNormalizedScoreNative", "([C[CI)F", + {"calcNormalizedScoreNative", "([I[II)F", reinterpret_cast(latinime_BinaryDictionary_calcNormalizedScore)}, - {"editDistanceNative", "([C[C)I", + {"editDistanceNative", "([I[I)I", reinterpret_cast(latinime_BinaryDictionary_editDistance)} }; diff --git a/native/jni/src/bigram_dictionary.cpp b/native/jni/src/bigram_dictionary.cpp index dade4f16b..f89dd1615 100644 --- a/native/jni/src/bigram_dictionary.cpp +++ b/native/jni/src/bigram_dictionary.cpp @@ -36,13 +36,13 @@ BigramDictionary::BigramDictionary(const unsigned char *dict, int maxWordLength, BigramDictionary::~BigramDictionary() { } -bool BigramDictionary::addWordBigram(unsigned short *word, int length, int frequency, - int *bigramFreq, unsigned short *bigramChars, int *outputTypes) const { +bool BigramDictionary::addWordBigram(int *word, int length, int frequency, int *bigramFreq, + int *bigramCodePoints, int *outputTypes) const { word[length] = 0; if (DEBUG_DICT) { #ifdef FLAG_DBG char s[length + 1]; - for (int i = 0; i <= length; i++) s[i] = word[i]; + for (int i = 0; i <= length; i++) s[i] = static_cast(word[i]); AKLOGI("Bigram: Found word = %s, freq = %d :", s, frequency); #endif } @@ -51,7 +51,8 @@ bool BigramDictionary::addWordBigram(unsigned short *word, int length, int frequ int insertAt = 0; while (insertAt < MAX_PREDICTIONS) { if (frequency > bigramFreq[insertAt] || (bigramFreq[insertAt] == frequency - && length < Dictionary::wideStrLen(bigramChars + insertAt * MAX_WORD_LENGTH))) { + && length < Dictionary::wideStrLen( + bigramCodePoints + insertAt * MAX_WORD_LENGTH))) { break; } insertAt++; @@ -65,10 +66,10 @@ bool BigramDictionary::addWordBigram(unsigned short *word, int length, int frequ (MAX_PREDICTIONS - insertAt - 1) * sizeof(bigramFreq[0])); bigramFreq[insertAt] = frequency; outputTypes[insertAt] = Dictionary::KIND_PREDICTION; - memmove(bigramChars + (insertAt + 1) * MAX_WORD_LENGTH, - bigramChars + insertAt * MAX_WORD_LENGTH, - (MAX_PREDICTIONS - insertAt - 1) * sizeof(bigramChars[0]) * MAX_WORD_LENGTH); - unsigned short *dest = bigramChars + insertAt * MAX_WORD_LENGTH; + memmove(bigramCodePoints + (insertAt + 1) * MAX_WORD_LENGTH, + bigramCodePoints + insertAt * MAX_WORD_LENGTH, + (MAX_PREDICTIONS - insertAt - 1) * sizeof(bigramCodePoints[0]) * MAX_WORD_LENGTH); + int *dest = bigramCodePoints + insertAt * MAX_WORD_LENGTH; while (length--) { *dest++ = *word++; } @@ -86,7 +87,7 @@ bool BigramDictionary::addWordBigram(unsigned short *word, int length, int frequ * prevWordLength: its length. * inputCodes: what user typed, in the same format as for UnigramDictionary::getSuggestions. * codesSize: the size of the codes array. - * bigramChars: an array for output, at the same format as outwords for getSuggestions. + * bigramCodePoints: an array for output, at the same format as outwords for getSuggestions. * bigramFreq: an array to output frequencies. * outputTypes: an array to output types. * This method returns the number of bigrams this word has, for backward compatibility. @@ -97,8 +98,8 @@ bool BigramDictionary::addWordBigram(unsigned short *word, int length, int frequ * and the bigrams are used to boost unigram result scores, it makes little sense to * reduce their scope to the ones that match the first letter. */ -int BigramDictionary::getBigrams(const int32_t *prevWord, int prevWordLength, int *inputCodes, - int codesSize, unsigned short *bigramChars, int *bigramFreq, int *outputTypes) const { +int BigramDictionary::getBigrams(const int *prevWord, int prevWordLength, int *inputCodes, + int codesSize, int *bigramCodePoints, int *bigramFreq, int *outputTypes) const { // TODO: remove unused arguments, and refrain from storing stuff in members of this class // TODO: have "in" arguments before "out" ones, and make out args explicit in the name @@ -117,7 +118,7 @@ int BigramDictionary::getBigrams(const int32_t *prevWord, int prevWordLength, in int bigramCount = 0; do { bigramFlags = BinaryFormat::getFlagsAndForwardPointer(root, &pos); - uint16_t bigramBuffer[MAX_WORD_LENGTH]; + int bigramBuffer[MAX_WORD_LENGTH]; int unigramFreq = 0; const int bigramPos = BinaryFormat::getAttributeAddressAndForwardPointer(root, bigramFlags, &pos); @@ -134,7 +135,7 @@ int BigramDictionary::getBigrams(const int32_t *prevWord, int prevWordLength, in // here, but it can't get too bad. const int frequency = BinaryFormat::computeFrequencyForBigram(unigramFreq, bigramFreqTemp); - if (addWordBigram(bigramBuffer, length, frequency, bigramFreq, bigramChars, + if (addWordBigram(bigramBuffer, length, frequency, bigramFreq, bigramCodePoints, outputTypes)) { ++bigramCount; } @@ -190,12 +191,12 @@ void BigramDictionary::fillBigramAddressToFrequencyMapAndFilter(const int32_t *p } while (0 != (BinaryFormat::FLAG_ATTRIBUTE_HAS_NEXT & bigramFlags)); } -bool BigramDictionary::checkFirstCharacter(unsigned short *word, int *inputCodes) const { +bool BigramDictionary::checkFirstCharacter(int *word, int *inputCodes) const { // Checks whether this word starts with same character or neighboring characters of // what user typed. int maxAlt = MAX_ALTERNATIVES; - const unsigned short firstBaseChar = toBaseLowerCase(*word); + const int firstBaseChar = toBaseLowerCase(*word); while (maxAlt > 0) { if (toBaseLowerCase(*inputCodes) == firstBaseChar) { return true; diff --git a/native/jni/src/bigram_dictionary.h b/native/jni/src/bigram_dictionary.h index 5f11ae822..150192de2 100644 --- a/native/jni/src/bigram_dictionary.h +++ b/native/jni/src/bigram_dictionary.h @@ -27,23 +27,23 @@ namespace latinime { class BigramDictionary { public: BigramDictionary(const unsigned char *dict, int maxWordLength, int maxPredictions); - int getBigrams(const int32_t *word, int length, int *inputCodes, int codesSize, - unsigned short *outWords, int *frequencies, int *outputTypes) const; - void fillBigramAddressToFrequencyMapAndFilter(const int32_t *prevWord, const int prevWordLength, + int getBigrams(const int *word, int length, int *inputCodes, int codesSize, int *outWords, + int *frequencies, int *outputTypes) const; + void fillBigramAddressToFrequencyMapAndFilter(const int *prevWord, const int prevWordLength, std::map *map, uint8_t *filter) const; - bool isValidBigram(const int32_t *word1, int length1, const int32_t *word2, int length2) const; + bool isValidBigram(const int *word1, int length1, const int *word2, int length2) const; ~BigramDictionary(); private: DISALLOW_IMPLICIT_CONSTRUCTORS(BigramDictionary); - bool addWordBigram(unsigned short *word, int length, int frequency, - int *bigramFreq, unsigned short *bigramChars, int *outputTypes) const; + bool addWordBigram(int *word, int length, int frequency, int *bigramFreq, int *bigramCodePoints, + int *outputTypes) const; int getBigramAddress(int *pos, bool advance); int getBigramFreq(int *pos); void searchForTerminalNode(int addressLookingFor, int frequency); bool getFirstBitOfByte(int *pos) { return (DICT[*pos] & 0x80) > 0; } bool getSecondBitOfByte(int *pos) { return (DICT[*pos] & 0x40) > 0; } - bool checkFirstCharacter(unsigned short *word, int *inputCodes) const; - int getBigramListPositionForWord(const int32_t *prevWord, const int prevWordLength, + bool checkFirstCharacter(int *word, int *inputCodes) const; + int getBigramListPositionForWord(const int *prevWord, const int prevWordLength, const bool forceLowerCaseSearch) const; const unsigned char *DICT; diff --git a/native/jni/src/binary_format.h b/native/jni/src/binary_format.h index eec52e323..c0aec50d7 100644 --- a/native/jni/src/binary_format.h +++ b/native/jni/src/binary_format.h @@ -84,7 +84,7 @@ class BinaryFormat { static unsigned int getFlags(const uint8_t *const dict); static int getGroupCountAndForwardPointer(const uint8_t *const dict, int *pos); static uint8_t getFlagsAndForwardPointer(const uint8_t *const dict, int *pos); - static int32_t getCodePointAndForwardPointer(const uint8_t *const dict, int *pos); + static int getCodePointAndForwardPointer(const uint8_t *const dict, int *pos); static int readFrequencyWithoutMovingPointer(const uint8_t *const dict, const int pos); static int skipOtherCharacters(const uint8_t *const dict, const int pos); static int skipChildrenPosition(const uint8_t flags, const int pos); @@ -98,10 +98,10 @@ class BinaryFormat { static int getAttributeAddressAndForwardPointer(const uint8_t *const dict, const uint8_t flags, int *pos); static int getAttributeFrequencyFromFlags(const int flags); - static int getTerminalPosition(const uint8_t *const root, const int32_t *const inWord, + static int getTerminalPosition(const uint8_t *const root, const int *const inWord, const int length, const bool forceLowerCaseSearch); static int getWordAtAddress(const uint8_t *const root, const int address, const int maxDepth, - uint16_t *outWord, int *outUnigramFrequency); + int *outWord, int *outUnigramFrequency); static int computeFrequencyForBigram(const int unigramFreq, const int bigramFreq); static int getProbability(const int position, const std::map *bigramMap, const uint8_t *bigramFilter, const int unigramFreq); @@ -176,17 +176,17 @@ inline uint8_t BinaryFormat::getFlagsAndForwardPointer(const uint8_t *const dict return dict[(*pos)++]; } -inline int32_t BinaryFormat::getCodePointAndForwardPointer(const uint8_t *const dict, int *pos) { +inline int BinaryFormat::getCodePointAndForwardPointer(const uint8_t *const dict, int *pos) { const int origin = *pos; - const int32_t codePoint = dict[origin]; + const int codePoint = dict[origin]; if (codePoint < MINIMAL_ONE_BYTE_CHARACTER_VALUE) { if (codePoint == CHARACTER_ARRAY_TERMINATOR) { *pos = origin + 1; return NOT_A_CODE_POINT; } else { *pos = origin + 3; - const int32_t char_1 = codePoint << 16; - const int32_t char_2 = char_1 + (dict[origin + 1] << 8); + const int char_1 = codePoint << 16; + const int char_2 = char_1 + (dict[origin + 1] << 8); return char_2 + dict[origin + 2]; } } else { @@ -202,7 +202,7 @@ inline int BinaryFormat::readFrequencyWithoutMovingPointer(const uint8_t *const inline int BinaryFormat::skipOtherCharacters(const uint8_t *const dict, const int pos) { int currentPos = pos; - int32_t character = dict[currentPos++]; + int character = dict[currentPos++]; while (CHARACTER_ARRAY_TERMINATOR != character) { if (character < MINIMAL_ONE_BYTE_CHARACTER_VALUE) { currentPos += MULTIPLE_BYTE_CHARACTER_ADDITIONAL_SIZE; @@ -352,8 +352,8 @@ inline int BinaryFormat::getAttributeFrequencyFromFlags(const int flags) { // This function gets the byte position of the last chargroup of the exact matching word in the // dictionary. If no match is found, it returns NOT_VALID_WORD. -inline int BinaryFormat::getTerminalPosition(const uint8_t *const root, - const int32_t *const inWord, const int length, const bool forceLowerCaseSearch) { +inline int BinaryFormat::getTerminalPosition(const uint8_t *const root, const int *const inWord, + const int length, const bool forceLowerCaseSearch) { int pos = 0; int wordPos = 0; @@ -362,14 +362,14 @@ inline int BinaryFormat::getTerminalPosition(const uint8_t *const root, // there was no match (or we would have found it). if (wordPos >= length) return NOT_VALID_WORD; int charGroupCount = BinaryFormat::getGroupCountAndForwardPointer(root, &pos); - const int32_t wChar = forceLowerCaseSearch ? toLowerCase(inWord[wordPos]) : inWord[wordPos]; + const int wChar = forceLowerCaseSearch ? toLowerCase(inWord[wordPos]) : inWord[wordPos]; while (true) { // If there are no more character groups in this node, it means we could not // find a matching character for this depth, therefore there is no match. if (0 >= charGroupCount) return NOT_VALID_WORD; const int charGroupPos = pos; const uint8_t flags = BinaryFormat::getFlagsAndForwardPointer(root, &pos); - int32_t character = BinaryFormat::getCodePointAndForwardPointer(root, &pos); + int character = BinaryFormat::getCodePointAndForwardPointer(root, &pos); if (character == wChar) { // This is the correct node. Only one character group may start with the same // char within a node, so either we found our match in this node, or there is @@ -439,7 +439,7 @@ inline int BinaryFormat::getTerminalPosition(const uint8_t *const root, * Return value : the length of the word, of 0 if the word was not found. */ inline int BinaryFormat::getWordAtAddress(const uint8_t *const root, const int address, - const int maxDepth, uint16_t *outWord, int *outUnigramFrequency) { + const int maxDepth, int *outWord, int *outUnigramFrequency) { int pos = 0; int wordPos = 0; @@ -457,13 +457,13 @@ inline int BinaryFormat::getWordAtAddress(const uint8_t *const root, const int a --charGroupCount) { const int startPos = pos; const uint8_t flags = getFlagsAndForwardPointer(root, &pos); - const int32_t character = getCodePointAndForwardPointer(root, &pos); + const int character = getCodePointAndForwardPointer(root, &pos); if (address == startPos) { // We found the address. Copy the rest of the word in the buffer and return // the length. outWord[wordPos] = character; if (FLAG_HAS_MULTIPLE_CHARS & flags) { - int32_t nextChar = getCodePointAndForwardPointer(root, &pos); + int nextChar = getCodePointAndForwardPointer(root, &pos); // We count chars in order to avoid infinite loops if the file is broken or // if there is some other bug int charCount = maxDepth; @@ -522,13 +522,12 @@ inline int BinaryFormat::getWordAtAddress(const uint8_t *const root, const int a if (0 != lastCandidateGroupPos) { const uint8_t lastFlags = getFlagsAndForwardPointer(root, &lastCandidateGroupPos); - const int32_t lastChar = + const int lastChar = getCodePointAndForwardPointer(root, &lastCandidateGroupPos); // We copy all the characters in this group to the buffer outWord[wordPos] = lastChar; if (FLAG_HAS_MULTIPLE_CHARS & lastFlags) { - int32_t nextChar = - getCodePointAndForwardPointer(root, &lastCandidateGroupPos); + int nextChar = getCodePointAndForwardPointer(root, &lastCandidateGroupPos); int charCount = maxDepth; while (-1 != nextChar && --charCount > 0) { outWord[++wordPos] = nextChar; diff --git a/native/jni/src/char_utils.h b/native/jni/src/char_utils.h index 9008e364c..b42de6607 100644 --- a/native/jni/src/char_utils.h +++ b/native/jni/src/char_utils.h @@ -18,22 +18,23 @@ #define LATINIME_CHAR_UTILS_H #include -#include + +#include "defines.h" namespace latinime { -inline static bool isAsciiUpper(unsigned short c) { +inline static bool isAsciiUpper(int c) { // Note: isupper(...) reports false positives for some Cyrillic characters, causing them to // be incorrectly lower-cased using toAsciiLower(...) rather than latin_tolower(...). return (c >= 'A' && c <= 'Z'); } -inline static unsigned short toAsciiLower(unsigned short c) { +inline static int toAsciiLower(int c) { return c - 'A' + 'a'; } -inline static bool isAscii(unsigned short c) { - return isascii(static_cast(c)) != 0; +inline static bool isAscii(int c) { + return isascii(c) != 0; } unsigned short latin_tolower(const unsigned short c); @@ -44,33 +45,32 @@ unsigned short latin_tolower(const unsigned short c); * if c is not a combined character, or the base character if it * is combined. */ - static const int BASE_CHARS_SIZE = 0x0500; -extern const uint16_t BASE_CHARS[BASE_CHARS_SIZE]; +extern const unsigned short BASE_CHARS[BASE_CHARS_SIZE]; -inline static unsigned short toBaseChar(unsigned short c) { +inline static int toBaseCodePoint(int c) { if (c < BASE_CHARS_SIZE) { - return BASE_CHARS[c]; + return static_cast(BASE_CHARS[c]); } return c; } -inline static unsigned short toLowerCase(const unsigned short c) { +inline static int toLowerCase(const int c) { if (isAsciiUpper(c)) { return toAsciiLower(c); } else if (isAscii(c)) { return c; } - return latin_tolower(c); + return static_cast(latin_tolower(static_cast(c))); } -inline static unsigned short toBaseLowerCase(const unsigned short c) { - return toLowerCase(toBaseChar(c)); +inline static int toBaseLowerCase(const int c) { + return toLowerCase(toBaseCodePoint(c)); } -inline static bool isSkippableChar(const uint16_t character) { +inline static bool isSkippableCodePoint(const int codePoint) { // TODO: Do not hardcode here - return character == '\'' || character == '-'; + return codePoint == KEYCODE_SINGLE_QUOTE || codePoint == KEYCODE_HYPHEN_MINUS; } } // namespace latinime diff --git a/native/jni/src/correction.cpp b/native/jni/src/correction.cpp index d57b0e370..949158a0b 100644 --- a/native/jni/src/correction.cpp +++ b/native/jni/src/correction.cpp @@ -60,8 +60,8 @@ inline static void dumpEditDistance10ForDebug(int *editDistanceTable, } } -inline static void calcEditDistanceOneStep(int *editDistanceTable, const unsigned short *input, - const int inputSize, const unsigned short *output, const int outputLength) { +inline static void calcEditDistanceOneStep(int *editDistanceTable, const int *input, + const int inputSize, const int *output, const int outputLength) { // TODO: Make sure that editDistance[0 ~ MAX_WORD_LENGTH_INTERNAL] is not touched. // Let dp[i][j] be editDistanceTable[i * (inputSize + 1) + j]. // Assuming that dp[0][0] ... dp[outputLength - 1][inputSize] are already calculated, @@ -71,10 +71,10 @@ inline static void calcEditDistanceOneStep(int *editDistanceTable, const unsigne const int *const prevprev = outputLength >= 2 ? editDistanceTable + (outputLength - 2) * (inputSize + 1) : 0; current[0] = outputLength; - const uint32_t co = toBaseLowerCase(output[outputLength - 1]); - const uint32_t prevCO = outputLength >= 2 ? toBaseLowerCase(output[outputLength - 2]) : 0; + const int co = toBaseLowerCase(output[outputLength - 1]); + const int prevCO = outputLength >= 2 ? toBaseLowerCase(output[outputLength - 2]) : 0; for (int i = 1; i <= inputSize; ++i) { - const uint32_t ci = toBaseLowerCase(input[i - 1]); + const int ci = toBaseLowerCase(input[i - 1]); const uint16_t cost = (ci == co) ? 0 : 1; current[i] = min(current[i - 1] + 1, min(prev[i] + 1, prev[i - 1] + cost)); if (i >= 2 && prevprev && ci == prevCO && co == toBaseLowerCase(input[i - 2])) { @@ -94,11 +94,9 @@ inline static int getCurrentEditDistance(int *editDistanceTable, const int editD ////////////////////// // inline functions // ////////////////////// -static const char SINGLE_QUOTE = '\''; - -inline bool Correction::isSingleQuote(const unsigned short c) { - const unsigned short userTypedChar = mProximityInfoState.getPrimaryCharAt(mInputIndex); - return (c == SINGLE_QUOTE && userTypedChar != SINGLE_QUOTE); +inline bool Correction::isSingleQuote(const int c) { + const int userTypedChar = mProximityInfoState.getPrimaryCodePointAt(mInputIndex); + return (c == KEYCODE_SINGLE_QUOTE && userTypedChar != KEYCODE_SINGLE_QUOTE); } //////////////// @@ -162,22 +160,22 @@ bool Correction::sameAsTyped() { } int Correction::getFreqForSplitMultipleWords(const int *freqArray, const int *wordLengthArray, - const int wordCount, const bool isSpaceProximity, const unsigned short *word) { + const int wordCount, const bool isSpaceProximity, const int *word) { return Correction::RankingAlgorithm::calcFreqForSplitMultipleWords(freqArray, wordLengthArray, wordCount, this, isSpaceProximity, word); } -int Correction::getFinalProbability(const int probability, unsigned short **word, int *wordLength) { +int Correction::getFinalProbability(const int probability, int **word, int *wordLength) { return getFinalProbabilityInternal(probability, word, wordLength, mInputSize); } -int Correction::getFinalProbabilityForSubQueue(const int probability, unsigned short **word, - int *wordLength, const int inputSize) { +int Correction::getFinalProbabilityForSubQueue(const int probability, int **word, int *wordLength, + const int inputSize) { return getFinalProbabilityInternal(probability, word, wordLength, inputSize); } -int Correction::getFinalProbabilityInternal(const int probability, unsigned short **word, - int *wordLength, const int inputSize) { +int Correction::getFinalProbabilityInternal(const int probability, int **word, int *wordLength, + const int inputSize) { const int outputIndex = mTerminalOutputIndex; const int inputIndex = mTerminalInputIndex; *wordLength = outputIndex + 1; @@ -273,15 +271,15 @@ bool Correction::needsToPrune() const { || (!mDoAutoCompletion && (mOutputIndex > mInputSize)); } -void Correction::addCharToCurrentWord(const int32_t c) { +void Correction::addCharToCurrentWord(const int c) { mWord[mOutputIndex] = c; - const unsigned short *primaryInputWord = mProximityInfoState.getPrimaryInputWord(); - calcEditDistanceOneStep(mEditDistanceTable, primaryInputWord, mInputSize, - mWord, mOutputIndex + 1); + const int *primaryInputWord = mProximityInfoState.getPrimaryInputWord(); + calcEditDistanceOneStep(mEditDistanceTable, primaryInputWord, mInputSize, mWord, + mOutputIndex + 1); } -Correction::CorrectionType Correction::processSkipChar( - const int32_t c, const bool isTerminal, const bool inputIndexIncremented) { +Correction::CorrectionType Correction::processSkipChar(const int c, const bool isTerminal, + const bool inputIndexIncremented) { addCharToCurrentWord(c); mTerminalInputIndex = mInputIndex - (inputIndexIncremented ? 1 : 0); mTerminalOutputIndex = mOutputIndex; @@ -309,8 +307,7 @@ inline bool isProximityCharOrEquivalentChar(ProximityType type) { return type == EQUIVALENT_CHAR || type == NEAR_PROXIMITY_CHAR; } -Correction::CorrectionType Correction::processCharAndCalcState( - const int32_t c, const bool isTerminal) { +Correction::CorrectionType Correction::processCharAndCalcState(const int c, const bool isTerminal) { const int correctionCount = (mSkippedCount + mExcessiveCount + mTransposedCount); if (correctionCount > mMaxErrors) { return processUnrelatedCorrectionType(); @@ -628,10 +625,10 @@ Correction::CorrectionType Correction::processCharAndCalcState( } } -inline static int getQuoteCount(const unsigned short *word, const int length) { +inline static int getQuoteCount(const int *word, const int length) { int quoteCount = 0; for (int i = 0; i < length; ++i) { - if (word[i] == SINGLE_QUOTE) { + if (word[i] == KEYCODE_SINGLE_QUOTE) { ++quoteCount; } } @@ -639,7 +636,7 @@ inline static int getQuoteCount(const unsigned short *word, const int length) { } inline static bool isUpperCase(unsigned short c) { - return isAsciiUpper(toBaseChar(c)); + return isAsciiUpper(toBaseCodePoint(c)); } ////////////////////// @@ -672,7 +669,7 @@ int Correction::RankingAlgorithm::calculateFinalProbability(const int inputIndex // TODO: use mExcessiveCount const int matchCount = inputSize - correction->mProximityCount - excessiveCount; - const unsigned short *word = correction->mWord; + const int *word = correction->mWord; const bool skipped = skippedCount > 0; const int quoteDiffCount = max(0, getQuoteCount(word, outputLength) @@ -911,7 +908,7 @@ int Correction::RankingAlgorithm::calculateFinalProbability(const int inputIndex /* static */ int Correction::RankingAlgorithm::calcFreqForSplitMultipleWords( const int *freqArray, const int *wordLengthArray, const int wordCount, - const Correction *correction, const bool isSpaceProximity, const unsigned short *word) { + const Correction *correction, const bool isSpaceProximity, const int *word) { const int typedLetterMultiplier = correction->TYPED_LETTER_MULTIPLIER; bool firstCapitalizedWordDemotion = false; @@ -1040,9 +1037,8 @@ int Correction::RankingAlgorithm::calcFreqForSplitMultipleWords( } /* Damerau-Levenshtein distance */ -inline static int editDistanceInternal( - int *editDistanceTable, const unsigned short *before, - const int beforeLength, const unsigned short *after, const int afterLength) { +inline static int editDistanceInternal(int *editDistanceTable, const int *before, + const int beforeLength, const int *after, const int afterLength) { // dp[li][lo] dp[a][b] = dp[ a * lo + b] int *dp = editDistanceTable; const int li = beforeLength + 1; @@ -1056,9 +1052,9 @@ inline static int editDistanceInternal( for (int i = 0; i < li - 1; ++i) { for (int j = 0; j < lo - 1; ++j) { - const uint32_t ci = toBaseLowerCase(before[i]); - const uint32_t co = toBaseLowerCase(after[j]); - const uint16_t cost = (ci == co) ? 0 : 1; + const int ci = toBaseLowerCase(before[i]); + const int co = toBaseLowerCase(after[j]); + const int cost = (ci == co) ? 0 : 1; dp[(i + 1) * lo + (j + 1)] = min(dp[i * lo + (j + 1)] + 1, min(dp[(i + 1) * lo + j] + 1, dp[i * lo + j] + cost)); if (i > 0 && j > 0 && ci == toBaseLowerCase(after[j - 1]) @@ -1080,8 +1076,8 @@ inline static int editDistanceInternal( return dp[li * lo - 1]; } -int Correction::RankingAlgorithm::editDistance(const unsigned short *before, - const int beforeLength, const unsigned short *after, const int afterLength) { +int Correction::RankingAlgorithm::editDistance(const int *before, const int beforeLength, + const int *after, const int afterLength) { int table[(beforeLength + 1) * (afterLength + 1)]; return editDistanceInternal(table, before, beforeLength, after, afterLength); } @@ -1109,9 +1105,8 @@ int Correction::RankingAlgorithm::editDistance(const unsigned short *before, // So, we can normalize original score by dividing powf(2, min(b.l(),a.l())) * 255 * 2. /* static */ -float Correction::RankingAlgorithm::calcNormalizedScore(const unsigned short *before, - const int beforeLength, const unsigned short *after, const int afterLength, - const int score) { +float Correction::RankingAlgorithm::calcNormalizedScore(const int *before, const int beforeLength, + const int *after, const int afterLength, const int score) { if (0 == beforeLength || 0 == afterLength) { return 0; } diff --git a/native/jni/src/correction.h b/native/jni/src/correction.h index a099853e6..fff24b048 100644 --- a/native/jni/src/correction.h +++ b/native/jni/src/correction.h @@ -78,14 +78,13 @@ class Correction { return ++mTotalTraverseCount; } - int getFreqForSplitMultipleWords( - const int *freqArray, const int *wordLengthArray, const int wordCount, - const bool isSpaceProximity, const unsigned short *word); - int getFinalProbability(const int probability, unsigned short **word, int *wordLength); - int getFinalProbabilityForSubQueue(const int probability, unsigned short **word, - int *wordLength, const int inputSize); + int getFreqForSplitMultipleWords(const int *freqArray, const int *wordLengthArray, + const int wordCount, const bool isSpaceProximity, const int *word); + int getFinalProbability(const int probability, int **word, int *wordLength); + int getFinalProbabilityForSubQueue(const int probability, int **word, int *wordLength, + const int inputSize); - CorrectionType processCharAndCalcState(const int32_t c, const bool isTerminal); + CorrectionType processCharAndCalcState(const int c, const bool isTerminal); ///////////////////////// // Tree helper methods @@ -110,28 +109,28 @@ class Correction { const int inputSize); static int calcFreqForSplitMultipleWords(const int *freqArray, const int *wordLengthArray, const int wordCount, const Correction *correction, const bool isSpaceProximity, - const unsigned short *word); - static float calcNormalizedScore(const unsigned short *before, const int beforeLength, - const unsigned short *after, const int afterLength, const int score); - static int editDistance(const unsigned short *before, - const int beforeLength, const unsigned short *after, const int afterLength); + const int *word); + static float calcNormalizedScore(const int *before, const int beforeLength, + const int *after, const int afterLength, const int score); + static int editDistance(const int *before, const int beforeLength, const int *after, + const int afterLength); private: static const int MAX_INITIAL_SCORE = 255; }; // proximity info state - void initInputParams(const ProximityInfo *proximityInfo, const int32_t *inputCodes, + void initInputParams(const ProximityInfo *proximityInfo, const int *inputCodes, const int inputSize, const int *xCoordinates, const int *yCoordinates) { mProximityInfoState.initInputParams(0, MAX_POINT_TO_KEY_LENGTH, proximityInfo, inputCodes, inputSize, xCoordinates, yCoordinates, 0, 0, false); } - const unsigned short *getPrimaryInputWord() const { + const int *getPrimaryInputWord() const { return mProximityInfoState.getPrimaryInputWord(); } - unsigned short getPrimaryCharAt(const int index) const { - return mProximityInfoState.getPrimaryCharAt(index); + int getPrimaryCodePointAt(const int index) const { + return mProximityInfoState.getPrimaryCodePointAt(index); } private: @@ -214,13 +213,13 @@ class Correction { inline void incrementInputIndex(); inline void incrementOutputIndex(); inline void startToTraverseAllNodes(); - inline bool isSingleQuote(const unsigned short c); - inline CorrectionType processSkipChar( - const int32_t c, const bool isTerminal, const bool inputIndexIncremented); + inline bool isSingleQuote(const int c); + inline CorrectionType processSkipChar(const int c, const bool isTerminal, + const bool inputIndexIncremented); inline CorrectionType processUnrelatedCorrectionType(); - inline void addCharToCurrentWord(const int32_t c); - inline int getFinalProbabilityInternal(const int probability, unsigned short **word, - int *wordLength, const int inputSize); + inline void addCharToCurrentWord(const int c); + inline int getFinalProbabilityInternal(const int probability, int **word, int *wordLength, + const int inputSize); static const int TYPED_LETTER_MULTIPLIER = 2; static const int FULL_WORD_MULTIPLIER = 2; @@ -240,7 +239,7 @@ class Correction { uint8_t mTotalTraverseCount; // The following arrays are state buffer. - unsigned short mWord[MAX_WORD_LENGTH_INTERNAL]; + int mWord[MAX_WORD_LENGTH_INTERNAL]; int mDistances[MAX_WORD_LENGTH_INTERNAL]; // Edit distance calculation requires a buffer with (N+1)^2 length for the input length N. diff --git a/native/jni/src/defines.h b/native/jni/src/defines.h index 942068a49..095487416 100644 --- a/native/jni/src/defines.h +++ b/native/jni/src/defines.h @@ -30,17 +30,15 @@ #define DUMP_RESULT(words, frequencies, maxWordCount, maxWordLength) do { \ dumpResult(words, frequencies, maxWordCount, maxWordLength); } while (0) #define DUMP_WORD(word, length) do { dumpWord(word, length); } while (0) -#define DUMP_WORD_INT(word, length) do { dumpWordInt(word, length); } while (0) -// TODO: INTS_TO_CHARS -#define SHORTS_TO_CHARS(input, length, output) do { \ - shortArrayToCharArray(input, length, output); } while (0) +#define INTS_TO_CHARS(input, length, output) do { \ + intArrayToCharArray(input, length, output); } while (0) -static inline void dumpWordInfo(const unsigned short *word, const int length, - const int rank, const int frequency) { +static inline void dumpWordInfo(const int *word, const int length, const int rank, + const int frequency) { static char charBuf[50]; int i = 0; for (; i < length; ++i) { - const unsigned short c = word[i]; + const int c = word[i]; if (c == 0) { break; } @@ -53,8 +51,7 @@ static inline void dumpWordInfo(const unsigned short *word, const int length, } } -static inline void dumpResult( - const unsigned short *outWords, const int *frequencies, const int maxWordCounts, +static inline void dumpResult(const int *outWords, const int *frequencies, const int maxWordCounts, const int maxWordLength) { AKLOGI("--- DUMP RESULT ---------"); for (int i = 0; i < maxWordCounts; ++i) { @@ -63,11 +60,11 @@ static inline void dumpResult( AKLOGI("-------------------------"); } -static inline void dumpWord(const unsigned short *word, const int length) { +static inline void dumpWord(const int *word, const int length) { static char charBuf[50]; int i = 0; for (; i < length; ++i) { - const unsigned short c = word[i]; + const int c = word[i]; if (c == 0) { break; } @@ -80,22 +77,10 @@ static inline void dumpWord(const unsigned short *word, const int length) { } } -static inline void dumpWordInt(const int *word, const int length) { - static char charBuf[50]; - - for (int i = 0; i < length; ++i) { - charBuf[i] = word[i]; - } - charBuf[length] = 0; - AKLOGI("i[ %s ]", charBuf); -} - -// TODO: Change this to intArrayToCharArray -static inline void shortArrayToCharArray( - const unsigned short *input, const int length, char *output) { +static inline void intArrayToCharArray(const int *input, const int length, char *output) { int i = 0; - for (;i < length; ++i) { - const unsigned short c = input[i]; + for (; i < length; ++i) { + const int c = input[i]; if (c == 0) { break; } @@ -137,11 +122,9 @@ static inline void showStackTrace() { #define AKLOGI(fmt, ...) #define DUMP_RESULT(words, frequencies, maxWordCount, maxWordLength) #define DUMP_WORD(word, length) -#define DUMP_WORD_INT(word, length) #define ASSERT(success) #define SHOW_STACK_TRACE -// TODO: INTS_TO_CHARS -#define SHORTS_TO_CHARS(input, length, output) +#define INTS_TO_CHARS(input, length, output) #endif #ifdef FLAG_DO_PROFILE @@ -286,6 +269,8 @@ static inline void prof_out(void) { #define NOT_A_PROBABILITY (-1) #define KEYCODE_SPACE ' ' +#define KEYCODE_SINGLE_QUOTE '\'' +#define KEYCODE_HYPHEN_MINUS '-' #define CALIBRATE_SCORE_BY_TOUCH_COORDINATES true diff --git a/native/jni/src/dictionary.cpp b/native/jni/src/dictionary.cpp index 81789ccfc..5fbe0461b 100644 --- a/native/jni/src/dictionary.cpp +++ b/native/jni/src/dictionary.cpp @@ -54,11 +54,10 @@ Dictionary::~Dictionary() { } int Dictionary::getSuggestions(ProximityInfo *proximityInfo, void *traverseSession, - int *xcoordinates, int *ycoordinates, int *times, int *pointerIds, - int *codes, int codesSize, int *prevWordChars, - int prevWordLength, int commitPoint, bool isGesture, - bool useFullEditDistance, unsigned short *outWords, - int *frequencies, int *spaceIndices, int *outputTypes) const { + int *xcoordinates, int *ycoordinates, int *times, int *pointerIds, int *codes, + int codesSize, int *prevWordChars, int prevWordLength, int commitPoint, bool isGesture, + bool useFullEditDistance, int *outWords, int *frequencies, int *spaceIndices, + int *outputTypes) const { int result = 0; if (isGesture) { DicTraverseWrapper::initDicTraverseSession( @@ -83,7 +82,7 @@ int Dictionary::getSuggestions(ProximityInfo *proximityInfo, void *traverseSessi } int Dictionary::getBigrams(const int32_t *word, int length, int *codes, int codesSize, - unsigned short *outWords, int *frequencies, int *outputTypes) const { + int *outWords, int *frequencies, int *outputTypes) const { if (length <= 0) return 0; return mBigramDictionary->getBigrams(word, length, codes, codesSize, outWords, frequencies, outputTypes); diff --git a/native/jni/src/dictionary.h b/native/jni/src/dictionary.h index 120ca5f7f..2ca00ab63 100644 --- a/native/jni/src/dictionary.h +++ b/native/jni/src/dictionary.h @@ -47,11 +47,11 @@ class Dictionary { int getSuggestions(ProximityInfo *proximityInfo, void *traverseSession, int *xcoordinates, int *ycoordinates, int *times, int *pointerIds, int *codes, int codesSize, int *prevWordChars, int prevWordLength, int commitPoint, bool isGesture, - bool useFullEditDistance, unsigned short *outWords, - int *frequencies, int *spaceIndices, int *outputTypes) const; + bool useFullEditDistance, int *outWords, int *frequencies, int *spaceIndices, + int *outputTypes) const; - int getBigrams(const int32_t *word, int length, int *codes, int codesSize, - unsigned short *outWords, int *frequencies, int *outputTypes) const; + int getBigrams(const int32_t *word, int length, int *codes, int codesSize, int *outWords, + int *frequencies, int *outputTypes) const; int getFrequency(const int32_t *word, int length) const; bool isValidBigram(const int32_t *word1, int length1, const int32_t *word2, int length2) const; @@ -68,7 +68,7 @@ class Dictionary { // public static utility methods // static inline methods should be defined in the header file - static int wideStrLen(unsigned short *str); + static int wideStrLen(int *str); private: DISALLOW_IMPLICIT_CONSTRUCTORS(Dictionary); @@ -88,7 +88,7 @@ class Dictionary { // public static utility methods // static inline methods should be defined in the header file -inline int Dictionary::wideStrLen(unsigned short *str) { +inline int Dictionary::wideStrLen(int *str) { if (!str) return 0; int length = 0; while (*str) { diff --git a/native/jni/src/gesture/gesture_decoder_wrapper.h b/native/jni/src/gesture/gesture_decoder_wrapper.h index 92e1ded49..eb80bd2e2 100644 --- a/native/jni/src/gesture/gesture_decoder_wrapper.h +++ b/native/jni/src/gesture/gesture_decoder_wrapper.h @@ -38,15 +38,14 @@ class GestureDecoderWrapper : public IncrementalDecoderInterface { } int getSuggestions(ProximityInfo *pInfo, void *traverseSession, int *inputXs, int *inputYs, - int *times, int *pointerIds, int *codes, int inputSize, int commitPoint, - unsigned short *outWords, int *frequencies, int *outputIndices, - int *outputTypes) const { + int *times, int *pointerIds, int *codes, int inputSize, int commitPoint, int *outWords, + int *frequencies, int *outputIndices, int *outputTypes) const { if (!mIncrementalDecoderInterface) { return 0; } - return mIncrementalDecoderInterface->getSuggestions( - pInfo, traverseSession, inputXs, inputYs, times, pointerIds, codes, - inputSize, commitPoint, outWords, frequencies, outputIndices, outputTypes); + return mIncrementalDecoderInterface->getSuggestions(pInfo, traverseSession, inputXs, + inputYs, times, pointerIds, codes, inputSize, commitPoint, outWords, frequencies, + outputIndices, outputTypes); } static void setGestureDecoderFactoryMethod( diff --git a/native/jni/src/gesture/incremental_decoder_interface.h b/native/jni/src/gesture/incremental_decoder_interface.h index d1395aab9..e41513dbc 100644 --- a/native/jni/src/gesture/incremental_decoder_interface.h +++ b/native/jni/src/gesture/incremental_decoder_interface.h @@ -28,10 +28,9 @@ class ProximityInfo; class IncrementalDecoderInterface { public: - virtual int getSuggestions(ProximityInfo *pInfo, void *traverseSession, - int *inputXs, int *inputYs, int *times, int *pointerIds, int *codes, - int inputSize, int commitPoint, unsigned short *outWords, int *frequencies, - int *outputIndices, int *outputTypes) const = 0; + virtual int getSuggestions(ProximityInfo *pInfo, void *traverseSession, int *inputXs, + int *inputYs, int *times, int *pointerIds, int *codes, int inputSize, int commitPoint, + int *outWords, int *frequencies, int *outputIndices, int *outputTypes) const = 0; IncrementalDecoderInterface() { }; virtual ~IncrementalDecoderInterface() { }; private: diff --git a/native/jni/src/gesture/incremental_decoder_wrapper.h b/native/jni/src/gesture/incremental_decoder_wrapper.h index da7afdb8a..691d4952d 100644 --- a/native/jni/src/gesture/incremental_decoder_wrapper.h +++ b/native/jni/src/gesture/incremental_decoder_wrapper.h @@ -38,15 +38,14 @@ class IncrementalDecoderWrapper : public IncrementalDecoderInterface { } int getSuggestions(ProximityInfo *pInfo, void *traverseSession, int *inputXs, int *inputYs, - int *times, int *pointerIds, int *codes, int inputSize, int commitPoint, - unsigned short *outWords, int *frequencies, int *outputIndices, - int *outputTypes) const { + int *times, int *pointerIds, int *codes, int inputSize, int commitPoint, int *outWords, + int *frequencies, int *outputIndices, int *outputTypes) const { if (!mIncrementalDecoderInterface) { return 0; } - return mIncrementalDecoderInterface->getSuggestions( - pInfo, traverseSession, inputXs, inputYs, times, pointerIds, codes, - inputSize, commitPoint, outWords, frequencies, outputIndices, outputTypes); + return mIncrementalDecoderInterface->getSuggestions(pInfo, traverseSession, inputXs, + inputYs, times, pointerIds, codes, inputSize, commitPoint, outWords, frequencies, + outputIndices, outputTypes); } static void setIncrementalDecoderFactoryMethod( diff --git a/native/jni/src/proximity_info_state.cpp b/native/jni/src/proximity_info_state.cpp index d41acdace..987a27b80 100644 --- a/native/jni/src/proximity_info_state.cpp +++ b/native/jni/src/proximity_info_state.cpp @@ -34,7 +34,7 @@ const float ProximityInfoState::NOT_A_DISTANCE_FLOAT = -1.0f; const int ProximityInfoState::NOT_A_CODE = -1; void ProximityInfoState::initInputParams(const int pointerId, const float maxPointToKeyLength, - const ProximityInfo *proximityInfo, const int32_t *const inputCodes, const int inputSize, + const ProximityInfo *proximityInfo, const int *const inputCodes, const int inputSize, const int *const xCoordinates, const int *const yCoordinates, const int *const times, const int *const pointerIds, const bool isGeometric) { @@ -63,7 +63,7 @@ void ProximityInfoState::initInputParams(const int pointerId, const float maxPoi // - mNormalizedSquaredDistances // TODO: Merge for (int i = 0; i < inputSize; ++i) { - const int32_t primaryKey = inputCodes[i]; + const int primaryKey = inputCodes[i]; const int x = xCoordinates[i]; const int y = yCoordinates[i]; int *proximities = &mInputCodes[i * MAX_PROXIMITY_CHARS_SIZE_INTERNAL]; @@ -146,7 +146,7 @@ void ProximityInfoState::initInputParams(const int pointerId, const float maxPoi AKLOGI("Init ProximityInfoState: (%d)PID = %d", i, pid); } if (pointerId == pid) { - const int c = isGeometric ? NOT_A_COORDINATE : getPrimaryCharAt(i); + const int c = isGeometric ? NOT_A_COORDINATE : getPrimaryCodePointAt(i); const int x = proximityOnly ? NOT_A_COORDINATE : xCoordinates[i]; const int y = proximityOnly ? NOT_A_COORDINATE : yCoordinates[i]; const int time = times ? times[i] : -1; @@ -306,12 +306,12 @@ void ProximityInfoState::initInputParams(const int pointerId, const float maxPoi && xCoordinates && yCoordinates; if (!isGeometric && pointerId == 0) { for (int i = 0; i < inputSize; ++i) { - mPrimaryInputWord[i] = getPrimaryCharAt(i); + mPrimaryInputWord[i] = getPrimaryCodePointAt(i); } for (int i = 0; i < mInputSize && mTouchPositionCorrectionEnabled; ++i) { - const int *proximityChars = getProximityCharsAt(i); - const int primaryKey = proximityChars[0]; + const int *proximityCodePoints = getProximityCodePointsAt(i); + const int primaryKey = proximityCodePoints[0]; const int x = xCoordinates[i]; const int y = yCoordinates[i]; if (DEBUG_PROXIMITY_CHARS) { @@ -319,11 +319,12 @@ void ProximityInfoState::initInputParams(const int pointerId, const float maxPoi a += 0; AKLOGI("--- Primary = %c, x = %d, y = %d", primaryKey, x, y); } - for (int j = 0; j < MAX_PROXIMITY_CHARS_SIZE_INTERNAL && proximityChars[j] > 0; ++j) { - const int currentChar = proximityChars[j]; + for (int j = 0; j < MAX_PROXIMITY_CHARS_SIZE_INTERNAL && proximityCodePoints[j] > 0; + ++j) { + const int currentCodePoint = proximityCodePoints[j]; const float squaredDistance = hasInputCoordinates() ? calculateNormalizedSquaredDistance( - mProximityInfo->getKeyIndexOf(currentChar), i) : + mProximityInfo->getKeyIndexOf(currentCodePoint), i) : NOT_A_DISTANCE_FLOAT; if (squaredDistance >= 0.0f) { mNormalizedSquaredDistances[i * MAX_PROXIMITY_CHARS_SIZE_INTERNAL + j] = @@ -334,7 +335,7 @@ void ProximityInfoState::initInputParams(const int pointerId, const float maxPoi PROXIMITY_CHAR_WITHOUT_DISTANCE_INFO; } if (DEBUG_PROXIMITY_CHARS) { - AKLOGI("--- Proximity (%d) = %c", j, currentChar); + AKLOGI("--- Proximity (%d) = %c", j, currentCodePoint); } } } @@ -449,7 +450,7 @@ float ProximityInfoState::getPointScore( // Sampling touch point and pushing information to vectors. // Returning if previous point is popped or not. -bool ProximityInfoState::pushTouchPoint(const int inputIndex, const int nodeChar, int x, int y, +bool ProximityInfoState::pushTouchPoint(const int inputIndex, const int nodeCodePoint, int x, int y, const int time, const bool sample, const bool isLastPoint, const float sumAngle, NearKeysDistanceMap *const currentNearKeysDistances, const NearKeysDistanceMap *const prevNearKeysDistances, @@ -458,7 +459,7 @@ bool ProximityInfoState::pushTouchPoint(const int inputIndex, const int nodeChar size_t size = mInputXs.size(); bool popped = false; - if (nodeChar < 0 && sample) { + if (nodeCodePoint < 0 && sample) { const float nearest = updateNearKeysDistances(x, y, currentNearKeysDistances); const float score = getPointScore(x, y, time, isLastPoint, nearest, sumAngle, currentNearKeysDistances, prevNearKeysDistances, prevPrevNearKeysDistances); @@ -487,8 +488,8 @@ bool ProximityInfoState::pushTouchPoint(const int inputIndex, const int nodeChar } } - if (nodeChar >= 0 && (x < 0 || y < 0)) { - const int keyId = mProximityInfo->getKeyIndexOf(nodeChar); + if (nodeCodePoint >= 0 && (x < 0 || y < 0)) { + const int keyId = mProximityInfo->getKeyIndexOf(nodeCodePoint); if (keyId >= 0) { x = mProximityInfo->getKeyCenterXOfKeyIdG(keyId); y = mProximityInfo->getKeyCenterYOfKeyIdG(keyId); @@ -543,7 +544,7 @@ float ProximityInfoState::getPointToKeyLength(const int inputIndex, const int co const int index = inputIndex * mProximityInfo->getKeyCount() + keyId; return min(mDistanceCache[index], mMaxPointToKeyLength); } - if (isSkippableChar(codePoint)) { + if (isSkippableCodePoint(codePoint)) { return 0.0f; } // If the char is not a key on the keyboard then return the max length. @@ -960,9 +961,9 @@ bool ProximityInfoState::suppressCharProbabilities(const int index0, const int i return true; } -// Get a word that is detected by tracing highest probability sequence into charBuf and returns -// probability of generating the word. -float ProximityInfoState::getHighestProbabilitySequence(uint16_t *const charBuf) const { +// Get a word that is detected by tracing highest probability sequence into codePointBuf and +// returns probability of generating the word. +float ProximityInfoState::getHighestProbabilitySequence(int *const codePointBuf) const { static const float DEMOTION_LOG_PROBABILITY = 0.3f; int index = 0; float sumLogProbability = 0.0f; @@ -980,12 +981,12 @@ float ProximityInfoState::getHighestProbabilitySequence(uint16_t *const charBuf) } } if (character != NOT_AN_INDEX) { - charBuf[index] = mProximityInfo->getCodePointOf(character); + codePointBuf[index] = mProximityInfo->getCodePointOf(character); index++; } sumLogProbability += minLogProbability; } - charBuf[index] = '\0'; + codePointBuf[index] = '\0'; return sumLogProbability; } diff --git a/native/jni/src/proximity_info_state.h b/native/jni/src/proximity_info_state.h index 1a3f2869d..14fe2d3f5 100644 --- a/native/jni/src/proximity_info_state.h +++ b/native/jni/src/proximity_info_state.h @@ -43,7 +43,7 @@ class ProximityInfoState { // Defined in proximity_info_state.cpp // ///////////////////////////////////////// void initInputParams(const int pointerId, const float maxPointToKeyLength, - const ProximityInfo *proximityInfo, const int32_t *const inputCodes, + const ProximityInfo *proximityInfo, const int *const inputCodes, const int inputSize, const int *xCoordinates, const int *yCoordinates, const int *const times, const int *const pointerIds, const bool isGeometric); @@ -65,15 +65,15 @@ class ProximityInfoState { virtual ~ProximityInfoState() {} - inline unsigned short getPrimaryCharAt(const int index) const { - return getProximityCharsAt(index)[0]; + inline int getPrimaryCodePointAt(const int index) const { + return getProximityCodePointsAt(index)[0]; } - inline bool existsCharInProximityAt(const int index, const int c) const { - const int *chars = getProximityCharsAt(index); + inline bool existsCodePointInProximityAt(const int index, const int c) const { + const int *codePoints = getProximityCodePointsAt(index); int i = 0; - while (chars[i] > 0 && i < MAX_PROXIMITY_CHARS_SIZE_INTERNAL) { - if (chars[i++] == c) { + while (codePoints[i] > 0 && i < MAX_PROXIMITY_CHARS_SIZE_INTERNAL) { + if (codePoints[i++] == c) { return true; } } @@ -82,13 +82,13 @@ class ProximityInfoState { inline bool existsAdjacentProximityChars(const int index) const { if (index < 0 || index >= mInputSize) return false; - const int currentChar = getPrimaryCharAt(index); + const int currentCodePoint = getPrimaryCodePointAt(index); const int leftIndex = index - 1; - if (leftIndex >= 0 && existsCharInProximityAt(leftIndex, currentChar)) { + if (leftIndex >= 0 && existsCodePointInProximityAt(leftIndex, currentCodePoint)) { return true; } const int rightIndex = index + 1; - if (rightIndex < mInputSize && existsCharInProximityAt(rightIndex, currentChar)) { + if (rightIndex < mInputSize && existsCodePointInProximityAt(rightIndex, currentCodePoint)) { return true; } return false; @@ -106,15 +106,15 @@ class ProximityInfoState { // Notice : accented characters do not have a proximity list, so they are alone // in their list. The non-accented version of the character should be considered // "close", but not the other keys close to the non-accented version. - inline ProximityType getMatchedProximityId(const int index, - const unsigned short c, const bool checkProximityChars, int *proximityIndex = 0) const { - const int *currentChars = getProximityCharsAt(index); - const int firstChar = currentChars[0]; - const unsigned short baseLowerC = toBaseLowerCase(c); + inline ProximityType getMatchedProximityId(const int index, const int c, + const bool checkProximityChars, int *proximityIndex = 0) const { + const int *currentCodePoints = getProximityCodePointsAt(index); + const int firstCodePoint = currentCodePoints[0]; + const int baseLowerC = toBaseLowerCase(c); // The first char in the array is what user typed. If it matches right away, // that means the user typed that same char for this pos. - if (firstChar == baseLowerC || firstChar == c) { + if (firstCodePoint == baseLowerC || firstCodePoint == c) { return EQUIVALENT_CHAR; } @@ -123,14 +123,14 @@ class ProximityInfoState { // If the non-accented, lowercased version of that first character matches c, // then we have a non-accented version of the accented character the user // typed. Treat it as a close char. - if (toBaseLowerCase(firstChar) == baseLowerC) + if (toBaseLowerCase(firstCodePoint) == baseLowerC) return NEAR_PROXIMITY_CHAR; // Not an exact nor an accent-alike match: search the list of close keys int j = 1; while (j < MAX_PROXIMITY_CHARS_SIZE_INTERNAL - && currentChars[j] > ADDITIONAL_PROXIMITY_CHAR_DELIMITER_CODE) { - const bool matched = (currentChars[j] == baseLowerC || currentChars[j] == c); + && currentCodePoints[j] > ADDITIONAL_PROXIMITY_CHAR_DELIMITER_CODE) { + const bool matched = (currentCodePoints[j] == baseLowerC || currentCodePoints[j] == c); if (matched) { if (proximityIndex) { *proximityIndex = j; @@ -140,11 +140,12 @@ class ProximityInfoState { ++j; } if (j < MAX_PROXIMITY_CHARS_SIZE_INTERNAL - && currentChars[j] == ADDITIONAL_PROXIMITY_CHAR_DELIMITER_CODE) { + && currentCodePoints[j] == ADDITIONAL_PROXIMITY_CHAR_DELIMITER_CODE) { ++j; while (j < MAX_PROXIMITY_CHARS_SIZE_INTERNAL - && currentChars[j] > ADDITIONAL_PROXIMITY_CHAR_DELIMITER_CODE) { - const bool matched = (currentChars[j] == baseLowerC || currentChars[j] == c); + && currentCodePoints[j] > ADDITIONAL_PROXIMITY_CHAR_DELIMITER_CODE) { + const bool matched = + (currentCodePoints[j] == baseLowerC || currentCodePoints[j] == c); if (matched) { if (proximityIndex) { *proximityIndex = j; @@ -165,7 +166,7 @@ class ProximityInfoState { inputIndex * MAX_PROXIMITY_CHARS_SIZE_INTERNAL + proximityIndex]; } - inline const unsigned short *getPrimaryInputWord() const { + inline const int *getPrimaryInputWord() const { return mPrimaryInputWord; } @@ -173,13 +174,13 @@ class ProximityInfoState { return mTouchPositionCorrectionEnabled; } - inline bool sameAsTyped(const unsigned short *word, int length) const { + inline bool sameAsTyped(const int *word, int length) const { if (length != mInputSize) { return false; } const int *inputCodes = mInputCodes; while (length--) { - if (static_cast(*inputCodes) != static_cast(*word)) { + if (*inputCodes != *word) { return false; } inputCodes += MAX_PROXIMITY_CHARS_SIZE_INTERNAL; @@ -236,7 +237,7 @@ class ProximityInfoState { // Returns angle of three points. x, y, and z are indices. float getPointsAngle(const int index0, const int index1, const int index2) const; - float getHighestProbabilitySequence(uint16_t *const charBuf) const; + float getHighestProbabilitySequence(int *const codePointBuf) const; float getProbability(const int index, const int charCode) const; @@ -255,7 +256,7 @@ class ProximityInfoState { float calculateSquaredDistanceFromSweetSpotCenter( const int keyIndex, const int inputIndex) const; - bool pushTouchPoint(const int inputIndex, const int nodeChar, int x, int y, const int time, + bool pushTouchPoint(const int inputIndex, const int nodeCodePoint, int x, int y, const int time, const bool sample, const bool isLastPoint, const float sumAngle, NearKeysDistanceMap *const currentNearKeysDistances, const NearKeysDistanceMap *const prevNearKeysDistances, @@ -269,7 +270,7 @@ class ProximityInfoState { return mInputXs.size() > 0 && mInputYs.size() > 0; } - inline const int *getProximityCharsAt(const int index) const { + inline const int *getProximityCodePointsAt(const int index) const { return mInputCodes + (index * MAX_PROXIMITY_CHARS_SIZE_INTERNAL); } @@ -322,10 +323,10 @@ class ProximityInfoState { // inputs including the current input point. std::vector mSearchKeysVector; bool mTouchPositionCorrectionEnabled; - int32_t mInputCodes[MAX_PROXIMITY_CHARS_SIZE_INTERNAL * MAX_WORD_LENGTH_INTERNAL]; + int mInputCodes[MAX_PROXIMITY_CHARS_SIZE_INTERNAL * MAX_WORD_LENGTH_INTERNAL]; int mNormalizedSquaredDistances[MAX_PROXIMITY_CHARS_SIZE_INTERNAL * MAX_WORD_LENGTH_INTERNAL]; int mInputSize; - unsigned short mPrimaryInputWord[MAX_WORD_LENGTH_INTERNAL]; + int mPrimaryInputWord[MAX_WORD_LENGTH_INTERNAL]; }; } // namespace latinime #endif // LATINIME_PROXIMITY_INFO_STATE_H diff --git a/native/jni/src/terminal_attributes.h b/native/jni/src/terminal_attributes.h index e72e7e3be..fed3c7251 100644 --- a/native/jni/src/terminal_attributes.h +++ b/native/jni/src/terminal_attributes.h @@ -43,18 +43,16 @@ class TerminalAttributes { return mHasNextShortcutTarget; } - // Gets the shortcut target itself as a uint16_t string. For parameters and return value + // Gets the shortcut target itself as an int string. For parameters and return value // see BinaryFormat::getWordAtAddress. - // TODO: make the output an uint32_t* to handle the whole unicode range. - inline int getNextShortcutTarget(const int maxDepth, uint16_t *outWord, int *outFreq) { + inline int getNextShortcutTarget(const int maxDepth, int *outWord, int *outFreq) { const int shortcutFlags = BinaryFormat::getFlagsAndForwardPointer(mDict, &mPos); - mHasNextShortcutTarget = - 0 != (shortcutFlags & BinaryFormat::FLAG_ATTRIBUTE_HAS_NEXT); + mHasNextShortcutTarget = 0 != (shortcutFlags & BinaryFormat::FLAG_ATTRIBUTE_HAS_NEXT); unsigned int i; for (i = 0; i < MAX_WORD_LENGTH_INTERNAL; ++i) { const int codePoint = BinaryFormat::getCodePointAndForwardPointer(mDict, &mPos); if (NOT_A_CODE_POINT == codePoint) break; - outWord[i] = (uint16_t)codePoint; + outWord[i] = codePoint; } *outFreq = BinaryFormat::getAttributeFrequencyFromFlags(shortcutFlags); return i; diff --git a/native/jni/src/unigram_dictionary.cpp b/native/jni/src/unigram_dictionary.cpp index f1fd1389a..dadc9c897 100644 --- a/native/jni/src/unigram_dictionary.cpp +++ b/native/jni/src/unigram_dictionary.cpp @@ -55,13 +55,13 @@ UnigramDictionary::UnigramDictionary(const uint8_t *const streamStart, int fullW UnigramDictionary::~UnigramDictionary() { } -static inline unsigned int getCodesBufferSize(const int *codes, const int codesSize) { - return static_cast(sizeof(*codes)) * codesSize; +static inline int getCodesBufferSize(const int *codes, const int codesSize) { + return sizeof(*codes) * codesSize; } -// TODO: This needs to take a const unsigned short* and not tinker with its contents -static inline void addWord(unsigned short *word, int length, int frequency, - WordsPriorityQueue *queue, int type) { +// TODO: This needs to take a const int* and not tinker with its contents +static inline void addWord(int *word, int length, int frequency, WordsPriorityQueue *queue, + int type) { queue->push(frequency, word, length, type); } @@ -171,9 +171,7 @@ void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximit int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *codes, const int codesSize, const std::map *bigramMap, const uint8_t *bigramFilter, - const bool useFullEditDistance, unsigned short *outWords, int *frequencies, - int *outputTypes) const { - + const bool useFullEditDistance, int *outWords, int *frequencies, int *outputTypes) const { WordsPriorityQueuePool queuePool(MAX_WORDS, SUB_QUEUE_MAX_WORDS, MAX_WORD_LENGTH); queuePool.clearAll(); Correction masterCorrection; @@ -218,7 +216,7 @@ int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo, const int *x AKLOGI("Returning %d words", suggestedWordsCount); /// Print the returned words for (int j = 0; j < suggestedWordsCount; ++j) { - short unsigned int *w = outWords + j * MAX_WORD_LENGTH; + int *w = outWords + j * MAX_WORD_LENGTH; char s[MAX_WORD_LENGTH]; for (int i = 0; i <= MAX_WORD_LENGTH; i++) s[i] = w[i]; (void)s; // To suppress compiler warning @@ -230,12 +228,11 @@ int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo, const int *x return suggestedWordsCount; } -void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo, - const int *xcoordinates, const int *ycoordinates, const int *codes, - const int inputSize, const std::map *bigramMap, const uint8_t *bigramFilter, - const bool useFullEditDistance, Correction *correction, - WordsPriorityQueuePool *queuePool) const { - +void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, + const int *ycoordinates, const int *codes, const int inputSize, + const std::map *bigramMap, const uint8_t *bigramFilter, + const bool useFullEditDistance, Correction *correction, WordsPriorityQueuePool *queuePool) + const { PROF_OPEN; PROF_START(0); PROF_END(0); @@ -284,7 +281,7 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo, if (queue->size() > 0) { WordsPriorityQueue::SuggestedWord *sw = queue->top(); const int score = sw->mScore; - const unsigned short *word = sw->mWord; + const int *word = sw->mWord; const int wordLength = sw->mWordLength; float ns = Correction::RankingAlgorithm::calcNormalizedScore( correction->getPrimaryInputWord(), i, word, wordLength, score); @@ -303,7 +300,7 @@ void UnigramDictionary::initSuggestions(ProximityInfo *proximityInfo, const int Correction *correction) const { if (DEBUG_DICT) { AKLOGI("initSuggest"); - DUMP_WORD_INT(codes, inputSize); + DUMP_WORD(codes, inputSize); } correction->initInputParams(proximityInfo, codes, inputSize, xCoordinates, yCoordinates); const int maxDepth = min(inputSize * MAX_DEPTH_MULTIPLIER, MAX_WORD_LENGTH); @@ -376,7 +373,7 @@ inline void UnigramDictionary::onTerminal(const int probability, const bool addToSubQueue = inputIndex < SUB_QUEUE_MAX_COUNT; int wordLength; - unsigned short *wordPointer; + int *wordPointer; if ((currentWordIndex == FIRST_WORD_INDEX) && addToMasterQueue) { WordsPriorityQueue *masterQueue = queuePool->getMasterQueue(); @@ -404,7 +401,7 @@ inline void UnigramDictionary::onTerminal(const int probability, // so that the insert order is protected inside the queue for words // with the same score. For the moment we use -1 to make sure the shortcut will // never be in front of the word. - uint16_t shortcutTarget[MAX_WORD_LENGTH_INTERNAL]; + int shortcutTarget[MAX_WORD_LENGTH_INTERNAL]; int shortcutFrequency; const int shortcutTargetStringLength = iterator.getNextShortcutTarget( MAX_WORD_LENGTH_INTERNAL, shortcutTarget, &shortcutFrequency); @@ -444,7 +441,7 @@ int UnigramDictionary::getSubStringSuggestion( const bool hasAutoCorrectionCandidate, const int currentWordIndex, const int inputWordStartPos, const int inputWordLength, const int outputWordStartPos, const bool isSpaceProximity, int *freqArray, - int *wordLengthArray, unsigned short *outputWord, int *outputWordLength) const { + int *wordLengthArray, int *outputWord, int *outputWordLength) const { if (inputWordLength > MULTIPLE_WORDS_SUGGESTION_MAX_WORD_LENGTH) { return FLAG_MULTIPLE_SUGGEST_ABORT; } @@ -487,13 +484,13 @@ int UnigramDictionary::getSubStringSuggestion( // TODO: Remove the safety net above // ////////////////////////////////////////////// - unsigned short *tempOutputWord = 0; + int *tempOutputWord = 0; int nextWordLength = 0; // TODO: Optimize init suggestion initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, inputSize, correction); - unsigned short word[MAX_WORD_LENGTH_INTERNAL]; + int word[MAX_WORD_LENGTH_INTERNAL]; int freq = getMostFrequentWordLike( inputWordStartPos, inputWordLength, correction, word); if (freq > 0) { @@ -592,7 +589,7 @@ void UnigramDictionary::getMultiWordsSuggestionRec(ProximityInfo *proximityInfo, const bool useFullEditDistance, const int inputSize, Correction *correction, WordsPriorityQueuePool *queuePool, const bool hasAutoCorrectionCandidate, const int startInputPos, const int startWordIndex, const int outputWordLength, - int *freqArray, int *wordLengthArray, unsigned short *outputWord) const { + int *freqArray, int *wordLengthArray, int *outputWord) const { if (startWordIndex >= (MULTIPLE_WORDS_SUGGESTION_MAX_WORDS - 1)) { // Return if the last word index return; @@ -678,7 +675,7 @@ void UnigramDictionary::getSplitMultipleWordsSuggestions(ProximityInfo *proximit } // Allocating fixed length array on stack - unsigned short outputWord[MAX_WORD_LENGTH]; + int outputWord[MAX_WORD_LENGTH]; int freqArray[MULTIPLE_WORDS_SUGGESTION_MAX_WORDS]; int wordLengthArray[MULTIPLE_WORDS_SUGGESTION_MAX_WORDS]; const int outputWordLength = 0; @@ -693,11 +690,11 @@ void UnigramDictionary::getSplitMultipleWordsSuggestions(ProximityInfo *proximit // Wrapper for getMostFrequentWordLikeInner, which matches it to the previous // interface. inline int UnigramDictionary::getMostFrequentWordLike(const int startInputIndex, - const int inputSize, Correction *correction, unsigned short *word) const { - uint16_t inWord[inputSize]; + const int inputSize, Correction *correction, int *word) const { + int inWord[inputSize]; for (int i = 0; i < inputSize; ++i) { - inWord[i] = (uint16_t)correction->getPrimaryCharAt(startInputIndex + i); + inWord[i] = correction->getPrimaryCodePointAt(startInputIndex + i); } return getMostFrequentWordLikeInner(inWord, inputSize, word); } @@ -715,14 +712,14 @@ inline int UnigramDictionary::getMostFrequentWordLike(const int startInputIndex, // In and out parameters may point to the same location. This function takes care // not to use any input parameters after it wrote into its outputs. static inline bool testCharGroupForContinuedLikeness(const uint8_t flags, - const uint8_t *const root, const int startPos, const uint16_t *const inWord, - const int startInputIndex, const int inputSize, int32_t *outNewWord, int *outInputIndex, + const uint8_t *const root, const int startPos, const int *const inWord, + const int startInputIndex, const int inputSize, int *outNewWord, int *outInputIndex, int *outPos) { const bool hasMultipleChars = (0 != (BinaryFormat::FLAG_HAS_MULTIPLE_CHARS & flags)); int pos = startPos; - int32_t codePoint = BinaryFormat::getCodePointAndForwardPointer(root, &pos); - int32_t baseChar = toBaseLowerCase(codePoint); - const uint16_t wChar = toBaseLowerCase(inWord[startInputIndex]); + int codePoint = BinaryFormat::getCodePointAndForwardPointer(root, &pos); + int baseChar = toBaseLowerCase(codePoint); + const int wChar = toBaseLowerCase(inWord[startInputIndex]); if (baseChar != wChar) { *outPos = hasMultipleChars ? BinaryFormat::skipOtherCharacters(root, pos) : pos; @@ -753,8 +750,8 @@ static inline bool testCharGroupForContinuedLikeness(const uint8_t flags, // It will compare the frequency to the max frequency, and if greater, will // copy the word into the output buffer. In output value maxFreq, it will // write the new maximum frequency if it changed. -static inline void onTerminalWordLike(const int freq, int32_t *newWord, const int length, - short unsigned int *outWord, int *maxFreq) { +static inline void onTerminalWordLike(const int freq, int *newWord, const int length, int *outWord, + int *maxFreq) { if (freq > *maxFreq) { for (int q = 0; q < length; ++q) { outWord[q] = newWord[q]; @@ -766,9 +763,9 @@ static inline void onTerminalWordLike(const int freq, int32_t *newWord, const in // Will find the highest frequency of the words like the one passed as an argument, // that is, everything that only differs by case/accents. -int UnigramDictionary::getMostFrequentWordLikeInner(const uint16_t *const inWord, - const int inputSize, short unsigned int *outWord) const { - int32_t newWord[MAX_WORD_LENGTH_INTERNAL]; +int UnigramDictionary::getMostFrequentWordLikeInner(const int *const inWord, const int inputSize, + int *outWord) const { + int newWord[MAX_WORD_LENGTH_INTERNAL]; int depth = 0; int maxFreq = -1; const uint8_t *const root = DICT_ROOT; @@ -828,7 +825,7 @@ int UnigramDictionary::getMostFrequentWordLikeInner(const uint16_t *const inWord return maxFreq; } -int UnigramDictionary::getFrequency(const int32_t *const inWord, const int length) const { +int UnigramDictionary::getFrequency(const int *const inWord, const int length) const { const uint8_t *const root = DICT_ROOT; int pos = BinaryFormat::getTerminalPosition(root, inWord, length, false /* forceLowerCaseSearch */); @@ -853,8 +850,7 @@ int UnigramDictionary::getFrequency(const int32_t *const inWord, const int lengt } // TODO: remove this function. -int UnigramDictionary::getBigramPosition(int pos, unsigned short *word, int offset, - int length) const { +int UnigramDictionary::getBigramPosition(int pos, int *word, int offset, int length) const { return -1; } @@ -900,7 +896,7 @@ inline bool UnigramDictionary::processCurrentNode(const int initialPos, // else if FLAG_IS_TERMINAL: the frequency // else if MASK_GROUP_ADDRESS_TYPE is not NONE: the children address // Note that you can't have a node that both is not a terminal and has no children. - int32_t c = BinaryFormat::getCodePointAndForwardPointer(DICT_ROOT, &pos); + int c = BinaryFormat::getCodePointAndForwardPointer(DICT_ROOT, &pos); assert(NOT_A_CODE_POINT != c); // We are going to loop through each character and make it look like it's a different @@ -914,7 +910,7 @@ inline bool UnigramDictionary::processCurrentNode(const int initialPos, // We prefetch the next char. If 'c' is the last char of this node, we will have // NOT_A_CODE_POINT in the next char. From this we can decide whether this virtual node // should behave as a terminal or not and whether we have children. - const int32_t nextc = hasMultipleChars + const int nextc = hasMultipleChars ? BinaryFormat::getCodePointAndForwardPointer(DICT_ROOT, &pos) : NOT_A_CODE_POINT; const bool isLastChar = (NOT_A_CODE_POINT == nextc); // If there are more chars in this nodes, then this virtual node is not a terminal. diff --git a/native/jni/src/unigram_dictionary.h b/native/jni/src/unigram_dictionary.h index 244d78d8c..764900739 100644 --- a/native/jni/src/unigram_dictionary.h +++ b/native/jni/src/unigram_dictionary.h @@ -41,12 +41,12 @@ class UnigramDictionary { static const int FLAG_MULTIPLE_SUGGEST_CONTINUE = 2; UnigramDictionary(const uint8_t *const streamStart, int fullWordMultiplier, int maxWordLength, int maxWords, const unsigned int flags); - int getFrequency(const int32_t *const inWord, const int length) const; - int getBigramPosition(int pos, unsigned short *word, int offset, int length) const; + int getFrequency(const int *const inWord, const int length) const; + int getBigramPosition(int pos, int *word, int offset, int length) const; int getSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *codes, const int codesSize, const std::map *bigramMap, const uint8_t *bigramFilter, - const bool useFullEditDistance, unsigned short *outWords, int *frequencies, + const bool useFullEditDistance, int *outWords, int *frequencies, int *outputTypes) const; virtual ~UnigramDictionary(); @@ -93,9 +93,9 @@ class UnigramDictionary { int *newChildPosition, int *nextSiblingPosition, WordsPriorityQueuePool *queuePool, const int currentWordIndex) const; int getMostFrequentWordLike(const int startInputIndex, const int inputSize, - Correction *correction, unsigned short *word) const; - int getMostFrequentWordLikeInner(const uint16_t *const inWord, const int inputSize, - short unsigned int *outWord) const; + Correction *correction, int *word) const; + int getMostFrequentWordLikeInner(const int *const inWord, const int inputSize, + int *outWord) const; int getSubStringSuggestion( ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *codes, const bool useFullEditDistance, Correction *correction, @@ -103,14 +103,13 @@ class UnigramDictionary { const bool hasAutoCorrectionCandidate, const int currentWordIndex, const int inputWordStartPos, const int inputWordLength, const int outputWordStartPos, const bool isSpaceProximity, int *freqArray, - int *wordLengthArray, unsigned short *outputWord, int *outputWordLength) const; - void getMultiWordsSuggestionRec(ProximityInfo *proximityInfo, - const int *xcoordinates, const int *ycoordinates, const int *codes, - const bool useFullEditDistance, const int inputSize, - Correction *correction, WordsPriorityQueuePool *queuePool, + int *wordLengthArray, int *outputWord, int *outputWordLength) const; + void getMultiWordsSuggestionRec(ProximityInfo *proximityInfo, const int *xcoordinates, + const int *ycoordinates, const int *codes, const bool useFullEditDistance, + const int inputSize, Correction *correction, WordsPriorityQueuePool *queuePool, const bool hasAutoCorrectionCandidate, const int startPos, const int startWordIndex, const int outputWordLength, int *freqArray, int *wordLengthArray, - unsigned short *outputWord) const; + int *outputWord) const; const uint8_t *const DICT_ROOT; const int MAX_WORD_LENGTH; diff --git a/native/jni/src/words_priority_queue.h b/native/jni/src/words_priority_queue.h index 19efa5da3..ac384dd69 100644 --- a/native/jni/src/words_priority_queue.h +++ b/native/jni/src/words_priority_queue.h @@ -30,15 +30,15 @@ class WordsPriorityQueue { class SuggestedWord { public: int mScore; - unsigned short mWord[MAX_WORD_LENGTH_INTERNAL]; + int mWord[MAX_WORD_LENGTH_INTERNAL]; int mWordLength; bool mUsed; int mType; - void setParams(int score, unsigned short *word, int wordLength, int type) { + void setParams(int score, int *word, int wordLength, int type) { mScore = score; mWordLength = wordLength; - memcpy(mWord, word, sizeof(unsigned short) * wordLength); + memcpy(mWord, word, sizeof(int) * wordLength); mUsed = true; mType = type; } @@ -57,9 +57,9 @@ class WordsPriorityQueue { delete[] mSuggestedWords; } - void push(int score, unsigned short *word, int wordLength, int type) { + void push(int score, int *word, int wordLength, int type) { SuggestedWord *sw = 0; - if (mSuggestions.size() >= MAX_WORDS) { + if (size() >= MAX_WORDS) { sw = mSuggestions.top(); const int minScore = sw->mScore; if (minScore >= score) { @@ -94,11 +94,10 @@ class WordsPriorityQueue { return sw; } - int outputSuggestions(const unsigned short *before, const int beforeLength, - int *frequencies, unsigned short *outputChars, int* outputTypes) { + int outputSuggestions(const int *before, const int beforeLength, int *frequencies, + int *outputCodePoints, int* outputTypes) { mHighestSuggestedWord = 0; - const unsigned int size = min( - MAX_WORDS, static_cast(mSuggestions.size())); + const int size = min(MAX_WORDS, static_cast(mSuggestions.size())); SuggestedWord *swBuffer[size]; int index = size - 1; while (!mSuggestions.empty() && index >= 0) { @@ -113,9 +112,9 @@ class WordsPriorityQueue { } if (size >= 2) { SuggestedWord *nsMaxSw = 0; - unsigned int maxIndex = 0; + int maxIndex = 0; float maxNs = 0; - for (unsigned int i = 0; i < size; ++i) { + for (int i = 0; i < size; ++i) { SuggestedWord *tempSw = swBuffer[i]; if (!tempSw) { continue; @@ -132,17 +131,17 @@ class WordsPriorityQueue { swBuffer[0] = nsMaxSw; } } - for (unsigned int i = 0; i < size; ++i) { + for (int i = 0; i < size; ++i) { SuggestedWord *sw = swBuffer[i]; if (!sw) { AKLOGE("SuggestedWord is null %d", i); continue; } - const unsigned int wordLength = sw->mWordLength; - unsigned short *targetAddress = outputChars + i * MAX_WORD_LENGTH; + const int wordLength = sw->mWordLength; + int *targetAddress = outputCodePoints + i * MAX_WORD_LENGTH; frequencies[i] = sw->mScore; outputTypes[i] = sw->mType; - memcpy(targetAddress, sw->mWord, wordLength * sizeof(unsigned short)); + memcpy(targetAddress, sw->mWord, wordLength * sizeof(int)); if (wordLength < MAX_WORD_LENGTH) { targetAddress[wordLength] = 0; } @@ -152,7 +151,7 @@ class WordsPriorityQueue { } int size() const { - return mSuggestions.size(); + return static_cast(mSuggestions.size()); } void clear() { @@ -175,13 +174,13 @@ class WordsPriorityQueue { DUMP_WORD(mHighestSuggestedWord->mWord, mHighestSuggestedWord->mWordLength); } - float getHighestNormalizedScore(const unsigned short *before, const int beforeLength, - unsigned short **outWord, int *outScore, int *outLength) { + float getHighestNormalizedScore(const int *before, const int beforeLength, int **outWord, + int *outScore, int *outLength) { if (!mHighestSuggestedWord) { return 0.0; } - return getNormalizedScore( - mHighestSuggestedWord, before, beforeLength, outWord, outScore, outLength); + return getNormalizedScore(mHighestSuggestedWord, before, beforeLength, outWord, outScore, + outLength); } private: @@ -192,9 +191,8 @@ class WordsPriorityQueue { } }; - SuggestedWord *getFreeSuggestedWord(int score, unsigned short *word, - int wordLength, int type) { - for (unsigned int i = 0; i < MAX_WORD_LENGTH; ++i) { + SuggestedWord *getFreeSuggestedWord(int score, int *word, int wordLength, int type) { + for (int i = 0; i < MAX_WORD_LENGTH; ++i) { if (!mSuggestedWords[i].mUsed) { mSuggestedWords[i].setParams(score, word, wordLength, type); return &mSuggestedWords[i]; @@ -203,10 +201,10 @@ class WordsPriorityQueue { return 0; } - static float getNormalizedScore(SuggestedWord *sw, const unsigned short *before, - const int beforeLength, unsigned short **outWord, int *outScore, int *outLength) { + static float getNormalizedScore(SuggestedWord *sw, const int *before, const int beforeLength, + int **outWord, int *outScore, int *outLength) { const int score = sw->mScore; - unsigned short *word = sw->mWord; + int *word = sw->mWord; const int wordLength = sw->mWordLength; if (outScore) { *outScore = score; @@ -217,15 +215,15 @@ class WordsPriorityQueue { if (outLength) { *outLength = wordLength; } - return Correction::RankingAlgorithm::calcNormalizedScore( - before, beforeLength, word, wordLength, score); + return Correction::RankingAlgorithm::calcNormalizedScore(before, beforeLength, word, + wordLength, score); } typedef std::priority_queue, wordComparator> Suggestions; Suggestions mSuggestions; - const unsigned int MAX_WORDS; - const unsigned int MAX_WORD_LENGTH; + const int MAX_WORDS; + const int MAX_WORD_LENGTH; SuggestedWord *mSuggestedWords; SuggestedWord *mHighestSuggestedWord; };