diff --git a/native/jni/Android.mk b/native/jni/Android.mk index bd769d2fc..69a9aa970 100644 --- a/native/jni/Android.mk +++ b/native/jni/Android.mk @@ -50,6 +50,7 @@ LATIN_IME_CORE_SRC_FILES := \ proximity_info.cpp \ proximity_info_state.cpp \ unigram_dictionary.cpp \ + words_priority_queue.cpp \ gesture/gesture_decoder_wrapper.cpp \ gesture/incremental_decoder_wrapper.cpp diff --git a/native/jni/src/correction.cpp b/native/jni/src/correction.cpp index 949158a0b..d7b67c98b 100644 --- a/native/jni/src/correction.cpp +++ b/native/jni/src/correction.cpp @@ -625,6 +625,29 @@ Correction::CorrectionType Correction::processCharAndCalcState(const int c, cons } } +/* static */ int Correction::powerIntCapped(const int base, const int n) { + if (n <= 0) return 1; + if (base == 2) { + return n < 31 ? 1 << n : S_INT_MAX; + } else { + int ret = base; + for (int i = 1; i < n; ++i) multiplyIntCapped(base, &ret); + return ret; + } +} + +/* static */ void Correction::multiplyRate(const int rate, int *freq) { + if (*freq != S_INT_MAX) { + if (*freq > 1000000) { + *freq /= 100; + multiplyIntCapped(rate, freq); + } else { + multiplyIntCapped(rate, freq); + *freq /= 100; + } + } +} + inline static int getQuoteCount(const int *word, const int length) { int quoteCount = 0; for (int i = 0; i < length; ++i) { diff --git a/native/jni/src/correction.h b/native/jni/src/correction.h index fff24b048..d0b196cf2 100644 --- a/native/jni/src/correction.h +++ b/native/jni/src/correction.h @@ -168,28 +168,8 @@ class Correction { } } - inline static int powerIntCapped(const int base, const int n) { - if (n <= 0) return 1; - if (base == 2) { - return n < 31 ? 1 << n : S_INT_MAX; - } else { - int ret = base; - for (int i = 1; i < n; ++i) multiplyIntCapped(base, &ret); - return ret; - } - } - - inline static void multiplyRate(const int rate, int *freq) { - if (*freq != S_INT_MAX) { - if (*freq > 1000000) { - *freq /= 100; - multiplyIntCapped(rate, freq); - } else { - multiplyIntCapped(rate, freq); - *freq /= 100; - } - } - } + static int powerIntCapped(const int base, const int n); + static void multiplyRate(const int rate, int *freq); inline int getSpaceProximityPos() const { return mSpaceProximityPos; @@ -214,8 +194,6 @@ class Correction { inline void incrementOutputIndex(); inline void startToTraverseAllNodes(); inline bool isSingleQuote(const int c); - inline CorrectionType processSkipChar(const int c, const bool isTerminal, - const bool inputIndexIncremented); inline CorrectionType processUnrelatedCorrectionType(); inline void addCharToCurrentWord(const int c); inline int getFinalProbabilityInternal(const int probability, int **word, int *wordLength, @@ -246,6 +224,9 @@ class Correction { // Caveat: Do not create multiple tables per thread as this table eats up RAM a lot. int mEditDistanceTable[(MAX_WORD_LENGTH_INTERNAL + 1) * (MAX_WORD_LENGTH_INTERNAL + 1)]; + CorrectionType processSkipChar(const int c, const bool isTerminal, + const bool inputIndexIncremented); + CorrectionState mCorrectionStates[MAX_WORD_LENGTH_INTERNAL]; // The following member variables are being used as cache values of the correction state. diff --git a/native/jni/src/dic_traverse_wrapper.h b/native/jni/src/dic_traverse_wrapper.h index 292382487..3fe3d5b74 100644 --- a/native/jni/src/dic_traverse_wrapper.h +++ b/native/jni/src/dic_traverse_wrapper.h @@ -62,6 +62,5 @@ class DicTraverseWrapper { void *, const Dictionary *const, const int *, const int); static void (*sDicTraverseSessionReleaseMethod)(void *); }; -int register_DicTraverseSession(JNIEnv *env); } // namespace latinime #endif // LATINIME_DIC_TRAVERSE_WRAPPER_H diff --git a/native/jni/src/proximity_info_state.cpp b/native/jni/src/proximity_info_state.cpp index 987a27b80..db79bb616 100644 --- a/native/jni/src/proximity_info_state.cpp +++ b/native/jni/src/proximity_info_state.cpp @@ -560,6 +560,68 @@ float ProximityInfoState::getPointToKeyByIdLength(const int inputIndex, const in return static_cast(MAX_POINT_TO_KEY_LENGTH); } +// In the following function, c is the current character of the dictionary word currently examined. +// currentChars is an array containing the keys close to the character the user actually typed at +// the same position. We want to see if c is in it: if so, then the word contains at that position +// a character close to what the user typed. +// What the user typed is actually the first character of the array. +// proximityIndex is a pointer to the variable where getMatchedProximityId returns the index of c +// in the proximity chars of the input index. +// Notice : accented characters do not have a proximity list, so they are alone in their list. The +// non-accented version of the character should be considered "close", but not the other keys close +// to the non-accented version. +ProximityType ProximityInfoState::getMatchedProximityId(const int index, const int c, + const bool checkProximityChars, int *proximityIndex) const { + const int *currentCodePoints = getProximityCodePointsAt(index); + const int firstCodePoint = currentCodePoints[0]; + const int baseLowerC = toBaseLowerCase(c); + + // The first char in the array is what user typed. If it matches right away, that means the + // user typed that same char for this pos. + if (firstCodePoint == baseLowerC || firstCodePoint == c) { + return EQUIVALENT_CHAR; + } + + if (!checkProximityChars) return UNRELATED_CHAR; + + // If the non-accented, lowercased version of that first character matches c, then we have a + // non-accented version of the accented character the user typed. Treat it as a close char. + if (toBaseLowerCase(firstCodePoint) == baseLowerC) { + return NEAR_PROXIMITY_CHAR; + } + + // Not an exact nor an accent-alike match: search the list of close keys + int j = 1; + while (j < MAX_PROXIMITY_CHARS_SIZE_INTERNAL + && currentCodePoints[j] > ADDITIONAL_PROXIMITY_CHAR_DELIMITER_CODE) { + const bool matched = (currentCodePoints[j] == baseLowerC || currentCodePoints[j] == c); + if (matched) { + if (proximityIndex) { + *proximityIndex = j; + } + return NEAR_PROXIMITY_CHAR; + } + ++j; + } + if (j < MAX_PROXIMITY_CHARS_SIZE_INTERNAL + && currentCodePoints[j] == ADDITIONAL_PROXIMITY_CHAR_DELIMITER_CODE) { + ++j; + while (j < MAX_PROXIMITY_CHARS_SIZE_INTERNAL + && currentCodePoints[j] > ADDITIONAL_PROXIMITY_CHAR_DELIMITER_CODE) { + const bool matched = (currentCodePoints[j] == baseLowerC || currentCodePoints[j] == c); + if (matched) { + if (proximityIndex) { + *proximityIndex = j; + } + return ADDITIONAL_PROXIMITY_CHAR; + } + ++j; + } + } + // Was not included, signal this as an unrelated character. + return UNRELATED_CHAR; +} + int ProximityInfoState::getSpaceY() const { const int keyId = mProximityInfo->getKeyIndexOf(KEYCODE_SPACE); return mProximityInfo->getKeyCenterYOfKeyIdG(keyId); diff --git a/native/jni/src/proximity_info_state.h b/native/jni/src/proximity_info_state.h index 14fe2d3f5..9b859606d 100644 --- a/native/jni/src/proximity_info_state.h +++ b/native/jni/src/proximity_info_state.h @@ -94,72 +94,6 @@ class ProximityInfoState { return false; } - // In the following function, c is the current character of the dictionary word - // currently examined. - // currentChars is an array containing the keys close to the character the - // user actually typed at the same position. We want to see if c is in it: if so, - // then the word contains at that position a character close to what the user - // typed. - // What the user typed is actually the first character of the array. - // proximityIndex is a pointer to the variable where getMatchedProximityId returns - // the index of c in the proximity chars of the input index. - // Notice : accented characters do not have a proximity list, so they are alone - // in their list. The non-accented version of the character should be considered - // "close", but not the other keys close to the non-accented version. - inline ProximityType getMatchedProximityId(const int index, const int c, - const bool checkProximityChars, int *proximityIndex = 0) const { - const int *currentCodePoints = getProximityCodePointsAt(index); - const int firstCodePoint = currentCodePoints[0]; - const int baseLowerC = toBaseLowerCase(c); - - // The first char in the array is what user typed. If it matches right away, - // that means the user typed that same char for this pos. - if (firstCodePoint == baseLowerC || firstCodePoint == c) { - return EQUIVALENT_CHAR; - } - - if (!checkProximityChars) return UNRELATED_CHAR; - - // If the non-accented, lowercased version of that first character matches c, - // then we have a non-accented version of the accented character the user - // typed. Treat it as a close char. - if (toBaseLowerCase(firstCodePoint) == baseLowerC) - return NEAR_PROXIMITY_CHAR; - - // Not an exact nor an accent-alike match: search the list of close keys - int j = 1; - while (j < MAX_PROXIMITY_CHARS_SIZE_INTERNAL - && currentCodePoints[j] > ADDITIONAL_PROXIMITY_CHAR_DELIMITER_CODE) { - const bool matched = (currentCodePoints[j] == baseLowerC || currentCodePoints[j] == c); - if (matched) { - if (proximityIndex) { - *proximityIndex = j; - } - return NEAR_PROXIMITY_CHAR; - } - ++j; - } - if (j < MAX_PROXIMITY_CHARS_SIZE_INTERNAL - && currentCodePoints[j] == ADDITIONAL_PROXIMITY_CHAR_DELIMITER_CODE) { - ++j; - while (j < MAX_PROXIMITY_CHARS_SIZE_INTERNAL - && currentCodePoints[j] > ADDITIONAL_PROXIMITY_CHAR_DELIMITER_CODE) { - const bool matched = - (currentCodePoints[j] == baseLowerC || currentCodePoints[j] == c); - if (matched) { - if (proximityIndex) { - *proximityIndex = j; - } - return ADDITIONAL_PROXIMITY_CHAR; - } - ++j; - } - } - - // Was not included, signal this as an unrelated character. - return UNRELATED_CHAR; - } - inline int getNormalizedSquaredDistance( const int inputIndex, const int proximityIndex) const { return mNormalizedSquaredDistances[ @@ -218,6 +152,9 @@ class ProximityInfoState { float getPointToKeyLength(const int inputIndex, const int charCode) const; float getPointToKeyByIdLength(const int inputIndex, const int keyId) const; + ProximityType getMatchedProximityId(const int index, const int c, + const bool checkProximityChars, int *proximityIndex = 0) const; + int getSpaceY() const; int32_t getAllPossibleChars( diff --git a/native/jni/src/unigram_dictionary.cpp b/native/jni/src/unigram_dictionary.cpp index 6cde06bfa..820f9ab12 100644 --- a/native/jni/src/unigram_dictionary.cpp +++ b/native/jni/src/unigram_dictionary.cpp @@ -59,8 +59,7 @@ static inline int getCodesBufferSize(const int *codes, const int codesSize) { } // TODO: This needs to take a const int* and not tinker with its contents -static inline void addWord(int *word, int length, int frequency, WordsPriorityQueue *queue, - int type) { +static void addWord(int *word, int length, int frequency, WordsPriorityQueue *queue, int type) { queue->push(frequency, word, length, type); } @@ -690,10 +689,9 @@ void UnigramDictionary::getSplitMultipleWordsSuggestions(ProximityInfo *proximit // Wrapper for getMostFrequentWordLikeInner, which matches it to the previous // interface. -inline int UnigramDictionary::getMostFrequentWordLike(const int startInputIndex, - const int inputSize, Correction *correction, int *word) const { +int UnigramDictionary::getMostFrequentWordLike(const int startInputIndex, const int inputSize, + Correction *correction, int *word) const { int inWord[inputSize]; - for (int i = 0; i < inputSize; ++i) { inWord[i] = correction->getPrimaryCodePointAt(startInputIndex + i); } @@ -869,7 +867,7 @@ int UnigramDictionary::getBigramPosition(int pos, int *word, int offset, int len // there aren't any more nodes at this level, it merely returns the address of the first byte after // the current node in nextSiblingPosition. Thus, the caller must keep count of the nodes at any // given level, as output into newCount when traversing this level's parent. -inline bool UnigramDictionary::processCurrentNode(const int initialPos, +bool UnigramDictionary::processCurrentNode(const int initialPos, const std::map *bigramMap, const uint8_t *bigramFilter, Correction *correction, int *newCount, int *newChildrenPosition, int *nextSiblingPosition, WordsPriorityQueuePool *queuePool, const int currentWordIndex) const { diff --git a/native/jni/src/words_priority_queue.cpp b/native/jni/src/words_priority_queue.cpp new file mode 100644 index 000000000..7e18d0f87 --- /dev/null +++ b/native/jni/src/words_priority_queue.cpp @@ -0,0 +1,76 @@ +/* + * Copyright (C) 2012, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "words_priority_queue.h" + +namespace latinime { + +int WordsPriorityQueue::outputSuggestions(const int *before, const int beforeLength, + int *frequencies, int *outputCodePoints, int* outputTypes) { + mHighestSuggestedWord = 0; + const int size = min(MAX_WORDS, static_cast(mSuggestions.size())); + SuggestedWord *swBuffer[size]; + int index = size - 1; + while (!mSuggestions.empty() && index >= 0) { + SuggestedWord *sw = mSuggestions.top(); + if (DEBUG_WORDS_PRIORITY_QUEUE) { + AKLOGI("dump word. %d", sw->mScore); + DUMP_WORD(sw->mWord, sw->mWordLength); + } + swBuffer[index] = sw; + mSuggestions.pop(); + --index; + } + if (size >= 2) { + SuggestedWord *nsMaxSw = 0; + int maxIndex = 0; + float maxNs = 0; + for (int i = 0; i < size; ++i) { + SuggestedWord *tempSw = swBuffer[i]; + if (!tempSw) { + continue; + } + const float tempNs = getNormalizedScore(tempSw, before, beforeLength, 0, 0, 0); + if (tempNs >= maxNs) { + maxNs = tempNs; + maxIndex = i; + nsMaxSw = tempSw; + } + } + if (maxIndex > 0 && nsMaxSw) { + memmove(&swBuffer[1], &swBuffer[0], maxIndex * sizeof(swBuffer[0])); + swBuffer[0] = nsMaxSw; + } + } + for (int i = 0; i < size; ++i) { + SuggestedWord *sw = swBuffer[i]; + if (!sw) { + AKLOGE("SuggestedWord is null %d", i); + continue; + } + const int wordLength = sw->mWordLength; + int *targetAddress = outputCodePoints + i * MAX_WORD_LENGTH; + frequencies[i] = sw->mScore; + outputTypes[i] = sw->mType; + memcpy(targetAddress, sw->mWord, wordLength * sizeof(targetAddress[0])); + if (wordLength < MAX_WORD_LENGTH) { + targetAddress[wordLength] = 0; + } + sw->mUsed = false; + } + return size; +} +} // namespace latinime diff --git a/native/jni/src/words_priority_queue.h b/native/jni/src/words_priority_queue.h index c4ee07f32..b0cc92e2f 100644 --- a/native/jni/src/words_priority_queue.h +++ b/native/jni/src/words_priority_queue.h @@ -94,62 +94,6 @@ class WordsPriorityQueue { return sw; } - int outputSuggestions(const int *before, const int beforeLength, int *frequencies, - int *outputCodePoints, int* outputTypes) { - mHighestSuggestedWord = 0; - const int size = min(MAX_WORDS, static_cast(mSuggestions.size())); - SuggestedWord *swBuffer[size]; - int index = size - 1; - while (!mSuggestions.empty() && index >= 0) { - SuggestedWord *sw = mSuggestions.top(); - if (DEBUG_WORDS_PRIORITY_QUEUE) { - AKLOGI("dump word. %d", sw->mScore); - DUMP_WORD(sw->mWord, sw->mWordLength); - } - swBuffer[index] = sw; - mSuggestions.pop(); - --index; - } - if (size >= 2) { - SuggestedWord *nsMaxSw = 0; - int maxIndex = 0; - float maxNs = 0; - for (int i = 0; i < size; ++i) { - SuggestedWord *tempSw = swBuffer[i]; - if (!tempSw) { - continue; - } - const float tempNs = getNormalizedScore(tempSw, before, beforeLength, 0, 0, 0); - if (tempNs >= maxNs) { - maxNs = tempNs; - maxIndex = i; - nsMaxSw = tempSw; - } - } - if (maxIndex > 0 && nsMaxSw) { - memmove(&swBuffer[1], &swBuffer[0], maxIndex * sizeof(swBuffer[0])); - swBuffer[0] = nsMaxSw; - } - } - for (int i = 0; i < size; ++i) { - SuggestedWord *sw = swBuffer[i]; - if (!sw) { - AKLOGE("SuggestedWord is null %d", i); - continue; - } - const int wordLength = sw->mWordLength; - int *targetAddress = outputCodePoints + i * MAX_WORD_LENGTH; - frequencies[i] = sw->mScore; - outputTypes[i] = sw->mType; - memcpy(targetAddress, sw->mWord, wordLength * sizeof(targetAddress[0])); - if (wordLength < MAX_WORD_LENGTH) { - targetAddress[wordLength] = 0; - } - sw->mUsed = false; - } - return size; - } - int size() const { return static_cast(mSuggestions.size()); } @@ -183,6 +127,9 @@ class WordsPriorityQueue { outLength); } + int outputSuggestions(const int *before, const int beforeLength, int *frequencies, + int *outputCodePoints, int* outputTypes); + private: DISALLOW_IMPLICIT_CONSTRUCTORS(WordsPriorityQueue); struct wordComparator {