diff --git a/native/jni/src/binary_format.h b/native/jni/src/binary_format.h index 2ee4077c1..4cabc8404 100644 --- a/native/jni/src/binary_format.h +++ b/native/jni/src/binary_format.h @@ -52,6 +52,8 @@ class BinaryFormat { // Mask for attribute frequency, stored on 4 bits inside the flags byte. static const int MASK_ATTRIBUTE_FREQUENCY = 0x0F; + // The numeric value of the shortcut frequency that means 'whitelist'. + static const int WHITELIST_SHORTCUT_FREQUENCY = 15; // Mask and flags for attribute address type selection. static const int MASK_ATTRIBUTE_ADDRESS_TYPE = 0x30; @@ -99,6 +101,7 @@ class BinaryFormat { static bool hasChildrenInFlags(const uint8_t flags); static int getAttributeAddressAndForwardPointer(const uint8_t *const dict, const uint8_t flags, int *pos); + static int getAttributeFrequencyFromFlags(const int flags); static int getTerminalPosition(const uint8_t *const root, const int32_t *const inWord, const int length, const bool forceLowerCaseSearch); static int getWordAtAddress(const uint8_t *const root, const int address, const int maxDepth, @@ -340,6 +343,10 @@ inline int BinaryFormat::getAttributeAddressAndForwardPointer(const uint8_t *con } } +inline int BinaryFormat::getAttributeFrequencyFromFlags(const int flags) { + return flags & MASK_ATTRIBUTE_FREQUENCY; +} + // This function gets the byte position of the last chargroup of the exact matching word in the // dictionary. If no match is found, it returns NOT_VALID_WORD. inline int BinaryFormat::getTerminalPosition(const uint8_t *const root, diff --git a/native/jni/src/correction.cpp b/native/jni/src/correction.cpp index e1f9db92d..e55da0113 100644 --- a/native/jni/src/correction.cpp +++ b/native/jni/src/correction.cpp @@ -157,6 +157,10 @@ void Correction::checkState() { } } +bool Correction::sameAsTyped() { + return mProximityInfoState.sameAsTyped(mWord, mOutputIndex); +} + int Correction::getFreqForSplitMultipleWords(const int *freqArray, const int *wordLengthArray, const int wordCount, const bool isSpaceProximity, const unsigned short *word) { return Correction::RankingAlgorithm::calcFreqForSplitMultipleWords(freqArray, wordLengthArray, diff --git a/native/jni/src/correction.h b/native/jni/src/correction.h index 81623a46b..57e7b7189 100644 --- a/native/jni/src/correction.h +++ b/native/jni/src/correction.h @@ -105,6 +105,7 @@ class Correction { const int spaceProximityPos, const int missingSpacePos, const bool useFullEditDistance, const bool doAutoCompletion, const int maxErrors); void checkState(); + bool sameAsTyped(); bool initProcessState(const int index); int getInputIndex(); diff --git a/native/jni/src/proximity_info_state.h b/native/jni/src/proximity_info_state.h index 76d45516e..474c40757 100644 --- a/native/jni/src/proximity_info_state.h +++ b/native/jni/src/proximity_info_state.h @@ -160,6 +160,21 @@ class ProximityInfoState { return mTouchPositionCorrectionEnabled; } + inline bool sameAsTyped(const unsigned short *word, int length) const { + if (length != mInputLength) { + return false; + } + const int *inputCodes = mInputCodes; + while (length--) { + if (static_cast(*inputCodes) != static_cast(*word)) { + return false; + } + inputCodes += MAX_PROXIMITY_CHARS_SIZE_INTERNAL; + word++; + } + return true; + } + private: DISALLOW_COPY_AND_ASSIGN(ProximityInfoState); ///////////////////////////////////////// @@ -179,21 +194,6 @@ class ProximityInfoState { return mInputXCoordinates && mInputYCoordinates; } - bool sameAsTyped(const unsigned short *word, int length) const { - if (length != mInputLength) { - return false; - } - const int *inputCodes = mInputCodes; - while (length--) { - if ((unsigned int) *inputCodes != (unsigned int) *word) { - return false; - } - inputCodes += MAX_PROXIMITY_CHARS_SIZE_INTERNAL; - word++; - } - return true; - } - // const const ProximityInfo *mProximityInfo; bool mHasTouchPositionCorrectionData; diff --git a/native/jni/src/terminal_attributes.h b/native/jni/src/terminal_attributes.h index d63364514..1ae9c7cbb 100644 --- a/native/jni/src/terminal_attributes.h +++ b/native/jni/src/terminal_attributes.h @@ -46,7 +46,7 @@ class TerminalAttributes { // Gets the shortcut target itself as a uint16_t string. For parameters and return value // see BinaryFormat::getWordAtAddress. // TODO: make the output an uint32_t* to handle the whole unicode range. - inline int getNextShortcutTarget(const int maxDepth, uint16_t *outWord) { + inline int getNextShortcutTarget(const int maxDepth, uint16_t *outWord, int *outFreq) { const int shortcutFlags = BinaryFormat::getFlagsAndForwardPointer(mDict, &mPos); mHasNextShortcutTarget = 0 != (shortcutFlags & BinaryFormat::FLAG_ATTRIBUTE_HAS_NEXT); @@ -56,6 +56,7 @@ class TerminalAttributes { if (NOT_A_CHARACTER == charCode) break; outWord[i] = (uint16_t)charCode; } + *outFreq = BinaryFormat::getAttributeFrequencyFromFlags(shortcutFlags); mPos += BinaryFormat::CHARACTER_ARRAY_TERMINATOR_SIZE; return i; } diff --git a/native/jni/src/unigram_dictionary.cpp b/native/jni/src/unigram_dictionary.cpp index 9f7ab5362..cc6d39a29 100644 --- a/native/jni/src/unigram_dictionary.cpp +++ b/native/jni/src/unigram_dictionary.cpp @@ -391,8 +391,12 @@ inline void UnigramDictionary::onTerminal(const int probability, const int finalProbability = correction->getFinalProbability(probability, &wordPointer, &wordLength); if (finalProbability != NOT_A_PROBABILITY) { - addWord(wordPointer, wordLength, finalProbability, masterQueue, - Dictionary::KIND_CORRECTION); + if (0 != finalProbability) { + // If the probability is 0, we don't want to add this word. However we still + // want to add its shortcuts (including a possible whitelist entry) if any. + addWord(wordPointer, wordLength, finalProbability, masterQueue, + Dictionary::KIND_CORRECTION); + } const int shortcutProbability = finalProbability > 0 ? finalProbability - 1 : 0; // Please note that the shortcut candidates will be added to the master queue only. @@ -407,10 +411,21 @@ inline void UnigramDictionary::onTerminal(const int probability, // with the same score. For the moment we use -1 to make sure the shortcut will // never be in front of the word. uint16_t shortcutTarget[MAX_WORD_LENGTH_INTERNAL]; + int shortcutFrequency; const int shortcutTargetStringLength = iterator.getNextShortcutTarget( - MAX_WORD_LENGTH_INTERNAL, shortcutTarget); - addWord(shortcutTarget, shortcutTargetStringLength, shortcutProbability, - masterQueue, Dictionary::KIND_CORRECTION); + MAX_WORD_LENGTH_INTERNAL, shortcutTarget, &shortcutFrequency); + int shortcutScore; + int kind; + if (shortcutFrequency == BinaryFormat::WHITELIST_SHORTCUT_FREQUENCY + && correction->sameAsTyped()) { + shortcutScore = S_INT_MAX; + kind = Dictionary::KIND_WHITELIST; + } else { + shortcutScore = shortcutProbability; + kind = Dictionary::KIND_CORRECTION; + } + addWord(shortcutTarget, shortcutTargetStringLength, shortcutScore, + masterQueue, kind); } } }