From b14fc88e482e53ba6852c8d5da5d9826c68d041f Mon Sep 17 00:00:00 2001 From: Jean Chalard Date: Fri, 10 Aug 2012 16:52:27 +0900 Subject: [PATCH] Tag the whitelisted entries in native code. Since this is already used in Java land, this actually does activate the whitelist path, and the code is now fully functional. We still have to remove the old whitelist resource and to compile the dictionary that includes the whitelist. Bug: 6906525 Change-Id: Iacde5313e303b9ed792940efaf6bcfa4ee1317bd --- native/jni/src/binary_format.h | 7 +++++++ native/jni/src/correction.cpp | 4 ++++ native/jni/src/correction.h | 1 + native/jni/src/proximity_info_state.h | 30 +++++++++++++-------------- native/jni/src/terminal_attributes.h | 3 ++- native/jni/src/unigram_dictionary.cpp | 25 +++++++++++++++++----- 6 files changed, 49 insertions(+), 21 deletions(-) diff --git a/native/jni/src/binary_format.h b/native/jni/src/binary_format.h index 2ee4077c1..4cabc8404 100644 --- a/native/jni/src/binary_format.h +++ b/native/jni/src/binary_format.h @@ -52,6 +52,8 @@ class BinaryFormat { // Mask for attribute frequency, stored on 4 bits inside the flags byte. static const int MASK_ATTRIBUTE_FREQUENCY = 0x0F; + // The numeric value of the shortcut frequency that means 'whitelist'. + static const int WHITELIST_SHORTCUT_FREQUENCY = 15; // Mask and flags for attribute address type selection. static const int MASK_ATTRIBUTE_ADDRESS_TYPE = 0x30; @@ -99,6 +101,7 @@ class BinaryFormat { static bool hasChildrenInFlags(const uint8_t flags); static int getAttributeAddressAndForwardPointer(const uint8_t *const dict, const uint8_t flags, int *pos); + static int getAttributeFrequencyFromFlags(const int flags); static int getTerminalPosition(const uint8_t *const root, const int32_t *const inWord, const int length, const bool forceLowerCaseSearch); static int getWordAtAddress(const uint8_t *const root, const int address, const int maxDepth, @@ -340,6 +343,10 @@ inline int BinaryFormat::getAttributeAddressAndForwardPointer(const uint8_t *con } } +inline int BinaryFormat::getAttributeFrequencyFromFlags(const int flags) { + return flags & MASK_ATTRIBUTE_FREQUENCY; +} + // This function gets the byte position of the last chargroup of the exact matching word in the // dictionary. If no match is found, it returns NOT_VALID_WORD. inline int BinaryFormat::getTerminalPosition(const uint8_t *const root, diff --git a/native/jni/src/correction.cpp b/native/jni/src/correction.cpp index b18b35e8c..c815dabe6 100644 --- a/native/jni/src/correction.cpp +++ b/native/jni/src/correction.cpp @@ -157,6 +157,10 @@ void Correction::checkState() { } } +bool Correction::sameAsTyped() { + return mProximityInfoState.sameAsTyped(mWord, mOutputIndex); +} + int Correction::getFreqForSplitMultipleWords(const int *freqArray, const int *wordLengthArray, const int wordCount, const bool isSpaceProximity, const unsigned short *word) { return Correction::RankingAlgorithm::calcFreqForSplitMultipleWords(freqArray, wordLengthArray, diff --git a/native/jni/src/correction.h b/native/jni/src/correction.h index 81623a46b..57e7b7189 100644 --- a/native/jni/src/correction.h +++ b/native/jni/src/correction.h @@ -105,6 +105,7 @@ class Correction { const int spaceProximityPos, const int missingSpacePos, const bool useFullEditDistance, const bool doAutoCompletion, const int maxErrors); void checkState(); + bool sameAsTyped(); bool initProcessState(const int index); int getInputIndex(); diff --git a/native/jni/src/proximity_info_state.h b/native/jni/src/proximity_info_state.h index 76d45516e..474c40757 100644 --- a/native/jni/src/proximity_info_state.h +++ b/native/jni/src/proximity_info_state.h @@ -160,6 +160,21 @@ class ProximityInfoState { return mTouchPositionCorrectionEnabled; } + inline bool sameAsTyped(const unsigned short *word, int length) const { + if (length != mInputLength) { + return false; + } + const int *inputCodes = mInputCodes; + while (length--) { + if (static_cast(*inputCodes) != static_cast(*word)) { + return false; + } + inputCodes += MAX_PROXIMITY_CHARS_SIZE_INTERNAL; + word++; + } + return true; + } + private: DISALLOW_COPY_AND_ASSIGN(ProximityInfoState); ///////////////////////////////////////// @@ -179,21 +194,6 @@ class ProximityInfoState { return mInputXCoordinates && mInputYCoordinates; } - bool sameAsTyped(const unsigned short *word, int length) const { - if (length != mInputLength) { - return false; - } - const int *inputCodes = mInputCodes; - while (length--) { - if ((unsigned int) *inputCodes != (unsigned int) *word) { - return false; - } - inputCodes += MAX_PROXIMITY_CHARS_SIZE_INTERNAL; - word++; - } - return true; - } - // const const ProximityInfo *mProximityInfo; bool mHasTouchPositionCorrectionData; diff --git a/native/jni/src/terminal_attributes.h b/native/jni/src/terminal_attributes.h index d63364514..1ae9c7cbb 100644 --- a/native/jni/src/terminal_attributes.h +++ b/native/jni/src/terminal_attributes.h @@ -46,7 +46,7 @@ class TerminalAttributes { // Gets the shortcut target itself as a uint16_t string. For parameters and return value // see BinaryFormat::getWordAtAddress. // TODO: make the output an uint32_t* to handle the whole unicode range. - inline int getNextShortcutTarget(const int maxDepth, uint16_t *outWord) { + inline int getNextShortcutTarget(const int maxDepth, uint16_t *outWord, int *outFreq) { const int shortcutFlags = BinaryFormat::getFlagsAndForwardPointer(mDict, &mPos); mHasNextShortcutTarget = 0 != (shortcutFlags & BinaryFormat::FLAG_ATTRIBUTE_HAS_NEXT); @@ -56,6 +56,7 @@ class TerminalAttributes { if (NOT_A_CHARACTER == charCode) break; outWord[i] = (uint16_t)charCode; } + *outFreq = BinaryFormat::getAttributeFrequencyFromFlags(shortcutFlags); mPos += BinaryFormat::CHARACTER_ARRAY_TERMINATOR_SIZE; return i; } diff --git a/native/jni/src/unigram_dictionary.cpp b/native/jni/src/unigram_dictionary.cpp index 9f7ab5362..cc6d39a29 100644 --- a/native/jni/src/unigram_dictionary.cpp +++ b/native/jni/src/unigram_dictionary.cpp @@ -391,8 +391,12 @@ inline void UnigramDictionary::onTerminal(const int probability, const int finalProbability = correction->getFinalProbability(probability, &wordPointer, &wordLength); if (finalProbability != NOT_A_PROBABILITY) { - addWord(wordPointer, wordLength, finalProbability, masterQueue, - Dictionary::KIND_CORRECTION); + if (0 != finalProbability) { + // If the probability is 0, we don't want to add this word. However we still + // want to add its shortcuts (including a possible whitelist entry) if any. + addWord(wordPointer, wordLength, finalProbability, masterQueue, + Dictionary::KIND_CORRECTION); + } const int shortcutProbability = finalProbability > 0 ? finalProbability - 1 : 0; // Please note that the shortcut candidates will be added to the master queue only. @@ -407,10 +411,21 @@ inline void UnigramDictionary::onTerminal(const int probability, // with the same score. For the moment we use -1 to make sure the shortcut will // never be in front of the word. uint16_t shortcutTarget[MAX_WORD_LENGTH_INTERNAL]; + int shortcutFrequency; const int shortcutTargetStringLength = iterator.getNextShortcutTarget( - MAX_WORD_LENGTH_INTERNAL, shortcutTarget); - addWord(shortcutTarget, shortcutTargetStringLength, shortcutProbability, - masterQueue, Dictionary::KIND_CORRECTION); + MAX_WORD_LENGTH_INTERNAL, shortcutTarget, &shortcutFrequency); + int shortcutScore; + int kind; + if (shortcutFrequency == BinaryFormat::WHITELIST_SHORTCUT_FREQUENCY + && correction->sameAsTyped()) { + shortcutScore = S_INT_MAX; + kind = Dictionary::KIND_WHITELIST; + } else { + shortcutScore = shortcutProbability; + kind = Dictionary::KIND_CORRECTION; + } + addWord(shortcutTarget, shortcutTargetStringLength, shortcutScore, + masterQueue, kind); } } }