diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.cpp b/native/jni/src/suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.cpp index 0a7509c8b..52b668936 100644 --- a/native/jni/src/suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.cpp +++ b/native/jni/src/suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.cpp @@ -33,6 +33,9 @@ const TaUtils::TerminalAttributeFlags TaUtils::FLAG_ATTRIBUTE_HAS_NEXT = 0x80; // Mask for attribute probability, stored on 4 bits inside the flags byte. const TaUtils::TerminalAttributeFlags TaUtils::MASK_ATTRIBUTE_PROBABILITY = 0x0F; const int TaUtils::ATTRIBUTE_ADDRESS_SHIFT = 4; +const int TaUtils::SHORTCUT_LIST_SIZE_FIELD_SIZE = 2; +// The numeric value of the shortcut probability that means 'whitelist'. +const int TaUtils::WHITELIST_SHORTCUT_PROBABILITY = 15; /* static */ int TaUtils::getBigramAddressAndForwardPointer( const BinaryDictionaryInfo *const binaryDictionaryInfo, const TerminalAttributeFlags flags, diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.h b/native/jni/src/suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.h index f38fd5aaa..15637d8a9 100644 --- a/native/jni/src/suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.h +++ b/native/jni/src/suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.h @@ -29,6 +29,7 @@ class BinaryDictionaryTerminalAttributesReadingUtils { public: typedef uint8_t TerminalAttributeFlags; typedef TerminalAttributeFlags BigramFlags; + typedef TerminalAttributeFlags ShortcutFlags; static AK_FORCE_INLINE TerminalAttributeFlags getFlagsAndForwardPointer( const BinaryDictionaryInfo *const binaryDictionaryInfo, int *const pos) { @@ -59,6 +60,34 @@ class BinaryDictionaryTerminalAttributesReadingUtils { const BinaryDictionaryInfo *const binaryDictionaryInfo, const BigramFlags flags, int *const pos); + // Shortcuts reading methods + // This method returns the size of the shortcut list region excluding the shortcut list size + // field at the beginning. + static AK_FORCE_INLINE int getShortcutListSizeAndForwardPointer( + const BinaryDictionaryInfo *const binaryDictionaryInfo, int *const pos) { + // readUint16andAdvancePosition() returns an offset *including* the uint16 field itself. + return ByteArrayUtils::readUint16andAdvancePosition( + binaryDictionaryInfo->getDictRoot(), pos) - SHORTCUT_LIST_SIZE_FIELD_SIZE; + } + + static AK_FORCE_INLINE void skipShortcuts( + const BinaryDictionaryInfo *const binaryDictionaryInfo, int *const pos) { + const int shortcutListSize = getShortcutListSizeAndForwardPointer( + binaryDictionaryInfo, pos); + *pos += shortcutListSize; + } + + static AK_FORCE_INLINE bool isWhitelist(const ShortcutFlags flags) { + return getProbabilityFromFlags(flags) == WHITELIST_SHORTCUT_PROBABILITY; + } + + static AK_FORCE_INLINE int readShortcutTarget( + const BinaryDictionaryInfo *const binaryDictionaryInfo, const int maxLength, + int *const outWord, int *const pos) { + return ByteArrayUtils::readStringAndAdvancePosition( + binaryDictionaryInfo->getDictRoot(), maxLength, outWord, pos); + } + private: DISALLOW_IMPLICIT_CONSTRUCTORS(BinaryDictionaryTerminalAttributesReadingUtils); @@ -70,6 +99,8 @@ class BinaryDictionaryTerminalAttributesReadingUtils { static const TerminalAttributeFlags FLAG_ATTRIBUTE_HAS_NEXT; static const TerminalAttributeFlags MASK_ATTRIBUTE_PROBABILITY; static const int ATTRIBUTE_ADDRESS_SHIFT; + static const int SHORTCUT_LIST_SIZE_FIELD_SIZE; + static const int WHITELIST_SHORTCUT_PROBABILITY; static AK_FORCE_INLINE bool isOffsetNegative(const TerminalAttributeFlags flags) { return (flags & FLAG_ATTRIBUTE_OFFSET_NEGATIVE) != 0; diff --git a/native/jni/src/suggest/core/dictionary/binary_format.h b/native/jni/src/suggest/core/dictionary/binary_format.h index df0ec480d..9557d8ce7 100644 --- a/native/jni/src/suggest/core/dictionary/binary_format.h +++ b/native/jni/src/suggest/core/dictionary/binary_format.h @@ -52,14 +52,10 @@ class BinaryFormat { // Mask for attribute probability, stored on 4 bits inside the flags byte. static const int MASK_ATTRIBUTE_PROBABILITY = 0x0F; - // The numeric value of the shortcut probability that means 'whitelist'. - static const int WHITELIST_SHORTCUT_PROBABILITY = 15; // Mask and flags for attribute address type selection. static const int MASK_ATTRIBUTE_ADDRESS_TYPE = 0x30; - static const int SHORTCUT_LIST_SIZE_SIZE = 2; - static bool hasBlacklistedOrNotAWordFlag(const int flags); static int getGroupCountAndForwardPointer(const uint8_t *const dict, int *pos); static uint8_t getFlagsAndForwardPointer(const uint8_t *const dict, int *pos); @@ -73,9 +69,6 @@ class BinaryFormat { const int pos); static int readChildrenPosition(const uint8_t *const dict, const uint8_t flags, const int pos); static bool hasChildrenInFlags(const uint8_t flags); - static int getAttributeAddressAndForwardPointer(const uint8_t *const dict, const uint8_t flags, - int *pos); - static int getAttributeProbabilityFromFlags(const int flags); static int getTerminalPosition(const uint8_t *const root, const int *const inWord, const int length, const bool forceLowerCaseSearch); static int getWordAtAddress(const uint8_t *const root, const int address, const int maxDepth, @@ -260,38 +253,6 @@ inline bool BinaryFormat::hasChildrenInFlags(const uint8_t flags) { return (FLAG_GROUP_ADDRESS_TYPE_NOADDRESS != (MASK_GROUP_ADDRESS_TYPE & flags)); } -AK_FORCE_INLINE int BinaryFormat::getAttributeAddressAndForwardPointer(const uint8_t *const dict, - const uint8_t flags, int *pos) { - int offset = 0; - const int origin = *pos; - switch (MASK_ATTRIBUTE_ADDRESS_TYPE & flags) { - case FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE: - offset = dict[origin]; - *pos = origin + 1; - break; - case FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES: - offset = dict[origin] << 8; - offset += dict[origin + 1]; - *pos = origin + 2; - break; - case FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES: - offset = dict[origin] << 16; - offset += dict[origin + 1] << 8; - offset += dict[origin + 2]; - *pos = origin + 3; - break; - } - if (FLAG_ATTRIBUTE_OFFSET_NEGATIVE & flags) { - return origin - offset; - } else { - return origin + offset; - } -} - -inline int BinaryFormat::getAttributeProbabilityFromFlags(const int flags) { - return flags & MASK_ATTRIBUTE_PROBABILITY; -} - // This function gets the byte position of the last chargroup of the exact matching word in the // dictionary. If no match is found, it returns NOT_VALID_WORD. AK_FORCE_INLINE int BinaryFormat::getTerminalPosition(const uint8_t *const root, diff --git a/native/jni/src/suggest/core/dictionary/shortcut_utils.h b/native/jni/src/suggest/core/dictionary/shortcut_utils.h index 601ac5f5a..3c2180937 100644 --- a/native/jni/src/suggest/core/dictionary/shortcut_utils.h +++ b/native/jni/src/suggest/core/dictionary/shortcut_utils.h @@ -29,15 +29,15 @@ class ShortcutUtils { int outputWordIndex, const int finalScore, int *const outputCodePoints, int *const frequencies, int *const outputTypes, const bool sameAsTyped) { TerminalAttributes::ShortcutIterator iterator = terminalAttributes->getShortcutIterator(); + int shortcutTarget[MAX_WORD_LENGTH]; while (iterator.hasNextShortcutTarget() && outputWordIndex < MAX_RESULTS) { - int shortcutTarget[MAX_WORD_LENGTH]; - int shortcutProbability; - const int shortcutTargetStringLength = iterator.getNextShortcutTarget( - MAX_WORD_LENGTH, shortcutTarget, &shortcutProbability); + bool isWhilelist; + int shortcutTargetStringLength; + iterator.nextShortcutTarget(MAX_WORD_LENGTH, shortcutTarget, + &shortcutTargetStringLength, &isWhilelist); int shortcutScore; int kind; - if (shortcutProbability == BinaryFormat::WHITELIST_SHORTCUT_PROBABILITY - && sameAsTyped) { + if (isWhilelist && sameAsTyped) { shortcutScore = S_INT_MAX; kind = Dictionary::KIND_WHITELIST; } else { diff --git a/native/jni/src/suggest/core/dictionary/terminal_attributes.h b/native/jni/src/suggest/core/dictionary/terminal_attributes.h index bbd9af090..cec47081e 100644 --- a/native/jni/src/suggest/core/dictionary/terminal_attributes.h +++ b/native/jni/src/suggest/core/dictionary/terminal_attributes.h @@ -20,6 +20,7 @@ #include #include "suggest/core/dictionary/binary_dictionary_info.h" +#include "suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.h" #include "suggest/core/dictionary/binary_format.h" namespace latinime { @@ -33,60 +34,66 @@ class TerminalAttributes { public: class ShortcutIterator { public: - ShortcutIterator(const BinaryDictionaryInfo *const binaryDictionaryInfo, const int pos, - const uint8_t flags) - : mBinaryDicitionaryInfo(binaryDictionaryInfo), mPos(pos), - mHasNextShortcutTarget(0 != (flags & BinaryFormat::FLAG_HAS_SHORTCUT_TARGETS)) { - } + ShortcutIterator(const BinaryDictionaryInfo *const binaryDictionaryInfo, + const int shortcutPos, const bool hasShortcutList) + : mBinaryDictionaryInfo(binaryDictionaryInfo), mPos(shortcutPos), + mHasNextShortcutTarget(hasShortcutList) {} inline bool hasNextShortcutTarget() const { return mHasNextShortcutTarget; } - // Gets the shortcut target itself as an int string. For parameters and return value - // see BinaryFormat::getWordAtAddress. - inline int getNextShortcutTarget(const int maxDepth, int *outWord, int *outFreq) { - const int shortcutFlags = BinaryFormat::getFlagsAndForwardPointer( - mBinaryDicitionaryInfo->getDictRoot(), &mPos); - mHasNextShortcutTarget = 0 != (shortcutFlags & BinaryFormat::FLAG_ATTRIBUTE_HAS_NEXT); - unsigned int i; - for (i = 0; i < MAX_WORD_LENGTH; ++i) { - const int codePoint = BinaryFormat::getCodePointAndForwardPointer( - mBinaryDicitionaryInfo->getDictRoot(), &mPos); - if (NOT_A_CODE_POINT == codePoint) break; - outWord[i] = codePoint; + // Gets the shortcut target itself as an int string and put it to outTarget, put its length + // to outTargetLength, put whether it is whitelist to outIsWhitelist. + AK_FORCE_INLINE void nextShortcutTarget( + const int maxDepth, int *const outTarget, int *const outTargetLength, + bool *const outIsWhitelist) { + const BinaryDictionaryTerminalAttributesReadingUtils::ShortcutFlags flags = + BinaryDictionaryTerminalAttributesReadingUtils::getFlagsAndForwardPointer( + mBinaryDictionaryInfo, &mPos); + mHasNextShortcutTarget = + BinaryDictionaryTerminalAttributesReadingUtils::hasNext(flags); + if (outIsWhitelist) { + *outIsWhitelist = + BinaryDictionaryTerminalAttributesReadingUtils::isWhitelist(flags); + } + if (outTargetLength) { + *outTargetLength = + BinaryDictionaryTerminalAttributesReadingUtils::readShortcutTarget( + mBinaryDictionaryInfo, maxDepth, outTarget, &mPos); } - *outFreq = BinaryFormat::getAttributeProbabilityFromFlags(shortcutFlags); - return i; } private: - const BinaryDictionaryInfo *const mBinaryDicitionaryInfo; + const BinaryDictionaryInfo *const mBinaryDictionaryInfo; int mPos; bool mHasNextShortcutTarget; }; - TerminalAttributes(const BinaryDictionaryInfo *const binaryDicitonaryInfo, - const uint8_t flags, const int pos) - : mBinaryDicitionaryInfo(binaryDicitonaryInfo), mFlags(flags), mStartPos(pos) { - } + TerminalAttributes(const BinaryDictionaryInfo *const binaryDictionaryInfo, + const uint8_t nodeFlags, const int shortcutPos) + : mBinaryDictionaryInfo(binaryDictionaryInfo), + mNodeFlags(nodeFlags), mShortcutListSizePos(shortcutPos) {} inline ShortcutIterator getShortcutIterator() const { // The size of the shortcuts is stored here so that the whole shortcut chunk can be // skipped quickly, so we ignore it. - return ShortcutIterator( - mBinaryDicitionaryInfo, mStartPos + BinaryFormat::SHORTCUT_LIST_SIZE_SIZE, mFlags); + int shortcutPos = mShortcutListSizePos; + BinaryDictionaryTerminalAttributesReadingUtils::getShortcutListSizeAndForwardPointer( + mBinaryDictionaryInfo, &shortcutPos); + const bool hasShortcutList = 0 != (mNodeFlags & BinaryFormat::FLAG_HAS_SHORTCUT_TARGETS); + return ShortcutIterator(mBinaryDictionaryInfo, shortcutPos, hasShortcutList); } bool isBlacklistedOrNotAWord() const { - return BinaryFormat::hasBlacklistedOrNotAWordFlag(mFlags); + return BinaryFormat::hasBlacklistedOrNotAWordFlag(mNodeFlags); } private: DISALLOW_IMPLICIT_CONSTRUCTORS(TerminalAttributes); - const BinaryDictionaryInfo *const mBinaryDicitionaryInfo; - const uint8_t mFlags; - const int mStartPos; + const BinaryDictionaryInfo *const mBinaryDictionaryInfo; + const uint8_t mNodeFlags; + const int mShortcutListSizePos; }; } // namespace latinime #endif // LATINIME_TERMINAL_ATTRIBUTES_H