am c0d7a376
: Merge "Move shortcut reading methods."
* commit 'c0d7a376e2d34e998f3791ea8b1e90d408b8e2e3': Move shortcut reading methods.
This commit is contained in:
commit
ee86815fde
5 changed files with 77 additions and 75 deletions
|
@ -33,6 +33,9 @@ const TaUtils::TerminalAttributeFlags TaUtils::FLAG_ATTRIBUTE_HAS_NEXT = 0x80;
|
|||
// Mask for attribute probability, stored on 4 bits inside the flags byte.
|
||||
const TaUtils::TerminalAttributeFlags TaUtils::MASK_ATTRIBUTE_PROBABILITY = 0x0F;
|
||||
const int TaUtils::ATTRIBUTE_ADDRESS_SHIFT = 4;
|
||||
const int TaUtils::SHORTCUT_LIST_SIZE_FIELD_SIZE = 2;
|
||||
// The numeric value of the shortcut probability that means 'whitelist'.
|
||||
const int TaUtils::WHITELIST_SHORTCUT_PROBABILITY = 15;
|
||||
|
||||
/* static */ int TaUtils::getBigramAddressAndForwardPointer(
|
||||
const BinaryDictionaryInfo *const binaryDictionaryInfo, const TerminalAttributeFlags flags,
|
||||
|
|
|
@ -29,6 +29,7 @@ class BinaryDictionaryTerminalAttributesReadingUtils {
|
|||
public:
|
||||
typedef uint8_t TerminalAttributeFlags;
|
||||
typedef TerminalAttributeFlags BigramFlags;
|
||||
typedef TerminalAttributeFlags ShortcutFlags;
|
||||
|
||||
static AK_FORCE_INLINE TerminalAttributeFlags getFlagsAndForwardPointer(
|
||||
const BinaryDictionaryInfo *const binaryDictionaryInfo, int *const pos) {
|
||||
|
@ -59,6 +60,34 @@ class BinaryDictionaryTerminalAttributesReadingUtils {
|
|||
const BinaryDictionaryInfo *const binaryDictionaryInfo, const BigramFlags flags,
|
||||
int *const pos);
|
||||
|
||||
// Shortcuts reading methods
|
||||
// This method returns the size of the shortcut list region excluding the shortcut list size
|
||||
// field at the beginning.
|
||||
static AK_FORCE_INLINE int getShortcutListSizeAndForwardPointer(
|
||||
const BinaryDictionaryInfo *const binaryDictionaryInfo, int *const pos) {
|
||||
// readUint16andAdvancePosition() returns an offset *including* the uint16 field itself.
|
||||
return ByteArrayUtils::readUint16andAdvancePosition(
|
||||
binaryDictionaryInfo->getDictRoot(), pos) - SHORTCUT_LIST_SIZE_FIELD_SIZE;
|
||||
}
|
||||
|
||||
static AK_FORCE_INLINE void skipShortcuts(
|
||||
const BinaryDictionaryInfo *const binaryDictionaryInfo, int *const pos) {
|
||||
const int shortcutListSize = getShortcutListSizeAndForwardPointer(
|
||||
binaryDictionaryInfo, pos);
|
||||
*pos += shortcutListSize;
|
||||
}
|
||||
|
||||
static AK_FORCE_INLINE bool isWhitelist(const ShortcutFlags flags) {
|
||||
return getProbabilityFromFlags(flags) == WHITELIST_SHORTCUT_PROBABILITY;
|
||||
}
|
||||
|
||||
static AK_FORCE_INLINE int readShortcutTarget(
|
||||
const BinaryDictionaryInfo *const binaryDictionaryInfo, const int maxLength,
|
||||
int *const outWord, int *const pos) {
|
||||
return ByteArrayUtils::readStringAndAdvancePosition(
|
||||
binaryDictionaryInfo->getDictRoot(), maxLength, outWord, pos);
|
||||
}
|
||||
|
||||
private:
|
||||
DISALLOW_IMPLICIT_CONSTRUCTORS(BinaryDictionaryTerminalAttributesReadingUtils);
|
||||
|
||||
|
@ -70,6 +99,8 @@ class BinaryDictionaryTerminalAttributesReadingUtils {
|
|||
static const TerminalAttributeFlags FLAG_ATTRIBUTE_HAS_NEXT;
|
||||
static const TerminalAttributeFlags MASK_ATTRIBUTE_PROBABILITY;
|
||||
static const int ATTRIBUTE_ADDRESS_SHIFT;
|
||||
static const int SHORTCUT_LIST_SIZE_FIELD_SIZE;
|
||||
static const int WHITELIST_SHORTCUT_PROBABILITY;
|
||||
|
||||
static AK_FORCE_INLINE bool isOffsetNegative(const TerminalAttributeFlags flags) {
|
||||
return (flags & FLAG_ATTRIBUTE_OFFSET_NEGATIVE) != 0;
|
||||
|
|
|
@ -52,14 +52,10 @@ class BinaryFormat {
|
|||
|
||||
// Mask for attribute probability, stored on 4 bits inside the flags byte.
|
||||
static const int MASK_ATTRIBUTE_PROBABILITY = 0x0F;
|
||||
// The numeric value of the shortcut probability that means 'whitelist'.
|
||||
static const int WHITELIST_SHORTCUT_PROBABILITY = 15;
|
||||
|
||||
// Mask and flags for attribute address type selection.
|
||||
static const int MASK_ATTRIBUTE_ADDRESS_TYPE = 0x30;
|
||||
|
||||
static const int SHORTCUT_LIST_SIZE_SIZE = 2;
|
||||
|
||||
static bool hasBlacklistedOrNotAWordFlag(const int flags);
|
||||
static int getGroupCountAndForwardPointer(const uint8_t *const dict, int *pos);
|
||||
static uint8_t getFlagsAndForwardPointer(const uint8_t *const dict, int *pos);
|
||||
|
@ -73,9 +69,6 @@ class BinaryFormat {
|
|||
const int pos);
|
||||
static int readChildrenPosition(const uint8_t *const dict, const uint8_t flags, const int pos);
|
||||
static bool hasChildrenInFlags(const uint8_t flags);
|
||||
static int getAttributeAddressAndForwardPointer(const uint8_t *const dict, const uint8_t flags,
|
||||
int *pos);
|
||||
static int getAttributeProbabilityFromFlags(const int flags);
|
||||
static int getTerminalPosition(const uint8_t *const root, const int *const inWord,
|
||||
const int length, const bool forceLowerCaseSearch);
|
||||
static int getWordAtAddress(const uint8_t *const root, const int address, const int maxDepth,
|
||||
|
@ -260,38 +253,6 @@ inline bool BinaryFormat::hasChildrenInFlags(const uint8_t flags) {
|
|||
return (FLAG_GROUP_ADDRESS_TYPE_NOADDRESS != (MASK_GROUP_ADDRESS_TYPE & flags));
|
||||
}
|
||||
|
||||
AK_FORCE_INLINE int BinaryFormat::getAttributeAddressAndForwardPointer(const uint8_t *const dict,
|
||||
const uint8_t flags, int *pos) {
|
||||
int offset = 0;
|
||||
const int origin = *pos;
|
||||
switch (MASK_ATTRIBUTE_ADDRESS_TYPE & flags) {
|
||||
case FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE:
|
||||
offset = dict[origin];
|
||||
*pos = origin + 1;
|
||||
break;
|
||||
case FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES:
|
||||
offset = dict[origin] << 8;
|
||||
offset += dict[origin + 1];
|
||||
*pos = origin + 2;
|
||||
break;
|
||||
case FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES:
|
||||
offset = dict[origin] << 16;
|
||||
offset += dict[origin + 1] << 8;
|
||||
offset += dict[origin + 2];
|
||||
*pos = origin + 3;
|
||||
break;
|
||||
}
|
||||
if (FLAG_ATTRIBUTE_OFFSET_NEGATIVE & flags) {
|
||||
return origin - offset;
|
||||
} else {
|
||||
return origin + offset;
|
||||
}
|
||||
}
|
||||
|
||||
inline int BinaryFormat::getAttributeProbabilityFromFlags(const int flags) {
|
||||
return flags & MASK_ATTRIBUTE_PROBABILITY;
|
||||
}
|
||||
|
||||
// This function gets the byte position of the last chargroup of the exact matching word in the
|
||||
// dictionary. If no match is found, it returns NOT_VALID_WORD.
|
||||
AK_FORCE_INLINE int BinaryFormat::getTerminalPosition(const uint8_t *const root,
|
||||
|
|
|
@ -29,15 +29,15 @@ class ShortcutUtils {
|
|||
int outputWordIndex, const int finalScore, int *const outputCodePoints,
|
||||
int *const frequencies, int *const outputTypes, const bool sameAsTyped) {
|
||||
TerminalAttributes::ShortcutIterator iterator = terminalAttributes->getShortcutIterator();
|
||||
int shortcutTarget[MAX_WORD_LENGTH];
|
||||
while (iterator.hasNextShortcutTarget() && outputWordIndex < MAX_RESULTS) {
|
||||
int shortcutTarget[MAX_WORD_LENGTH];
|
||||
int shortcutProbability;
|
||||
const int shortcutTargetStringLength = iterator.getNextShortcutTarget(
|
||||
MAX_WORD_LENGTH, shortcutTarget, &shortcutProbability);
|
||||
bool isWhilelist;
|
||||
int shortcutTargetStringLength;
|
||||
iterator.nextShortcutTarget(MAX_WORD_LENGTH, shortcutTarget,
|
||||
&shortcutTargetStringLength, &isWhilelist);
|
||||
int shortcutScore;
|
||||
int kind;
|
||||
if (shortcutProbability == BinaryFormat::WHITELIST_SHORTCUT_PROBABILITY
|
||||
&& sameAsTyped) {
|
||||
if (isWhilelist && sameAsTyped) {
|
||||
shortcutScore = S_INT_MAX;
|
||||
kind = Dictionary::KIND_WHITELIST;
|
||||
} else {
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
#include <stdint.h>
|
||||
|
||||
#include "suggest/core/dictionary/binary_dictionary_info.h"
|
||||
#include "suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.h"
|
||||
#include "suggest/core/dictionary/binary_format.h"
|
||||
|
||||
namespace latinime {
|
||||
|
@ -33,60 +34,66 @@ class TerminalAttributes {
|
|||
public:
|
||||
class ShortcutIterator {
|
||||
public:
|
||||
ShortcutIterator(const BinaryDictionaryInfo *const binaryDictionaryInfo, const int pos,
|
||||
const uint8_t flags)
|
||||
: mBinaryDicitionaryInfo(binaryDictionaryInfo), mPos(pos),
|
||||
mHasNextShortcutTarget(0 != (flags & BinaryFormat::FLAG_HAS_SHORTCUT_TARGETS)) {
|
||||
}
|
||||
ShortcutIterator(const BinaryDictionaryInfo *const binaryDictionaryInfo,
|
||||
const int shortcutPos, const bool hasShortcutList)
|
||||
: mBinaryDictionaryInfo(binaryDictionaryInfo), mPos(shortcutPos),
|
||||
mHasNextShortcutTarget(hasShortcutList) {}
|
||||
|
||||
inline bool hasNextShortcutTarget() const {
|
||||
return mHasNextShortcutTarget;
|
||||
}
|
||||
|
||||
// Gets the shortcut target itself as an int string. For parameters and return value
|
||||
// see BinaryFormat::getWordAtAddress.
|
||||
inline int getNextShortcutTarget(const int maxDepth, int *outWord, int *outFreq) {
|
||||
const int shortcutFlags = BinaryFormat::getFlagsAndForwardPointer(
|
||||
mBinaryDicitionaryInfo->getDictRoot(), &mPos);
|
||||
mHasNextShortcutTarget = 0 != (shortcutFlags & BinaryFormat::FLAG_ATTRIBUTE_HAS_NEXT);
|
||||
unsigned int i;
|
||||
for (i = 0; i < MAX_WORD_LENGTH; ++i) {
|
||||
const int codePoint = BinaryFormat::getCodePointAndForwardPointer(
|
||||
mBinaryDicitionaryInfo->getDictRoot(), &mPos);
|
||||
if (NOT_A_CODE_POINT == codePoint) break;
|
||||
outWord[i] = codePoint;
|
||||
// Gets the shortcut target itself as an int string and put it to outTarget, put its length
|
||||
// to outTargetLength, put whether it is whitelist to outIsWhitelist.
|
||||
AK_FORCE_INLINE void nextShortcutTarget(
|
||||
const int maxDepth, int *const outTarget, int *const outTargetLength,
|
||||
bool *const outIsWhitelist) {
|
||||
const BinaryDictionaryTerminalAttributesReadingUtils::ShortcutFlags flags =
|
||||
BinaryDictionaryTerminalAttributesReadingUtils::getFlagsAndForwardPointer(
|
||||
mBinaryDictionaryInfo, &mPos);
|
||||
mHasNextShortcutTarget =
|
||||
BinaryDictionaryTerminalAttributesReadingUtils::hasNext(flags);
|
||||
if (outIsWhitelist) {
|
||||
*outIsWhitelist =
|
||||
BinaryDictionaryTerminalAttributesReadingUtils::isWhitelist(flags);
|
||||
}
|
||||
if (outTargetLength) {
|
||||
*outTargetLength =
|
||||
BinaryDictionaryTerminalAttributesReadingUtils::readShortcutTarget(
|
||||
mBinaryDictionaryInfo, maxDepth, outTarget, &mPos);
|
||||
}
|
||||
*outFreq = BinaryFormat::getAttributeProbabilityFromFlags(shortcutFlags);
|
||||
return i;
|
||||
}
|
||||
|
||||
private:
|
||||
const BinaryDictionaryInfo *const mBinaryDicitionaryInfo;
|
||||
const BinaryDictionaryInfo *const mBinaryDictionaryInfo;
|
||||
int mPos;
|
||||
bool mHasNextShortcutTarget;
|
||||
};
|
||||
|
||||
TerminalAttributes(const BinaryDictionaryInfo *const binaryDicitonaryInfo,
|
||||
const uint8_t flags, const int pos)
|
||||
: mBinaryDicitionaryInfo(binaryDicitonaryInfo), mFlags(flags), mStartPos(pos) {
|
||||
}
|
||||
TerminalAttributes(const BinaryDictionaryInfo *const binaryDictionaryInfo,
|
||||
const uint8_t nodeFlags, const int shortcutPos)
|
||||
: mBinaryDictionaryInfo(binaryDictionaryInfo),
|
||||
mNodeFlags(nodeFlags), mShortcutListSizePos(shortcutPos) {}
|
||||
|
||||
inline ShortcutIterator getShortcutIterator() const {
|
||||
// The size of the shortcuts is stored here so that the whole shortcut chunk can be
|
||||
// skipped quickly, so we ignore it.
|
||||
return ShortcutIterator(
|
||||
mBinaryDicitionaryInfo, mStartPos + BinaryFormat::SHORTCUT_LIST_SIZE_SIZE, mFlags);
|
||||
int shortcutPos = mShortcutListSizePos;
|
||||
BinaryDictionaryTerminalAttributesReadingUtils::getShortcutListSizeAndForwardPointer(
|
||||
mBinaryDictionaryInfo, &shortcutPos);
|
||||
const bool hasShortcutList = 0 != (mNodeFlags & BinaryFormat::FLAG_HAS_SHORTCUT_TARGETS);
|
||||
return ShortcutIterator(mBinaryDictionaryInfo, shortcutPos, hasShortcutList);
|
||||
}
|
||||
|
||||
bool isBlacklistedOrNotAWord() const {
|
||||
return BinaryFormat::hasBlacklistedOrNotAWordFlag(mFlags);
|
||||
return BinaryFormat::hasBlacklistedOrNotAWordFlag(mNodeFlags);
|
||||
}
|
||||
|
||||
private:
|
||||
DISALLOW_IMPLICIT_CONSTRUCTORS(TerminalAttributes);
|
||||
const BinaryDictionaryInfo *const mBinaryDicitionaryInfo;
|
||||
const uint8_t mFlags;
|
||||
const int mStartPos;
|
||||
const BinaryDictionaryInfo *const mBinaryDictionaryInfo;
|
||||
const uint8_t mNodeFlags;
|
||||
const int mShortcutListSizePos;
|
||||
};
|
||||
} // namespace latinime
|
||||
#endif // LATINIME_TERMINAL_ATTRIBUTES_H
|
||||
|
|
Loading…
Reference in a new issue