am c0d7a376
: Merge "Move shortcut reading methods."
* commit 'c0d7a376e2d34e998f3791ea8b1e90d408b8e2e3': Move shortcut reading methods.
This commit is contained in:
commit
ee86815fde
5 changed files with 77 additions and 75 deletions
|
@ -33,6 +33,9 @@ const TaUtils::TerminalAttributeFlags TaUtils::FLAG_ATTRIBUTE_HAS_NEXT = 0x80;
|
||||||
// Mask for attribute probability, stored on 4 bits inside the flags byte.
|
// Mask for attribute probability, stored on 4 bits inside the flags byte.
|
||||||
const TaUtils::TerminalAttributeFlags TaUtils::MASK_ATTRIBUTE_PROBABILITY = 0x0F;
|
const TaUtils::TerminalAttributeFlags TaUtils::MASK_ATTRIBUTE_PROBABILITY = 0x0F;
|
||||||
const int TaUtils::ATTRIBUTE_ADDRESS_SHIFT = 4;
|
const int TaUtils::ATTRIBUTE_ADDRESS_SHIFT = 4;
|
||||||
|
const int TaUtils::SHORTCUT_LIST_SIZE_FIELD_SIZE = 2;
|
||||||
|
// The numeric value of the shortcut probability that means 'whitelist'.
|
||||||
|
const int TaUtils::WHITELIST_SHORTCUT_PROBABILITY = 15;
|
||||||
|
|
||||||
/* static */ int TaUtils::getBigramAddressAndForwardPointer(
|
/* static */ int TaUtils::getBigramAddressAndForwardPointer(
|
||||||
const BinaryDictionaryInfo *const binaryDictionaryInfo, const TerminalAttributeFlags flags,
|
const BinaryDictionaryInfo *const binaryDictionaryInfo, const TerminalAttributeFlags flags,
|
||||||
|
|
|
@ -29,6 +29,7 @@ class BinaryDictionaryTerminalAttributesReadingUtils {
|
||||||
public:
|
public:
|
||||||
typedef uint8_t TerminalAttributeFlags;
|
typedef uint8_t TerminalAttributeFlags;
|
||||||
typedef TerminalAttributeFlags BigramFlags;
|
typedef TerminalAttributeFlags BigramFlags;
|
||||||
|
typedef TerminalAttributeFlags ShortcutFlags;
|
||||||
|
|
||||||
static AK_FORCE_INLINE TerminalAttributeFlags getFlagsAndForwardPointer(
|
static AK_FORCE_INLINE TerminalAttributeFlags getFlagsAndForwardPointer(
|
||||||
const BinaryDictionaryInfo *const binaryDictionaryInfo, int *const pos) {
|
const BinaryDictionaryInfo *const binaryDictionaryInfo, int *const pos) {
|
||||||
|
@ -59,6 +60,34 @@ class BinaryDictionaryTerminalAttributesReadingUtils {
|
||||||
const BinaryDictionaryInfo *const binaryDictionaryInfo, const BigramFlags flags,
|
const BinaryDictionaryInfo *const binaryDictionaryInfo, const BigramFlags flags,
|
||||||
int *const pos);
|
int *const pos);
|
||||||
|
|
||||||
|
// Shortcuts reading methods
|
||||||
|
// This method returns the size of the shortcut list region excluding the shortcut list size
|
||||||
|
// field at the beginning.
|
||||||
|
static AK_FORCE_INLINE int getShortcutListSizeAndForwardPointer(
|
||||||
|
const BinaryDictionaryInfo *const binaryDictionaryInfo, int *const pos) {
|
||||||
|
// readUint16andAdvancePosition() returns an offset *including* the uint16 field itself.
|
||||||
|
return ByteArrayUtils::readUint16andAdvancePosition(
|
||||||
|
binaryDictionaryInfo->getDictRoot(), pos) - SHORTCUT_LIST_SIZE_FIELD_SIZE;
|
||||||
|
}
|
||||||
|
|
||||||
|
static AK_FORCE_INLINE void skipShortcuts(
|
||||||
|
const BinaryDictionaryInfo *const binaryDictionaryInfo, int *const pos) {
|
||||||
|
const int shortcutListSize = getShortcutListSizeAndForwardPointer(
|
||||||
|
binaryDictionaryInfo, pos);
|
||||||
|
*pos += shortcutListSize;
|
||||||
|
}
|
||||||
|
|
||||||
|
static AK_FORCE_INLINE bool isWhitelist(const ShortcutFlags flags) {
|
||||||
|
return getProbabilityFromFlags(flags) == WHITELIST_SHORTCUT_PROBABILITY;
|
||||||
|
}
|
||||||
|
|
||||||
|
static AK_FORCE_INLINE int readShortcutTarget(
|
||||||
|
const BinaryDictionaryInfo *const binaryDictionaryInfo, const int maxLength,
|
||||||
|
int *const outWord, int *const pos) {
|
||||||
|
return ByteArrayUtils::readStringAndAdvancePosition(
|
||||||
|
binaryDictionaryInfo->getDictRoot(), maxLength, outWord, pos);
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
DISALLOW_IMPLICIT_CONSTRUCTORS(BinaryDictionaryTerminalAttributesReadingUtils);
|
DISALLOW_IMPLICIT_CONSTRUCTORS(BinaryDictionaryTerminalAttributesReadingUtils);
|
||||||
|
|
||||||
|
@ -70,6 +99,8 @@ class BinaryDictionaryTerminalAttributesReadingUtils {
|
||||||
static const TerminalAttributeFlags FLAG_ATTRIBUTE_HAS_NEXT;
|
static const TerminalAttributeFlags FLAG_ATTRIBUTE_HAS_NEXT;
|
||||||
static const TerminalAttributeFlags MASK_ATTRIBUTE_PROBABILITY;
|
static const TerminalAttributeFlags MASK_ATTRIBUTE_PROBABILITY;
|
||||||
static const int ATTRIBUTE_ADDRESS_SHIFT;
|
static const int ATTRIBUTE_ADDRESS_SHIFT;
|
||||||
|
static const int SHORTCUT_LIST_SIZE_FIELD_SIZE;
|
||||||
|
static const int WHITELIST_SHORTCUT_PROBABILITY;
|
||||||
|
|
||||||
static AK_FORCE_INLINE bool isOffsetNegative(const TerminalAttributeFlags flags) {
|
static AK_FORCE_INLINE bool isOffsetNegative(const TerminalAttributeFlags flags) {
|
||||||
return (flags & FLAG_ATTRIBUTE_OFFSET_NEGATIVE) != 0;
|
return (flags & FLAG_ATTRIBUTE_OFFSET_NEGATIVE) != 0;
|
||||||
|
|
|
@ -52,14 +52,10 @@ class BinaryFormat {
|
||||||
|
|
||||||
// Mask for attribute probability, stored on 4 bits inside the flags byte.
|
// Mask for attribute probability, stored on 4 bits inside the flags byte.
|
||||||
static const int MASK_ATTRIBUTE_PROBABILITY = 0x0F;
|
static const int MASK_ATTRIBUTE_PROBABILITY = 0x0F;
|
||||||
// The numeric value of the shortcut probability that means 'whitelist'.
|
|
||||||
static const int WHITELIST_SHORTCUT_PROBABILITY = 15;
|
|
||||||
|
|
||||||
// Mask and flags for attribute address type selection.
|
// Mask and flags for attribute address type selection.
|
||||||
static const int MASK_ATTRIBUTE_ADDRESS_TYPE = 0x30;
|
static const int MASK_ATTRIBUTE_ADDRESS_TYPE = 0x30;
|
||||||
|
|
||||||
static const int SHORTCUT_LIST_SIZE_SIZE = 2;
|
|
||||||
|
|
||||||
static bool hasBlacklistedOrNotAWordFlag(const int flags);
|
static bool hasBlacklistedOrNotAWordFlag(const int flags);
|
||||||
static int getGroupCountAndForwardPointer(const uint8_t *const dict, int *pos);
|
static int getGroupCountAndForwardPointer(const uint8_t *const dict, int *pos);
|
||||||
static uint8_t getFlagsAndForwardPointer(const uint8_t *const dict, int *pos);
|
static uint8_t getFlagsAndForwardPointer(const uint8_t *const dict, int *pos);
|
||||||
|
@ -73,9 +69,6 @@ class BinaryFormat {
|
||||||
const int pos);
|
const int pos);
|
||||||
static int readChildrenPosition(const uint8_t *const dict, const uint8_t flags, const int pos);
|
static int readChildrenPosition(const uint8_t *const dict, const uint8_t flags, const int pos);
|
||||||
static bool hasChildrenInFlags(const uint8_t flags);
|
static bool hasChildrenInFlags(const uint8_t flags);
|
||||||
static int getAttributeAddressAndForwardPointer(const uint8_t *const dict, const uint8_t flags,
|
|
||||||
int *pos);
|
|
||||||
static int getAttributeProbabilityFromFlags(const int flags);
|
|
||||||
static int getTerminalPosition(const uint8_t *const root, const int *const inWord,
|
static int getTerminalPosition(const uint8_t *const root, const int *const inWord,
|
||||||
const int length, const bool forceLowerCaseSearch);
|
const int length, const bool forceLowerCaseSearch);
|
||||||
static int getWordAtAddress(const uint8_t *const root, const int address, const int maxDepth,
|
static int getWordAtAddress(const uint8_t *const root, const int address, const int maxDepth,
|
||||||
|
@ -260,38 +253,6 @@ inline bool BinaryFormat::hasChildrenInFlags(const uint8_t flags) {
|
||||||
return (FLAG_GROUP_ADDRESS_TYPE_NOADDRESS != (MASK_GROUP_ADDRESS_TYPE & flags));
|
return (FLAG_GROUP_ADDRESS_TYPE_NOADDRESS != (MASK_GROUP_ADDRESS_TYPE & flags));
|
||||||
}
|
}
|
||||||
|
|
||||||
AK_FORCE_INLINE int BinaryFormat::getAttributeAddressAndForwardPointer(const uint8_t *const dict,
|
|
||||||
const uint8_t flags, int *pos) {
|
|
||||||
int offset = 0;
|
|
||||||
const int origin = *pos;
|
|
||||||
switch (MASK_ATTRIBUTE_ADDRESS_TYPE & flags) {
|
|
||||||
case FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE:
|
|
||||||
offset = dict[origin];
|
|
||||||
*pos = origin + 1;
|
|
||||||
break;
|
|
||||||
case FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES:
|
|
||||||
offset = dict[origin] << 8;
|
|
||||||
offset += dict[origin + 1];
|
|
||||||
*pos = origin + 2;
|
|
||||||
break;
|
|
||||||
case FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES:
|
|
||||||
offset = dict[origin] << 16;
|
|
||||||
offset += dict[origin + 1] << 8;
|
|
||||||
offset += dict[origin + 2];
|
|
||||||
*pos = origin + 3;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
if (FLAG_ATTRIBUTE_OFFSET_NEGATIVE & flags) {
|
|
||||||
return origin - offset;
|
|
||||||
} else {
|
|
||||||
return origin + offset;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
inline int BinaryFormat::getAttributeProbabilityFromFlags(const int flags) {
|
|
||||||
return flags & MASK_ATTRIBUTE_PROBABILITY;
|
|
||||||
}
|
|
||||||
|
|
||||||
// This function gets the byte position of the last chargroup of the exact matching word in the
|
// This function gets the byte position of the last chargroup of the exact matching word in the
|
||||||
// dictionary. If no match is found, it returns NOT_VALID_WORD.
|
// dictionary. If no match is found, it returns NOT_VALID_WORD.
|
||||||
AK_FORCE_INLINE int BinaryFormat::getTerminalPosition(const uint8_t *const root,
|
AK_FORCE_INLINE int BinaryFormat::getTerminalPosition(const uint8_t *const root,
|
||||||
|
|
|
@ -29,15 +29,15 @@ class ShortcutUtils {
|
||||||
int outputWordIndex, const int finalScore, int *const outputCodePoints,
|
int outputWordIndex, const int finalScore, int *const outputCodePoints,
|
||||||
int *const frequencies, int *const outputTypes, const bool sameAsTyped) {
|
int *const frequencies, int *const outputTypes, const bool sameAsTyped) {
|
||||||
TerminalAttributes::ShortcutIterator iterator = terminalAttributes->getShortcutIterator();
|
TerminalAttributes::ShortcutIterator iterator = terminalAttributes->getShortcutIterator();
|
||||||
while (iterator.hasNextShortcutTarget() && outputWordIndex < MAX_RESULTS) {
|
|
||||||
int shortcutTarget[MAX_WORD_LENGTH];
|
int shortcutTarget[MAX_WORD_LENGTH];
|
||||||
int shortcutProbability;
|
while (iterator.hasNextShortcutTarget() && outputWordIndex < MAX_RESULTS) {
|
||||||
const int shortcutTargetStringLength = iterator.getNextShortcutTarget(
|
bool isWhilelist;
|
||||||
MAX_WORD_LENGTH, shortcutTarget, &shortcutProbability);
|
int shortcutTargetStringLength;
|
||||||
|
iterator.nextShortcutTarget(MAX_WORD_LENGTH, shortcutTarget,
|
||||||
|
&shortcutTargetStringLength, &isWhilelist);
|
||||||
int shortcutScore;
|
int shortcutScore;
|
||||||
int kind;
|
int kind;
|
||||||
if (shortcutProbability == BinaryFormat::WHITELIST_SHORTCUT_PROBABILITY
|
if (isWhilelist && sameAsTyped) {
|
||||||
&& sameAsTyped) {
|
|
||||||
shortcutScore = S_INT_MAX;
|
shortcutScore = S_INT_MAX;
|
||||||
kind = Dictionary::KIND_WHITELIST;
|
kind = Dictionary::KIND_WHITELIST;
|
||||||
} else {
|
} else {
|
||||||
|
|
|
@ -20,6 +20,7 @@
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
|
|
||||||
#include "suggest/core/dictionary/binary_dictionary_info.h"
|
#include "suggest/core/dictionary/binary_dictionary_info.h"
|
||||||
|
#include "suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.h"
|
||||||
#include "suggest/core/dictionary/binary_format.h"
|
#include "suggest/core/dictionary/binary_format.h"
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
@ -33,60 +34,66 @@ class TerminalAttributes {
|
||||||
public:
|
public:
|
||||||
class ShortcutIterator {
|
class ShortcutIterator {
|
||||||
public:
|
public:
|
||||||
ShortcutIterator(const BinaryDictionaryInfo *const binaryDictionaryInfo, const int pos,
|
ShortcutIterator(const BinaryDictionaryInfo *const binaryDictionaryInfo,
|
||||||
const uint8_t flags)
|
const int shortcutPos, const bool hasShortcutList)
|
||||||
: mBinaryDicitionaryInfo(binaryDictionaryInfo), mPos(pos),
|
: mBinaryDictionaryInfo(binaryDictionaryInfo), mPos(shortcutPos),
|
||||||
mHasNextShortcutTarget(0 != (flags & BinaryFormat::FLAG_HAS_SHORTCUT_TARGETS)) {
|
mHasNextShortcutTarget(hasShortcutList) {}
|
||||||
}
|
|
||||||
|
|
||||||
inline bool hasNextShortcutTarget() const {
|
inline bool hasNextShortcutTarget() const {
|
||||||
return mHasNextShortcutTarget;
|
return mHasNextShortcutTarget;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Gets the shortcut target itself as an int string. For parameters and return value
|
// Gets the shortcut target itself as an int string and put it to outTarget, put its length
|
||||||
// see BinaryFormat::getWordAtAddress.
|
// to outTargetLength, put whether it is whitelist to outIsWhitelist.
|
||||||
inline int getNextShortcutTarget(const int maxDepth, int *outWord, int *outFreq) {
|
AK_FORCE_INLINE void nextShortcutTarget(
|
||||||
const int shortcutFlags = BinaryFormat::getFlagsAndForwardPointer(
|
const int maxDepth, int *const outTarget, int *const outTargetLength,
|
||||||
mBinaryDicitionaryInfo->getDictRoot(), &mPos);
|
bool *const outIsWhitelist) {
|
||||||
mHasNextShortcutTarget = 0 != (shortcutFlags & BinaryFormat::FLAG_ATTRIBUTE_HAS_NEXT);
|
const BinaryDictionaryTerminalAttributesReadingUtils::ShortcutFlags flags =
|
||||||
unsigned int i;
|
BinaryDictionaryTerminalAttributesReadingUtils::getFlagsAndForwardPointer(
|
||||||
for (i = 0; i < MAX_WORD_LENGTH; ++i) {
|
mBinaryDictionaryInfo, &mPos);
|
||||||
const int codePoint = BinaryFormat::getCodePointAndForwardPointer(
|
mHasNextShortcutTarget =
|
||||||
mBinaryDicitionaryInfo->getDictRoot(), &mPos);
|
BinaryDictionaryTerminalAttributesReadingUtils::hasNext(flags);
|
||||||
if (NOT_A_CODE_POINT == codePoint) break;
|
if (outIsWhitelist) {
|
||||||
outWord[i] = codePoint;
|
*outIsWhitelist =
|
||||||
|
BinaryDictionaryTerminalAttributesReadingUtils::isWhitelist(flags);
|
||||||
|
}
|
||||||
|
if (outTargetLength) {
|
||||||
|
*outTargetLength =
|
||||||
|
BinaryDictionaryTerminalAttributesReadingUtils::readShortcutTarget(
|
||||||
|
mBinaryDictionaryInfo, maxDepth, outTarget, &mPos);
|
||||||
}
|
}
|
||||||
*outFreq = BinaryFormat::getAttributeProbabilityFromFlags(shortcutFlags);
|
|
||||||
return i;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
const BinaryDictionaryInfo *const mBinaryDicitionaryInfo;
|
const BinaryDictionaryInfo *const mBinaryDictionaryInfo;
|
||||||
int mPos;
|
int mPos;
|
||||||
bool mHasNextShortcutTarget;
|
bool mHasNextShortcutTarget;
|
||||||
};
|
};
|
||||||
|
|
||||||
TerminalAttributes(const BinaryDictionaryInfo *const binaryDicitonaryInfo,
|
TerminalAttributes(const BinaryDictionaryInfo *const binaryDictionaryInfo,
|
||||||
const uint8_t flags, const int pos)
|
const uint8_t nodeFlags, const int shortcutPos)
|
||||||
: mBinaryDicitionaryInfo(binaryDicitonaryInfo), mFlags(flags), mStartPos(pos) {
|
: mBinaryDictionaryInfo(binaryDictionaryInfo),
|
||||||
}
|
mNodeFlags(nodeFlags), mShortcutListSizePos(shortcutPos) {}
|
||||||
|
|
||||||
inline ShortcutIterator getShortcutIterator() const {
|
inline ShortcutIterator getShortcutIterator() const {
|
||||||
// The size of the shortcuts is stored here so that the whole shortcut chunk can be
|
// The size of the shortcuts is stored here so that the whole shortcut chunk can be
|
||||||
// skipped quickly, so we ignore it.
|
// skipped quickly, so we ignore it.
|
||||||
return ShortcutIterator(
|
int shortcutPos = mShortcutListSizePos;
|
||||||
mBinaryDicitionaryInfo, mStartPos + BinaryFormat::SHORTCUT_LIST_SIZE_SIZE, mFlags);
|
BinaryDictionaryTerminalAttributesReadingUtils::getShortcutListSizeAndForwardPointer(
|
||||||
|
mBinaryDictionaryInfo, &shortcutPos);
|
||||||
|
const bool hasShortcutList = 0 != (mNodeFlags & BinaryFormat::FLAG_HAS_SHORTCUT_TARGETS);
|
||||||
|
return ShortcutIterator(mBinaryDictionaryInfo, shortcutPos, hasShortcutList);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool isBlacklistedOrNotAWord() const {
|
bool isBlacklistedOrNotAWord() const {
|
||||||
return BinaryFormat::hasBlacklistedOrNotAWordFlag(mFlags);
|
return BinaryFormat::hasBlacklistedOrNotAWordFlag(mNodeFlags);
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
DISALLOW_IMPLICIT_CONSTRUCTORS(TerminalAttributes);
|
DISALLOW_IMPLICIT_CONSTRUCTORS(TerminalAttributes);
|
||||||
const BinaryDictionaryInfo *const mBinaryDicitionaryInfo;
|
const BinaryDictionaryInfo *const mBinaryDictionaryInfo;
|
||||||
const uint8_t mFlags;
|
const uint8_t mNodeFlags;
|
||||||
const int mStartPos;
|
const int mShortcutListSizePos;
|
||||||
};
|
};
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
#endif // LATINIME_TERMINAL_ATTRIBUTES_H
|
#endif // LATINIME_TERMINAL_ATTRIBUTES_H
|
||||||
|
|
Loading…
Reference in a new issue