Move shortcut reading methods.

Moved form BinaryFormat to BinaryDictionaryTerminalAttributesReadingUtils.

Bug: 6669677

Change-Id: Ia1ab25854effbf61df37837fe26755ac7dc4d020
main
Keisuke Kuroynagi 2013-06-26 14:41:22 +09:00
parent 4da287d0d1
commit 5ac44bdc2a
5 changed files with 77 additions and 75 deletions

View File

@ -33,6 +33,9 @@ const TaUtils::TerminalAttributeFlags TaUtils::FLAG_ATTRIBUTE_HAS_NEXT = 0x80;
// Mask for attribute probability, stored on 4 bits inside the flags byte. // Mask for attribute probability, stored on 4 bits inside the flags byte.
const TaUtils::TerminalAttributeFlags TaUtils::MASK_ATTRIBUTE_PROBABILITY = 0x0F; const TaUtils::TerminalAttributeFlags TaUtils::MASK_ATTRIBUTE_PROBABILITY = 0x0F;
const int TaUtils::ATTRIBUTE_ADDRESS_SHIFT = 4; const int TaUtils::ATTRIBUTE_ADDRESS_SHIFT = 4;
const int TaUtils::SHORTCUT_LIST_SIZE_FIELD_SIZE = 2;
// The numeric value of the shortcut probability that means 'whitelist'.
const int TaUtils::WHITELIST_SHORTCUT_PROBABILITY = 15;
/* static */ int TaUtils::getBigramAddressAndForwardPointer( /* static */ int TaUtils::getBigramAddressAndForwardPointer(
const BinaryDictionaryInfo *const binaryDictionaryInfo, const TerminalAttributeFlags flags, const BinaryDictionaryInfo *const binaryDictionaryInfo, const TerminalAttributeFlags flags,

View File

@ -29,6 +29,7 @@ class BinaryDictionaryTerminalAttributesReadingUtils {
public: public:
typedef uint8_t TerminalAttributeFlags; typedef uint8_t TerminalAttributeFlags;
typedef TerminalAttributeFlags BigramFlags; typedef TerminalAttributeFlags BigramFlags;
typedef TerminalAttributeFlags ShortcutFlags;
static AK_FORCE_INLINE TerminalAttributeFlags getFlagsAndForwardPointer( static AK_FORCE_INLINE TerminalAttributeFlags getFlagsAndForwardPointer(
const BinaryDictionaryInfo *const binaryDictionaryInfo, int *const pos) { const BinaryDictionaryInfo *const binaryDictionaryInfo, int *const pos) {
@ -59,6 +60,34 @@ class BinaryDictionaryTerminalAttributesReadingUtils {
const BinaryDictionaryInfo *const binaryDictionaryInfo, const BigramFlags flags, const BinaryDictionaryInfo *const binaryDictionaryInfo, const BigramFlags flags,
int *const pos); int *const pos);
// Shortcuts reading methods
// This method returns the size of the shortcut list region excluding the shortcut list size
// field at the beginning.
static AK_FORCE_INLINE int getShortcutListSizeAndForwardPointer(
const BinaryDictionaryInfo *const binaryDictionaryInfo, int *const pos) {
// readUint16andAdvancePosition() returns an offset *including* the uint16 field itself.
return ByteArrayUtils::readUint16andAdvancePosition(
binaryDictionaryInfo->getDictRoot(), pos) - SHORTCUT_LIST_SIZE_FIELD_SIZE;
}
static AK_FORCE_INLINE void skipShortcuts(
const BinaryDictionaryInfo *const binaryDictionaryInfo, int *const pos) {
const int shortcutListSize = getShortcutListSizeAndForwardPointer(
binaryDictionaryInfo, pos);
*pos += shortcutListSize;
}
static AK_FORCE_INLINE bool isWhitelist(const ShortcutFlags flags) {
return getProbabilityFromFlags(flags) == WHITELIST_SHORTCUT_PROBABILITY;
}
static AK_FORCE_INLINE int readShortcutTarget(
const BinaryDictionaryInfo *const binaryDictionaryInfo, const int maxLength,
int *const outWord, int *const pos) {
return ByteArrayUtils::readStringAndAdvancePosition(
binaryDictionaryInfo->getDictRoot(), maxLength, outWord, pos);
}
private: private:
DISALLOW_IMPLICIT_CONSTRUCTORS(BinaryDictionaryTerminalAttributesReadingUtils); DISALLOW_IMPLICIT_CONSTRUCTORS(BinaryDictionaryTerminalAttributesReadingUtils);
@ -70,6 +99,8 @@ class BinaryDictionaryTerminalAttributesReadingUtils {
static const TerminalAttributeFlags FLAG_ATTRIBUTE_HAS_NEXT; static const TerminalAttributeFlags FLAG_ATTRIBUTE_HAS_NEXT;
static const TerminalAttributeFlags MASK_ATTRIBUTE_PROBABILITY; static const TerminalAttributeFlags MASK_ATTRIBUTE_PROBABILITY;
static const int ATTRIBUTE_ADDRESS_SHIFT; static const int ATTRIBUTE_ADDRESS_SHIFT;
static const int SHORTCUT_LIST_SIZE_FIELD_SIZE;
static const int WHITELIST_SHORTCUT_PROBABILITY;
static AK_FORCE_INLINE bool isOffsetNegative(const TerminalAttributeFlags flags) { static AK_FORCE_INLINE bool isOffsetNegative(const TerminalAttributeFlags flags) {
return (flags & FLAG_ATTRIBUTE_OFFSET_NEGATIVE) != 0; return (flags & FLAG_ATTRIBUTE_OFFSET_NEGATIVE) != 0;

View File

@ -52,14 +52,10 @@ class BinaryFormat {
// Mask for attribute probability, stored on 4 bits inside the flags byte. // Mask for attribute probability, stored on 4 bits inside the flags byte.
static const int MASK_ATTRIBUTE_PROBABILITY = 0x0F; static const int MASK_ATTRIBUTE_PROBABILITY = 0x0F;
// The numeric value of the shortcut probability that means 'whitelist'.
static const int WHITELIST_SHORTCUT_PROBABILITY = 15;
// Mask and flags for attribute address type selection. // Mask and flags for attribute address type selection.
static const int MASK_ATTRIBUTE_ADDRESS_TYPE = 0x30; static const int MASK_ATTRIBUTE_ADDRESS_TYPE = 0x30;
static const int SHORTCUT_LIST_SIZE_SIZE = 2;
static bool hasBlacklistedOrNotAWordFlag(const int flags); static bool hasBlacklistedOrNotAWordFlag(const int flags);
static int getGroupCountAndForwardPointer(const uint8_t *const dict, int *pos); static int getGroupCountAndForwardPointer(const uint8_t *const dict, int *pos);
static uint8_t getFlagsAndForwardPointer(const uint8_t *const dict, int *pos); static uint8_t getFlagsAndForwardPointer(const uint8_t *const dict, int *pos);
@ -73,9 +69,6 @@ class BinaryFormat {
const int pos); const int pos);
static int readChildrenPosition(const uint8_t *const dict, const uint8_t flags, const int pos); static int readChildrenPosition(const uint8_t *const dict, const uint8_t flags, const int pos);
static bool hasChildrenInFlags(const uint8_t flags); static bool hasChildrenInFlags(const uint8_t flags);
static int getAttributeAddressAndForwardPointer(const uint8_t *const dict, const uint8_t flags,
int *pos);
static int getAttributeProbabilityFromFlags(const int flags);
static int getTerminalPosition(const uint8_t *const root, const int *const inWord, static int getTerminalPosition(const uint8_t *const root, const int *const inWord,
const int length, const bool forceLowerCaseSearch); const int length, const bool forceLowerCaseSearch);
static int getWordAtAddress(const uint8_t *const root, const int address, const int maxDepth, static int getWordAtAddress(const uint8_t *const root, const int address, const int maxDepth,
@ -260,38 +253,6 @@ inline bool BinaryFormat::hasChildrenInFlags(const uint8_t flags) {
return (FLAG_GROUP_ADDRESS_TYPE_NOADDRESS != (MASK_GROUP_ADDRESS_TYPE & flags)); return (FLAG_GROUP_ADDRESS_TYPE_NOADDRESS != (MASK_GROUP_ADDRESS_TYPE & flags));
} }
AK_FORCE_INLINE int BinaryFormat::getAttributeAddressAndForwardPointer(const uint8_t *const dict,
const uint8_t flags, int *pos) {
int offset = 0;
const int origin = *pos;
switch (MASK_ATTRIBUTE_ADDRESS_TYPE & flags) {
case FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE:
offset = dict[origin];
*pos = origin + 1;
break;
case FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES:
offset = dict[origin] << 8;
offset += dict[origin + 1];
*pos = origin + 2;
break;
case FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES:
offset = dict[origin] << 16;
offset += dict[origin + 1] << 8;
offset += dict[origin + 2];
*pos = origin + 3;
break;
}
if (FLAG_ATTRIBUTE_OFFSET_NEGATIVE & flags) {
return origin - offset;
} else {
return origin + offset;
}
}
inline int BinaryFormat::getAttributeProbabilityFromFlags(const int flags) {
return flags & MASK_ATTRIBUTE_PROBABILITY;
}
// This function gets the byte position of the last chargroup of the exact matching word in the // This function gets the byte position of the last chargroup of the exact matching word in the
// dictionary. If no match is found, it returns NOT_VALID_WORD. // dictionary. If no match is found, it returns NOT_VALID_WORD.
AK_FORCE_INLINE int BinaryFormat::getTerminalPosition(const uint8_t *const root, AK_FORCE_INLINE int BinaryFormat::getTerminalPosition(const uint8_t *const root,

View File

@ -29,15 +29,15 @@ class ShortcutUtils {
int outputWordIndex, const int finalScore, int *const outputCodePoints, int outputWordIndex, const int finalScore, int *const outputCodePoints,
int *const frequencies, int *const outputTypes, const bool sameAsTyped) { int *const frequencies, int *const outputTypes, const bool sameAsTyped) {
TerminalAttributes::ShortcutIterator iterator = terminalAttributes->getShortcutIterator(); TerminalAttributes::ShortcutIterator iterator = terminalAttributes->getShortcutIterator();
int shortcutTarget[MAX_WORD_LENGTH];
while (iterator.hasNextShortcutTarget() && outputWordIndex < MAX_RESULTS) { while (iterator.hasNextShortcutTarget() && outputWordIndex < MAX_RESULTS) {
int shortcutTarget[MAX_WORD_LENGTH]; bool isWhilelist;
int shortcutProbability; int shortcutTargetStringLength;
const int shortcutTargetStringLength = iterator.getNextShortcutTarget( iterator.nextShortcutTarget(MAX_WORD_LENGTH, shortcutTarget,
MAX_WORD_LENGTH, shortcutTarget, &shortcutProbability); &shortcutTargetStringLength, &isWhilelist);
int shortcutScore; int shortcutScore;
int kind; int kind;
if (shortcutProbability == BinaryFormat::WHITELIST_SHORTCUT_PROBABILITY if (isWhilelist && sameAsTyped) {
&& sameAsTyped) {
shortcutScore = S_INT_MAX; shortcutScore = S_INT_MAX;
kind = Dictionary::KIND_WHITELIST; kind = Dictionary::KIND_WHITELIST;
} else { } else {

View File

@ -20,6 +20,7 @@
#include <stdint.h> #include <stdint.h>
#include "suggest/core/dictionary/binary_dictionary_info.h" #include "suggest/core/dictionary/binary_dictionary_info.h"
#include "suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.h"
#include "suggest/core/dictionary/binary_format.h" #include "suggest/core/dictionary/binary_format.h"
namespace latinime { namespace latinime {
@ -33,60 +34,66 @@ class TerminalAttributes {
public: public:
class ShortcutIterator { class ShortcutIterator {
public: public:
ShortcutIterator(const BinaryDictionaryInfo *const binaryDictionaryInfo, const int pos, ShortcutIterator(const BinaryDictionaryInfo *const binaryDictionaryInfo,
const uint8_t flags) const int shortcutPos, const bool hasShortcutList)
: mBinaryDicitionaryInfo(binaryDictionaryInfo), mPos(pos), : mBinaryDictionaryInfo(binaryDictionaryInfo), mPos(shortcutPos),
mHasNextShortcutTarget(0 != (flags & BinaryFormat::FLAG_HAS_SHORTCUT_TARGETS)) { mHasNextShortcutTarget(hasShortcutList) {}
}
inline bool hasNextShortcutTarget() const { inline bool hasNextShortcutTarget() const {
return mHasNextShortcutTarget; return mHasNextShortcutTarget;
} }
// Gets the shortcut target itself as an int string. For parameters and return value // Gets the shortcut target itself as an int string and put it to outTarget, put its length
// see BinaryFormat::getWordAtAddress. // to outTargetLength, put whether it is whitelist to outIsWhitelist.
inline int getNextShortcutTarget(const int maxDepth, int *outWord, int *outFreq) { AK_FORCE_INLINE void nextShortcutTarget(
const int shortcutFlags = BinaryFormat::getFlagsAndForwardPointer( const int maxDepth, int *const outTarget, int *const outTargetLength,
mBinaryDicitionaryInfo->getDictRoot(), &mPos); bool *const outIsWhitelist) {
mHasNextShortcutTarget = 0 != (shortcutFlags & BinaryFormat::FLAG_ATTRIBUTE_HAS_NEXT); const BinaryDictionaryTerminalAttributesReadingUtils::ShortcutFlags flags =
unsigned int i; BinaryDictionaryTerminalAttributesReadingUtils::getFlagsAndForwardPointer(
for (i = 0; i < MAX_WORD_LENGTH; ++i) { mBinaryDictionaryInfo, &mPos);
const int codePoint = BinaryFormat::getCodePointAndForwardPointer( mHasNextShortcutTarget =
mBinaryDicitionaryInfo->getDictRoot(), &mPos); BinaryDictionaryTerminalAttributesReadingUtils::hasNext(flags);
if (NOT_A_CODE_POINT == codePoint) break; if (outIsWhitelist) {
outWord[i] = codePoint; *outIsWhitelist =
BinaryDictionaryTerminalAttributesReadingUtils::isWhitelist(flags);
}
if (outTargetLength) {
*outTargetLength =
BinaryDictionaryTerminalAttributesReadingUtils::readShortcutTarget(
mBinaryDictionaryInfo, maxDepth, outTarget, &mPos);
} }
*outFreq = BinaryFormat::getAttributeProbabilityFromFlags(shortcutFlags);
return i;
} }
private: private:
const BinaryDictionaryInfo *const mBinaryDicitionaryInfo; const BinaryDictionaryInfo *const mBinaryDictionaryInfo;
int mPos; int mPos;
bool mHasNextShortcutTarget; bool mHasNextShortcutTarget;
}; };
TerminalAttributes(const BinaryDictionaryInfo *const binaryDicitonaryInfo, TerminalAttributes(const BinaryDictionaryInfo *const binaryDictionaryInfo,
const uint8_t flags, const int pos) const uint8_t nodeFlags, const int shortcutPos)
: mBinaryDicitionaryInfo(binaryDicitonaryInfo), mFlags(flags), mStartPos(pos) { : mBinaryDictionaryInfo(binaryDictionaryInfo),
} mNodeFlags(nodeFlags), mShortcutListSizePos(shortcutPos) {}
inline ShortcutIterator getShortcutIterator() const { inline ShortcutIterator getShortcutIterator() const {
// The size of the shortcuts is stored here so that the whole shortcut chunk can be // The size of the shortcuts is stored here so that the whole shortcut chunk can be
// skipped quickly, so we ignore it. // skipped quickly, so we ignore it.
return ShortcutIterator( int shortcutPos = mShortcutListSizePos;
mBinaryDicitionaryInfo, mStartPos + BinaryFormat::SHORTCUT_LIST_SIZE_SIZE, mFlags); BinaryDictionaryTerminalAttributesReadingUtils::getShortcutListSizeAndForwardPointer(
mBinaryDictionaryInfo, &shortcutPos);
const bool hasShortcutList = 0 != (mNodeFlags & BinaryFormat::FLAG_HAS_SHORTCUT_TARGETS);
return ShortcutIterator(mBinaryDictionaryInfo, shortcutPos, hasShortcutList);
} }
bool isBlacklistedOrNotAWord() const { bool isBlacklistedOrNotAWord() const {
return BinaryFormat::hasBlacklistedOrNotAWordFlag(mFlags); return BinaryFormat::hasBlacklistedOrNotAWordFlag(mNodeFlags);
} }
private: private:
DISALLOW_IMPLICIT_CONSTRUCTORS(TerminalAttributes); DISALLOW_IMPLICIT_CONSTRUCTORS(TerminalAttributes);
const BinaryDictionaryInfo *const mBinaryDicitionaryInfo; const BinaryDictionaryInfo *const mBinaryDictionaryInfo;
const uint8_t mFlags; const uint8_t mNodeFlags;
const int mStartPos; const int mShortcutListSizePos;
}; };
} // namespace latinime } // namespace latinime
#endif // LATINIME_TERMINAL_ATTRIBUTES_H #endif // LATINIME_TERMINAL_ATTRIBUTES_H