Merge "Move shortcut reading methods."

This commit is contained in:
Keisuke Kuroynagi 2013-06-26 05:44:56 +00:00 committed by Android (Google) Code Review
commit c0d7a376e2
5 changed files with 77 additions and 75 deletions

View file

@ -33,6 +33,9 @@ const TaUtils::TerminalAttributeFlags TaUtils::FLAG_ATTRIBUTE_HAS_NEXT = 0x80;
// Mask for attribute probability, stored on 4 bits inside the flags byte.
const TaUtils::TerminalAttributeFlags TaUtils::MASK_ATTRIBUTE_PROBABILITY = 0x0F;
const int TaUtils::ATTRIBUTE_ADDRESS_SHIFT = 4;
const int TaUtils::SHORTCUT_LIST_SIZE_FIELD_SIZE = 2;
// The numeric value of the shortcut probability that means 'whitelist'.
const int TaUtils::WHITELIST_SHORTCUT_PROBABILITY = 15;
/* static */ int TaUtils::getBigramAddressAndForwardPointer(
const BinaryDictionaryInfo *const binaryDictionaryInfo, const TerminalAttributeFlags flags,

View file

@ -29,6 +29,7 @@ class BinaryDictionaryTerminalAttributesReadingUtils {
public:
typedef uint8_t TerminalAttributeFlags;
typedef TerminalAttributeFlags BigramFlags;
typedef TerminalAttributeFlags ShortcutFlags;
static AK_FORCE_INLINE TerminalAttributeFlags getFlagsAndForwardPointer(
const BinaryDictionaryInfo *const binaryDictionaryInfo, int *const pos) {
@ -59,6 +60,34 @@ class BinaryDictionaryTerminalAttributesReadingUtils {
const BinaryDictionaryInfo *const binaryDictionaryInfo, const BigramFlags flags,
int *const pos);
// Shortcuts reading methods
// This method returns the size of the shortcut list region excluding the shortcut list size
// field at the beginning.
static AK_FORCE_INLINE int getShortcutListSizeAndForwardPointer(
const BinaryDictionaryInfo *const binaryDictionaryInfo, int *const pos) {
// readUint16andAdvancePosition() returns an offset *including* the uint16 field itself.
return ByteArrayUtils::readUint16andAdvancePosition(
binaryDictionaryInfo->getDictRoot(), pos) - SHORTCUT_LIST_SIZE_FIELD_SIZE;
}
static AK_FORCE_INLINE void skipShortcuts(
const BinaryDictionaryInfo *const binaryDictionaryInfo, int *const pos) {
const int shortcutListSize = getShortcutListSizeAndForwardPointer(
binaryDictionaryInfo, pos);
*pos += shortcutListSize;
}
static AK_FORCE_INLINE bool isWhitelist(const ShortcutFlags flags) {
return getProbabilityFromFlags(flags) == WHITELIST_SHORTCUT_PROBABILITY;
}
static AK_FORCE_INLINE int readShortcutTarget(
const BinaryDictionaryInfo *const binaryDictionaryInfo, const int maxLength,
int *const outWord, int *const pos) {
return ByteArrayUtils::readStringAndAdvancePosition(
binaryDictionaryInfo->getDictRoot(), maxLength, outWord, pos);
}
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(BinaryDictionaryTerminalAttributesReadingUtils);
@ -70,6 +99,8 @@ class BinaryDictionaryTerminalAttributesReadingUtils {
static const TerminalAttributeFlags FLAG_ATTRIBUTE_HAS_NEXT;
static const TerminalAttributeFlags MASK_ATTRIBUTE_PROBABILITY;
static const int ATTRIBUTE_ADDRESS_SHIFT;
static const int SHORTCUT_LIST_SIZE_FIELD_SIZE;
static const int WHITELIST_SHORTCUT_PROBABILITY;
static AK_FORCE_INLINE bool isOffsetNegative(const TerminalAttributeFlags flags) {
return (flags & FLAG_ATTRIBUTE_OFFSET_NEGATIVE) != 0;

View file

@ -52,14 +52,10 @@ class BinaryFormat {
// Mask for attribute probability, stored on 4 bits inside the flags byte.
static const int MASK_ATTRIBUTE_PROBABILITY = 0x0F;
// The numeric value of the shortcut probability that means 'whitelist'.
static const int WHITELIST_SHORTCUT_PROBABILITY = 15;
// Mask and flags for attribute address type selection.
static const int MASK_ATTRIBUTE_ADDRESS_TYPE = 0x30;
static const int SHORTCUT_LIST_SIZE_SIZE = 2;
static bool hasBlacklistedOrNotAWordFlag(const int flags);
static int getGroupCountAndForwardPointer(const uint8_t *const dict, int *pos);
static uint8_t getFlagsAndForwardPointer(const uint8_t *const dict, int *pos);
@ -73,9 +69,6 @@ class BinaryFormat {
const int pos);
static int readChildrenPosition(const uint8_t *const dict, const uint8_t flags, const int pos);
static bool hasChildrenInFlags(const uint8_t flags);
static int getAttributeAddressAndForwardPointer(const uint8_t *const dict, const uint8_t flags,
int *pos);
static int getAttributeProbabilityFromFlags(const int flags);
static int getTerminalPosition(const uint8_t *const root, const int *const inWord,
const int length, const bool forceLowerCaseSearch);
static int getWordAtAddress(const uint8_t *const root, const int address, const int maxDepth,
@ -260,38 +253,6 @@ inline bool BinaryFormat::hasChildrenInFlags(const uint8_t flags) {
return (FLAG_GROUP_ADDRESS_TYPE_NOADDRESS != (MASK_GROUP_ADDRESS_TYPE & flags));
}
AK_FORCE_INLINE int BinaryFormat::getAttributeAddressAndForwardPointer(const uint8_t *const dict,
const uint8_t flags, int *pos) {
int offset = 0;
const int origin = *pos;
switch (MASK_ATTRIBUTE_ADDRESS_TYPE & flags) {
case FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE:
offset = dict[origin];
*pos = origin + 1;
break;
case FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES:
offset = dict[origin] << 8;
offset += dict[origin + 1];
*pos = origin + 2;
break;
case FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES:
offset = dict[origin] << 16;
offset += dict[origin + 1] << 8;
offset += dict[origin + 2];
*pos = origin + 3;
break;
}
if (FLAG_ATTRIBUTE_OFFSET_NEGATIVE & flags) {
return origin - offset;
} else {
return origin + offset;
}
}
inline int BinaryFormat::getAttributeProbabilityFromFlags(const int flags) {
return flags & MASK_ATTRIBUTE_PROBABILITY;
}
// This function gets the byte position of the last chargroup of the exact matching word in the
// dictionary. If no match is found, it returns NOT_VALID_WORD.
AK_FORCE_INLINE int BinaryFormat::getTerminalPosition(const uint8_t *const root,

View file

@ -29,15 +29,15 @@ class ShortcutUtils {
int outputWordIndex, const int finalScore, int *const outputCodePoints,
int *const frequencies, int *const outputTypes, const bool sameAsTyped) {
TerminalAttributes::ShortcutIterator iterator = terminalAttributes->getShortcutIterator();
int shortcutTarget[MAX_WORD_LENGTH];
while (iterator.hasNextShortcutTarget() && outputWordIndex < MAX_RESULTS) {
int shortcutTarget[MAX_WORD_LENGTH];
int shortcutProbability;
const int shortcutTargetStringLength = iterator.getNextShortcutTarget(
MAX_WORD_LENGTH, shortcutTarget, &shortcutProbability);
bool isWhilelist;
int shortcutTargetStringLength;
iterator.nextShortcutTarget(MAX_WORD_LENGTH, shortcutTarget,
&shortcutTargetStringLength, &isWhilelist);
int shortcutScore;
int kind;
if (shortcutProbability == BinaryFormat::WHITELIST_SHORTCUT_PROBABILITY
&& sameAsTyped) {
if (isWhilelist && sameAsTyped) {
shortcutScore = S_INT_MAX;
kind = Dictionary::KIND_WHITELIST;
} else {

View file

@ -20,6 +20,7 @@
#include <stdint.h>
#include "suggest/core/dictionary/binary_dictionary_info.h"
#include "suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.h"
#include "suggest/core/dictionary/binary_format.h"
namespace latinime {
@ -33,60 +34,66 @@ class TerminalAttributes {
public:
class ShortcutIterator {
public:
ShortcutIterator(const BinaryDictionaryInfo *const binaryDictionaryInfo, const int pos,
const uint8_t flags)
: mBinaryDicitionaryInfo(binaryDictionaryInfo), mPos(pos),
mHasNextShortcutTarget(0 != (flags & BinaryFormat::FLAG_HAS_SHORTCUT_TARGETS)) {
}
ShortcutIterator(const BinaryDictionaryInfo *const binaryDictionaryInfo,
const int shortcutPos, const bool hasShortcutList)
: mBinaryDictionaryInfo(binaryDictionaryInfo), mPos(shortcutPos),
mHasNextShortcutTarget(hasShortcutList) {}
inline bool hasNextShortcutTarget() const {
return mHasNextShortcutTarget;
}
// Gets the shortcut target itself as an int string. For parameters and return value
// see BinaryFormat::getWordAtAddress.
inline int getNextShortcutTarget(const int maxDepth, int *outWord, int *outFreq) {
const int shortcutFlags = BinaryFormat::getFlagsAndForwardPointer(
mBinaryDicitionaryInfo->getDictRoot(), &mPos);
mHasNextShortcutTarget = 0 != (shortcutFlags & BinaryFormat::FLAG_ATTRIBUTE_HAS_NEXT);
unsigned int i;
for (i = 0; i < MAX_WORD_LENGTH; ++i) {
const int codePoint = BinaryFormat::getCodePointAndForwardPointer(
mBinaryDicitionaryInfo->getDictRoot(), &mPos);
if (NOT_A_CODE_POINT == codePoint) break;
outWord[i] = codePoint;
// Gets the shortcut target itself as an int string and put it to outTarget, put its length
// to outTargetLength, put whether it is whitelist to outIsWhitelist.
AK_FORCE_INLINE void nextShortcutTarget(
const int maxDepth, int *const outTarget, int *const outTargetLength,
bool *const outIsWhitelist) {
const BinaryDictionaryTerminalAttributesReadingUtils::ShortcutFlags flags =
BinaryDictionaryTerminalAttributesReadingUtils::getFlagsAndForwardPointer(
mBinaryDictionaryInfo, &mPos);
mHasNextShortcutTarget =
BinaryDictionaryTerminalAttributesReadingUtils::hasNext(flags);
if (outIsWhitelist) {
*outIsWhitelist =
BinaryDictionaryTerminalAttributesReadingUtils::isWhitelist(flags);
}
if (outTargetLength) {
*outTargetLength =
BinaryDictionaryTerminalAttributesReadingUtils::readShortcutTarget(
mBinaryDictionaryInfo, maxDepth, outTarget, &mPos);
}
*outFreq = BinaryFormat::getAttributeProbabilityFromFlags(shortcutFlags);
return i;
}
private:
const BinaryDictionaryInfo *const mBinaryDicitionaryInfo;
const BinaryDictionaryInfo *const mBinaryDictionaryInfo;
int mPos;
bool mHasNextShortcutTarget;
};
TerminalAttributes(const BinaryDictionaryInfo *const binaryDicitonaryInfo,
const uint8_t flags, const int pos)
: mBinaryDicitionaryInfo(binaryDicitonaryInfo), mFlags(flags), mStartPos(pos) {
}
TerminalAttributes(const BinaryDictionaryInfo *const binaryDictionaryInfo,
const uint8_t nodeFlags, const int shortcutPos)
: mBinaryDictionaryInfo(binaryDictionaryInfo),
mNodeFlags(nodeFlags), mShortcutListSizePos(shortcutPos) {}
inline ShortcutIterator getShortcutIterator() const {
// The size of the shortcuts is stored here so that the whole shortcut chunk can be
// skipped quickly, so we ignore it.
return ShortcutIterator(
mBinaryDicitionaryInfo, mStartPos + BinaryFormat::SHORTCUT_LIST_SIZE_SIZE, mFlags);
int shortcutPos = mShortcutListSizePos;
BinaryDictionaryTerminalAttributesReadingUtils::getShortcutListSizeAndForwardPointer(
mBinaryDictionaryInfo, &shortcutPos);
const bool hasShortcutList = 0 != (mNodeFlags & BinaryFormat::FLAG_HAS_SHORTCUT_TARGETS);
return ShortcutIterator(mBinaryDictionaryInfo, shortcutPos, hasShortcutList);
}
bool isBlacklistedOrNotAWord() const {
return BinaryFormat::hasBlacklistedOrNotAWordFlag(mFlags);
return BinaryFormat::hasBlacklistedOrNotAWordFlag(mNodeFlags);
}
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(TerminalAttributes);
const BinaryDictionaryInfo *const mBinaryDicitionaryInfo;
const uint8_t mFlags;
const int mStartPos;
const BinaryDictionaryInfo *const mBinaryDictionaryInfo;
const uint8_t mNodeFlags;
const int mShortcutListSizePos;
};
} // namespace latinime
#endif // LATINIME_TERMINAL_ATTRIBUTES_H