am 80c9b829
: Merge "Give PatriciaTrieReadingUtils methods for reading nodes."
* commit '80c9b829bd390fd7e9b88dde68b509292cae9b07': Give PatriciaTrieReadingUtils methods for reading nodes.
This commit is contained in:
commit
15072b4e5c
9 changed files with 306 additions and 113 deletions
|
@ -72,7 +72,8 @@ LATIN_IME_CORE_SRC_FILES := \
|
|||
suggest/core/session/dic_traverse_session.cpp \
|
||||
$(addprefix suggest/policyimpl/dictionary/, \
|
||||
dynamic_patricia_trie_policy.cpp \
|
||||
patricia_trie_policy.cpp) \
|
||||
patricia_trie_policy.cpp \
|
||||
patricia_trie_reading_utils.cpp) \
|
||||
suggest/policyimpl/gesture/gesture_suggest_policy_factory.cpp \
|
||||
$(addprefix suggest/policyimpl/typing/, \
|
||||
scoring_params.cpp \
|
||||
|
|
|
@ -44,15 +44,15 @@ const int TaUtils::WHITELIST_SHORTCUT_PROBABILITY = 15;
|
|||
const int origin = *pos;
|
||||
switch (MASK_ATTRIBUTE_ADDRESS_TYPE & flags) {
|
||||
case FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE:
|
||||
offset = ByteArrayUtils::readUint8andAdvancePosition(
|
||||
offset = ByteArrayUtils::readUint8AndAdvancePosition(
|
||||
binaryDictionaryInfo->getDictRoot(), pos);
|
||||
break;
|
||||
case FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES:
|
||||
offset = ByteArrayUtils::readUint16andAdvancePosition(
|
||||
offset = ByteArrayUtils::readUint16AndAdvancePosition(
|
||||
binaryDictionaryInfo->getDictRoot(), pos);
|
||||
break;
|
||||
case FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES:
|
||||
offset = ByteArrayUtils::readUint24andAdvancePosition(
|
||||
offset = ByteArrayUtils::readUint24AndAdvancePosition(
|
||||
binaryDictionaryInfo->getDictRoot(), pos);
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -33,7 +33,7 @@ class BinaryDictionaryTerminalAttributesReadingUtils {
|
|||
|
||||
static AK_FORCE_INLINE TerminalAttributeFlags getFlagsAndForwardPointer(
|
||||
const BinaryDictionaryInfo *const binaryDictionaryInfo, int *const pos) {
|
||||
return ByteArrayUtils::readUint8andAdvancePosition(
|
||||
return ByteArrayUtils::readUint8AndAdvancePosition(
|
||||
binaryDictionaryInfo->getDictRoot(), pos);
|
||||
}
|
||||
|
||||
|
@ -66,7 +66,7 @@ class BinaryDictionaryTerminalAttributesReadingUtils {
|
|||
static AK_FORCE_INLINE int getShortcutListSizeAndForwardPointer(
|
||||
const BinaryDictionaryInfo *const binaryDictionaryInfo, int *const pos) {
|
||||
// readUint16andAdvancePosition() returns an offset *including* the uint16 field itself.
|
||||
return ByteArrayUtils::readUint16andAdvancePosition(
|
||||
return ByteArrayUtils::readUint16AndAdvancePosition(
|
||||
binaryDictionaryInfo->getDictRoot(), pos) - SHORTCUT_LIST_SIZE_FIELD_SIZE;
|
||||
}
|
||||
|
||||
|
|
|
@ -50,39 +50,39 @@ class ByteArrayUtils {
|
|||
return buffer[pos];
|
||||
}
|
||||
|
||||
static AK_FORCE_INLINE uint32_t readUint32andAdvancePosition(
|
||||
static AK_FORCE_INLINE uint32_t readUint32AndAdvancePosition(
|
||||
const uint8_t *const buffer, int *const pos) {
|
||||
const uint32_t value = readUint32(buffer, *pos);
|
||||
*pos += 4;
|
||||
return value;
|
||||
}
|
||||
|
||||
static AK_FORCE_INLINE int readSint24andAdvancePosition(
|
||||
static AK_FORCE_INLINE int readSint24AndAdvancePosition(
|
||||
const uint8_t *const buffer, int *const pos) {
|
||||
const uint8_t value = readUint8(buffer, *pos);
|
||||
if (value < 0x80) {
|
||||
return readUint24andAdvancePosition(buffer, pos);
|
||||
return readUint24AndAdvancePosition(buffer, pos);
|
||||
} else {
|
||||
(*pos)++;
|
||||
return -(((value & 0x7F) << 16) ^ readUint16andAdvancePosition(buffer, pos));
|
||||
return -(((value & 0x7F) << 16) ^ readUint16AndAdvancePosition(buffer, pos));
|
||||
}
|
||||
}
|
||||
|
||||
static AK_FORCE_INLINE uint32_t readUint24andAdvancePosition(
|
||||
static AK_FORCE_INLINE uint32_t readUint24AndAdvancePosition(
|
||||
const uint8_t *const buffer, int *const pos) {
|
||||
const uint32_t value = readUint24(buffer, *pos);
|
||||
*pos += 3;
|
||||
return value;
|
||||
}
|
||||
|
||||
static AK_FORCE_INLINE uint16_t readUint16andAdvancePosition(
|
||||
static AK_FORCE_INLINE uint16_t readUint16AndAdvancePosition(
|
||||
const uint8_t *const buffer, int *const pos) {
|
||||
const uint16_t value = readUint16(buffer, *pos);
|
||||
*pos += 2;
|
||||
return value;
|
||||
}
|
||||
|
||||
static AK_FORCE_INLINE uint8_t readUint8andAdvancePosition(
|
||||
static AK_FORCE_INLINE uint8_t readUint8AndAdvancePosition(
|
||||
const uint8_t *const buffer, int *const pos) {
|
||||
return buffer[(*pos)++];
|
||||
}
|
||||
|
@ -113,7 +113,7 @@ class ByteArrayUtils {
|
|||
*pos += 1;
|
||||
return NOT_A_CODE_POINT;
|
||||
} else {
|
||||
return readUint24andAdvancePosition(buffer, pos);
|
||||
return readUint24AndAdvancePosition(buffer, pos);
|
||||
}
|
||||
} else {
|
||||
*pos += 1;
|
||||
|
|
|
@ -56,7 +56,6 @@ class BinaryFormat {
|
|||
// Mask and flags for attribute address type selection.
|
||||
static const int MASK_ATTRIBUTE_ADDRESS_TYPE = 0x30;
|
||||
|
||||
static bool hasBlacklistedOrNotAWordFlag(const int flags);
|
||||
static int getGroupCountAndForwardPointer(const uint8_t *const dict, int *pos);
|
||||
static uint8_t getFlagsAndForwardPointer(const uint8_t *const dict, int *pos);
|
||||
static int getCodePointAndForwardPointer(const uint8_t *const dict, int *pos);
|
||||
|
@ -74,10 +73,6 @@ class BinaryFormat {
|
|||
static int getCodePointsAndProbabilityAndReturnCodePointCount(
|
||||
const uint8_t *const root, const int nodePos, const int maxCodePointCount,
|
||||
int *const outCodePoints, int *const outUnigramProbability);
|
||||
static int getBigramListPositionForWordPosition(const uint8_t *const root,
|
||||
const int nodePosition);
|
||||
static int getShortcutListPositionForWordPosition(const uint8_t *const root,
|
||||
const int nodePosition);
|
||||
|
||||
private:
|
||||
DISALLOW_IMPLICIT_CONSTRUCTORS(BinaryFormat);
|
||||
|
@ -99,10 +94,6 @@ class BinaryFormat {
|
|||
static int skipBigrams(const uint8_t *const dict, const uint8_t flags, const int pos);
|
||||
};
|
||||
|
||||
inline bool BinaryFormat::hasBlacklistedOrNotAWordFlag(const int flags) {
|
||||
return (flags & (FLAG_IS_BLACKLISTED | FLAG_IS_NOT_A_WORD)) != 0;
|
||||
}
|
||||
|
||||
AK_FORCE_INLINE int BinaryFormat::getGroupCountAndForwardPointer(const uint8_t *const dict,
|
||||
int *pos) {
|
||||
const int msb = dict[(*pos)++];
|
||||
|
@ -475,38 +466,5 @@ AK_FORCE_INLINE int BinaryFormat::getCodePointsAndProbabilityAndReturnCodePointC
|
|||
return 0;
|
||||
}
|
||||
|
||||
AK_FORCE_INLINE int BinaryFormat::getBigramListPositionForWordPosition(
|
||||
const uint8_t *const root, const int nodePosition) {
|
||||
if (NOT_A_VALID_WORD_POS == nodePosition) return NOT_A_DICT_POS;
|
||||
int position = nodePosition;
|
||||
const uint8_t flags = getFlagsAndForwardPointer(root, &position);
|
||||
if (!(flags & FLAG_HAS_BIGRAMS)) return NOT_A_DICT_POS;
|
||||
if (flags & FLAG_HAS_MULTIPLE_CHARS) {
|
||||
position = skipOtherCharacters(root, position);
|
||||
} else {
|
||||
getCodePointAndForwardPointer(root, &position);
|
||||
}
|
||||
position = skipProbability(flags, position);
|
||||
position = skipChildrenPosition(flags, position);
|
||||
position = skipShortcuts(root, flags, position);
|
||||
return position;
|
||||
}
|
||||
|
||||
AK_FORCE_INLINE int BinaryFormat::getShortcutListPositionForWordPosition(
|
||||
const uint8_t *const root, const int nodePosition) {
|
||||
if (NOT_A_VALID_WORD_POS == nodePosition) return NOT_A_DICT_POS;
|
||||
int position = nodePosition;
|
||||
const uint8_t flags = getFlagsAndForwardPointer(root, &position);
|
||||
if (!(flags & FLAG_HAS_SHORTCUT_TARGETS)) return NOT_A_DICT_POS;
|
||||
if (flags & FLAG_HAS_MULTIPLE_CHARS) {
|
||||
position = skipOtherCharacters(root, position);
|
||||
} else {
|
||||
getCodePointAndForwardPointer(root, &position);
|
||||
}
|
||||
position = skipProbability(flags, position);
|
||||
position = skipChildrenPosition(flags, position);
|
||||
return position;
|
||||
}
|
||||
|
||||
} // namespace latinime
|
||||
#endif // LATINIME_BINARY_FORMAT_H
|
||||
|
|
|
@ -21,7 +21,9 @@
|
|||
#include "suggest/core/dicnode/dic_node.h"
|
||||
#include "suggest/core/dicnode/dic_node_vector.h"
|
||||
#include "suggest/core/dictionary/binary_dictionary_info.h"
|
||||
#include "suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.h"
|
||||
#include "suggest/policyimpl/dictionary/binary_format.h"
|
||||
#include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h"
|
||||
|
||||
namespace latinime {
|
||||
|
||||
|
@ -34,7 +36,7 @@ void PatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode,
|
|||
return;
|
||||
}
|
||||
int nextPos = dicNode->getChildrenPos();
|
||||
const int childCount = BinaryFormat::getGroupCountAndForwardPointer(
|
||||
const int childCount = PatriciaTrieReadingUtils::getGroupCountAndAdvancePosition(
|
||||
binaryDictionaryInfo->getDictRoot(), &nextPos);
|
||||
for (int i = 0; i < childCount; i++) {
|
||||
nextPos = createAndGetLeavingChildNode(dicNode, nextPos, binaryDictionaryInfo,
|
||||
|
@ -60,82 +62,108 @@ int PatriciaTriePolicy::getTerminalNodePositionOfWord(
|
|||
|
||||
int PatriciaTriePolicy::getUnigramProbability(
|
||||
const BinaryDictionaryInfo *const binaryDictionaryInfo, const int nodePos) const {
|
||||
const uint8_t *const root = binaryDictionaryInfo->getDictRoot();
|
||||
if (nodePos == NOT_A_VALID_WORD_POS) {
|
||||
return NOT_A_PROBABILITY;
|
||||
}
|
||||
const uint8_t *const dictRoot = binaryDictionaryInfo->getDictRoot();
|
||||
int pos = nodePos;
|
||||
const uint8_t flags = BinaryFormat::getFlagsAndForwardPointer(root, &pos);
|
||||
if (flags & (BinaryFormat::FLAG_IS_BLACKLISTED | BinaryFormat::FLAG_IS_NOT_A_WORD)) {
|
||||
const PatriciaTrieReadingUtils::NodeFlags flags =
|
||||
PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictRoot, &pos);
|
||||
if (!PatriciaTrieReadingUtils::isTerminal(flags)) {
|
||||
return NOT_A_PROBABILITY;
|
||||
}
|
||||
if (PatriciaTrieReadingUtils::isNotAWord(flags)
|
||||
|| PatriciaTrieReadingUtils::isBlacklisted(flags)) {
|
||||
// If this is not a word, or if it's a blacklisted entry, it should behave as
|
||||
// having no probability outside of the suggestion process (where it should be used
|
||||
// for shortcuts).
|
||||
return NOT_A_PROBABILITY;
|
||||
}
|
||||
const bool hasMultipleChars = (0 != (BinaryFormat::FLAG_HAS_MULTIPLE_CHARS & flags));
|
||||
if (hasMultipleChars) {
|
||||
pos = BinaryFormat::skipOtherCharacters(root, pos);
|
||||
} else {
|
||||
BinaryFormat::getCodePointAndForwardPointer(root, &pos);
|
||||
}
|
||||
return BinaryFormat::readProbabilityWithoutMovingPointer(root, pos);
|
||||
PatriciaTrieReadingUtils::skipCharacters(dictRoot, flags, MAX_WORD_LENGTH, &pos);
|
||||
return PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(dictRoot, &pos);
|
||||
}
|
||||
|
||||
int PatriciaTriePolicy::getShortcutPositionOfNode(
|
||||
const BinaryDictionaryInfo *const binaryDictionaryInfo,
|
||||
const int nodePos) const {
|
||||
return BinaryFormat::getShortcutListPositionForWordPosition(
|
||||
binaryDictionaryInfo->getDictRoot(), nodePos);
|
||||
if (nodePos == NOT_A_VALID_WORD_POS) {
|
||||
return NOT_A_DICT_POS;
|
||||
}
|
||||
const uint8_t *const dictRoot = binaryDictionaryInfo->getDictRoot();
|
||||
int pos = nodePos;
|
||||
const PatriciaTrieReadingUtils::NodeFlags flags =
|
||||
PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictRoot, &pos);
|
||||
if (!PatriciaTrieReadingUtils::hasShortcutTargets(flags)) {
|
||||
return NOT_A_DICT_POS;
|
||||
}
|
||||
PatriciaTrieReadingUtils::skipCharacters(dictRoot, flags, MAX_WORD_LENGTH, &pos);
|
||||
if (PatriciaTrieReadingUtils::isTerminal(flags)) {
|
||||
PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(dictRoot, &pos);
|
||||
}
|
||||
if (PatriciaTrieReadingUtils::hasChildrenInFlags(flags)) {
|
||||
PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(dictRoot, flags, &pos);
|
||||
}
|
||||
return pos;
|
||||
}
|
||||
|
||||
int PatriciaTriePolicy::getBigramsPositionOfNode(
|
||||
const BinaryDictionaryInfo *const binaryDictionaryInfo,
|
||||
const int nodePos) const {
|
||||
return BinaryFormat::getBigramListPositionForWordPosition(
|
||||
binaryDictionaryInfo->getDictRoot(), nodePos);
|
||||
if (nodePos == NOT_A_VALID_WORD_POS) {
|
||||
return NOT_A_DICT_POS;
|
||||
}
|
||||
const uint8_t *const dictRoot = binaryDictionaryInfo->getDictRoot();
|
||||
int pos = nodePos;
|
||||
const PatriciaTrieReadingUtils::NodeFlags flags =
|
||||
PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictRoot, &pos);
|
||||
if (!PatriciaTrieReadingUtils::hasBigrams(flags)) {
|
||||
return NOT_A_DICT_POS;
|
||||
}
|
||||
PatriciaTrieReadingUtils::skipCharacters(dictRoot, flags, MAX_WORD_LENGTH, &pos);
|
||||
if (PatriciaTrieReadingUtils::isTerminal(flags)) {
|
||||
PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(dictRoot, &pos);
|
||||
}
|
||||
if (PatriciaTrieReadingUtils::hasChildrenInFlags(flags)) {
|
||||
PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(dictRoot, flags, &pos);
|
||||
}
|
||||
if (PatriciaTrieReadingUtils::hasShortcutTargets(flags)) {
|
||||
BinaryDictionaryTerminalAttributesReadingUtils::skipShortcuts(binaryDictionaryInfo, &pos);
|
||||
}
|
||||
return pos;
|
||||
}
|
||||
|
||||
int PatriciaTriePolicy::createAndGetLeavingChildNode(const DicNode *const dicNode, int pos,
|
||||
const BinaryDictionaryInfo *const binaryDictionaryInfo,
|
||||
int PatriciaTriePolicy::createAndGetLeavingChildNode(const DicNode *const dicNode,
|
||||
const int nodePos, const BinaryDictionaryInfo *const binaryDictionaryInfo,
|
||||
const NodeFilter *const childrenFilter, DicNodeVector *childDicNodes) const {
|
||||
const int nextPos = pos;
|
||||
const uint8_t flags = BinaryFormat::getFlagsAndForwardPointer(
|
||||
binaryDictionaryInfo->getDictRoot(), &pos);
|
||||
const bool hasMultipleChars = (0 != (BinaryFormat::FLAG_HAS_MULTIPLE_CHARS & flags));
|
||||
const bool isTerminal = (0 != (BinaryFormat::FLAG_IS_TERMINAL & flags));
|
||||
const bool hasChildren = BinaryFormat::hasChildrenInFlags(flags);
|
||||
const bool isBlacklistedOrNotAWord = BinaryFormat::hasBlacklistedOrNotAWordFlag(flags);
|
||||
|
||||
int codePoint = BinaryFormat::getCodePointAndForwardPointer(
|
||||
binaryDictionaryInfo->getDictRoot(), &pos);
|
||||
ASSERT(NOT_A_CODE_POINT != codePoint);
|
||||
// TODO: optimize this
|
||||
const uint8_t *const dictRoot = binaryDictionaryInfo->getDictRoot();
|
||||
int pos = nodePos;
|
||||
const PatriciaTrieReadingUtils::NodeFlags flags =
|
||||
PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictRoot, &pos);
|
||||
int mergedNodeCodePoints[MAX_WORD_LENGTH];
|
||||
uint16_t mergedNodeCodePointCount = 0;
|
||||
mergedNodeCodePoints[mergedNodeCodePointCount++] = codePoint;
|
||||
|
||||
do {
|
||||
const int nextCodePoint = hasMultipleChars
|
||||
? BinaryFormat::getCodePointAndForwardPointer(
|
||||
binaryDictionaryInfo->getDictRoot(), &pos) : NOT_A_CODE_POINT;
|
||||
const bool isLastChar = (NOT_A_CODE_POINT == nextCodePoint);
|
||||
if (!isLastChar) {
|
||||
mergedNodeCodePoints[mergedNodeCodePointCount++] = nextCodePoint;
|
||||
}
|
||||
codePoint = nextCodePoint;
|
||||
} while (NOT_A_CODE_POINT != codePoint);
|
||||
|
||||
const int probability = isTerminal ? BinaryFormat::readProbabilityWithoutMovingPointer(
|
||||
binaryDictionaryInfo->getDictRoot(), pos) : NOT_A_PROBABILITY;
|
||||
pos = BinaryFormat::skipProbability(flags, pos);
|
||||
int childrenPos = hasChildren ? BinaryFormat::readChildrenPosition(
|
||||
binaryDictionaryInfo->getDictRoot(), flags, pos) : NOT_A_DICT_POS;
|
||||
const int siblingPos = BinaryFormat::skipChildrenPosAndAttributes(
|
||||
binaryDictionaryInfo->getDictRoot(), flags, pos);
|
||||
|
||||
if (childrenFilter->isFilteredOut(mergedNodeCodePoints[0])) {
|
||||
return siblingPos;
|
||||
const int mergedNodeCodePointCount = PatriciaTrieReadingUtils::getCharsAndAdvancePosition(
|
||||
dictRoot, flags, MAX_WORD_LENGTH, mergedNodeCodePoints, &pos);
|
||||
const int probability = (PatriciaTrieReadingUtils::isTerminal(flags))?
|
||||
PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(dictRoot, &pos)
|
||||
: NOT_A_PROBABILITY;
|
||||
const int childrenPos = PatriciaTrieReadingUtils::hasChildrenInFlags(flags) ?
|
||||
PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(
|
||||
dictRoot, flags, &pos) : NOT_A_DICT_POS;
|
||||
if (PatriciaTrieReadingUtils::hasShortcutTargets(flags)) {
|
||||
BinaryDictionaryTerminalAttributesReadingUtils::skipShortcuts(binaryDictionaryInfo, &pos);
|
||||
}
|
||||
childDicNodes->pushLeavingChild(dicNode, nextPos, childrenPos, probability, isTerminal,
|
||||
hasChildren, isBlacklistedOrNotAWord, mergedNodeCodePointCount, mergedNodeCodePoints);
|
||||
return siblingPos;
|
||||
if (PatriciaTrieReadingUtils::hasBigrams(flags)) {
|
||||
BinaryDictionaryTerminalAttributesReadingUtils::skipExistingBigrams(
|
||||
binaryDictionaryInfo, &pos);
|
||||
}
|
||||
if (!childrenFilter->isFilteredOut(mergedNodeCodePoints[0])) {
|
||||
childDicNodes->pushLeavingChild(dicNode, nodePos, childrenPos, probability,
|
||||
PatriciaTrieReadingUtils::isTerminal(flags),
|
||||
PatriciaTrieReadingUtils::hasChildrenInFlags(flags),
|
||||
PatriciaTrieReadingUtils::isBlacklisted(flags) ||
|
||||
PatriciaTrieReadingUtils::isNotAWord(flags),
|
||||
mergedNodeCodePointCount, mergedNodeCodePoints);
|
||||
}
|
||||
return pos;
|
||||
}
|
||||
|
||||
} // namespace latinime
|
||||
|
|
|
@ -61,7 +61,7 @@ class PatriciaTriePolicy : public DictionaryStructurePolicy {
|
|||
PatriciaTriePolicy() {}
|
||||
~PatriciaTriePolicy() {}
|
||||
|
||||
int createAndGetLeavingChildNode(const DicNode *const dicNode, int pos,
|
||||
int createAndGetLeavingChildNode(const DicNode *const dicNode, const int nodePos,
|
||||
const BinaryDictionaryInfo *const binaryDictionaryInfo,
|
||||
const NodeFilter *const nodeFilter, DicNodeVector *const childDicNodes) const;
|
||||
};
|
||||
|
|
|
@ -0,0 +1,67 @@
|
|||
/*
|
||||
* Copyright (C) 2013, The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h"
|
||||
|
||||
#include "defines.h"
|
||||
#include "suggest/core/dictionary/byte_array_utils.h"
|
||||
|
||||
namespace latinime {
|
||||
|
||||
typedef PatriciaTrieReadingUtils PtReadingUtils;
|
||||
|
||||
const PtReadingUtils::NodeFlags PtReadingUtils::MASK_GROUP_ADDRESS_TYPE = 0xC0;
|
||||
const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_GROUP_ADDRESS_TYPE_NOADDRESS = 0x00;
|
||||
const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_GROUP_ADDRESS_TYPE_ONEBYTE = 0x40;
|
||||
const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_GROUP_ADDRESS_TYPE_TWOBYTES = 0x80;
|
||||
const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_GROUP_ADDRESS_TYPE_THREEBYTES = 0xC0;
|
||||
|
||||
// Flag for single/multiple char group
|
||||
const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_HAS_MULTIPLE_CHARS = 0x20;
|
||||
// Flag for terminal groups
|
||||
const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_IS_TERMINAL = 0x10;
|
||||
// Flag for shortcut targets presence
|
||||
const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_HAS_SHORTCUT_TARGETS = 0x08;
|
||||
// Flag for bigram presence
|
||||
const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_HAS_BIGRAMS = 0x04;
|
||||
// Flag for non-words (typically, shortcut only entries)
|
||||
const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_IS_NOT_A_WORD = 0x02;
|
||||
// Flag for blacklist
|
||||
const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_IS_BLACKLISTED = 0x01;
|
||||
|
||||
/* static */ int PtReadingUtils::readChildrenPositionAndAdvancePosition(
|
||||
const uint8_t *const buffer, const NodeFlags flags, int *const pos) {
|
||||
const int base = *pos;
|
||||
int offset = 0;
|
||||
switch (MASK_GROUP_ADDRESS_TYPE & flags) {
|
||||
case FLAG_GROUP_ADDRESS_TYPE_ONEBYTE:
|
||||
offset = ByteArrayUtils::readUint8AndAdvancePosition(buffer, pos);
|
||||
break;
|
||||
case FLAG_GROUP_ADDRESS_TYPE_TWOBYTES:
|
||||
offset = ByteArrayUtils::readUint16AndAdvancePosition(buffer, pos);
|
||||
break;
|
||||
case FLAG_GROUP_ADDRESS_TYPE_THREEBYTES:
|
||||
offset = ByteArrayUtils::readUint24AndAdvancePosition(buffer, pos);
|
||||
break;
|
||||
default:
|
||||
// If we come here, it means we asked for the children of a word with
|
||||
// no children.
|
||||
return NOT_A_DICT_POS;
|
||||
}
|
||||
return base + offset;
|
||||
}
|
||||
|
||||
} // namespace latinime
|
|
@ -0,0 +1,139 @@
|
|||
/*
|
||||
* Copyright (C) 2013, The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef LATINIME_PATRICIA_TRIE_READING_UTILS_H
|
||||
#define LATINIME_PATRICIA_TRIE_READING_UTILS_H
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include "defines.h"
|
||||
#include "suggest/core/dictionary/byte_array_utils.h"
|
||||
|
||||
namespace latinime {
|
||||
|
||||
class PatriciaTrieReadingUtils {
|
||||
public:
|
||||
typedef uint8_t NodeFlags;
|
||||
|
||||
static AK_FORCE_INLINE int getGroupCountAndAdvancePosition(
|
||||
const uint8_t *const buffer, int *const pos) {
|
||||
const uint8_t firstByte = ByteArrayUtils::readUint8AndAdvancePosition(buffer, pos);
|
||||
if (firstByte < 0x80) {
|
||||
return firstByte;
|
||||
} else {
|
||||
return ((firstByte & 0x7F) << 8) ^ ByteArrayUtils::readUint8AndAdvancePosition(
|
||||
buffer, pos);
|
||||
}
|
||||
}
|
||||
|
||||
static AK_FORCE_INLINE NodeFlags getFlagsAndAdvancePosition(const uint8_t *const buffer,
|
||||
int *const pos) {
|
||||
return ByteArrayUtils::readUint8AndAdvancePosition(buffer, pos);
|
||||
}
|
||||
|
||||
static AK_FORCE_INLINE int getCodePointAndAdvancePosition(const uint8_t *const buffer,
|
||||
int *const pos) {
|
||||
return ByteArrayUtils::readCodePointAndAdvancePosition(buffer, pos);
|
||||
}
|
||||
|
||||
// Returns the number of read characters.
|
||||
static AK_FORCE_INLINE int getCharsAndAdvancePosition(const uint8_t *const buffer,
|
||||
const NodeFlags flags, const int maxLength, int *const outBuffer, int *const pos) {
|
||||
int length = 0;
|
||||
if (hasMultipleChars(flags)) {
|
||||
length = ByteArrayUtils::readStringAndAdvancePosition(buffer, maxLength, outBuffer,
|
||||
pos);
|
||||
} else {
|
||||
if (maxLength > 0) {
|
||||
outBuffer[0] = getCodePointAndAdvancePosition(buffer, pos);
|
||||
length = 1;
|
||||
}
|
||||
}
|
||||
return length;
|
||||
}
|
||||
|
||||
// Returns the number of skipped characters.
|
||||
static AK_FORCE_INLINE int skipCharacters(const uint8_t *const buffer, const NodeFlags flags,
|
||||
const int maxLength, int *const pos) {
|
||||
if (hasMultipleChars(flags)) {
|
||||
return ByteArrayUtils::advancePositionToBehindString(buffer, maxLength, pos);
|
||||
} else {
|
||||
if (maxLength > 0) {
|
||||
getCodePointAndAdvancePosition(buffer, pos);
|
||||
return 1;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static AK_FORCE_INLINE int readProbabilityAndAdvancePosition(const uint8_t *const buffer,
|
||||
int *const pos) {
|
||||
return ByteArrayUtils::readUint8AndAdvancePosition(buffer, pos);
|
||||
}
|
||||
|
||||
static int readChildrenPositionAndAdvancePosition(const uint8_t *const buffer,
|
||||
const NodeFlags flags, int *const pos);
|
||||
|
||||
/**
|
||||
* Node Flags
|
||||
*/
|
||||
static AK_FORCE_INLINE bool isBlacklisted(const NodeFlags flags) {
|
||||
return (flags & FLAG_IS_BLACKLISTED) != 0;
|
||||
}
|
||||
|
||||
static AK_FORCE_INLINE bool isNotAWord(const NodeFlags flags) {
|
||||
return (flags & FLAG_IS_NOT_A_WORD) != 0;
|
||||
}
|
||||
|
||||
static AK_FORCE_INLINE bool isTerminal(const NodeFlags flags) {
|
||||
return (flags & FLAG_IS_TERMINAL) != 0;
|
||||
}
|
||||
|
||||
static AK_FORCE_INLINE bool hasShortcutTargets(const NodeFlags flags) {
|
||||
return (flags & FLAG_HAS_SHORTCUT_TARGETS) != 0;
|
||||
}
|
||||
|
||||
static AK_FORCE_INLINE bool hasBigrams(const NodeFlags flags) {
|
||||
return (flags & FLAG_HAS_BIGRAMS) != 0;
|
||||
}
|
||||
|
||||
static AK_FORCE_INLINE bool hasMultipleChars(const NodeFlags flags) {
|
||||
return (flags & FLAG_HAS_MULTIPLE_CHARS) != 0;
|
||||
}
|
||||
|
||||
static AK_FORCE_INLINE bool hasChildrenInFlags(const NodeFlags flags) {
|
||||
return FLAG_GROUP_ADDRESS_TYPE_NOADDRESS != (MASK_GROUP_ADDRESS_TYPE & flags);
|
||||
}
|
||||
|
||||
private:
|
||||
DISALLOW_IMPLICIT_CONSTRUCTORS(PatriciaTrieReadingUtils);
|
||||
|
||||
static const NodeFlags MASK_GROUP_ADDRESS_TYPE;
|
||||
static const NodeFlags FLAG_GROUP_ADDRESS_TYPE_NOADDRESS;
|
||||
static const NodeFlags FLAG_GROUP_ADDRESS_TYPE_ONEBYTE;
|
||||
static const NodeFlags FLAG_GROUP_ADDRESS_TYPE_TWOBYTES;
|
||||
static const NodeFlags FLAG_GROUP_ADDRESS_TYPE_THREEBYTES;
|
||||
|
||||
static const NodeFlags FLAG_HAS_MULTIPLE_CHARS;
|
||||
static const NodeFlags FLAG_IS_TERMINAL;
|
||||
static const NodeFlags FLAG_HAS_SHORTCUT_TARGETS;
|
||||
static const NodeFlags FLAG_HAS_BIGRAMS;
|
||||
static const NodeFlags FLAG_IS_NOT_A_WORD;
|
||||
static const NodeFlags FLAG_IS_BLACKLISTED;
|
||||
};
|
||||
} // namespace latinime
|
||||
#endif /* LATINIME_PATRICIA_TRIE_NODE_READING_UTILS_H */
|
Loading…
Reference in a new issue