am 80c9b829: Merge "Give PatriciaTrieReadingUtils methods for reading nodes."

* commit '80c9b829bd390fd7e9b88dde68b509292cae9b07':
  Give PatriciaTrieReadingUtils methods for reading nodes.
main
Keisuke Kuroynagi 2013-08-01 01:35:29 -07:00 committed by Android Git Automerger
commit 15072b4e5c
9 changed files with 306 additions and 113 deletions

View File

@ -72,7 +72,8 @@ LATIN_IME_CORE_SRC_FILES := \
suggest/core/session/dic_traverse_session.cpp \ suggest/core/session/dic_traverse_session.cpp \
$(addprefix suggest/policyimpl/dictionary/, \ $(addprefix suggest/policyimpl/dictionary/, \
dynamic_patricia_trie_policy.cpp \ dynamic_patricia_trie_policy.cpp \
patricia_trie_policy.cpp) \ patricia_trie_policy.cpp \
patricia_trie_reading_utils.cpp) \
suggest/policyimpl/gesture/gesture_suggest_policy_factory.cpp \ suggest/policyimpl/gesture/gesture_suggest_policy_factory.cpp \
$(addprefix suggest/policyimpl/typing/, \ $(addprefix suggest/policyimpl/typing/, \
scoring_params.cpp \ scoring_params.cpp \

View File

@ -44,15 +44,15 @@ const int TaUtils::WHITELIST_SHORTCUT_PROBABILITY = 15;
const int origin = *pos; const int origin = *pos;
switch (MASK_ATTRIBUTE_ADDRESS_TYPE & flags) { switch (MASK_ATTRIBUTE_ADDRESS_TYPE & flags) {
case FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE: case FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE:
offset = ByteArrayUtils::readUint8andAdvancePosition( offset = ByteArrayUtils::readUint8AndAdvancePosition(
binaryDictionaryInfo->getDictRoot(), pos); binaryDictionaryInfo->getDictRoot(), pos);
break; break;
case FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES: case FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES:
offset = ByteArrayUtils::readUint16andAdvancePosition( offset = ByteArrayUtils::readUint16AndAdvancePosition(
binaryDictionaryInfo->getDictRoot(), pos); binaryDictionaryInfo->getDictRoot(), pos);
break; break;
case FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES: case FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES:
offset = ByteArrayUtils::readUint24andAdvancePosition( offset = ByteArrayUtils::readUint24AndAdvancePosition(
binaryDictionaryInfo->getDictRoot(), pos); binaryDictionaryInfo->getDictRoot(), pos);
break; break;
} }

View File

@ -33,7 +33,7 @@ class BinaryDictionaryTerminalAttributesReadingUtils {
static AK_FORCE_INLINE TerminalAttributeFlags getFlagsAndForwardPointer( static AK_FORCE_INLINE TerminalAttributeFlags getFlagsAndForwardPointer(
const BinaryDictionaryInfo *const binaryDictionaryInfo, int *const pos) { const BinaryDictionaryInfo *const binaryDictionaryInfo, int *const pos) {
return ByteArrayUtils::readUint8andAdvancePosition( return ByteArrayUtils::readUint8AndAdvancePosition(
binaryDictionaryInfo->getDictRoot(), pos); binaryDictionaryInfo->getDictRoot(), pos);
} }
@ -66,7 +66,7 @@ class BinaryDictionaryTerminalAttributesReadingUtils {
static AK_FORCE_INLINE int getShortcutListSizeAndForwardPointer( static AK_FORCE_INLINE int getShortcutListSizeAndForwardPointer(
const BinaryDictionaryInfo *const binaryDictionaryInfo, int *const pos) { const BinaryDictionaryInfo *const binaryDictionaryInfo, int *const pos) {
// readUint16andAdvancePosition() returns an offset *including* the uint16 field itself. // readUint16andAdvancePosition() returns an offset *including* the uint16 field itself.
return ByteArrayUtils::readUint16andAdvancePosition( return ByteArrayUtils::readUint16AndAdvancePosition(
binaryDictionaryInfo->getDictRoot(), pos) - SHORTCUT_LIST_SIZE_FIELD_SIZE; binaryDictionaryInfo->getDictRoot(), pos) - SHORTCUT_LIST_SIZE_FIELD_SIZE;
} }

View File

@ -50,39 +50,39 @@ class ByteArrayUtils {
return buffer[pos]; return buffer[pos];
} }
static AK_FORCE_INLINE uint32_t readUint32andAdvancePosition( static AK_FORCE_INLINE uint32_t readUint32AndAdvancePosition(
const uint8_t *const buffer, int *const pos) { const uint8_t *const buffer, int *const pos) {
const uint32_t value = readUint32(buffer, *pos); const uint32_t value = readUint32(buffer, *pos);
*pos += 4; *pos += 4;
return value; return value;
} }
static AK_FORCE_INLINE int readSint24andAdvancePosition( static AK_FORCE_INLINE int readSint24AndAdvancePosition(
const uint8_t *const buffer, int *const pos) { const uint8_t *const buffer, int *const pos) {
const uint8_t value = readUint8(buffer, *pos); const uint8_t value = readUint8(buffer, *pos);
if (value < 0x80) { if (value < 0x80) {
return readUint24andAdvancePosition(buffer, pos); return readUint24AndAdvancePosition(buffer, pos);
} else { } else {
(*pos)++; (*pos)++;
return -(((value & 0x7F) << 16) ^ readUint16andAdvancePosition(buffer, pos)); return -(((value & 0x7F) << 16) ^ readUint16AndAdvancePosition(buffer, pos));
} }
} }
static AK_FORCE_INLINE uint32_t readUint24andAdvancePosition( static AK_FORCE_INLINE uint32_t readUint24AndAdvancePosition(
const uint8_t *const buffer, int *const pos) { const uint8_t *const buffer, int *const pos) {
const uint32_t value = readUint24(buffer, *pos); const uint32_t value = readUint24(buffer, *pos);
*pos += 3; *pos += 3;
return value; return value;
} }
static AK_FORCE_INLINE uint16_t readUint16andAdvancePosition( static AK_FORCE_INLINE uint16_t readUint16AndAdvancePosition(
const uint8_t *const buffer, int *const pos) { const uint8_t *const buffer, int *const pos) {
const uint16_t value = readUint16(buffer, *pos); const uint16_t value = readUint16(buffer, *pos);
*pos += 2; *pos += 2;
return value; return value;
} }
static AK_FORCE_INLINE uint8_t readUint8andAdvancePosition( static AK_FORCE_INLINE uint8_t readUint8AndAdvancePosition(
const uint8_t *const buffer, int *const pos) { const uint8_t *const buffer, int *const pos) {
return buffer[(*pos)++]; return buffer[(*pos)++];
} }
@ -113,7 +113,7 @@ class ByteArrayUtils {
*pos += 1; *pos += 1;
return NOT_A_CODE_POINT; return NOT_A_CODE_POINT;
} else { } else {
return readUint24andAdvancePosition(buffer, pos); return readUint24AndAdvancePosition(buffer, pos);
} }
} else { } else {
*pos += 1; *pos += 1;

View File

@ -56,7 +56,6 @@ class BinaryFormat {
// Mask and flags for attribute address type selection. // Mask and flags for attribute address type selection.
static const int MASK_ATTRIBUTE_ADDRESS_TYPE = 0x30; static const int MASK_ATTRIBUTE_ADDRESS_TYPE = 0x30;
static bool hasBlacklistedOrNotAWordFlag(const int flags);
static int getGroupCountAndForwardPointer(const uint8_t *const dict, int *pos); static int getGroupCountAndForwardPointer(const uint8_t *const dict, int *pos);
static uint8_t getFlagsAndForwardPointer(const uint8_t *const dict, int *pos); static uint8_t getFlagsAndForwardPointer(const uint8_t *const dict, int *pos);
static int getCodePointAndForwardPointer(const uint8_t *const dict, int *pos); static int getCodePointAndForwardPointer(const uint8_t *const dict, int *pos);
@ -74,10 +73,6 @@ class BinaryFormat {
static int getCodePointsAndProbabilityAndReturnCodePointCount( static int getCodePointsAndProbabilityAndReturnCodePointCount(
const uint8_t *const root, const int nodePos, const int maxCodePointCount, const uint8_t *const root, const int nodePos, const int maxCodePointCount,
int *const outCodePoints, int *const outUnigramProbability); int *const outCodePoints, int *const outUnigramProbability);
static int getBigramListPositionForWordPosition(const uint8_t *const root,
const int nodePosition);
static int getShortcutListPositionForWordPosition(const uint8_t *const root,
const int nodePosition);
private: private:
DISALLOW_IMPLICIT_CONSTRUCTORS(BinaryFormat); DISALLOW_IMPLICIT_CONSTRUCTORS(BinaryFormat);
@ -99,10 +94,6 @@ class BinaryFormat {
static int skipBigrams(const uint8_t *const dict, const uint8_t flags, const int pos); static int skipBigrams(const uint8_t *const dict, const uint8_t flags, const int pos);
}; };
inline bool BinaryFormat::hasBlacklistedOrNotAWordFlag(const int flags) {
return (flags & (FLAG_IS_BLACKLISTED | FLAG_IS_NOT_A_WORD)) != 0;
}
AK_FORCE_INLINE int BinaryFormat::getGroupCountAndForwardPointer(const uint8_t *const dict, AK_FORCE_INLINE int BinaryFormat::getGroupCountAndForwardPointer(const uint8_t *const dict,
int *pos) { int *pos) {
const int msb = dict[(*pos)++]; const int msb = dict[(*pos)++];
@ -475,38 +466,5 @@ AK_FORCE_INLINE int BinaryFormat::getCodePointsAndProbabilityAndReturnCodePointC
return 0; return 0;
} }
AK_FORCE_INLINE int BinaryFormat::getBigramListPositionForWordPosition(
const uint8_t *const root, const int nodePosition) {
if (NOT_A_VALID_WORD_POS == nodePosition) return NOT_A_DICT_POS;
int position = nodePosition;
const uint8_t flags = getFlagsAndForwardPointer(root, &position);
if (!(flags & FLAG_HAS_BIGRAMS)) return NOT_A_DICT_POS;
if (flags & FLAG_HAS_MULTIPLE_CHARS) {
position = skipOtherCharacters(root, position);
} else {
getCodePointAndForwardPointer(root, &position);
}
position = skipProbability(flags, position);
position = skipChildrenPosition(flags, position);
position = skipShortcuts(root, flags, position);
return position;
}
AK_FORCE_INLINE int BinaryFormat::getShortcutListPositionForWordPosition(
const uint8_t *const root, const int nodePosition) {
if (NOT_A_VALID_WORD_POS == nodePosition) return NOT_A_DICT_POS;
int position = nodePosition;
const uint8_t flags = getFlagsAndForwardPointer(root, &position);
if (!(flags & FLAG_HAS_SHORTCUT_TARGETS)) return NOT_A_DICT_POS;
if (flags & FLAG_HAS_MULTIPLE_CHARS) {
position = skipOtherCharacters(root, position);
} else {
getCodePointAndForwardPointer(root, &position);
}
position = skipProbability(flags, position);
position = skipChildrenPosition(flags, position);
return position;
}
} // namespace latinime } // namespace latinime
#endif // LATINIME_BINARY_FORMAT_H #endif // LATINIME_BINARY_FORMAT_H

View File

@ -21,7 +21,9 @@
#include "suggest/core/dicnode/dic_node.h" #include "suggest/core/dicnode/dic_node.h"
#include "suggest/core/dicnode/dic_node_vector.h" #include "suggest/core/dicnode/dic_node_vector.h"
#include "suggest/core/dictionary/binary_dictionary_info.h" #include "suggest/core/dictionary/binary_dictionary_info.h"
#include "suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.h"
#include "suggest/policyimpl/dictionary/binary_format.h" #include "suggest/policyimpl/dictionary/binary_format.h"
#include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h"
namespace latinime { namespace latinime {
@ -34,7 +36,7 @@ void PatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode,
return; return;
} }
int nextPos = dicNode->getChildrenPos(); int nextPos = dicNode->getChildrenPos();
const int childCount = BinaryFormat::getGroupCountAndForwardPointer( const int childCount = PatriciaTrieReadingUtils::getGroupCountAndAdvancePosition(
binaryDictionaryInfo->getDictRoot(), &nextPos); binaryDictionaryInfo->getDictRoot(), &nextPos);
for (int i = 0; i < childCount; i++) { for (int i = 0; i < childCount; i++) {
nextPos = createAndGetLeavingChildNode(dicNode, nextPos, binaryDictionaryInfo, nextPos = createAndGetLeavingChildNode(dicNode, nextPos, binaryDictionaryInfo,
@ -60,82 +62,108 @@ int PatriciaTriePolicy::getTerminalNodePositionOfWord(
int PatriciaTriePolicy::getUnigramProbability( int PatriciaTriePolicy::getUnigramProbability(
const BinaryDictionaryInfo *const binaryDictionaryInfo, const int nodePos) const { const BinaryDictionaryInfo *const binaryDictionaryInfo, const int nodePos) const {
const uint8_t *const root = binaryDictionaryInfo->getDictRoot(); if (nodePos == NOT_A_VALID_WORD_POS) {
return NOT_A_PROBABILITY;
}
const uint8_t *const dictRoot = binaryDictionaryInfo->getDictRoot();
int pos = nodePos; int pos = nodePos;
const uint8_t flags = BinaryFormat::getFlagsAndForwardPointer(root, &pos); const PatriciaTrieReadingUtils::NodeFlags flags =
if (flags & (BinaryFormat::FLAG_IS_BLACKLISTED | BinaryFormat::FLAG_IS_NOT_A_WORD)) { PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictRoot, &pos);
if (!PatriciaTrieReadingUtils::isTerminal(flags)) {
return NOT_A_PROBABILITY;
}
if (PatriciaTrieReadingUtils::isNotAWord(flags)
|| PatriciaTrieReadingUtils::isBlacklisted(flags)) {
// If this is not a word, or if it's a blacklisted entry, it should behave as // If this is not a word, or if it's a blacklisted entry, it should behave as
// having no probability outside of the suggestion process (where it should be used // having no probability outside of the suggestion process (where it should be used
// for shortcuts). // for shortcuts).
return NOT_A_PROBABILITY; return NOT_A_PROBABILITY;
} }
const bool hasMultipleChars = (0 != (BinaryFormat::FLAG_HAS_MULTIPLE_CHARS & flags)); PatriciaTrieReadingUtils::skipCharacters(dictRoot, flags, MAX_WORD_LENGTH, &pos);
if (hasMultipleChars) { return PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(dictRoot, &pos);
pos = BinaryFormat::skipOtherCharacters(root, pos);
} else {
BinaryFormat::getCodePointAndForwardPointer(root, &pos);
}
return BinaryFormat::readProbabilityWithoutMovingPointer(root, pos);
} }
int PatriciaTriePolicy::getShortcutPositionOfNode( int PatriciaTriePolicy::getShortcutPositionOfNode(
const BinaryDictionaryInfo *const binaryDictionaryInfo, const BinaryDictionaryInfo *const binaryDictionaryInfo,
const int nodePos) const { const int nodePos) const {
return BinaryFormat::getShortcutListPositionForWordPosition( if (nodePos == NOT_A_VALID_WORD_POS) {
binaryDictionaryInfo->getDictRoot(), nodePos); return NOT_A_DICT_POS;
}
const uint8_t *const dictRoot = binaryDictionaryInfo->getDictRoot();
int pos = nodePos;
const PatriciaTrieReadingUtils::NodeFlags flags =
PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictRoot, &pos);
if (!PatriciaTrieReadingUtils::hasShortcutTargets(flags)) {
return NOT_A_DICT_POS;
}
PatriciaTrieReadingUtils::skipCharacters(dictRoot, flags, MAX_WORD_LENGTH, &pos);
if (PatriciaTrieReadingUtils::isTerminal(flags)) {
PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(dictRoot, &pos);
}
if (PatriciaTrieReadingUtils::hasChildrenInFlags(flags)) {
PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(dictRoot, flags, &pos);
}
return pos;
} }
int PatriciaTriePolicy::getBigramsPositionOfNode( int PatriciaTriePolicy::getBigramsPositionOfNode(
const BinaryDictionaryInfo *const binaryDictionaryInfo, const BinaryDictionaryInfo *const binaryDictionaryInfo,
const int nodePos) const { const int nodePos) const {
return BinaryFormat::getBigramListPositionForWordPosition( if (nodePos == NOT_A_VALID_WORD_POS) {
binaryDictionaryInfo->getDictRoot(), nodePos); return NOT_A_DICT_POS;
}
const uint8_t *const dictRoot = binaryDictionaryInfo->getDictRoot();
int pos = nodePos;
const PatriciaTrieReadingUtils::NodeFlags flags =
PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictRoot, &pos);
if (!PatriciaTrieReadingUtils::hasBigrams(flags)) {
return NOT_A_DICT_POS;
}
PatriciaTrieReadingUtils::skipCharacters(dictRoot, flags, MAX_WORD_LENGTH, &pos);
if (PatriciaTrieReadingUtils::isTerminal(flags)) {
PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(dictRoot, &pos);
}
if (PatriciaTrieReadingUtils::hasChildrenInFlags(flags)) {
PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(dictRoot, flags, &pos);
}
if (PatriciaTrieReadingUtils::hasShortcutTargets(flags)) {
BinaryDictionaryTerminalAttributesReadingUtils::skipShortcuts(binaryDictionaryInfo, &pos);
}
return pos;
} }
int PatriciaTriePolicy::createAndGetLeavingChildNode(const DicNode *const dicNode, int pos, int PatriciaTriePolicy::createAndGetLeavingChildNode(const DicNode *const dicNode,
const BinaryDictionaryInfo *const binaryDictionaryInfo, const int nodePos, const BinaryDictionaryInfo *const binaryDictionaryInfo,
const NodeFilter *const childrenFilter, DicNodeVector *childDicNodes) const { const NodeFilter *const childrenFilter, DicNodeVector *childDicNodes) const {
const int nextPos = pos; const uint8_t *const dictRoot = binaryDictionaryInfo->getDictRoot();
const uint8_t flags = BinaryFormat::getFlagsAndForwardPointer( int pos = nodePos;
binaryDictionaryInfo->getDictRoot(), &pos); const PatriciaTrieReadingUtils::NodeFlags flags =
const bool hasMultipleChars = (0 != (BinaryFormat::FLAG_HAS_MULTIPLE_CHARS & flags)); PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictRoot, &pos);
const bool isTerminal = (0 != (BinaryFormat::FLAG_IS_TERMINAL & flags));
const bool hasChildren = BinaryFormat::hasChildrenInFlags(flags);
const bool isBlacklistedOrNotAWord = BinaryFormat::hasBlacklistedOrNotAWordFlag(flags);
int codePoint = BinaryFormat::getCodePointAndForwardPointer(
binaryDictionaryInfo->getDictRoot(), &pos);
ASSERT(NOT_A_CODE_POINT != codePoint);
// TODO: optimize this
int mergedNodeCodePoints[MAX_WORD_LENGTH]; int mergedNodeCodePoints[MAX_WORD_LENGTH];
uint16_t mergedNodeCodePointCount = 0; const int mergedNodeCodePointCount = PatriciaTrieReadingUtils::getCharsAndAdvancePosition(
mergedNodeCodePoints[mergedNodeCodePointCount++] = codePoint; dictRoot, flags, MAX_WORD_LENGTH, mergedNodeCodePoints, &pos);
const int probability = (PatriciaTrieReadingUtils::isTerminal(flags))?
do { PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(dictRoot, &pos)
const int nextCodePoint = hasMultipleChars : NOT_A_PROBABILITY;
? BinaryFormat::getCodePointAndForwardPointer( const int childrenPos = PatriciaTrieReadingUtils::hasChildrenInFlags(flags) ?
binaryDictionaryInfo->getDictRoot(), &pos) : NOT_A_CODE_POINT; PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(
const bool isLastChar = (NOT_A_CODE_POINT == nextCodePoint); dictRoot, flags, &pos) : NOT_A_DICT_POS;
if (!isLastChar) { if (PatriciaTrieReadingUtils::hasShortcutTargets(flags)) {
mergedNodeCodePoints[mergedNodeCodePointCount++] = nextCodePoint; BinaryDictionaryTerminalAttributesReadingUtils::skipShortcuts(binaryDictionaryInfo, &pos);
} }
codePoint = nextCodePoint; if (PatriciaTrieReadingUtils::hasBigrams(flags)) {
} while (NOT_A_CODE_POINT != codePoint); BinaryDictionaryTerminalAttributesReadingUtils::skipExistingBigrams(
binaryDictionaryInfo, &pos);
const int probability = isTerminal ? BinaryFormat::readProbabilityWithoutMovingPointer(
binaryDictionaryInfo->getDictRoot(), pos) : NOT_A_PROBABILITY;
pos = BinaryFormat::skipProbability(flags, pos);
int childrenPos = hasChildren ? BinaryFormat::readChildrenPosition(
binaryDictionaryInfo->getDictRoot(), flags, pos) : NOT_A_DICT_POS;
const int siblingPos = BinaryFormat::skipChildrenPosAndAttributes(
binaryDictionaryInfo->getDictRoot(), flags, pos);
if (childrenFilter->isFilteredOut(mergedNodeCodePoints[0])) {
return siblingPos;
} }
childDicNodes->pushLeavingChild(dicNode, nextPos, childrenPos, probability, isTerminal, if (!childrenFilter->isFilteredOut(mergedNodeCodePoints[0])) {
hasChildren, isBlacklistedOrNotAWord, mergedNodeCodePointCount, mergedNodeCodePoints); childDicNodes->pushLeavingChild(dicNode, nodePos, childrenPos, probability,
return siblingPos; PatriciaTrieReadingUtils::isTerminal(flags),
PatriciaTrieReadingUtils::hasChildrenInFlags(flags),
PatriciaTrieReadingUtils::isBlacklisted(flags) ||
PatriciaTrieReadingUtils::isNotAWord(flags),
mergedNodeCodePointCount, mergedNodeCodePoints);
}
return pos;
} }
} // namespace latinime } // namespace latinime

View File

@ -61,7 +61,7 @@ class PatriciaTriePolicy : public DictionaryStructurePolicy {
PatriciaTriePolicy() {} PatriciaTriePolicy() {}
~PatriciaTriePolicy() {} ~PatriciaTriePolicy() {}
int createAndGetLeavingChildNode(const DicNode *const dicNode, int pos, int createAndGetLeavingChildNode(const DicNode *const dicNode, const int nodePos,
const BinaryDictionaryInfo *const binaryDictionaryInfo, const BinaryDictionaryInfo *const binaryDictionaryInfo,
const NodeFilter *const nodeFilter, DicNodeVector *const childDicNodes) const; const NodeFilter *const nodeFilter, DicNodeVector *const childDicNodes) const;
}; };

View File

@ -0,0 +1,67 @@
/*
* Copyright (C) 2013, The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h"
#include "defines.h"
#include "suggest/core/dictionary/byte_array_utils.h"
namespace latinime {
typedef PatriciaTrieReadingUtils PtReadingUtils;
const PtReadingUtils::NodeFlags PtReadingUtils::MASK_GROUP_ADDRESS_TYPE = 0xC0;
const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_GROUP_ADDRESS_TYPE_NOADDRESS = 0x00;
const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_GROUP_ADDRESS_TYPE_ONEBYTE = 0x40;
const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_GROUP_ADDRESS_TYPE_TWOBYTES = 0x80;
const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_GROUP_ADDRESS_TYPE_THREEBYTES = 0xC0;
// Flag for single/multiple char group
const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_HAS_MULTIPLE_CHARS = 0x20;
// Flag for terminal groups
const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_IS_TERMINAL = 0x10;
// Flag for shortcut targets presence
const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_HAS_SHORTCUT_TARGETS = 0x08;
// Flag for bigram presence
const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_HAS_BIGRAMS = 0x04;
// Flag for non-words (typically, shortcut only entries)
const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_IS_NOT_A_WORD = 0x02;
// Flag for blacklist
const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_IS_BLACKLISTED = 0x01;
/* static */ int PtReadingUtils::readChildrenPositionAndAdvancePosition(
const uint8_t *const buffer, const NodeFlags flags, int *const pos) {
const int base = *pos;
int offset = 0;
switch (MASK_GROUP_ADDRESS_TYPE & flags) {
case FLAG_GROUP_ADDRESS_TYPE_ONEBYTE:
offset = ByteArrayUtils::readUint8AndAdvancePosition(buffer, pos);
break;
case FLAG_GROUP_ADDRESS_TYPE_TWOBYTES:
offset = ByteArrayUtils::readUint16AndAdvancePosition(buffer, pos);
break;
case FLAG_GROUP_ADDRESS_TYPE_THREEBYTES:
offset = ByteArrayUtils::readUint24AndAdvancePosition(buffer, pos);
break;
default:
// If we come here, it means we asked for the children of a word with
// no children.
return NOT_A_DICT_POS;
}
return base + offset;
}
} // namespace latinime

View File

@ -0,0 +1,139 @@
/*
* Copyright (C) 2013, The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef LATINIME_PATRICIA_TRIE_READING_UTILS_H
#define LATINIME_PATRICIA_TRIE_READING_UTILS_H
#include <stdint.h>
#include "defines.h"
#include "suggest/core/dictionary/byte_array_utils.h"
namespace latinime {
class PatriciaTrieReadingUtils {
public:
typedef uint8_t NodeFlags;
static AK_FORCE_INLINE int getGroupCountAndAdvancePosition(
const uint8_t *const buffer, int *const pos) {
const uint8_t firstByte = ByteArrayUtils::readUint8AndAdvancePosition(buffer, pos);
if (firstByte < 0x80) {
return firstByte;
} else {
return ((firstByte & 0x7F) << 8) ^ ByteArrayUtils::readUint8AndAdvancePosition(
buffer, pos);
}
}
static AK_FORCE_INLINE NodeFlags getFlagsAndAdvancePosition(const uint8_t *const buffer,
int *const pos) {
return ByteArrayUtils::readUint8AndAdvancePosition(buffer, pos);
}
static AK_FORCE_INLINE int getCodePointAndAdvancePosition(const uint8_t *const buffer,
int *const pos) {
return ByteArrayUtils::readCodePointAndAdvancePosition(buffer, pos);
}
// Returns the number of read characters.
static AK_FORCE_INLINE int getCharsAndAdvancePosition(const uint8_t *const buffer,
const NodeFlags flags, const int maxLength, int *const outBuffer, int *const pos) {
int length = 0;
if (hasMultipleChars(flags)) {
length = ByteArrayUtils::readStringAndAdvancePosition(buffer, maxLength, outBuffer,
pos);
} else {
if (maxLength > 0) {
outBuffer[0] = getCodePointAndAdvancePosition(buffer, pos);
length = 1;
}
}
return length;
}
// Returns the number of skipped characters.
static AK_FORCE_INLINE int skipCharacters(const uint8_t *const buffer, const NodeFlags flags,
const int maxLength, int *const pos) {
if (hasMultipleChars(flags)) {
return ByteArrayUtils::advancePositionToBehindString(buffer, maxLength, pos);
} else {
if (maxLength > 0) {
getCodePointAndAdvancePosition(buffer, pos);
return 1;
} else {
return 0;
}
}
}
static AK_FORCE_INLINE int readProbabilityAndAdvancePosition(const uint8_t *const buffer,
int *const pos) {
return ByteArrayUtils::readUint8AndAdvancePosition(buffer, pos);
}
static int readChildrenPositionAndAdvancePosition(const uint8_t *const buffer,
const NodeFlags flags, int *const pos);
/**
* Node Flags
*/
static AK_FORCE_INLINE bool isBlacklisted(const NodeFlags flags) {
return (flags & FLAG_IS_BLACKLISTED) != 0;
}
static AK_FORCE_INLINE bool isNotAWord(const NodeFlags flags) {
return (flags & FLAG_IS_NOT_A_WORD) != 0;
}
static AK_FORCE_INLINE bool isTerminal(const NodeFlags flags) {
return (flags & FLAG_IS_TERMINAL) != 0;
}
static AK_FORCE_INLINE bool hasShortcutTargets(const NodeFlags flags) {
return (flags & FLAG_HAS_SHORTCUT_TARGETS) != 0;
}
static AK_FORCE_INLINE bool hasBigrams(const NodeFlags flags) {
return (flags & FLAG_HAS_BIGRAMS) != 0;
}
static AK_FORCE_INLINE bool hasMultipleChars(const NodeFlags flags) {
return (flags & FLAG_HAS_MULTIPLE_CHARS) != 0;
}
static AK_FORCE_INLINE bool hasChildrenInFlags(const NodeFlags flags) {
return FLAG_GROUP_ADDRESS_TYPE_NOADDRESS != (MASK_GROUP_ADDRESS_TYPE & flags);
}
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(PatriciaTrieReadingUtils);
static const NodeFlags MASK_GROUP_ADDRESS_TYPE;
static const NodeFlags FLAG_GROUP_ADDRESS_TYPE_NOADDRESS;
static const NodeFlags FLAG_GROUP_ADDRESS_TYPE_ONEBYTE;
static const NodeFlags FLAG_GROUP_ADDRESS_TYPE_TWOBYTES;
static const NodeFlags FLAG_GROUP_ADDRESS_TYPE_THREEBYTES;
static const NodeFlags FLAG_HAS_MULTIPLE_CHARS;
static const NodeFlags FLAG_IS_TERMINAL;
static const NodeFlags FLAG_HAS_SHORTCUT_TARGETS;
static const NodeFlags FLAG_HAS_BIGRAMS;
static const NodeFlags FLAG_IS_NOT_A_WORD;
static const NodeFlags FLAG_IS_BLACKLISTED;
};
} // namespace latinime
#endif /* LATINIME_PATRICIA_TRIE_NODE_READING_UTILS_H */