From 647c00070712067fc5ae415f9106be5ca4e17464 Mon Sep 17 00:00:00 2001 From: Keisuke Kuroynagi Date: Thu, 1 Aug 2013 16:15:35 +0900 Subject: [PATCH] Give PatriciaTrieReadingUtils methods for reading nodes. The remaining methods in BinaryFormat will be removed. Bug: 6669677 Change-Id: I866f828d69a600c0ac03b68d71b1f6ee2ce4dd36 --- native/jni/Android.mk | 3 +- ...nary_terminal_attributes_reading_utils.cpp | 6 +- ...ionary_terminal_attributes_reading_utils.h | 4 +- .../core/dictionary/byte_array_utils.h | 16 +- .../policyimpl/dictionary/binary_format.h | 42 ------ .../dictionary/patricia_trie_policy.cpp | 140 +++++++++++------- .../dictionary/patricia_trie_policy.h | 2 +- .../patricia_trie_reading_utils.cpp | 67 +++++++++ .../dictionary/patricia_trie_reading_utils.h | 139 +++++++++++++++++ 9 files changed, 306 insertions(+), 113 deletions(-) create mode 100644 native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.cpp create mode 100644 native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.h diff --git a/native/jni/Android.mk b/native/jni/Android.mk index 771623c36..acd230ff2 100644 --- a/native/jni/Android.mk +++ b/native/jni/Android.mk @@ -72,7 +72,8 @@ LATIN_IME_CORE_SRC_FILES := \ suggest/core/session/dic_traverse_session.cpp \ $(addprefix suggest/policyimpl/dictionary/, \ dynamic_patricia_trie_policy.cpp \ - patricia_trie_policy.cpp) \ + patricia_trie_policy.cpp \ + patricia_trie_reading_utils.cpp) \ suggest/policyimpl/gesture/gesture_suggest_policy_factory.cpp \ $(addprefix suggest/policyimpl/typing/, \ scoring_params.cpp \ diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.cpp b/native/jni/src/suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.cpp index 52b668936..20b77b3b2 100644 --- a/native/jni/src/suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.cpp +++ b/native/jni/src/suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.cpp @@ -44,15 +44,15 @@ const int TaUtils::WHITELIST_SHORTCUT_PROBABILITY = 15; const int origin = *pos; switch (MASK_ATTRIBUTE_ADDRESS_TYPE & flags) { case FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE: - offset = ByteArrayUtils::readUint8andAdvancePosition( + offset = ByteArrayUtils::readUint8AndAdvancePosition( binaryDictionaryInfo->getDictRoot(), pos); break; case FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES: - offset = ByteArrayUtils::readUint16andAdvancePosition( + offset = ByteArrayUtils::readUint16AndAdvancePosition( binaryDictionaryInfo->getDictRoot(), pos); break; case FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES: - offset = ByteArrayUtils::readUint24andAdvancePosition( + offset = ByteArrayUtils::readUint24AndAdvancePosition( binaryDictionaryInfo->getDictRoot(), pos); break; } diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.h b/native/jni/src/suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.h index 15637d8a9..375fc7dff 100644 --- a/native/jni/src/suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.h +++ b/native/jni/src/suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.h @@ -33,7 +33,7 @@ class BinaryDictionaryTerminalAttributesReadingUtils { static AK_FORCE_INLINE TerminalAttributeFlags getFlagsAndForwardPointer( const BinaryDictionaryInfo *const binaryDictionaryInfo, int *const pos) { - return ByteArrayUtils::readUint8andAdvancePosition( + return ByteArrayUtils::readUint8AndAdvancePosition( binaryDictionaryInfo->getDictRoot(), pos); } @@ -66,7 +66,7 @@ class BinaryDictionaryTerminalAttributesReadingUtils { static AK_FORCE_INLINE int getShortcutListSizeAndForwardPointer( const BinaryDictionaryInfo *const binaryDictionaryInfo, int *const pos) { // readUint16andAdvancePosition() returns an offset *including* the uint16 field itself. - return ByteArrayUtils::readUint16andAdvancePosition( + return ByteArrayUtils::readUint16AndAdvancePosition( binaryDictionaryInfo->getDictRoot(), pos) - SHORTCUT_LIST_SIZE_FIELD_SIZE; } diff --git a/native/jni/src/suggest/core/dictionary/byte_array_utils.h b/native/jni/src/suggest/core/dictionary/byte_array_utils.h index daa822ffa..75ccfc766 100644 --- a/native/jni/src/suggest/core/dictionary/byte_array_utils.h +++ b/native/jni/src/suggest/core/dictionary/byte_array_utils.h @@ -50,39 +50,39 @@ class ByteArrayUtils { return buffer[pos]; } - static AK_FORCE_INLINE uint32_t readUint32andAdvancePosition( + static AK_FORCE_INLINE uint32_t readUint32AndAdvancePosition( const uint8_t *const buffer, int *const pos) { const uint32_t value = readUint32(buffer, *pos); *pos += 4; return value; } - static AK_FORCE_INLINE int readSint24andAdvancePosition( + static AK_FORCE_INLINE int readSint24AndAdvancePosition( const uint8_t *const buffer, int *const pos) { const uint8_t value = readUint8(buffer, *pos); if (value < 0x80) { - return readUint24andAdvancePosition(buffer, pos); + return readUint24AndAdvancePosition(buffer, pos); } else { (*pos)++; - return -(((value & 0x7F) << 16) ^ readUint16andAdvancePosition(buffer, pos)); + return -(((value & 0x7F) << 16) ^ readUint16AndAdvancePosition(buffer, pos)); } } - static AK_FORCE_INLINE uint32_t readUint24andAdvancePosition( + static AK_FORCE_INLINE uint32_t readUint24AndAdvancePosition( const uint8_t *const buffer, int *const pos) { const uint32_t value = readUint24(buffer, *pos); *pos += 3; return value; } - static AK_FORCE_INLINE uint16_t readUint16andAdvancePosition( + static AK_FORCE_INLINE uint16_t readUint16AndAdvancePosition( const uint8_t *const buffer, int *const pos) { const uint16_t value = readUint16(buffer, *pos); *pos += 2; return value; } - static AK_FORCE_INLINE uint8_t readUint8andAdvancePosition( + static AK_FORCE_INLINE uint8_t readUint8AndAdvancePosition( const uint8_t *const buffer, int *const pos) { return buffer[(*pos)++]; } @@ -113,7 +113,7 @@ class ByteArrayUtils { *pos += 1; return NOT_A_CODE_POINT; } else { - return readUint24andAdvancePosition(buffer, pos); + return readUint24AndAdvancePosition(buffer, pos); } } else { *pos += 1; diff --git a/native/jni/src/suggest/policyimpl/dictionary/binary_format.h b/native/jni/src/suggest/policyimpl/dictionary/binary_format.h index 9e22b50cd..23f4c7fec 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/binary_format.h +++ b/native/jni/src/suggest/policyimpl/dictionary/binary_format.h @@ -56,7 +56,6 @@ class BinaryFormat { // Mask and flags for attribute address type selection. static const int MASK_ATTRIBUTE_ADDRESS_TYPE = 0x30; - static bool hasBlacklistedOrNotAWordFlag(const int flags); static int getGroupCountAndForwardPointer(const uint8_t *const dict, int *pos); static uint8_t getFlagsAndForwardPointer(const uint8_t *const dict, int *pos); static int getCodePointAndForwardPointer(const uint8_t *const dict, int *pos); @@ -74,10 +73,6 @@ class BinaryFormat { static int getCodePointsAndProbabilityAndReturnCodePointCount( const uint8_t *const root, const int nodePos, const int maxCodePointCount, int *const outCodePoints, int *const outUnigramProbability); - static int getBigramListPositionForWordPosition(const uint8_t *const root, - const int nodePosition); - static int getShortcutListPositionForWordPosition(const uint8_t *const root, - const int nodePosition); private: DISALLOW_IMPLICIT_CONSTRUCTORS(BinaryFormat); @@ -99,10 +94,6 @@ class BinaryFormat { static int skipBigrams(const uint8_t *const dict, const uint8_t flags, const int pos); }; -inline bool BinaryFormat::hasBlacklistedOrNotAWordFlag(const int flags) { - return (flags & (FLAG_IS_BLACKLISTED | FLAG_IS_NOT_A_WORD)) != 0; -} - AK_FORCE_INLINE int BinaryFormat::getGroupCountAndForwardPointer(const uint8_t *const dict, int *pos) { const int msb = dict[(*pos)++]; @@ -475,38 +466,5 @@ AK_FORCE_INLINE int BinaryFormat::getCodePointsAndProbabilityAndReturnCodePointC return 0; } -AK_FORCE_INLINE int BinaryFormat::getBigramListPositionForWordPosition( - const uint8_t *const root, const int nodePosition) { - if (NOT_A_VALID_WORD_POS == nodePosition) return NOT_A_DICT_POS; - int position = nodePosition; - const uint8_t flags = getFlagsAndForwardPointer(root, &position); - if (!(flags & FLAG_HAS_BIGRAMS)) return NOT_A_DICT_POS; - if (flags & FLAG_HAS_MULTIPLE_CHARS) { - position = skipOtherCharacters(root, position); - } else { - getCodePointAndForwardPointer(root, &position); - } - position = skipProbability(flags, position); - position = skipChildrenPosition(flags, position); - position = skipShortcuts(root, flags, position); - return position; -} - -AK_FORCE_INLINE int BinaryFormat::getShortcutListPositionForWordPosition( - const uint8_t *const root, const int nodePosition) { - if (NOT_A_VALID_WORD_POS == nodePosition) return NOT_A_DICT_POS; - int position = nodePosition; - const uint8_t flags = getFlagsAndForwardPointer(root, &position); - if (!(flags & FLAG_HAS_SHORTCUT_TARGETS)) return NOT_A_DICT_POS; - if (flags & FLAG_HAS_MULTIPLE_CHARS) { - position = skipOtherCharacters(root, position); - } else { - getCodePointAndForwardPointer(root, &position); - } - position = skipProbability(flags, position); - position = skipChildrenPosition(flags, position); - return position; -} - } // namespace latinime #endif // LATINIME_BINARY_FORMAT_H diff --git a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp index 2a9a5ce7a..097f7c86a 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp @@ -21,7 +21,9 @@ #include "suggest/core/dicnode/dic_node.h" #include "suggest/core/dicnode/dic_node_vector.h" #include "suggest/core/dictionary/binary_dictionary_info.h" +#include "suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.h" #include "suggest/policyimpl/dictionary/binary_format.h" +#include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h" namespace latinime { @@ -34,7 +36,7 @@ void PatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode, return; } int nextPos = dicNode->getChildrenPos(); - const int childCount = BinaryFormat::getGroupCountAndForwardPointer( + const int childCount = PatriciaTrieReadingUtils::getGroupCountAndAdvancePosition( binaryDictionaryInfo->getDictRoot(), &nextPos); for (int i = 0; i < childCount; i++) { nextPos = createAndGetLeavingChildNode(dicNode, nextPos, binaryDictionaryInfo, @@ -60,82 +62,108 @@ int PatriciaTriePolicy::getTerminalNodePositionOfWord( int PatriciaTriePolicy::getUnigramProbability( const BinaryDictionaryInfo *const binaryDictionaryInfo, const int nodePos) const { - const uint8_t *const root = binaryDictionaryInfo->getDictRoot(); + if (nodePos == NOT_A_VALID_WORD_POS) { + return NOT_A_PROBABILITY; + } + const uint8_t *const dictRoot = binaryDictionaryInfo->getDictRoot(); int pos = nodePos; - const uint8_t flags = BinaryFormat::getFlagsAndForwardPointer(root, &pos); - if (flags & (BinaryFormat::FLAG_IS_BLACKLISTED | BinaryFormat::FLAG_IS_NOT_A_WORD)) { + const PatriciaTrieReadingUtils::NodeFlags flags = + PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictRoot, &pos); + if (!PatriciaTrieReadingUtils::isTerminal(flags)) { + return NOT_A_PROBABILITY; + } + if (PatriciaTrieReadingUtils::isNotAWord(flags) + || PatriciaTrieReadingUtils::isBlacklisted(flags)) { // If this is not a word, or if it's a blacklisted entry, it should behave as // having no probability outside of the suggestion process (where it should be used // for shortcuts). return NOT_A_PROBABILITY; } - const bool hasMultipleChars = (0 != (BinaryFormat::FLAG_HAS_MULTIPLE_CHARS & flags)); - if (hasMultipleChars) { - pos = BinaryFormat::skipOtherCharacters(root, pos); - } else { - BinaryFormat::getCodePointAndForwardPointer(root, &pos); - } - return BinaryFormat::readProbabilityWithoutMovingPointer(root, pos); + PatriciaTrieReadingUtils::skipCharacters(dictRoot, flags, MAX_WORD_LENGTH, &pos); + return PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(dictRoot, &pos); } int PatriciaTriePolicy::getShortcutPositionOfNode( const BinaryDictionaryInfo *const binaryDictionaryInfo, const int nodePos) const { - return BinaryFormat::getShortcutListPositionForWordPosition( - binaryDictionaryInfo->getDictRoot(), nodePos); + if (nodePos == NOT_A_VALID_WORD_POS) { + return NOT_A_DICT_POS; + } + const uint8_t *const dictRoot = binaryDictionaryInfo->getDictRoot(); + int pos = nodePos; + const PatriciaTrieReadingUtils::NodeFlags flags = + PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictRoot, &pos); + if (!PatriciaTrieReadingUtils::hasShortcutTargets(flags)) { + return NOT_A_DICT_POS; + } + PatriciaTrieReadingUtils::skipCharacters(dictRoot, flags, MAX_WORD_LENGTH, &pos); + if (PatriciaTrieReadingUtils::isTerminal(flags)) { + PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(dictRoot, &pos); + } + if (PatriciaTrieReadingUtils::hasChildrenInFlags(flags)) { + PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(dictRoot, flags, &pos); + } + return pos; } int PatriciaTriePolicy::getBigramsPositionOfNode( const BinaryDictionaryInfo *const binaryDictionaryInfo, const int nodePos) const { - return BinaryFormat::getBigramListPositionForWordPosition( - binaryDictionaryInfo->getDictRoot(), nodePos); + if (nodePos == NOT_A_VALID_WORD_POS) { + return NOT_A_DICT_POS; + } + const uint8_t *const dictRoot = binaryDictionaryInfo->getDictRoot(); + int pos = nodePos; + const PatriciaTrieReadingUtils::NodeFlags flags = + PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictRoot, &pos); + if (!PatriciaTrieReadingUtils::hasBigrams(flags)) { + return NOT_A_DICT_POS; + } + PatriciaTrieReadingUtils::skipCharacters(dictRoot, flags, MAX_WORD_LENGTH, &pos); + if (PatriciaTrieReadingUtils::isTerminal(flags)) { + PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(dictRoot, &pos); + } + if (PatriciaTrieReadingUtils::hasChildrenInFlags(flags)) { + PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(dictRoot, flags, &pos); + } + if (PatriciaTrieReadingUtils::hasShortcutTargets(flags)) { + BinaryDictionaryTerminalAttributesReadingUtils::skipShortcuts(binaryDictionaryInfo, &pos); + } + return pos; } -int PatriciaTriePolicy::createAndGetLeavingChildNode(const DicNode *const dicNode, int pos, - const BinaryDictionaryInfo *const binaryDictionaryInfo, +int PatriciaTriePolicy::createAndGetLeavingChildNode(const DicNode *const dicNode, + const int nodePos, const BinaryDictionaryInfo *const binaryDictionaryInfo, const NodeFilter *const childrenFilter, DicNodeVector *childDicNodes) const { - const int nextPos = pos; - const uint8_t flags = BinaryFormat::getFlagsAndForwardPointer( - binaryDictionaryInfo->getDictRoot(), &pos); - const bool hasMultipleChars = (0 != (BinaryFormat::FLAG_HAS_MULTIPLE_CHARS & flags)); - const bool isTerminal = (0 != (BinaryFormat::FLAG_IS_TERMINAL & flags)); - const bool hasChildren = BinaryFormat::hasChildrenInFlags(flags); - const bool isBlacklistedOrNotAWord = BinaryFormat::hasBlacklistedOrNotAWordFlag(flags); - - int codePoint = BinaryFormat::getCodePointAndForwardPointer( - binaryDictionaryInfo->getDictRoot(), &pos); - ASSERT(NOT_A_CODE_POINT != codePoint); - // TODO: optimize this + const uint8_t *const dictRoot = binaryDictionaryInfo->getDictRoot(); + int pos = nodePos; + const PatriciaTrieReadingUtils::NodeFlags flags = + PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictRoot, &pos); int mergedNodeCodePoints[MAX_WORD_LENGTH]; - uint16_t mergedNodeCodePointCount = 0; - mergedNodeCodePoints[mergedNodeCodePointCount++] = codePoint; - - do { - const int nextCodePoint = hasMultipleChars - ? BinaryFormat::getCodePointAndForwardPointer( - binaryDictionaryInfo->getDictRoot(), &pos) : NOT_A_CODE_POINT; - const bool isLastChar = (NOT_A_CODE_POINT == nextCodePoint); - if (!isLastChar) { - mergedNodeCodePoints[mergedNodeCodePointCount++] = nextCodePoint; - } - codePoint = nextCodePoint; - } while (NOT_A_CODE_POINT != codePoint); - - const int probability = isTerminal ? BinaryFormat::readProbabilityWithoutMovingPointer( - binaryDictionaryInfo->getDictRoot(), pos) : NOT_A_PROBABILITY; - pos = BinaryFormat::skipProbability(flags, pos); - int childrenPos = hasChildren ? BinaryFormat::readChildrenPosition( - binaryDictionaryInfo->getDictRoot(), flags, pos) : NOT_A_DICT_POS; - const int siblingPos = BinaryFormat::skipChildrenPosAndAttributes( - binaryDictionaryInfo->getDictRoot(), flags, pos); - - if (childrenFilter->isFilteredOut(mergedNodeCodePoints[0])) { - return siblingPos; + const int mergedNodeCodePointCount = PatriciaTrieReadingUtils::getCharsAndAdvancePosition( + dictRoot, flags, MAX_WORD_LENGTH, mergedNodeCodePoints, &pos); + const int probability = (PatriciaTrieReadingUtils::isTerminal(flags))? + PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(dictRoot, &pos) + : NOT_A_PROBABILITY; + const int childrenPos = PatriciaTrieReadingUtils::hasChildrenInFlags(flags) ? + PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition( + dictRoot, flags, &pos) : NOT_A_DICT_POS; + if (PatriciaTrieReadingUtils::hasShortcutTargets(flags)) { + BinaryDictionaryTerminalAttributesReadingUtils::skipShortcuts(binaryDictionaryInfo, &pos); } - childDicNodes->pushLeavingChild(dicNode, nextPos, childrenPos, probability, isTerminal, - hasChildren, isBlacklistedOrNotAWord, mergedNodeCodePointCount, mergedNodeCodePoints); - return siblingPos; + if (PatriciaTrieReadingUtils::hasBigrams(flags)) { + BinaryDictionaryTerminalAttributesReadingUtils::skipExistingBigrams( + binaryDictionaryInfo, &pos); + } + if (!childrenFilter->isFilteredOut(mergedNodeCodePoints[0])) { + childDicNodes->pushLeavingChild(dicNode, nodePos, childrenPos, probability, + PatriciaTrieReadingUtils::isTerminal(flags), + PatriciaTrieReadingUtils::hasChildrenInFlags(flags), + PatriciaTrieReadingUtils::isBlacklisted(flags) || + PatriciaTrieReadingUtils::isNotAWord(flags), + mergedNodeCodePointCount, mergedNodeCodePoints); + } + return pos; } } // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h index 42827d93a..71f256eee 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h @@ -61,7 +61,7 @@ class PatriciaTriePolicy : public DictionaryStructurePolicy { PatriciaTriePolicy() {} ~PatriciaTriePolicy() {} - int createAndGetLeavingChildNode(const DicNode *const dicNode, int pos, + int createAndGetLeavingChildNode(const DicNode *const dicNode, const int nodePos, const BinaryDictionaryInfo *const binaryDictionaryInfo, const NodeFilter *const nodeFilter, DicNodeVector *const childDicNodes) const; }; diff --git a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.cpp new file mode 100644 index 000000000..89e981df8 --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.cpp @@ -0,0 +1,67 @@ +/* + * Copyright (C) 2013, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h" + +#include "defines.h" +#include "suggest/core/dictionary/byte_array_utils.h" + +namespace latinime { + +typedef PatriciaTrieReadingUtils PtReadingUtils; + +const PtReadingUtils::NodeFlags PtReadingUtils::MASK_GROUP_ADDRESS_TYPE = 0xC0; +const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_GROUP_ADDRESS_TYPE_NOADDRESS = 0x00; +const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_GROUP_ADDRESS_TYPE_ONEBYTE = 0x40; +const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_GROUP_ADDRESS_TYPE_TWOBYTES = 0x80; +const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_GROUP_ADDRESS_TYPE_THREEBYTES = 0xC0; + +// Flag for single/multiple char group +const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_HAS_MULTIPLE_CHARS = 0x20; +// Flag for terminal groups +const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_IS_TERMINAL = 0x10; +// Flag for shortcut targets presence +const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_HAS_SHORTCUT_TARGETS = 0x08; +// Flag for bigram presence +const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_HAS_BIGRAMS = 0x04; +// Flag for non-words (typically, shortcut only entries) +const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_IS_NOT_A_WORD = 0x02; +// Flag for blacklist +const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_IS_BLACKLISTED = 0x01; + +/* static */ int PtReadingUtils::readChildrenPositionAndAdvancePosition( + const uint8_t *const buffer, const NodeFlags flags, int *const pos) { + const int base = *pos; + int offset = 0; + switch (MASK_GROUP_ADDRESS_TYPE & flags) { + case FLAG_GROUP_ADDRESS_TYPE_ONEBYTE: + offset = ByteArrayUtils::readUint8AndAdvancePosition(buffer, pos); + break; + case FLAG_GROUP_ADDRESS_TYPE_TWOBYTES: + offset = ByteArrayUtils::readUint16AndAdvancePosition(buffer, pos); + break; + case FLAG_GROUP_ADDRESS_TYPE_THREEBYTES: + offset = ByteArrayUtils::readUint24AndAdvancePosition(buffer, pos); + break; + default: + // If we come here, it means we asked for the children of a word with + // no children. + return NOT_A_DICT_POS; + } + return base + offset; +} + +} // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.h b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.h new file mode 100644 index 000000000..002c3f19b --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_reading_utils.h @@ -0,0 +1,139 @@ +/* + * Copyright (C) 2013, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_PATRICIA_TRIE_READING_UTILS_H +#define LATINIME_PATRICIA_TRIE_READING_UTILS_H + +#include + +#include "defines.h" +#include "suggest/core/dictionary/byte_array_utils.h" + +namespace latinime { + +class PatriciaTrieReadingUtils { + public: + typedef uint8_t NodeFlags; + + static AK_FORCE_INLINE int getGroupCountAndAdvancePosition( + const uint8_t *const buffer, int *const pos) { + const uint8_t firstByte = ByteArrayUtils::readUint8AndAdvancePosition(buffer, pos); + if (firstByte < 0x80) { + return firstByte; + } else { + return ((firstByte & 0x7F) << 8) ^ ByteArrayUtils::readUint8AndAdvancePosition( + buffer, pos); + } + } + + static AK_FORCE_INLINE NodeFlags getFlagsAndAdvancePosition(const uint8_t *const buffer, + int *const pos) { + return ByteArrayUtils::readUint8AndAdvancePosition(buffer, pos); + } + + static AK_FORCE_INLINE int getCodePointAndAdvancePosition(const uint8_t *const buffer, + int *const pos) { + return ByteArrayUtils::readCodePointAndAdvancePosition(buffer, pos); + } + + // Returns the number of read characters. + static AK_FORCE_INLINE int getCharsAndAdvancePosition(const uint8_t *const buffer, + const NodeFlags flags, const int maxLength, int *const outBuffer, int *const pos) { + int length = 0; + if (hasMultipleChars(flags)) { + length = ByteArrayUtils::readStringAndAdvancePosition(buffer, maxLength, outBuffer, + pos); + } else { + if (maxLength > 0) { + outBuffer[0] = getCodePointAndAdvancePosition(buffer, pos); + length = 1; + } + } + return length; + } + + // Returns the number of skipped characters. + static AK_FORCE_INLINE int skipCharacters(const uint8_t *const buffer, const NodeFlags flags, + const int maxLength, int *const pos) { + if (hasMultipleChars(flags)) { + return ByteArrayUtils::advancePositionToBehindString(buffer, maxLength, pos); + } else { + if (maxLength > 0) { + getCodePointAndAdvancePosition(buffer, pos); + return 1; + } else { + return 0; + } + } + } + + static AK_FORCE_INLINE int readProbabilityAndAdvancePosition(const uint8_t *const buffer, + int *const pos) { + return ByteArrayUtils::readUint8AndAdvancePosition(buffer, pos); + } + + static int readChildrenPositionAndAdvancePosition(const uint8_t *const buffer, + const NodeFlags flags, int *const pos); + + /** + * Node Flags + */ + static AK_FORCE_INLINE bool isBlacklisted(const NodeFlags flags) { + return (flags & FLAG_IS_BLACKLISTED) != 0; + } + + static AK_FORCE_INLINE bool isNotAWord(const NodeFlags flags) { + return (flags & FLAG_IS_NOT_A_WORD) != 0; + } + + static AK_FORCE_INLINE bool isTerminal(const NodeFlags flags) { + return (flags & FLAG_IS_TERMINAL) != 0; + } + + static AK_FORCE_INLINE bool hasShortcutTargets(const NodeFlags flags) { + return (flags & FLAG_HAS_SHORTCUT_TARGETS) != 0; + } + + static AK_FORCE_INLINE bool hasBigrams(const NodeFlags flags) { + return (flags & FLAG_HAS_BIGRAMS) != 0; + } + + static AK_FORCE_INLINE bool hasMultipleChars(const NodeFlags flags) { + return (flags & FLAG_HAS_MULTIPLE_CHARS) != 0; + } + + static AK_FORCE_INLINE bool hasChildrenInFlags(const NodeFlags flags) { + return FLAG_GROUP_ADDRESS_TYPE_NOADDRESS != (MASK_GROUP_ADDRESS_TYPE & flags); + } + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(PatriciaTrieReadingUtils); + + static const NodeFlags MASK_GROUP_ADDRESS_TYPE; + static const NodeFlags FLAG_GROUP_ADDRESS_TYPE_NOADDRESS; + static const NodeFlags FLAG_GROUP_ADDRESS_TYPE_ONEBYTE; + static const NodeFlags FLAG_GROUP_ADDRESS_TYPE_TWOBYTES; + static const NodeFlags FLAG_GROUP_ADDRESS_TYPE_THREEBYTES; + + static const NodeFlags FLAG_HAS_MULTIPLE_CHARS; + static const NodeFlags FLAG_IS_TERMINAL; + static const NodeFlags FLAG_HAS_SHORTCUT_TARGETS; + static const NodeFlags FLAG_HAS_BIGRAMS; + static const NodeFlags FLAG_IS_NOT_A_WORD; + static const NodeFlags FLAG_IS_BLACKLISTED; +}; +} // namespace latinime +#endif /* LATINIME_PATRICIA_TRIE_NODE_READING_UTILS_H */