From 2b1dd6e2532ee041248c3f7c48f28d789713b18b Mon Sep 17 00:00:00 2001 From: Keisuke Kuroynagi Date: Tue, 6 Aug 2013 10:18:20 +0900 Subject: [PATCH] Implement a part of ver 3 dictionary reading methods. Bug: 6669677 Change-Id: I83c159838ad99b2093907b9de7df6cb7a97b4165 --- native/jni/Android.mk | 2 + .../dynamic_patricia_trie_node_reader.cpp | 74 ++++++++++ .../dynamic_patricia_trie_node_reader.h | 134 ++++++++++++++++++ .../dynamic_patricia_trie_policy.cpp | 53 ++++++- 4 files changed, 256 insertions(+), 7 deletions(-) create mode 100644 native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp create mode 100644 native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h diff --git a/native/jni/Android.mk b/native/jni/Android.mk index acd230ff2..e14cf5a71 100644 --- a/native/jni/Android.mk +++ b/native/jni/Android.mk @@ -71,7 +71,9 @@ LATIN_IME_CORE_SRC_FILES := \ suggest/core/policy/weighting.cpp \ suggest/core/session/dic_traverse_session.cpp \ $(addprefix suggest/policyimpl/dictionary/, \ + dynamic_patricia_trie_node_reader.cpp \ dynamic_patricia_trie_policy.cpp \ + dynamic_patricia_trie_reading_utils.cpp \ patricia_trie_policy.cpp \ patricia_trie_reading_utils.cpp) \ suggest/policyimpl/gesture/gesture_suggest_policy_factory.cpp \ diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp new file mode 100644 index 000000000..20cda91a3 --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp @@ -0,0 +1,74 @@ +/* + * Copyright (C) 2013, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h" + +#include "suggest/core/dictionary/binary_dictionary_info.h" +#include "suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.h" +#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h" + +namespace latinime { + +void DynamicPatriciaTrieNodeReader::fetchNodeInfoFromBufferAndProcessMovedNode(const int nodePos, + const int maxCodePointCount, int *const outCodePoints) { + const uint8_t *const dictRoot = mBinaryDictionaryInfo->getDictRoot(); + int pos = nodePos; + mFlags = PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictRoot, &pos); + mParentPos = DynamicPatriciaTrieReadingUtils::getParentPosAndAdvancePosition(dictRoot, &pos); + if (outCodePoints != 0) { + mCodePointCount = PatriciaTrieReadingUtils::getCharsAndAdvancePosition( + dictRoot, mFlags, maxCodePointCount, outCodePoints, &pos); + } else { + mCodePointCount = PatriciaTrieReadingUtils::skipCharacters( + dictRoot, mFlags, MAX_WORD_LENGTH, &pos); + } + if (isTerminal()) { + mProbability = PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(dictRoot, &pos); + } else { + mProbability = NOT_A_PROBABILITY; + } + if (hasChildren()) { + mChildrenPos = DynamicPatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition( + dictRoot, mFlags, &pos); + } else { + mChildrenPos = NOT_A_DICT_POS; + } + if (PatriciaTrieReadingUtils::hasShortcutTargets(mFlags)) { + mShortcutPos = pos; + BinaryDictionaryTerminalAttributesReadingUtils::skipShortcuts(mBinaryDictionaryInfo, &pos); + } else { + mShortcutPos = NOT_A_DICT_POS; + } + if (PatriciaTrieReadingUtils::hasBigrams(mFlags)) { + mBigramPos = pos; + BinaryDictionaryTerminalAttributesReadingUtils::skipExistingBigrams( + mBinaryDictionaryInfo, &pos); + } else { + mBigramPos = NOT_A_DICT_POS; + } + // Update siblingPos if needed. + if (mSiblingPos == NOT_A_VALID_WORD_POS) { + // Sibling position is the tail position of current node. + mSiblingPos = pos; + } + // Read destination node if the read node is a moved node. + if (DynamicPatriciaTrieReadingUtils::isMoved(mFlags)) { + // The destination position is stored at the same place as the parent position. + fetchNodeInfoFromBufferAndProcessMovedNode(mParentPos, maxCodePointCount, outCodePoints); + } +} + +} diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h new file mode 100644 index 000000000..b668aab78 --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h @@ -0,0 +1,134 @@ +/* + * Copyright (C) 2013, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_DYNAMIC_PATRICIA_TRIE_NODE_READER_H +#define LATINIME_DYNAMIC_PATRICIA_TRIE_NODE_READER_H + +#include "defines.h" +#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h" +#include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h" + +namespace latinime { + +class BinaryDictionaryInfo; + +/* + * This class is used for helping to read nodes of dynamic patricia trie. This class handles moved + * node and reads node attributes. + */ +class DynamicPatriciaTrieNodeReader { + public: + explicit DynamicPatriciaTrieNodeReader(const BinaryDictionaryInfo *const binaryDictionaryInfo) + : mBinaryDictionaryInfo(binaryDictionaryInfo), mNodePos(NOT_A_VALID_WORD_POS), + mFlags(0), mParentPos(NOT_A_DICT_POS), mCodePointCount(0), + mProbability(NOT_A_PROBABILITY), mChildrenPos(NOT_A_DICT_POS), + mShortcutPos(NOT_A_DICT_POS), mBigramPos(NOT_A_DICT_POS), + mSiblingPos(NOT_A_VALID_WORD_POS) {} + + ~DynamicPatriciaTrieNodeReader() {} + + // Reads node information from dictionary buffer and updates members with the information. + AK_FORCE_INLINE void fetchNodeInfoFromBuffer(const int nodePos) { + fetchNodeInfoFromBufferAndGetNodeCodePoints(mNodePos , 0 /* maxCodePointCount */, + 0 /* outCodePoints */); + } + + AK_FORCE_INLINE void fetchNodeInfoFromBufferAndGetNodeCodePoints(const int nodePos, + const int maxCodePointCount, int *const outCodePoints) { + mNodePos = nodePos; + mSiblingPos = NOT_A_VALID_WORD_POS; + fetchNodeInfoFromBufferAndProcessMovedNode(mNodePos, maxCodePointCount, outCodePoints); + } + + AK_FORCE_INLINE int getNodePos() const { + return mNodePos; + } + + // Flags + AK_FORCE_INLINE bool isDeleted() const { + return DynamicPatriciaTrieReadingUtils::isDeleted(mFlags); + } + + AK_FORCE_INLINE bool hasChildren() const { + return PatriciaTrieReadingUtils::hasChildrenInFlags(mFlags); + } + + AK_FORCE_INLINE bool isTerminal() const { + return PatriciaTrieReadingUtils::isTerminal(mFlags); + } + + AK_FORCE_INLINE bool isBlacklisted() const { + return PatriciaTrieReadingUtils::isBlacklisted(mFlags); + } + + AK_FORCE_INLINE bool isNotAWord() const { + return PatriciaTrieReadingUtils::isNotAWord(mFlags); + } + + // Parent node position + AK_FORCE_INLINE int getParentPos() const { + return mParentPos; + } + + // Number of code points + AK_FORCE_INLINE uint8_t getCodePointCount() const { + return mCodePointCount; + } + + // Probability + AK_FORCE_INLINE int getProbability() const { + return mProbability; + } + + // Children node group position + AK_FORCE_INLINE int getChildrenPos() const { + return mChildrenPos; + } + + // Shortcutlist position + AK_FORCE_INLINE int getShortcutPos() const { + return mShortcutPos; + } + + // Bigrams position + AK_FORCE_INLINE int getBigramsPos() const { + return mBigramPos; + } + + // Sibling node position + AK_FORCE_INLINE int getSiblingNodePos() const { + return mSiblingPos; + } + + private: + DISALLOW_COPY_AND_ASSIGN(DynamicPatriciaTrieNodeReader); + + const BinaryDictionaryInfo *const mBinaryDictionaryInfo; + int mNodePos; + DynamicPatriciaTrieReadingUtils::NodeFlags mFlags; + int mParentPos; + uint8_t mCodePointCount; + int mProbability; + int mChildrenPos; + int mShortcutPos; + int mBigramPos; + int mSiblingPos; + + void fetchNodeInfoFromBufferAndProcessMovedNode(const int nodePos, const int maxCodePointCount, + int *const outCodePoints); +}; +} // namespace latinime +#endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_NODE_READER_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp index c7314ecf1..17cbdde3a 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp @@ -20,6 +20,9 @@ #include "suggest/core/dicnode/dic_node.h" #include "suggest/core/dicnode/dic_node_vector.h" #include "suggest/core/dictionary/binary_dictionary_info.h" +#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h" +#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h" +#include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h" namespace latinime { @@ -28,7 +31,31 @@ const DynamicPatriciaTriePolicy DynamicPatriciaTriePolicy::sInstance; void DynamicPatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode, const BinaryDictionaryInfo *const binaryDictionaryInfo, const NodeFilter *const nodeFilter, DicNodeVector *const childDicNodes) const { - // TODO: Implement. + if (!dicNode->hasChildren()) { + return; + } + DynamicPatriciaTrieNodeReader nodeReader(binaryDictionaryInfo); + int mergedNodeCodePoints[MAX_WORD_LENGTH]; + int nextPos = dicNode->getChildrenPos(); + do { + const int childCount = PatriciaTrieReadingUtils::getGroupCountAndAdvancePosition( + binaryDictionaryInfo->getDictRoot(), &nextPos); + for (int i = 0; i < childCount; i++) { + nodeReader.fetchNodeInfoFromBufferAndGetNodeCodePoints(nextPos, MAX_WORD_LENGTH, + mergedNodeCodePoints); + if (!nodeReader.isDeleted() && !nodeFilter->isFilteredOut(mergedNodeCodePoints[0])) { + // Push child note when the node is not deleted and not filtered out. + childDicNodes->pushLeavingChild(dicNode, nodeReader.getNodePos(), + nodeReader.getChildrenPos(), nodeReader.getProbability(), + nodeReader.isTerminal(), nodeReader.hasChildren(), + nodeReader.isBlacklisted() || nodeReader.isNotAWord(), + nodeReader.getCodePointCount(), mergedNodeCodePoints); + } + nextPos = nodeReader.getSiblingNodePos(); + } + nextPos = DynamicPatriciaTrieReadingUtils::getForwardLinkPosition( + binaryDictionaryInfo->getDictRoot(), nextPos); + } while(DynamicPatriciaTrieReadingUtils::isValidForwardLinkPosition(nextPos)); } int DynamicPatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount( @@ -48,22 +75,34 @@ int DynamicPatriciaTriePolicy::getTerminalNodePositionOfWord( int DynamicPatriciaTriePolicy::getUnigramProbability( const BinaryDictionaryInfo *const binaryDictionaryInfo, const int nodePos) const { - // TODO: Implement. - return NOT_A_PROBABILITY; + DynamicPatriciaTrieNodeReader nodeReader(binaryDictionaryInfo); + nodeReader.fetchNodeInfoFromBuffer(nodePos); + if (nodeReader.isDeleted() || nodeReader.isBlacklisted() || nodeReader.isNotAWord()) { + return NOT_A_PROBABILITY; + } + return nodeReader.getProbability(); } int DynamicPatriciaTriePolicy::getShortcutPositionOfNode( const BinaryDictionaryInfo *const binaryDictionaryInfo, const int nodePos) const { - // TODO: Implement. - return NOT_A_DICT_POS; + DynamicPatriciaTrieNodeReader nodeReader(binaryDictionaryInfo); + nodeReader.fetchNodeInfoFromBuffer(nodePos); + if (nodeReader.isDeleted()) { + return NOT_A_DICT_POS; + } + return nodeReader.getShortcutPos(); } int DynamicPatriciaTriePolicy::getBigramsPositionOfNode( const BinaryDictionaryInfo *const binaryDictionaryInfo, const int nodePos) const { - // TODO: Implement. - return NOT_A_DICT_POS; + DynamicPatriciaTrieNodeReader nodeReader(binaryDictionaryInfo); + nodeReader.fetchNodeInfoFromBuffer(nodePos); + if (nodeReader.isDeleted()) { + return NOT_A_DICT_POS; + } + return nodeReader.getBigramsPos(); } } // namespace latinime