From c5e6efafff56c57c5527fe64dddb851df0719634 Mon Sep 17 00:00:00 2001 From: Keisuke Kuroynagi Date: Fri, 28 Jun 2013 19:47:00 +0900 Subject: [PATCH] Introduce patriciaTrie to abstract traversing version 2 dictionary. Bug: 6669677 Change-Id: Ifef72f3d7a7ba67c5232b98c7835485d72d7322d --- native/jni/Android.mk | 1 + .../suggest/core/dicnode/dic_node_utils.cpp | 6 +- .../core/dictionary/bigram_dictionary.cpp | 11 ++- .../core/dictionary/binary_dictionary_info.h | 16 +++-- .../suggest/core/dictionary/dictionary.cpp | 21 ++---- .../dictionary_structure_policy_factory.h | 47 +++++++++++++ .../dictionary/patricia_trie_policy.cpp | 70 +++++++++++++++++++ .../dictionary/patricia_trie_policy.h | 58 +++++++++++++++ 8 files changed, 198 insertions(+), 32 deletions(-) create mode 100644 native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_policy_factory.h create mode 100644 native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp create mode 100644 native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h diff --git a/native/jni/Android.mk b/native/jni/Android.mk index d5df6b62e..f89eea735 100644 --- a/native/jni/Android.mk +++ b/native/jni/Android.mk @@ -70,6 +70,7 @@ LATIN_IME_CORE_SRC_FILES := \ proximity_info_state_utils.cpp) \ suggest/core/policy/weighting.cpp \ suggest/core/session/dic_traverse_session.cpp \ + suggest/policyimpl/dictionary/patricia_trie_policy.cpp \ suggest/policyimpl/gesture/gesture_suggest_policy_factory.cpp \ $(addprefix suggest/policyimpl/typing/, \ scoring_params.cpp \ diff --git a/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp b/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp index 2063c39ee..9bf7eceb5 100644 --- a/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp +++ b/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp @@ -26,6 +26,7 @@ #include "suggest/core/dictionary/probability_utils.h" #include "suggest/core/layout/proximity_info.h" #include "suggest/core/layout/proximity_info_state.h" +#include "suggest/core/policy/dictionary_structure_policy.h" #include "utils/char_utils.h" namespace latinime { @@ -36,14 +37,15 @@ namespace latinime { /* static */ void DicNodeUtils::initAsRoot(const BinaryDictionaryInfo *const binaryDictionaryInfo, const int prevWordNodePos, DicNode *const newRootNode) { - newRootNode->initAsRoot(binaryDictionaryInfo->getRootPosition(), prevWordNodePos); + newRootNode->initAsRoot(binaryDictionaryInfo->getStructurePolicy()->getRootPosition(), + prevWordNodePos); } /*static */ void DicNodeUtils::initAsRootWithPreviousWord( const BinaryDictionaryInfo *const binaryDictionaryInfo, DicNode *const prevWordLastNode, DicNode *const newRootNode) { newRootNode->initAsRootWithPreviousWord( - prevWordLastNode, binaryDictionaryInfo->getRootPosition()); + prevWordLastNode, binaryDictionaryInfo->getStructurePolicy()->getRootPosition()); } /* static */ void DicNodeUtils::initByCopy(DicNode *srcNode, DicNode *destNode) { diff --git a/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp b/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp index 242a9bdd6..ff304d2b2 100644 --- a/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp +++ b/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp @@ -150,11 +150,10 @@ int BigramDictionary::getPredictions(const int *prevWord, int prevWordLength, in int BigramDictionary::getBigramListPositionForWord(const int *prevWord, const int prevWordLength, const bool forceLowerCaseSearch) const { if (0 >= prevWordLength) return 0; - const uint8_t *const root = mBinaryDictionaryInfo->getDictRoot(); - int pos = BinaryFormat::getTerminalPosition(root, prevWord, prevWordLength, - forceLowerCaseSearch); - + int pos = mBinaryDictionaryInfo->getStructurePolicy()->getTerminalNodePositionOfWord( + mBinaryDictionaryInfo, prevWord, prevWordLength, forceLowerCaseSearch); if (NOT_VALID_WORD == pos) return 0; + const uint8_t *const root = mBinaryDictionaryInfo->getDictRoot(); const uint8_t flags = BinaryFormat::getFlagsAndForwardPointer(root, &pos); if (0 == (flags & BinaryFormat::FLAG_HAS_BIGRAMS)) return 0; if (0 == (flags & BinaryFormat::FLAG_HAS_MULTIPLE_CHARS)) { @@ -189,8 +188,8 @@ bool BigramDictionary::isValidBigram(const int *word0, int length0, const int *w int pos = getBigramListPositionForWord(word0, length0, false /* forceLowerCaseSearch */); // getBigramListPositionForWord returns 0 if this word isn't in the dictionary or has no bigrams if (0 == pos) return false; - int nextWordPos = BinaryFormat::getTerminalPosition(mBinaryDictionaryInfo->getDictRoot(), - word1, length1, false /* forceLowerCaseSearch */); + int nextWordPos = mBinaryDictionaryInfo->getStructurePolicy()->getTerminalNodePositionOfWord( + mBinaryDictionaryInfo, word1, length1, false /* forceLowerCaseSearch */); if (NOT_VALID_WORD == nextWordPos) return false; for (BinaryDictionaryBigramsIterator bigramsIt(mBinaryDictionaryInfo, pos); diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_info.h b/native/jni/src/suggest/core/dictionary/binary_dictionary_info.h index c92123679..7cb31440a 100644 --- a/native/jni/src/suggest/core/dictionary/binary_dictionary_info.h +++ b/native/jni/src/suggest/core/dictionary/binary_dictionary_info.h @@ -22,11 +22,10 @@ #include "defines.h" #include "suggest/core/dictionary/binary_dictionary_format_utils.h" #include "suggest/core/dictionary/binary_dictionary_header.h" +#include "suggest/policyimpl/dictionary/dictionary_structure_policy_factory.h" namespace latinime { -class BinaryDictionaryHeader; - class BinaryDictionaryInfo { public: BinaryDictionaryInfo(const uint8_t *const dictBuf, const int dictSize, const int mmapFd, @@ -35,7 +34,9 @@ class BinaryDictionaryInfo { mDictBufOffset(dictBufOffset), mIsUpdatable(isUpdatable), mDictionaryFormat(BinaryDictionaryFormatUtils::detectFormatVersion( mDictBuf, mDictSize)), - mDictionaryHeader(this), mDictRoot(mDictBuf + mDictionaryHeader.getSize()) {} + mDictionaryHeader(this), mDictRoot(mDictBuf + mDictionaryHeader.getSize()), + mStructurePolicy(DictionaryStructurePolicyFactory::getDictionaryStructurePolicy( + mDictionaryFormat)) {} AK_FORCE_INLINE const uint8_t *getDictBuf() const { return mDictBuf; @@ -61,10 +62,6 @@ class BinaryDictionaryInfo { return mDictionaryFormat; } - AK_FORCE_INLINE int getRootPosition() const { - return 0; - } - AK_FORCE_INLINE const BinaryDictionaryHeader *getHeader() const { return &mDictionaryHeader; } @@ -75,6 +72,10 @@ class BinaryDictionaryInfo { return mIsUpdatable && isUpdatableDictionaryFormat; } + AK_FORCE_INLINE const DictionaryStructurePolicy *getStructurePolicy() const { + return mStructurePolicy; + } + private: DISALLOW_COPY_AND_ASSIGN(BinaryDictionaryInfo); @@ -86,6 +87,7 @@ class BinaryDictionaryInfo { const BinaryDictionaryFormatUtils::FORMAT_VERSION mDictionaryFormat; const BinaryDictionaryHeader mDictionaryHeader; const uint8_t *const mDictRoot; + const DictionaryStructurePolicy *const mStructurePolicy; }; } #endif /* LATINIME_BINARY_DICTIONARY_INFO_H */ diff --git a/native/jni/src/suggest/core/dictionary/dictionary.cpp b/native/jni/src/suggest/core/dictionary/dictionary.cpp index 51f23dc55..675b54972 100644 --- a/native/jni/src/suggest/core/dictionary/dictionary.cpp +++ b/native/jni/src/suggest/core/dictionary/dictionary.cpp @@ -83,27 +83,14 @@ int Dictionary::getBigrams(const int *word, int length, int *inputCodePoints, in } int Dictionary::getProbability(const int *word, int length) const { - const uint8_t *const root = mBinaryDictionaryInfo.getDictRoot(); - int pos = BinaryFormat::getTerminalPosition(root, word, length, + const DictionaryStructurePolicy *const structurePolicy = + mBinaryDictionaryInfo.getStructurePolicy(); + int pos = structurePolicy->getTerminalNodePositionOfWord(&mBinaryDictionaryInfo, word, length, false /* forceLowerCaseSearch */); if (NOT_VALID_WORD == pos) { return NOT_A_PROBABILITY; } - const uint8_t flags = BinaryFormat::getFlagsAndForwardPointer(root, &pos); - if (flags & (BinaryFormat::FLAG_IS_BLACKLISTED | BinaryFormat::FLAG_IS_NOT_A_WORD)) { - // If this is not a word, or if it's a blacklisted entry, it should behave as - // having no probability outside of the suggestion process (where it should be used - // for shortcuts). - return NOT_A_PROBABILITY; - } - const bool hasMultipleChars = (0 != (BinaryFormat::FLAG_HAS_MULTIPLE_CHARS & flags)); - if (hasMultipleChars) { - pos = BinaryFormat::skipOtherCharacters(root, pos); - } else { - BinaryFormat::getCodePointAndForwardPointer(root, &pos); - } - const int unigramProbability = BinaryFormat::readProbabilityWithoutMovingPointer(root, pos); - return unigramProbability; + return structurePolicy->getUnigramProbability(&mBinaryDictionaryInfo, pos); } bool Dictionary::isValidBigram(const int *word0, int length0, const int *word1, int length1) const { diff --git a/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_policy_factory.h b/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_policy_factory.h new file mode 100644 index 000000000..5070651cb --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_policy_factory.h @@ -0,0 +1,47 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_DICTIONARY_STRUCTURE_POLICY_FACTORY_H +#define LATINIME_DICTIONARY_STRUCTURE_POLICY_FACTORY_H + +#include "defines.h" +#include "suggest/core/dictionary/binary_dictionary_format_utils.h" +#include "suggest/policyimpl/dictionary/patricia_trie_policy.h" + +namespace latinime { + +class DictionaryStructurePolicy; + +class DictionaryStructurePolicyFactory { + public: + static const DictionaryStructurePolicy *getDictionaryStructurePolicy( + const BinaryDictionaryFormatUtils::FORMAT_VERSION dictionaryFormat) { + switch (dictionaryFormat) { + case BinaryDictionaryFormatUtils::VERSION_1: + // Fall through + case BinaryDictionaryFormatUtils::VERSION_2: + return PatriciaTriePolicy::getInstance(); + default: + ASSERT(false); + return 0; + } + } + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(DictionaryStructurePolicyFactory); +}; +} // namespace latinime +#endif // LATINIME_DICTIONARY_STRUCTURE_POLICY_FACTORY_H diff --git a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp new file mode 100644 index 000000000..c995af98a --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.cpp @@ -0,0 +1,70 @@ +/* + * Copyright (C) 2013, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#include "suggest/policyimpl/dictionary/patricia_trie_policy.h" + +#include "defines.h" +#include "suggest/core/dicnode/dic_node.h" +#include "suggest/core/dicnode/dic_node_vector.h" +#include "suggest/core/dictionary/binary_dictionary_info.h" +#include "suggest/core/dictionary/binary_format.h" + +namespace latinime { + +const PatriciaTriePolicy PatriciaTriePolicy::sInstance; + +void PatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode, + const BinaryDictionaryInfo *const binaryDictionaryInfo, + const NodeFilter *const nodeFilter, DicNodeVector *const childDicNodes) const { + // TODO: Move children creating methods form DicNodeUtils. +} + +void PatriciaTriePolicy::getWordAtPosition(const BinaryDictionaryInfo *const binaryDictionaryInfo, + const int terminalNodePos, const int maxDepth, int *const outWord, + int *const outUnigramProbability) const { + BinaryFormat::getWordAtAddress(binaryDictionaryInfo->getDictRoot(), terminalNodePos, + maxDepth, outWord, outUnigramProbability); +} + +int PatriciaTriePolicy::getTerminalNodePositionOfWord( + const BinaryDictionaryInfo *const binaryDictionaryInfo, const int *const inWord, + const int length, const bool forceLowerCaseSearch) const { + return BinaryFormat::getTerminalPosition(binaryDictionaryInfo->getDictRoot(), inWord, + length, forceLowerCaseSearch); +} + +int PatriciaTriePolicy::getUnigramProbability( + const BinaryDictionaryInfo *const binaryDictionaryInfo, const int nodePos) const { + const uint8_t *const root = binaryDictionaryInfo->getDictRoot(); + int pos = nodePos; + const uint8_t flags = BinaryFormat::getFlagsAndForwardPointer(root, &pos); + if (flags & (BinaryFormat::FLAG_IS_BLACKLISTED | BinaryFormat::FLAG_IS_NOT_A_WORD)) { + // If this is not a word, or if it's a blacklisted entry, it should behave as + // having no probability outside of the suggestion process (where it should be used + // for shortcuts). + return NOT_A_PROBABILITY; + } + const bool hasMultipleChars = (0 != (BinaryFormat::FLAG_HAS_MULTIPLE_CHARS & flags)); + if (hasMultipleChars) { + pos = BinaryFormat::skipOtherCharacters(root, pos); + } else { + BinaryFormat::getCodePointAndForwardPointer(root, &pos); + } + return BinaryFormat::readProbabilityWithoutMovingPointer(root, pos); +} + +} // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h new file mode 100644 index 000000000..9b9338145 --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h @@ -0,0 +1,58 @@ +/* + * Copyright (C) 2013, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_PATRICIA_TRIE_POLICY_H +#define LATINIME_PATRICIA_TRIE_POLICY_H + +#include "defines.h" +#include "suggest/core/policy/dictionary_structure_policy.h" + +namespace latinime { + +class PatriciaTriePolicy : public DictionaryStructurePolicy { + public: + static AK_FORCE_INLINE const PatriciaTriePolicy *getInstance() { + return &sInstance; + } + + AK_FORCE_INLINE int getRootPosition() const { + return 0; + } + + void createAndGetAllChildNodes(const DicNode *const dicNode, + const BinaryDictionaryInfo *const binaryDictionaryInfo, + const NodeFilter *const nodeFilter, DicNodeVector *const childDicNodes) const; + + void getWordAtPosition(const BinaryDictionaryInfo *const binaryDictionaryInfo, + const int terminalNodePos, const int maxDepth, int *const outWord, + int *const outUnigramProbability) const; + + int getTerminalNodePositionOfWord( + const BinaryDictionaryInfo *const binaryDictionaryInfo, const int *const inWord, + const int length, const bool forceLowerCaseSearch) const; + + int getUnigramProbability(const BinaryDictionaryInfo *const binaryDictionaryInfo, + const int nodePos) const; + + private: + DISALLOW_COPY_AND_ASSIGN(PatriciaTriePolicy); + static const PatriciaTriePolicy sInstance; + + PatriciaTriePolicy() {} + ~PatriciaTriePolicy() {} +}; +} // namespace latinime +#endif // LATINIME_PATRICIA_TRIE_POLICY_H