From b3691b5642e2f0eb44d826fbade71869a1d229f9 Mon Sep 17 00:00:00 2001 From: Keisuke Kuroyanagi Date: Thu, 31 Oct 2013 11:01:41 -0700 Subject: [PATCH] Implement ver4 dict PtNode reading methods. Bug: 11073222 Change-Id: I03af717ff02a621d9be8eb554d8f1438a500339b --- native/jni/Android.mk | 1 + .../structure/pt_common/pt_node_params.h | 22 ++++- .../v3/dynamic_patricia_trie_node_reader.h | 2 +- .../structure/v4/ver4_dict_constants.cpp | 2 +- .../structure/v4/ver4_dict_constants.h | 2 +- .../v4/ver4_patricia_trie_node_reader.cpp | 95 +++++++++++++++++++ .../v4/ver4_patricia_trie_node_reader.h | 58 +++++++++++ .../v4/ver4_patricia_trie_policy.cpp | 91 +++++++++++++++--- .../structure/v4/ver4_patricia_trie_policy.h | 9 +- .../v4/ver4_patricia_trie_reading_utils.cpp | 6 ++ .../v4/ver4_patricia_trie_reading_utils.h | 3 + .../latin/Ver4BinaryDictionaryTests.java | 30 ++++++ 12 files changed, 302 insertions(+), 19 deletions(-) create mode 100644 native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.cpp create mode 100644 native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h diff --git a/native/jni/Android.mk b/native/jni/Android.mk index f4c8f7a40..d73862eb6 100644 --- a/native/jni/Android.mk +++ b/native/jni/Android.mk @@ -87,6 +87,7 @@ LATIN_IME_CORE_SRC_FILES := \ dynamic_patricia_trie_writing_utils.cpp) \ $(addprefix suggest/policyimpl/dictionary/structure/v4/, \ ver4_dict_constants.cpp \ + ver4_patricia_trie_node_reader.cpp \ ver4_patricia_trie_policy.cpp \ ver4_patricia_trie_reading_utils.cpp ) \ $(addprefix suggest/policyimpl/dictionary/utils/, \ diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h index 7bdd829cd..edc7b954c 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h @@ -32,7 +32,7 @@ class PtNodeParams { // Invalid PtNode. PtNodeParams() : mHeadPos(NOT_A_DICT_POS), mFlags(0), mParentPos(NOT_A_DICT_POS), mCodePointCount(0), mCodePoints(), mTerminalIdFieldPos(NOT_A_DICT_POS), - mTerminalId(Ver4DictConstants::NOT_A_TERMINAL), mProbabilityFieldPos(NOT_A_DICT_POS), + mTerminalId(Ver4DictConstants::NOT_A_TERMINAL_ID), mProbabilityFieldPos(NOT_A_DICT_POS), mProbability(NOT_A_PROBABILITY), mChildrenPosFieldPos(NOT_A_DICT_POS), mChildrenPos(NOT_A_DICT_POS), mBigramLinkedNodePos(NOT_A_DICT_POS), mShortcutPos(NOT_A_DICT_POS), mBigramPos(NOT_A_DICT_POS), @@ -53,6 +53,7 @@ class PtNodeParams { memcpy(mCodePoints, ptNodeParams.getCodePoints(), sizeof(int) * mCodePointCount); } + // PtNode without terminal id. PtNodeParams(const int headPos, const PatriciaTrieReadingUtils::NodeFlags flags, const int parentPos, const int codePointCount, const int *const codePoints, const int probabilityFieldPos, const int probability, const int childrenPosFieldPos, @@ -60,7 +61,8 @@ class PtNodeParams { const int bigramPos, const int siblingPos) : mHeadPos(headPos), mFlags(flags), mParentPos(parentPos), mCodePointCount(codePointCount), mCodePoints(), - mTerminalIdFieldPos(NOT_A_DICT_POS), mTerminalId(Ver4DictConstants::NOT_A_TERMINAL), + mTerminalIdFieldPos(NOT_A_DICT_POS), + mTerminalId(Ver4DictConstants::NOT_A_TERMINAL_ID), mProbabilityFieldPos(probabilityFieldPos), mProbability(probability), mChildrenPosFieldPos(childrenPosFieldPos), mChildrenPos(childrenPos), mBigramLinkedNodePos(bigramLinkedNodePos), mShortcutPos(shortcutPos), @@ -68,6 +70,22 @@ class PtNodeParams { memcpy(mCodePoints, codePoints, sizeof(int) * mCodePointCount); } + // PtNode with a terminal id. + PtNodeParams(const int headPos, const PatriciaTrieReadingUtils::NodeFlags flags, + const int parentPos, const int codePointCount, const int *const codePoints, + const int terminalIdFieldPos, const int terminalId, const int probability, + const int childrenPosFieldPos, const int childrenPos, const int bigramLinkedNodePos, + const int siblingPos) + : mHeadPos(headPos), mFlags(flags), mParentPos(parentPos), + mCodePointCount(codePointCount), mCodePoints(), + mTerminalIdFieldPos(terminalIdFieldPos), mTerminalId(terminalId), + mProbabilityFieldPos(NOT_A_DICT_POS), mProbability(probability), + mChildrenPosFieldPos(childrenPosFieldPos), mChildrenPos(childrenPos), + mBigramLinkedNodePos(bigramLinkedNodePos), mShortcutPos(terminalId), + mBigramPos(terminalId), mSiblingPos(siblingPos) { + memcpy(mCodePoints, codePoints, sizeof(int) * mCodePointCount); + } + AK_FORCE_INLINE bool isValid() const { return mCodePointCount > 0; } diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.h index b5abffeda..ef0067b48 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.h @@ -55,7 +55,7 @@ class DynamicPatriciaTrieNodeReader : public PtNodeReader { const DictionaryBigramsStructurePolicy *const mBigramsPolicy; const DictionaryShortcutsStructurePolicy *const mShortcutsPolicy; - const PtNodeParams fetchPtNodeInfoFromBufferAndProcessMovedPtNode(const int ptNodePos, + const PtNodeParams fetchPtNodeInfoFromBufferAndProcessMovedPtNode(const int ptNodePos, const int siblingNodePos, const int bigramLinkedNodePos) const; }; } // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp index 160259afc..3e21399fd 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp @@ -30,7 +30,7 @@ const char *const Ver4DictConstants::SHORTCUT_LOOKUP_TABLE_FILE_EXTENSION = ".sh const char *const Ver4DictConstants::SHORTCUT_CONTENT_TABLE_FILE_EXTENSION = ".shortcut_index_shortcut"; -const int Ver4DictConstants::NOT_A_TERMINAL = -1; +const int Ver4DictConstants::NOT_A_TERMINAL_ID = -1; const int Ver4DictConstants::PROBABILITY_SIZE = 1; const int Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE = 1; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h index dcc36d1dc..47a6990f8 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h @@ -34,7 +34,7 @@ class Ver4DictConstants { static const char *const SHORTCUT_LOOKUP_TABLE_FILE_EXTENSION; static const char *const SHORTCUT_CONTENT_TABLE_FILE_EXTENSION; - static const int NOT_A_TERMINAL; + static const int NOT_A_TERMINAL_ID; static const int PROBABILITY_SIZE; static const int FLAGS_IN_PROBABILITY_FILE_SIZE; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.cpp new file mode 100644 index 000000000..b18bdc943 --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.cpp @@ -0,0 +1,95 @@ +/* + * Copyright (C) 2013, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h" + +#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.h" +#include "suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h" +#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" + +namespace latinime { + +const PtNodeParams Ver4PatriciaTrieNodeReader::fetchPtNodeInfoFromBufferAndProcessMovedPtNode( + const int ptNodePos, const int siblingNodePos, const int bigramLinkedNodePos) const { + if (ptNodePos < 0 || ptNodePos >= mBuffer->getTailPosition()) { + // Reading invalid position because of bug or broken dictionary. + AKLOGE("Fetching PtNode info from invalid dictionary position: %d, dictionary size: %d", + ptNodePos, mBuffer->getTailPosition()); + ASSERT(false); + return PtNodeParams(); + } + const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(ptNodePos); + const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer); + int pos = ptNodePos; + const int headPos = ptNodePos; + if (usesAdditionalBuffer) { + pos -= mBuffer->getOriginalBufferSize(); + } + const PatriciaTrieReadingUtils::NodeFlags flags = + PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos); + const int parentPosOffset = + DynamicPatriciaTrieReadingUtils::getParentPtNodePosOffsetAndAdvancePosition( + dictBuf, &pos); + const int parentPos = + DynamicPatriciaTrieReadingUtils::getParentPtNodePos(parentPosOffset, headPos); + int codePoints[MAX_WORD_LENGTH]; + const int codePonitCount = PatriciaTrieReadingUtils::getCharsAndAdvancePosition( + dictBuf, flags, MAX_WORD_LENGTH, codePoints, &pos); + int terminalIdFieldPos = NOT_A_DICT_POS; + int terminalId = Ver4DictConstants::NOT_A_TERMINAL_ID; + int probability = NOT_A_PROBABILITY; + if (PatriciaTrieReadingUtils::isTerminal(flags)) { + terminalIdFieldPos = pos; + if (usesAdditionalBuffer) { + terminalIdFieldPos += mBuffer->getOriginalBufferSize(); + } + terminalId = Ver4PatriciaTrieReadingUtils::getTerminalIdAndAdvancePosition(dictBuf, &pos); + probability = mProbabilityDictContent->getProbability(terminalId); + } + int childrenPosFieldPos = pos; + if (usesAdditionalBuffer) { + childrenPosFieldPos += mBuffer->getOriginalBufferSize(); + } + int childrenPos = DynamicPatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition( + dictBuf, &pos); + if (usesAdditionalBuffer && childrenPos != NOT_A_DICT_POS) { + childrenPos += mBuffer->getOriginalBufferSize(); + } + int newBigramLinkedNodePos = bigramLinkedNodePos; + if (siblingNodePos == NOT_A_DICT_POS) { + if (DynamicPatriciaTrieReadingUtils::isMoved(flags)) { + newBigramLinkedNodePos = childrenPos; + } + } + if (usesAdditionalBuffer) { + pos += mBuffer->getOriginalBufferSize(); + } + // Sibling position is the tail position of original PtNode. + int newSiblingNodePos = (siblingNodePos == NOT_A_DICT_POS) ? pos : siblingNodePos; + // Read destination node if the read node is a moved node. + if (DynamicPatriciaTrieReadingUtils::isMoved(flags)) { + // The destination position is stored at the same place as the parent position. + return fetchPtNodeInfoFromBufferAndProcessMovedPtNode(parentPos, newSiblingNodePos, + newBigramLinkedNodePos); + } else { + return PtNodeParams(headPos, flags, parentPos, codePonitCount, codePoints, + terminalIdFieldPos, terminalId, probability, childrenPosFieldPos, childrenPos, + newBigramLinkedNodePos, newSiblingNodePos); + } +} + +} diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h new file mode 100644 index 000000000..bdae0de7e --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h @@ -0,0 +1,58 @@ +/* + * Copyright (C) 2013, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_VER4_PATRICIA_TRIE_NODE_READER_H +#define LATINIME_VER4_PATRICIA_TRIE_NODE_READER_H + +#include + +#include "defines.h" +#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h" +#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_reader.h" + +namespace latinime { + +class BufferWithExtendableBuffer; +class ProbabilityDictContent; + +/* + * This class is used for helping to read nodes of ver4 patricia trie. This class handles moved + * node and reads node attributes including probability form probabilityBuffer. + */ +class Ver4PatriciaTrieNodeReader : public PtNodeReader { + public: + Ver4PatriciaTrieNodeReader(const BufferWithExtendableBuffer *const buffer, + const ProbabilityDictContent *const probabilityDictContent) + : mBuffer(buffer), mProbabilityDictContent(probabilityDictContent) {} + + ~Ver4PatriciaTrieNodeReader() {} + + virtual const PtNodeParams fetchNodeInfoInBufferFromPtNodePos(const int ptNodePos) const { + return fetchPtNodeInfoFromBufferAndProcessMovedPtNode(ptNodePos, + NOT_A_DICT_POS /* siblingNodePos */, NOT_A_DICT_POS /* bigramLinkedNodePos */); + } + + private: + DISALLOW_COPY_AND_ASSIGN(Ver4PatriciaTrieNodeReader); + + const BufferWithExtendableBuffer *const mBuffer; + const ProbabilityDictContent *const mProbabilityDictContent; + + const PtNodeParams fetchPtNodeInfoFromBufferAndProcessMovedPtNode(const int ptNodePos, + const int siblingNodePos, const int bigramLinkedNodePos) const; +}; +} // namespace latinime +#endif /* LATINIME_VER4_PATRICIA_TRIE_NODE_READER_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp index b9ee4891c..33f738413 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp @@ -16,45 +16,110 @@ #include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h" +#include "suggest/core/dicnode/dic_node.h" +#include "suggest/core/dicnode/dic_node_vector.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.h" +#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h" +#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h" +#include "suggest/policyimpl/dictionary/utils/probability_utils.h" + namespace latinime { void Ver4PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const dicNode, DicNodeVector *const childDicNodes) const { - // TODO: Implement. + if (!dicNode->hasChildren()) { + return; + } + DynamicPatriciaTrieReadingHelper readingHelper(&mDictBuffer, &mNodeReader); + readingHelper.initWithPtNodeArrayPos(dicNode->getChildrenPtNodeArrayPos()); + while (!readingHelper.isEnd()) { + const PtNodeParams ptNodeParams = readingHelper.getPtNodeParams(); + if (!ptNodeParams.isValid()) { + break; + } + bool isTerminal = ptNodeParams.isTerminal() && !ptNodeParams.isDeleted(); + if (isTerminal && mHeaderPolicy.isDecayingDict()) { + // A DecayingDict may have a terminal PtNode that has a terminal DicNode whose + // probability is NOT_A_PROBABILITY. In such case, we don't want to treat it as a + // valid terminal DicNode. + isTerminal = getProbability(ptNodeParams.getProbability(), NOT_A_PROBABILITY) + != NOT_A_PROBABILITY; + } + childDicNodes->pushLeavingChild(dicNode, ptNodeParams.getHeadPos(), + ptNodeParams.getChildrenPos(), ptNodeParams.getProbability(), isTerminal, + ptNodeParams.hasChildren(), + ptNodeParams.isBlacklisted() + || ptNodeParams.isNotAWord() /* isBlacklistedOrNotAWord */, + ptNodeParams.getCodePointCount(), ptNodeParams.getCodePoints()); + readingHelper.readNextSiblingNode(ptNodeParams); + } } int Ver4PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount( const int ptNodePos, const int maxCodePointCount, int *const outCodePoints, int *const outUnigramProbability) const { - // TODO: Implement. - return 0; + DynamicPatriciaTrieReadingHelper readingHelper(&mDictBuffer, &mNodeReader); + readingHelper.initWithPtNodePos(ptNodePos); + return readingHelper.getCodePointsAndProbabilityAndReturnCodePointCount( + maxCodePointCount, outCodePoints, outUnigramProbability); } int Ver4PatriciaTriePolicy::getTerminalPtNodePositionOfWord(const int *const inWord, const int length, const bool forceLowerCaseSearch) const { - // TODO: Implement. - return NOT_A_DICT_POS; + DynamicPatriciaTrieReadingHelper readingHelper(&mDictBuffer, &mNodeReader); + readingHelper.initWithPtNodeArrayPos(getRootPosition()); + return readingHelper.getTerminalPtNodePositionOfWord(inWord, length, forceLowerCaseSearch); } int Ver4PatriciaTriePolicy::getProbability(const int unigramProbability, const int bigramProbability) const { - // TODO: Implement. - return NOT_A_PROBABILITY; + if (mHeaderPolicy.isDecayingDict()) { + // Both probabilities are encoded. Decode them and get probability. + return ForgettingCurveUtils::getProbability(unigramProbability, bigramProbability); + } else { + if (unigramProbability == NOT_A_PROBABILITY) { + return NOT_A_PROBABILITY; + } else if (bigramProbability == NOT_A_PROBABILITY) { + return ProbabilityUtils::backoff(unigramProbability); + } else { + // bigramProbability is a bigram probability delta. + return ProbabilityUtils::computeProbabilityForBigram(unigramProbability, + bigramProbability); + } + } } int Ver4PatriciaTriePolicy::getUnigramProbabilityOfPtNode(const int ptNodePos) const { - // TODO: Implement. - return NOT_A_PROBABILITY; + if (ptNodePos == NOT_A_DICT_POS) { + return NOT_A_PROBABILITY; + } + const PtNodeParams ptNodeParams(mNodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos)); + if (ptNodeParams.isDeleted() || ptNodeParams.isBlacklisted() || ptNodeParams.isNotAWord()) { + return NOT_A_PROBABILITY; + } + return getProbability(ptNodeParams.getProbability(), NOT_A_PROBABILITY); } int Ver4PatriciaTriePolicy::getShortcutPositionOfPtNode(const int ptNodePos) const { - // TODO: Implement. - return NOT_A_DICT_POS; + if (ptNodePos == NOT_A_DICT_POS) { + return NOT_A_DICT_POS; + } + const PtNodeParams ptNodeParams(mNodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos)); + if (ptNodeParams.isDeleted()) { + return NOT_A_DICT_POS; + } + return ptNodeParams.getTerminalId(); } int Ver4PatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) const { - // TODO: Implement. - return NOT_A_DICT_POS; + if (ptNodePos == NOT_A_DICT_POS) { + return NOT_A_DICT_POS; + } + const PtNodeParams ptNodeParams(mNodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos)); + if (ptNodeParams.isDeleted()) { + return NOT_A_DICT_POS; + } + return ptNodeParams.getTerminalId(); } bool Ver4PatriciaTriePolicy::addUnigramWord(const int *const word, const int length, diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h index f7bfb3b0d..2f577f741 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h @@ -21,6 +21,7 @@ #include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" #include "suggest/policyimpl/dictionary/header/header_policy.h" #include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h" +#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h" #include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" namespace latinime { @@ -33,7 +34,11 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { public: Ver4PatriciaTriePolicy(const Ver4DictBuffers::Ver4DictBuffersPtr &buffers) : mBuffers(buffers), - mHeaderPolicy(mBuffers.get()->getRawDictBuffer(), FormatUtils::VERSION_4) {}; + mHeaderPolicy(mBuffers.get()->getRawDictBuffer(), FormatUtils::VERSION_4), + mDictBuffer(mBuffers.get()->getRawDictBuffer() + mHeaderPolicy.getSize(), + mBuffers.get()->getRawDictBufferSize() - mHeaderPolicy.getSize(), + BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE), + mNodeReader(&mDictBuffer, mBuffers.get()->getProbabilityDictContent()) {}; AK_FORCE_INLINE int getRootPosition() const { return 0; @@ -91,6 +96,8 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { const Ver4DictBuffers::Ver4DictBuffersPtr mBuffers; const HeaderPolicy mHeaderPolicy; + BufferWithExtendableBuffer mDictBuffer; + Ver4PatriciaTrieNodeReader mNodeReader; }; } // namespace latinime #endif // LATINIME_VER4_PATRICIA_TRIE_POLICY_H diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.cpp index 3304f1521..fd8a3ba55 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.cpp @@ -18,9 +18,15 @@ #include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h" #include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" +#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h" namespace latinime { +/* static */ int Ver4PatriciaTrieReadingUtils::getTerminalIdAndAdvancePosition( + const uint8_t *const buffer, int *pos) { + return ByteArrayUtils::readUint32AndAdvancePosition(buffer, pos); +} + /* static */ int Ver4PatriciaTrieReadingUtils::getProbability( const BufferWithExtendableBuffer *const probabilityBuffer, const int terminalId) { int pos = terminalId * (Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.h index ee369d1e1..19e6dd331 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.h @@ -27,6 +27,9 @@ class BufferWithExtendableBuffer; class Ver4PatriciaTrieReadingUtils { public: + static int getTerminalIdAndAdvancePosition(const uint8_t *const buffer, + int *const pos); + static int getProbability(const BufferWithExtendableBuffer *const probabilityBuffer, const int terminalId); diff --git a/tests/src/com/android/inputmethod/latin/Ver4BinaryDictionaryTests.java b/tests/src/com/android/inputmethod/latin/Ver4BinaryDictionaryTests.java index 25fb1c2a8..f48a4296a 100644 --- a/tests/src/com/android/inputmethod/latin/Ver4BinaryDictionaryTests.java +++ b/tests/src/com/android/inputmethod/latin/Ver4BinaryDictionaryTests.java @@ -33,6 +33,7 @@ import java.io.IOException; import java.util.HashMap; import java.util.Locale; +// TODO: Add a test to evaluate the speed of operations of Ver4 dictionary. @LargeTest public class Ver4BinaryDictionaryTests extends AndroidTestCase { private static final String TAG = Ver4BinaryDictionaryTests.class.getSimpleName(); @@ -90,4 +91,33 @@ public class Ver4BinaryDictionaryTests extends AndroidTestCase { Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); assertTrue(binaryDictionary.isValidDictionary()); } + + // TODO: Add large tests. + public void testReadProbability() { + final String dictVersion = Long.toString(System.currentTimeMillis()); + final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), + getDictionaryOptions(TEST_LOCALE, dictVersion)); + + final int frequency = 100; + dict.add("a", frequency, null, false /* isNotAWord */); + dict.add("aaa", frequency, null, false /* isNotAWord */); + dict.add("ab", frequency, null, false /* isNotAWord */); + + DictEncoder encoder = new Ver4DictEncoder(getContext().getCacheDir()); + try { + encoder.writeDictionary(dict, FORMAT_OPTIONS); + } catch (IOException e) { + Log.e(TAG, "IOException while writing dictionary", e); + } catch (UnsupportedFormatException e) { + Log.e(TAG, "Unsupported format", e); + } + File trieFile = getTrieFile(TEST_LOCALE, dictVersion); + BinaryDictionary binaryDictionary = new BinaryDictionary(trieFile.getAbsolutePath(), + 0 /* offset */, trieFile.length(), true /* useFullEditDistance */, + Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); + assertTrue(binaryDictionary.isValidDictionary()); + assertEquals(frequency, binaryDictionary.getFrequency("a")); + assertEquals(frequency, binaryDictionary.getFrequency("aaa")); + assertEquals(frequency, binaryDictionary.getFrequency("ab")); + } }