From b460c85589e685634cb06f3410317b7d3b376b30 Mon Sep 17 00:00:00 2001 From: Keisuke Kuroyanagi Date: Wed, 6 Nov 2013 18:09:35 +0900 Subject: [PATCH] Implement ver4 shortcut reading method. Bug: 11073222 Change-Id: I3738c7ce07f500920bde8d3f985cf6e8ecb40b6a --- .../shortcut/ver4_shortcut_list_policy.h | 71 +++++++++++++++++++ .../v4/content/bigram_dict_content.h | 2 +- .../v4/content/shortcut_dict_content.h | 63 ++++++++++++++++ .../structure/v4/ver4_dict_buffers.h | 16 ++--- .../structure/v4/ver4_dict_constants.cpp | 4 +- .../structure/v4/ver4_dict_constants.h | 5 +- .../v4/ver4_patricia_trie_policy.cpp | 3 +- .../structure/v4/ver4_patricia_trie_policy.h | 6 +- .../utils/buffer_with_extendable_buffer.cpp | 15 +++- .../utils/buffer_with_extendable_buffer.h | 3 + 10 files changed, 173 insertions(+), 15 deletions(-) create mode 100644 native/jni/src/suggest/policyimpl/dictionary/shortcut/ver4_shortcut_list_policy.h create mode 100644 native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.h diff --git a/native/jni/src/suggest/policyimpl/dictionary/shortcut/ver4_shortcut_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/shortcut/ver4_shortcut_list_policy.h new file mode 100644 index 000000000..12d3579fd --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/shortcut/ver4_shortcut_list_policy.h @@ -0,0 +1,71 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_VER4_SHORTCUT_LIST_POLICY_H +#define LATINIME_VER4_SHORTCUT_LIST_POLICY_H + +#include + +#include "defines.h" +#include "suggest/core/policy/dictionary_shortcuts_structure_policy.h" +#include "suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.h" +#include "suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.h" +#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h" + +namespace latinime { + +class Ver4ShortcutListPolicy : public DictionaryShortcutsStructurePolicy { + public: + Ver4ShortcutListPolicy(const ShortcutDictContent *const shortcutDictContent, + const TerminalPositionLookupTable *const terminalPositionLookupTable) + : mShortcutDictContent(shortcutDictContent), + mTerminalPositionLookupTable(terminalPositionLookupTable) {} + + ~Ver4ShortcutListPolicy() {} + + int getStartPos(const int pos) const { + // The first shortcut entry is located at the head position of the shortcut list. + return pos; + } + + void getNextShortcut(const int maxCodePointCount, int *const outCodePoint, + int *const outCodePointCount, bool *const outIsWhitelist, bool *const outHasNext, + int *const pos) const { + int shortcutFlags = 0; + if (outCodePoint && outCodePointCount) { + mShortcutDictContent->getShortcutEntryAndAdvancePosition(maxCodePointCount, + outCodePoint, outCodePointCount, &shortcutFlags, pos); + } + if (outHasNext) { + *outHasNext = ShortcutListReadingUtils::hasNext(shortcutFlags); + } + if (outIsWhitelist) { + *outIsWhitelist = ShortcutListReadingUtils::isWhitelist(shortcutFlags); + } + } + + void skipAllShortcuts(int *const pos) const { + // Do nothing because we don't need to skip shortcut lists in ver4 dictionaries. + } + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4ShortcutListPolicy); + + const ShortcutDictContent *const mShortcutDictContent; + const TerminalPositionLookupTable *const mTerminalPositionLookupTable; +}; +} // namespace latinime +#endif // LATINIME_VER4_SHORTCUT_LIST_POLICY_H diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h index 634c1f08e..5eed13e70 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h @@ -38,7 +38,7 @@ class BigramDictContent : public SparseTableDictContent { const BufferWithExtendableBuffer *const bigramListBuffer = getContentBuffer(); if (outBigramFlags) { *outBigramFlags = bigramListBuffer->readUintAndAdvancePosition( - Ver4DictConstants::BIGRAM_FRAGS_FIELD_SIZE, bigramEntryPos); + Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE, bigramEntryPos); } if (outTargetTerminalId) { *outTargetTerminalId = bigramListBuffer->readUintAndAdvancePosition( diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.h new file mode 100644 index 000000000..c10fbcb2a --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.h @@ -0,0 +1,63 @@ +/* + * Copyright (C) 2013, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_SHORTCUT_DICT_CONTENT_H +#define LATINIME_SHORTCUT_DICT_CONTENT_H + +#include "defines.h" +#include "suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h" +#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h" + +namespace latinime { + +class ShortcutDictContent : public SparseTableDictContent { + public: + ShortcutDictContent(const char *const dictDirPath, const bool isUpdatable) + : SparseTableDictContent(dictDirPath, + Ver4DictConstants::SHORTCUT_LOOKUP_TABLE_FILE_EXTENSION, + Ver4DictConstants::SHORTCUT_CONTENT_TABLE_FILE_EXTENSION, + Ver4DictConstants::SHORTCUT_FILE_EXTENSION, isUpdatable, + Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE, + Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_DATA_SIZE) {} + + void getShortcutEntryAndAdvancePosition(const int maxCodePointCount, + int *const outCodePoint, int *const outCodePointCount, int *const outShortcutFlags, + int *const shortcutEntryPos) const { + const BufferWithExtendableBuffer *const shortcutListBuffer = getContentBuffer(); + if (outShortcutFlags) { + *outShortcutFlags = shortcutListBuffer->readUintAndAdvancePosition( + Ver4DictConstants::SHORTCUT_FLAGS_FIELD_SIZE, shortcutEntryPos); + } + if (outCodePoint && outCodePointCount) { + shortcutListBuffer->readCodePointsAndAdvancePosition( + maxCodePointCount, outCodePoint, outCodePointCount, shortcutEntryPos); + } + } + + // Returns head position of shortcut list for a PtNode specified by terminalId. + int getShortcutListHeadPos(const int terminalId) const { + const SparseTable *const addressLookupTable = getAddressLookupTable(); + if (!addressLookupTable->contains(terminalId)) { + return NOT_A_DICT_POS; + } + return addressLookupTable->get(terminalId); + } + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(ShortcutDictContent); +}; +} // namespace latinime +#endif /* LATINIME_SHORTCUT_DICT_CONTENT_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h index 4e10403f3..6ee6e63e4 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h @@ -21,8 +21,7 @@ #include "suggest/policyimpl/dictionary/header/header_read_write_utils.h" #include "suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h" #include "suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h" -#include "suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h" -#include "suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h" +#include "suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.h" #include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h" #include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h" #include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" @@ -66,6 +65,10 @@ class Ver4DictBuffers { return &mBigramDictContent; } + AK_FORCE_INLINE const ShortcutDictContent *getShortcutDictContent() const { + return &mShortcutDictContent; + } + private: DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4DictBuffers); @@ -77,18 +80,13 @@ class Ver4DictBuffers { HeaderReadWriteUtils::getHeaderSize(mDictBuffer.get()->getBuffer())), mProbabilityDictContent(dictDirPath, isUpdatable), mBigramDictContent(dictDirPath, isUpdatable), - mShortcutDictContent(dictDirPath, - Ver4DictConstants::SHORTCUT_LOOKUP_TABLE_FILE_EXTENSION, - Ver4DictConstants::SHORTCUT_CONTENT_TABLE_FILE_EXTENSION, - Ver4DictConstants::SHORTCUT_FILE_EXTENSION, isUpdatable, - Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE, - Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_DATA_SIZE) {} + mShortcutDictContent(dictDirPath, isUpdatable) {} const MmappedBuffer::MmappedBufferPtr mDictBuffer; TerminalPositionLookupTable mTerminalPositionLookupTable; ProbabilityDictContent mProbabilityDictContent; BigramDictContent mBigramDictContent; - SparseTableDictContent mShortcutDictContent; + ShortcutDictContent mShortcutDictContent; }; } // namespace latinime #endif /* LATINIME_VER4_DICT_BUFFER_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp index fb29c0c4a..20adb927f 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp @@ -41,6 +41,8 @@ const int Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE = 16; const int Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_DATA_SIZE = 4; const int Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE = 3; -const int Ver4DictConstants::BIGRAM_FRAGS_FIELD_SIZE = 1; +const int Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE = 1; + +const int Ver4DictConstants::SHORTCUT_FLAGS_FIELD_SIZE = 1; } // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h index a0bebb75f..522581873 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h @@ -44,8 +44,11 @@ class Ver4DictConstants { static const int SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE; static const int SHORTCUT_ADDRESS_TABLE_DATA_SIZE; - static const int BIGRAM_FRAGS_FIELD_SIZE; + static const int BIGRAM_FLAGS_FIELD_SIZE; static const int BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE; + + static const int SHORTCUT_FLAGS_FIELD_SIZE; + private: DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4DictConstants); }; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp index 43ad301db..ae5a094d1 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp @@ -108,7 +108,8 @@ int Ver4PatriciaTriePolicy::getShortcutPositionOfPtNode(const int ptNodePos) con if (ptNodeParams.isDeleted()) { return NOT_A_DICT_POS; } - return ptNodeParams.getTerminalId(); + return mBuffers.get()->getShortcutDictContent()->getShortcutListHeadPos( + ptNodeParams.getTerminalId()); } int Ver4PatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) const { diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h index d0be77d0b..10b9125f0 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h @@ -21,6 +21,7 @@ #include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" #include "suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h" #include "suggest/policyimpl/dictionary/header/header_policy.h" +#include "suggest/policyimpl/dictionary/shortcut/ver4_shortcut_list_policy.h" #include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h" #include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h" #include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" @@ -41,6 +42,8 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE), mBigramPolicy(mBuffers.get()->getBigramDictContent(), mBuffers.get()->getTerminalPositionLookupTable()), + mShortcutPolicy(mBuffers.get()->getShortcutDictContent(), + mBuffers.get()->getTerminalPositionLookupTable()), mNodeReader(&mDictBuffer, mBuffers.get()->getProbabilityDictContent()) {}; AK_FORCE_INLINE int getRootPosition() const { @@ -74,7 +77,7 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { } const DictionaryShortcutsStructurePolicy *getShortcutsStructurePolicy() const { - return 0; + return &mShortcutPolicy; } bool addUnigramWord(const int *const word, const int length, const int probability); @@ -101,6 +104,7 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { const HeaderPolicy mHeaderPolicy; BufferWithExtendableBuffer mDictBuffer; const Ver4BigramListPolicy mBigramPolicy; + const Ver4ShortcutListPolicy mShortcutPolicy; Ver4PatriciaTrieNodeReader mNodeReader; }; } // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp index ead2212a0..f17a0d1c0 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp @@ -36,6 +36,19 @@ uint32_t BufferWithExtendableBuffer::readUintAndAdvancePosition(const int size, return value; } +void BufferWithExtendableBuffer::readCodePointsAndAdvancePosition(const int maxCodePointCount, + int *const outCodePoints, int *outCodePointCount, int *const pos) const { + const bool readingPosIsInAdditionalBuffer = isInAdditionalBuffer(*pos); + if (readingPosIsInAdditionalBuffer) { + *pos -= mOriginalBufferSize; + } + *outCodePointCount = ByteArrayUtils::readStringAndAdvancePosition( + getBuffer(readingPosIsInAdditionalBuffer), maxCodePointCount, outCodePointCount, pos); + if (readingPosIsInAdditionalBuffer) { + *pos += mOriginalBufferSize; + } +} + bool BufferWithExtendableBuffer::writeUintAndAdvancePosition(const uint32_t data, const int size, int *const pos) { if (!(size >= 1 && size <= 4)) { @@ -59,7 +72,7 @@ bool BufferWithExtendableBuffer::writeUintAndAdvancePosition(const uint32_t data } bool BufferWithExtendableBuffer::writeCodePointsAndAdvancePosition(const int *const codePoints, - const int codePointCount, const bool writesTerminator ,int *const pos) { + const int codePointCount, const bool writesTerminator, int *const pos) { const size_t size = ByteArrayUtils::calculateRequiredByteCountToStoreCodePoints( codePoints, codePointCount, writesTerminator); if (!checkAndPrepareWriting(*pos, size)) { diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h index 5c1b4cd01..13dce9b61 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h @@ -75,6 +75,9 @@ class BufferWithExtendableBuffer { uint32_t readUintAndAdvancePosition(const int size, int *const pos) const; + void readCodePointsAndAdvancePosition(const int maxCodePointCount, + int *const outCodePoints, int *outCodePointCount, int *const pos) const; + AK_FORCE_INLINE int getOriginalBufferSize() const { return mOriginalBufferSize; }