From 1c62341de6c4cb72d48c48dfe04ed4113212b877 Mon Sep 17 00:00:00 2001 From: Keisuke Kuroyanagi Date: Fri, 1 Nov 2013 11:03:41 -0700 Subject: [PATCH] Implement SparseTable in native code. Bug: 11073222 Change-Id: Ia2cbb2fecfae0c9f621600483d96a55aef75c1b8 --- native/jni/Android.mk | 3 +- .../v4/content/sparse_table_dict_content.h | 9 +++- .../structure/v4/ver4_dict_buffers.h | 8 ++- .../structure/v4/ver4_dict_constants.cpp | 5 ++ .../structure/v4/ver4_dict_constants.h | 4 ++ .../utils/buffer_with_extendable_buffer.cpp | 17 +++--- .../utils/buffer_with_extendable_buffer.h | 2 + .../dictionary/utils/byte_array_utils.h | 12 ++--- .../dictionary/utils/sparse_table.cpp | 42 +++++++++++++++ .../dictionary/utils/sparse_table.h | 52 +++++++++++++++++++ 10 files changed, 134 insertions(+), 20 deletions(-) create mode 100644 native/jni/src/suggest/policyimpl/dictionary/utils/sparse_table.cpp create mode 100644 native/jni/src/suggest/policyimpl/dictionary/utils/sparse_table.h diff --git a/native/jni/Android.mk b/native/jni/Android.mk index d73862eb6..333688015 100644 --- a/native/jni/Android.mk +++ b/native/jni/Android.mk @@ -96,7 +96,8 @@ LATIN_IME_CORE_SRC_FILES := \ dict_file_writing_utils.cpp \ forgetting_curve_utils.cpp \ format_utils.cpp \ - mmapped_buffer.cpp) \ + mmapped_buffer.cpp \ + sparse_table.cpp) \ suggest/policyimpl/gesture/gesture_suggest_policy_factory.cpp \ $(addprefix suggest/policyimpl/typing/, \ scoring_params.cpp \ diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h index 446b51ed0..4836d8688 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h @@ -21,6 +21,7 @@ #include "suggest/policyimpl/dictionary/structure/v4/content/dict_content.h" #include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" #include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h" +#include "suggest/policyimpl/dictionary/utils/sparse_table.h" namespace latinime { @@ -29,7 +30,8 @@ class SparseTableDictContent : public DictContent { public: AK_FORCE_INLINE SparseTableDictContent(const char *const dictDirPath, const char *const lookupTableFileName, const char *const addressTableFileName, - const char *const contentFileName, const bool isUpdatable) + const char *const contentFileName, const bool isUpdatable, + const int sparseTableBlockSize, const int sparseTableDataSize) : mLookupTableBuffer( MmappedBuffer::openBuffer(dictDirPath, lookupTableFileName, isUpdatable)), mAddressTableBuffer( @@ -45,7 +47,9 @@ class SparseTableDictContent : public DictContent { BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE), mExpandableContentBuffer(mContentBuffer.get() ? mContentBuffer.get()->getBuffer() : 0, mContentBuffer.get() ? mContentBuffer.get()->getBufferSize() : 0, - BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE) {} + BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE), + mAddressLookupTable(&mExpandableLookupTableBuffer, &mExpandableAddressTableBuffer, + sparseTableBlockSize, sparseTableDataSize) {} virtual ~SparseTableDictContent() {} @@ -64,6 +68,7 @@ class SparseTableDictContent : public DictContent { BufferWithExtendableBuffer mExpandableLookupTableBuffer; BufferWithExtendableBuffer mExpandableAddressTableBuffer; BufferWithExtendableBuffer mExpandableContentBuffer; + SparseTable mAddressLookupTable; }; } // namespace latinime #endif /* LATINIME_SPARSE_TABLE_DICT_CONTENT_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h index 333a7c209..1f5503ccb 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h @@ -67,11 +67,15 @@ class Ver4DictBuffers { mBigramDictContent(dictDirPath, Ver4DictConstants::BIGRAM_LOOKUP_TABLE_FILE_EXTENSION, Ver4DictConstants::BIGRAM_CONTENT_TABLE_FILE_EXTENSION, - Ver4DictConstants::BIGRAM_FILE_EXTENSION, isUpdatable), + Ver4DictConstants::BIGRAM_FILE_EXTENSION, isUpdatable, + Ver4DictConstants::BIGRAM_ADDRESS_TABLE_BLOCK_SIZE, + Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE), mShortcutDictContent(dictDirPath, Ver4DictConstants::SHORTCUT_LOOKUP_TABLE_FILE_EXTENSION, Ver4DictConstants::SHORTCUT_CONTENT_TABLE_FILE_EXTENSION, - Ver4DictConstants::SHORTCUT_FILE_EXTENSION, isUpdatable) {} + Ver4DictConstants::SHORTCUT_FILE_EXTENSION, isUpdatable, + Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE, + Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_DATA_SIZE) {} const MmappedBuffer::MmappedBufferPtr mDictBuffer; SingleDictContent mTerminalAddressTable; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp index 3e21399fd..6195d060b 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp @@ -34,4 +34,9 @@ const int Ver4DictConstants::NOT_A_TERMINAL_ID = -1; const int Ver4DictConstants::PROBABILITY_SIZE = 1; const int Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE = 1; +const int Ver4DictConstants::BIGRAM_ADDRESS_TABLE_BLOCK_SIZE = 4; +const int Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE = 4; +const int Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE = 16; +const int Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_DATA_SIZE = 4; + } // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h index 47a6990f8..b6be29ae9 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h @@ -38,6 +38,10 @@ class Ver4DictConstants { static const int PROBABILITY_SIZE; static const int FLAGS_IN_PROBABILITY_FILE_SIZE; + static const int BIGRAM_ADDRESS_TABLE_BLOCK_SIZE; + static const int BIGRAM_ADDRESS_TABLE_DATA_SIZE; + static const int SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE; + static const int SHORTCUT_ADDRESS_TABLE_DATA_SIZE; private: DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4DictConstants); }; diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp index e028de526..ead2212a0 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp @@ -23,17 +23,16 @@ const int BufferWithExtendableBuffer::NEAR_BUFFER_LIMIT_THRESHOLD_PERCENTILE = 9 // TODO: Needs to allocate larger memory corresponding to the current vector size. const size_t BufferWithExtendableBuffer::EXTEND_ADDITIONAL_BUFFER_SIZE_STEP = 128 * 1024; +uint32_t BufferWithExtendableBuffer::readUint(const int size, const int pos) const { + const bool readingPosIsInAdditionalBuffer = isInAdditionalBuffer(pos); + const int posInBuffer = readingPosIsInAdditionalBuffer ? pos - mOriginalBufferSize : pos; + return ByteArrayUtils::readUint(getBuffer(readingPosIsInAdditionalBuffer), size, posInBuffer); +} + uint32_t BufferWithExtendableBuffer::readUintAndAdvancePosition(const int size, int *const pos) const { - const bool readingPosIsInAdditionalBuffer = isInAdditionalBuffer(*pos); - if (readingPosIsInAdditionalBuffer) { - *pos -= mOriginalBufferSize; - } - const int value = ByteArrayUtils::readUintAndAdvancePosition( - getBuffer(readingPosIsInAdditionalBuffer), size, pos); - if (readingPosIsInAdditionalBuffer) { - *pos += mOriginalBufferSize; - } + const int value = readUint(size, *pos); + *pos += size; return value; } diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h index 2d89f71b1..5c1b4cd01 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h @@ -71,6 +71,8 @@ class BufferWithExtendableBuffer { } } + uint32_t readUint(const int size, const int pos) const; + uint32_t readUintAndAdvancePosition(const int size, int *const pos) const; AK_FORCE_INLINE int getOriginalBufferSize() const { diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/byte_array_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/byte_array_utils.h index 1ca01b868..ebdd523e1 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/byte_array_utils.h +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/byte_array_utils.h @@ -114,19 +114,19 @@ class ByteArrayUtils { return buffer[(*pos)++]; } - static AK_FORCE_INLINE int readUintAndAdvancePosition(const uint8_t *const buffer, - const int size, int *const pos) { + static AK_FORCE_INLINE int readUint(const uint8_t *const buffer, + const int size, const int pos) { // size must be in 1 to 4. ASSERT(size >= 1 && size <= 4); switch (size) { case 1: - return ByteArrayUtils::readUint8AndAdvancePosition(buffer, pos); + return ByteArrayUtils::readUint8(buffer, pos); case 2: - return ByteArrayUtils::readUint16AndAdvancePosition(buffer, pos); + return ByteArrayUtils::readUint16(buffer, pos); case 3: - return ByteArrayUtils::readUint24AndAdvancePosition(buffer, pos); + return ByteArrayUtils::readUint24(buffer, pos); case 4: - return ByteArrayUtils::readUint32AndAdvancePosition(buffer, pos); + return ByteArrayUtils::readUint32(buffer, pos); default: return 0; } diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/sparse_table.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/sparse_table.cpp new file mode 100644 index 000000000..93ec70c1e --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/sparse_table.cpp @@ -0,0 +1,42 @@ +/* + * Copyright (C) 2013, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "suggest/policyimpl/dictionary/utils/sparse_table.h" + +namespace latinime { + +const int SparseTable::NOT_EXIST = -1; + +bool SparseTable::contains(const int id) const { + if (id < 0 || mIndexTableBuffer->getTailPosition() <= id * mDataSize) { + return false; + } + const int indexTableIndex = id / mBlockSize; + const int readingPos = indexTableIndex * mDataSize; + const int index = mIndexTableBuffer->readUint(mDataSize, readingPos); + return index != NOT_EXIST; +} + +uint32_t SparseTable::get(const int id) const { + const int indexTableIndex = id / mBlockSize; + int readingPos = indexTableIndex * mDataSize; + const int index = mIndexTableBuffer->readUint(mDataSize, readingPos); + const int offset = id % mBlockSize; + readingPos = (index * mDataSize + offset) * mBlockSize; + return mContentTableBuffer->readUint(mDataSize, readingPos); +} + +} // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/sparse_table.h b/native/jni/src/suggest/policyimpl/dictionary/utils/sparse_table.h new file mode 100644 index 000000000..d71756c63 --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/sparse_table.h @@ -0,0 +1,52 @@ +/* + * Copyright (C) 2013, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_SPARSE_TABLE_H +#define LATINIME_SPARSE_TABLE_H + +#include + +#include "defines.h" +#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" + +namespace latinime { + +// Note that there is a corresponding implementation in SparseTable.java. +// TODO: Support multiple content buffers. +class SparseTable { + public: + SparseTable(BufferWithExtendableBuffer *const indexTableBuffer, + BufferWithExtendableBuffer *const contentTableBuffer, const int blockSize, + const int dataSize) + : mIndexTableBuffer(indexTableBuffer), mContentTableBuffer(contentTableBuffer), + mBlockSize(blockSize), mDataSize(dataSize) {} + + bool contains(const int id) const; + + uint32_t get(const int id) const; + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(SparseTable); + + static const int NOT_EXIST; + + BufferWithExtendableBuffer *const mIndexTableBuffer; + BufferWithExtendableBuffer *const mContentTableBuffer; + const int mBlockSize; + const int mDataSize; +}; +} // namespace latinime +#endif /* LATINIME_SPARSE_TABLE_H */