From 72877b15ea858946274e635604d21edd9593d71f Mon Sep 17 00:00:00 2001 From: Keisuke Kuroyanagi Date: Mon, 21 Oct 2013 19:56:50 +0900 Subject: [PATCH] Add utility methods related to buffers for ver4 dict. Bug: 11073222 Change-Id: Ieca15ffa09fd36c6602f6d2afab5779800c5a422 --- .../dictionary/header/header_policy.h | 4 +- .../structure/v2/patricia_trie_policy.h | 3 +- .../v3/dynamic_patricia_trie_policy.h | 6 ++- .../dynamic_patricia_trie_writing_helper.cpp | 9 ++-- .../utils/buffer_with_extendable_buffer.cpp | 2 +- .../utils/buffer_with_extendable_buffer.h | 11 +++- .../utils/dict_file_writing_utils.cpp | 6 ++- .../policyimpl/dictionary/utils/file_utils.h | 50 +++++++++++++++++++ .../dictionary/utils/mmapped_buffer.h | 37 +++++++++++++- 9 files changed, 113 insertions(+), 15 deletions(-) create mode 100644 native/jni/src/suggest/policyimpl/dictionary/utils/file_utils.h diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h index a9c7805a8..7c06a7117 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h @@ -30,8 +30,8 @@ namespace latinime { class HeaderPolicy : public DictionaryHeaderStructurePolicy { public: // Reads information from existing dictionary buffer. - HeaderPolicy(const uint8_t *const dictBuf, const int dictSize) - : mDictFormatVersion(FormatUtils::detectFormatVersion(dictBuf, dictSize)), + HeaderPolicy(const uint8_t *const dictBuf, const FormatUtils::FORMAT_VERSION formatVersion) + : mDictFormatVersion(formatVersion), mDictionaryFlags(HeaderReadWriteUtils::getFlags(dictBuf)), mSize(HeaderReadWriteUtils::getHeaderSize(dictBuf)), mAttributeMap(createAttributeMapAndReadAllAttributes(dictBuf)), diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h index 4b4c39dfa..31e6186b7 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h @@ -24,6 +24,7 @@ #include "suggest/policyimpl/dictionary/bigram/bigram_list_policy.h" #include "suggest/policyimpl/dictionary/header/header_policy.h" #include "suggest/policyimpl/dictionary/shortcut/shortcut_list_policy.h" +#include "suggest/policyimpl/dictionary/utils/format_utils.h" #include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h" namespace latinime { @@ -34,7 +35,7 @@ class DicNodeVector; class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { public: PatriciaTriePolicy(const MmappedBuffer *const buffer) - : mBuffer(buffer), mHeaderPolicy(mBuffer->getBuffer(), buffer->getBufferSize()), + : mBuffer(buffer), mHeaderPolicy(mBuffer->getBuffer(), FormatUtils::VERSION_2), mDictRoot(mBuffer->getBuffer() + mHeaderPolicy.getSize()), mDictBufferSize(mBuffer->getBufferSize() - mHeaderPolicy.getSize()), mBigramListPolicy(mDictRoot), mShortcutListPolicy(mDictRoot) {} diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_policy.h index 2c722e8ed..1a924c177 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_policy.h @@ -23,6 +23,7 @@ #include "suggest/policyimpl/dictionary/header/header_policy.h" #include "suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h" #include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" +#include "suggest/policyimpl/dictionary/utils/format_utils.h" #include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h" namespace latinime { @@ -33,9 +34,10 @@ class DicNodeVector; class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { public: DynamicPatriciaTriePolicy(const MmappedBuffer *const buffer) - : mBuffer(buffer), mHeaderPolicy(mBuffer->getBuffer(), buffer->getBufferSize()), + : mBuffer(buffer), mHeaderPolicy(mBuffer->getBuffer(), FormatUtils::VERSION_3), mBufferWithExtendableBuffer(mBuffer->getBuffer() + mHeaderPolicy.getSize(), - mBuffer->getBufferSize() - mHeaderPolicy.getSize()), + mBuffer->getBufferSize() - mHeaderPolicy.getSize(), + BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE), mShortcutListPolicy(&mBufferWithExtendableBuffer), mBigramListPolicy(&mHeaderPolicy, &mBufferWithExtendableBuffer, &mShortcutListPolicy, mHeaderPolicy.isDecayingDict()), diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_helper.cpp index e149d6371..d856c50f4 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_helper.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_helper.cpp @@ -149,7 +149,8 @@ bool DynamicPatriciaTrieWritingHelper::removeBigramWords(const int word0Pos, con void DynamicPatriciaTrieWritingHelper::writeToDictFile(const char *const fileName, const HeaderPolicy *const headerPolicy, const int unigramCount, const int bigramCount) { - BufferWithExtendableBuffer headerBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */); + BufferWithExtendableBuffer headerBuffer( + BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE); const int extendedRegionSize = headerPolicy->getExtendedRegionSize() + mBuffer->getUsedAdditionalBufferSize(); if (!headerPolicy->writeHeaderToBuffer(&headerBuffer, false /* updatesLastUpdatedTime */, @@ -161,8 +162,7 @@ void DynamicPatriciaTrieWritingHelper::writeToDictFile(const char *const fileNam void DynamicPatriciaTrieWritingHelper::writeToDictFileWithGC(const int rootPtNodeArrayPos, const char *const fileName, const HeaderPolicy *const headerPolicy) { - BufferWithExtendableBuffer newDictBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */, - MAX_DICTIONARY_SIZE); + BufferWithExtendableBuffer newDictBuffer(MAX_DICTIONARY_SIZE); int unigramCount = 0; int bigramCount = 0; if (mNeedsToDecay) { @@ -171,7 +171,8 @@ void DynamicPatriciaTrieWritingHelper::writeToDictFileWithGC(const int rootPtNod if (!runGC(rootPtNodeArrayPos, headerPolicy, &newDictBuffer, &unigramCount, &bigramCount)) { return; } - BufferWithExtendableBuffer headerBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */); + BufferWithExtendableBuffer headerBuffer( + BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE); if (!headerPolicy->writeHeaderToBuffer(&headerBuffer, true /* updatesLastUpdatedTime */, mNeedsToDecay, unigramCount, bigramCount, 0 /* extendedRegionSize */)) { return; diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp index f692882f2..5032131ab 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp @@ -18,7 +18,7 @@ namespace latinime { -const size_t BufferWithExtendableBuffer::MAX_ADDITIONAL_BUFFER_SIZE = 1024 * 1024; +const size_t BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE = 1024 * 1024; const int BufferWithExtendableBuffer::NEAR_BUFFER_LIMIT_THRESHOLD_PERCENTILE = 90; // TODO: Needs to allocate larger memory corresponding to the current vector size. const size_t BufferWithExtendableBuffer::EXTEND_ADDITIONAL_BUFFER_SIZE_STEP = 128 * 1024; diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h index 9dc34823c..1e27a1bec 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h @@ -32,12 +32,20 @@ namespace latinime { // raw pointer but provides several methods that handle boundary checking for writing data. class BufferWithExtendableBuffer { public: + static const size_t DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE; + BufferWithExtendableBuffer(uint8_t *const originalBuffer, const int originalBufferSize, - const int maxAdditionalBufferSize = MAX_ADDITIONAL_BUFFER_SIZE) + const int maxAdditionalBufferSize) : mOriginalBuffer(originalBuffer), mOriginalBufferSize(originalBufferSize), mAdditionalBuffer(EXTEND_ADDITIONAL_BUFFER_SIZE_STEP), mUsedAdditionalBufferSize(0), mMaxAdditionalBufferSize(maxAdditionalBufferSize) {} + // Without original buffer. + BufferWithExtendableBuffer(const int maxAdditionalBufferSize) + : mOriginalBuffer(0), mOriginalBufferSize(0), + mAdditionalBuffer(EXTEND_ADDITIONAL_BUFFER_SIZE_STEP), mUsedAdditionalBufferSize(0), + mMaxAdditionalBufferSize(maxAdditionalBufferSize) {} + AK_FORCE_INLINE int getTailPosition() const { return mOriginalBufferSize + mUsedAdditionalBufferSize; } @@ -86,7 +94,6 @@ class BufferWithExtendableBuffer { private: DISALLOW_COPY_AND_ASSIGN(BufferWithExtendableBuffer); - static const size_t MAX_ADDITIONAL_BUFFER_SIZE; static const int NEAR_BUFFER_LIMIT_THRESHOLD_PERCENTILE; static const size_t EXTEND_ADDITIONAL_BUFFER_SIZE_STEP; diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp index f65583ee4..b48e5b005 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp @@ -44,12 +44,14 @@ const char *const DictFileWritingUtils::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE = /* static */ bool DictFileWritingUtils::createEmptyV3DictFile(const char *const filePath, const HeaderReadWriteUtils::AttributeMap *const attributeMap) { - BufferWithExtendableBuffer headerBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */); + BufferWithExtendableBuffer headerBuffer( + BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE); HeaderPolicy headerPolicy(FormatUtils::VERSION_3, attributeMap); headerPolicy.writeHeaderToBuffer(&headerBuffer, true /* updatesLastUpdatedTime */, true /* updatesLastDecayedTime */, 0 /* unigramCount */, 0 /* bigramCount */, 0 /* extendedRegionSize */); - BufferWithExtendableBuffer bodyBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */); + BufferWithExtendableBuffer bodyBuffer( + BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE); if (!DynamicPatriciaTrieWritingUtils::writeEmptyDictionary(&bodyBuffer, 0 /* rootPos */)) { return false; } diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/file_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/file_utils.h new file mode 100644 index 000000000..59b894fa6 --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/file_utils.h @@ -0,0 +1,50 @@ +/* + * Copyright (C) 2013, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_FILE_UTILS_H +#define LATINIME_FILE_UTILS_H + +#include +#include +#include +#include + +#include "defines.h" + +namespace latinime { + +class FileUtils { + public: + // Returns -1 on error. + static int getFileSize(const char *const filePath) { + const int fd = open(filePath, O_RDONLY); + if (fd == -1) { + return -1; + } + struct stat statBuf; + if (fstat(fd, &statBuf) != 0) { + close(fd); + return -1; + } + close(fd); + return static_cast(statBuf.st_size); + } + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(FileUtils); +}; +} // namespace latinime +#endif /* LATINIME_FILE_UTILS_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.h b/native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.h index 6b69116eb..82138355d 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.h +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/mmapped_buffer.h @@ -18,18 +18,21 @@ #define LATINIME_MMAPPED_BUFFER_H #include +#include +#include #include #include #include #include #include "defines.h" +#include "suggest/policyimpl/dictionary/utils/file_utils.h" namespace latinime { class MmappedBuffer { public: - static MmappedBuffer* openBuffer(const char *const path, const int bufferOffset, + static MmappedBuffer *openBuffer(const char *const path, const int bufferOffset, const int bufferSize, const bool isUpdatable) { const int openMode = isUpdatable ? O_RDWR : O_RDONLY; const int mmapFd = open(path, openMode); @@ -59,7 +62,34 @@ class MmappedBuffer { isUpdatable); } + // Mmap entire file. + static MmappedBuffer *openBuffer(const char *const path, const bool isUpdatable) { + const int fileSize = FileUtils::getFileSize(path); + if (fileSize == -1) { + return 0; + } else if (fileSize == 0) { + return new MmappedBuffer(isUpdatable); + } else { + return openBuffer(path, 0 /* bufferOffset */, fileSize, isUpdatable); + } + } + + static MmappedBuffer *openBuffer(const char *const dirPath, const char *const fileName, + const bool isUpdatable) { + const int filePathBufferSize = PATH_MAX + 1 /* terminator */; + char filePath[filePathBufferSize]; + const int filePathLength = snprintf(filePath, filePathBufferSize, "%s%s", dirPath, + fileName); + if (filePathLength >= filePathBufferSize) { + return 0; + } + return openBuffer(filePath, isUpdatable); + } + ~MmappedBuffer() { + if (mAlignedSize == 0) { + return; + } int ret = munmap(mMmappedBuffer, mAlignedSize); if (ret != 0) { AKLOGE("DICT: Failure in munmap. ret=%d errno=%d", ret, errno); @@ -89,6 +119,11 @@ class MmappedBuffer { : mBuffer(buffer), mBufferSize(bufferSize), mMmappedBuffer(mmappedBuffer), mAlignedSize(alignedSize), mMmapFd(mmapFd), mIsUpdatable(isUpdatable) {} + // Empty file. We have to handle an empty file as a valid part of a dictionary. + AK_FORCE_INLINE MmappedBuffer(const bool isUpdatable) + : mBuffer(0), mBufferSize(0), mMmappedBuffer(0), mAlignedSize(0), mMmapFd(0), + mIsUpdatable(isUpdatable) {} + DISALLOW_IMPLICIT_CONSTRUCTORS(MmappedBuffer); uint8_t *const mBuffer;