Merge "Add utility methods related to buffers for ver4 dict."
This commit is contained in:
commit
0913aaa56f
9 changed files with 113 additions and 15 deletions
|
@ -30,8 +30,8 @@ namespace latinime {
|
||||||
class HeaderPolicy : public DictionaryHeaderStructurePolicy {
|
class HeaderPolicy : public DictionaryHeaderStructurePolicy {
|
||||||
public:
|
public:
|
||||||
// Reads information from existing dictionary buffer.
|
// Reads information from existing dictionary buffer.
|
||||||
HeaderPolicy(const uint8_t *const dictBuf, const int dictSize)
|
HeaderPolicy(const uint8_t *const dictBuf, const FormatUtils::FORMAT_VERSION formatVersion)
|
||||||
: mDictFormatVersion(FormatUtils::detectFormatVersion(dictBuf, dictSize)),
|
: mDictFormatVersion(formatVersion),
|
||||||
mDictionaryFlags(HeaderReadWriteUtils::getFlags(dictBuf)),
|
mDictionaryFlags(HeaderReadWriteUtils::getFlags(dictBuf)),
|
||||||
mSize(HeaderReadWriteUtils::getHeaderSize(dictBuf)),
|
mSize(HeaderReadWriteUtils::getHeaderSize(dictBuf)),
|
||||||
mAttributeMap(createAttributeMapAndReadAllAttributes(dictBuf)),
|
mAttributeMap(createAttributeMapAndReadAllAttributes(dictBuf)),
|
||||||
|
|
|
@ -24,6 +24,7 @@
|
||||||
#include "suggest/policyimpl/dictionary/bigram/bigram_list_policy.h"
|
#include "suggest/policyimpl/dictionary/bigram/bigram_list_policy.h"
|
||||||
#include "suggest/policyimpl/dictionary/header/header_policy.h"
|
#include "suggest/policyimpl/dictionary/header/header_policy.h"
|
||||||
#include "suggest/policyimpl/dictionary/shortcut/shortcut_list_policy.h"
|
#include "suggest/policyimpl/dictionary/shortcut/shortcut_list_policy.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/utils/format_utils.h"
|
||||||
#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
|
#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
@ -34,7 +35,7 @@ class DicNodeVector;
|
||||||
class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
||||||
public:
|
public:
|
||||||
PatriciaTriePolicy(const MmappedBuffer *const buffer)
|
PatriciaTriePolicy(const MmappedBuffer *const buffer)
|
||||||
: mBuffer(buffer), mHeaderPolicy(mBuffer->getBuffer(), buffer->getBufferSize()),
|
: mBuffer(buffer), mHeaderPolicy(mBuffer->getBuffer(), FormatUtils::VERSION_2),
|
||||||
mDictRoot(mBuffer->getBuffer() + mHeaderPolicy.getSize()),
|
mDictRoot(mBuffer->getBuffer() + mHeaderPolicy.getSize()),
|
||||||
mDictBufferSize(mBuffer->getBufferSize() - mHeaderPolicy.getSize()),
|
mDictBufferSize(mBuffer->getBufferSize() - mHeaderPolicy.getSize()),
|
||||||
mBigramListPolicy(mDictRoot), mShortcutListPolicy(mDictRoot) {}
|
mBigramListPolicy(mDictRoot), mShortcutListPolicy(mDictRoot) {}
|
||||||
|
|
|
@ -23,6 +23,7 @@
|
||||||
#include "suggest/policyimpl/dictionary/header/header_policy.h"
|
#include "suggest/policyimpl/dictionary/header/header_policy.h"
|
||||||
#include "suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h"
|
#include "suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h"
|
||||||
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
|
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/utils/format_utils.h"
|
||||||
#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
|
#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
@ -33,9 +34,10 @@ class DicNodeVector;
|
||||||
class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
||||||
public:
|
public:
|
||||||
DynamicPatriciaTriePolicy(const MmappedBuffer *const buffer)
|
DynamicPatriciaTriePolicy(const MmappedBuffer *const buffer)
|
||||||
: mBuffer(buffer), mHeaderPolicy(mBuffer->getBuffer(), buffer->getBufferSize()),
|
: mBuffer(buffer), mHeaderPolicy(mBuffer->getBuffer(), FormatUtils::VERSION_3),
|
||||||
mBufferWithExtendableBuffer(mBuffer->getBuffer() + mHeaderPolicy.getSize(),
|
mBufferWithExtendableBuffer(mBuffer->getBuffer() + mHeaderPolicy.getSize(),
|
||||||
mBuffer->getBufferSize() - mHeaderPolicy.getSize()),
|
mBuffer->getBufferSize() - mHeaderPolicy.getSize(),
|
||||||
|
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
|
||||||
mShortcutListPolicy(&mBufferWithExtendableBuffer),
|
mShortcutListPolicy(&mBufferWithExtendableBuffer),
|
||||||
mBigramListPolicy(&mHeaderPolicy, &mBufferWithExtendableBuffer, &mShortcutListPolicy,
|
mBigramListPolicy(&mHeaderPolicy, &mBufferWithExtendableBuffer, &mShortcutListPolicy,
|
||||||
mHeaderPolicy.isDecayingDict()),
|
mHeaderPolicy.isDecayingDict()),
|
||||||
|
|
|
@ -149,7 +149,8 @@ bool DynamicPatriciaTrieWritingHelper::removeBigramWords(const int word0Pos, con
|
||||||
|
|
||||||
void DynamicPatriciaTrieWritingHelper::writeToDictFile(const char *const fileName,
|
void DynamicPatriciaTrieWritingHelper::writeToDictFile(const char *const fileName,
|
||||||
const HeaderPolicy *const headerPolicy, const int unigramCount, const int bigramCount) {
|
const HeaderPolicy *const headerPolicy, const int unigramCount, const int bigramCount) {
|
||||||
BufferWithExtendableBuffer headerBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */);
|
BufferWithExtendableBuffer headerBuffer(
|
||||||
|
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE);
|
||||||
const int extendedRegionSize = headerPolicy->getExtendedRegionSize() +
|
const int extendedRegionSize = headerPolicy->getExtendedRegionSize() +
|
||||||
mBuffer->getUsedAdditionalBufferSize();
|
mBuffer->getUsedAdditionalBufferSize();
|
||||||
if (!headerPolicy->writeHeaderToBuffer(&headerBuffer, false /* updatesLastUpdatedTime */,
|
if (!headerPolicy->writeHeaderToBuffer(&headerBuffer, false /* updatesLastUpdatedTime */,
|
||||||
|
@ -161,8 +162,7 @@ void DynamicPatriciaTrieWritingHelper::writeToDictFile(const char *const fileNam
|
||||||
|
|
||||||
void DynamicPatriciaTrieWritingHelper::writeToDictFileWithGC(const int rootPtNodeArrayPos,
|
void DynamicPatriciaTrieWritingHelper::writeToDictFileWithGC(const int rootPtNodeArrayPos,
|
||||||
const char *const fileName, const HeaderPolicy *const headerPolicy) {
|
const char *const fileName, const HeaderPolicy *const headerPolicy) {
|
||||||
BufferWithExtendableBuffer newDictBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */,
|
BufferWithExtendableBuffer newDictBuffer(MAX_DICTIONARY_SIZE);
|
||||||
MAX_DICTIONARY_SIZE);
|
|
||||||
int unigramCount = 0;
|
int unigramCount = 0;
|
||||||
int bigramCount = 0;
|
int bigramCount = 0;
|
||||||
if (mNeedsToDecay) {
|
if (mNeedsToDecay) {
|
||||||
|
@ -171,7 +171,8 @@ void DynamicPatriciaTrieWritingHelper::writeToDictFileWithGC(const int rootPtNod
|
||||||
if (!runGC(rootPtNodeArrayPos, headerPolicy, &newDictBuffer, &unigramCount, &bigramCount)) {
|
if (!runGC(rootPtNodeArrayPos, headerPolicy, &newDictBuffer, &unigramCount, &bigramCount)) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
BufferWithExtendableBuffer headerBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */);
|
BufferWithExtendableBuffer headerBuffer(
|
||||||
|
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE);
|
||||||
if (!headerPolicy->writeHeaderToBuffer(&headerBuffer, true /* updatesLastUpdatedTime */,
|
if (!headerPolicy->writeHeaderToBuffer(&headerBuffer, true /* updatesLastUpdatedTime */,
|
||||||
mNeedsToDecay, unigramCount, bigramCount, 0 /* extendedRegionSize */)) {
|
mNeedsToDecay, unigramCount, bigramCount, 0 /* extendedRegionSize */)) {
|
||||||
return;
|
return;
|
||||||
|
|
|
@ -18,7 +18,7 @@
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
|
||||||
const size_t BufferWithExtendableBuffer::MAX_ADDITIONAL_BUFFER_SIZE = 1024 * 1024;
|
const size_t BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE = 1024 * 1024;
|
||||||
const int BufferWithExtendableBuffer::NEAR_BUFFER_LIMIT_THRESHOLD_PERCENTILE = 90;
|
const int BufferWithExtendableBuffer::NEAR_BUFFER_LIMIT_THRESHOLD_PERCENTILE = 90;
|
||||||
// TODO: Needs to allocate larger memory corresponding to the current vector size.
|
// TODO: Needs to allocate larger memory corresponding to the current vector size.
|
||||||
const size_t BufferWithExtendableBuffer::EXTEND_ADDITIONAL_BUFFER_SIZE_STEP = 128 * 1024;
|
const size_t BufferWithExtendableBuffer::EXTEND_ADDITIONAL_BUFFER_SIZE_STEP = 128 * 1024;
|
||||||
|
|
|
@ -32,12 +32,20 @@ namespace latinime {
|
||||||
// raw pointer but provides several methods that handle boundary checking for writing data.
|
// raw pointer but provides several methods that handle boundary checking for writing data.
|
||||||
class BufferWithExtendableBuffer {
|
class BufferWithExtendableBuffer {
|
||||||
public:
|
public:
|
||||||
|
static const size_t DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE;
|
||||||
|
|
||||||
BufferWithExtendableBuffer(uint8_t *const originalBuffer, const int originalBufferSize,
|
BufferWithExtendableBuffer(uint8_t *const originalBuffer, const int originalBufferSize,
|
||||||
const int maxAdditionalBufferSize = MAX_ADDITIONAL_BUFFER_SIZE)
|
const int maxAdditionalBufferSize)
|
||||||
: mOriginalBuffer(originalBuffer), mOriginalBufferSize(originalBufferSize),
|
: mOriginalBuffer(originalBuffer), mOriginalBufferSize(originalBufferSize),
|
||||||
mAdditionalBuffer(EXTEND_ADDITIONAL_BUFFER_SIZE_STEP), mUsedAdditionalBufferSize(0),
|
mAdditionalBuffer(EXTEND_ADDITIONAL_BUFFER_SIZE_STEP), mUsedAdditionalBufferSize(0),
|
||||||
mMaxAdditionalBufferSize(maxAdditionalBufferSize) {}
|
mMaxAdditionalBufferSize(maxAdditionalBufferSize) {}
|
||||||
|
|
||||||
|
// Without original buffer.
|
||||||
|
BufferWithExtendableBuffer(const int maxAdditionalBufferSize)
|
||||||
|
: mOriginalBuffer(0), mOriginalBufferSize(0),
|
||||||
|
mAdditionalBuffer(EXTEND_ADDITIONAL_BUFFER_SIZE_STEP), mUsedAdditionalBufferSize(0),
|
||||||
|
mMaxAdditionalBufferSize(maxAdditionalBufferSize) {}
|
||||||
|
|
||||||
AK_FORCE_INLINE int getTailPosition() const {
|
AK_FORCE_INLINE int getTailPosition() const {
|
||||||
return mOriginalBufferSize + mUsedAdditionalBufferSize;
|
return mOriginalBufferSize + mUsedAdditionalBufferSize;
|
||||||
}
|
}
|
||||||
|
@ -86,7 +94,6 @@ class BufferWithExtendableBuffer {
|
||||||
private:
|
private:
|
||||||
DISALLOW_COPY_AND_ASSIGN(BufferWithExtendableBuffer);
|
DISALLOW_COPY_AND_ASSIGN(BufferWithExtendableBuffer);
|
||||||
|
|
||||||
static const size_t MAX_ADDITIONAL_BUFFER_SIZE;
|
|
||||||
static const int NEAR_BUFFER_LIMIT_THRESHOLD_PERCENTILE;
|
static const int NEAR_BUFFER_LIMIT_THRESHOLD_PERCENTILE;
|
||||||
static const size_t EXTEND_ADDITIONAL_BUFFER_SIZE_STEP;
|
static const size_t EXTEND_ADDITIONAL_BUFFER_SIZE_STEP;
|
||||||
|
|
||||||
|
|
|
@ -44,12 +44,14 @@ const char *const DictFileWritingUtils::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE =
|
||||||
|
|
||||||
/* static */ bool DictFileWritingUtils::createEmptyV3DictFile(const char *const filePath,
|
/* static */ bool DictFileWritingUtils::createEmptyV3DictFile(const char *const filePath,
|
||||||
const HeaderReadWriteUtils::AttributeMap *const attributeMap) {
|
const HeaderReadWriteUtils::AttributeMap *const attributeMap) {
|
||||||
BufferWithExtendableBuffer headerBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */);
|
BufferWithExtendableBuffer headerBuffer(
|
||||||
|
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE);
|
||||||
HeaderPolicy headerPolicy(FormatUtils::VERSION_3, attributeMap);
|
HeaderPolicy headerPolicy(FormatUtils::VERSION_3, attributeMap);
|
||||||
headerPolicy.writeHeaderToBuffer(&headerBuffer, true /* updatesLastUpdatedTime */,
|
headerPolicy.writeHeaderToBuffer(&headerBuffer, true /* updatesLastUpdatedTime */,
|
||||||
true /* updatesLastDecayedTime */, 0 /* unigramCount */, 0 /* bigramCount */,
|
true /* updatesLastDecayedTime */, 0 /* unigramCount */, 0 /* bigramCount */,
|
||||||
0 /* extendedRegionSize */);
|
0 /* extendedRegionSize */);
|
||||||
BufferWithExtendableBuffer bodyBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */);
|
BufferWithExtendableBuffer bodyBuffer(
|
||||||
|
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE);
|
||||||
if (!DynamicPatriciaTrieWritingUtils::writeEmptyDictionary(&bodyBuffer, 0 /* rootPos */)) {
|
if (!DynamicPatriciaTrieWritingUtils::writeEmptyDictionary(&bodyBuffer, 0 /* rootPos */)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,50 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2013, The Android Open Source Project
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef LATINIME_FILE_UTILS_H
|
||||||
|
#define LATINIME_FILE_UTILS_H
|
||||||
|
|
||||||
|
#include <sys/types.h>
|
||||||
|
#include <sys/stat.h>
|
||||||
|
#include <fcntl.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
|
||||||
|
#include "defines.h"
|
||||||
|
|
||||||
|
namespace latinime {
|
||||||
|
|
||||||
|
class FileUtils {
|
||||||
|
public:
|
||||||
|
// Returns -1 on error.
|
||||||
|
static int getFileSize(const char *const filePath) {
|
||||||
|
const int fd = open(filePath, O_RDONLY);
|
||||||
|
if (fd == -1) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
struct stat statBuf;
|
||||||
|
if (fstat(fd, &statBuf) != 0) {
|
||||||
|
close(fd);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
close(fd);
|
||||||
|
return static_cast<int>(statBuf.st_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
DISALLOW_IMPLICIT_CONSTRUCTORS(FileUtils);
|
||||||
|
};
|
||||||
|
} // namespace latinime
|
||||||
|
#endif /* LATINIME_FILE_UTILS_H */
|
|
@ -18,12 +18,15 @@
|
||||||
#define LATINIME_MMAPPED_BUFFER_H
|
#define LATINIME_MMAPPED_BUFFER_H
|
||||||
|
|
||||||
#include <cerrno>
|
#include <cerrno>
|
||||||
|
#include <climits>
|
||||||
|
#include <cstdio>
|
||||||
#include <fcntl.h>
|
#include <fcntl.h>
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
#include <sys/mman.h>
|
#include <sys/mman.h>
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
|
|
||||||
#include "defines.h"
|
#include "defines.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/utils/file_utils.h"
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
|
||||||
|
@ -59,7 +62,34 @@ class MmappedBuffer {
|
||||||
isUpdatable);
|
isUpdatable);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Mmap entire file.
|
||||||
|
static MmappedBuffer *openBuffer(const char *const path, const bool isUpdatable) {
|
||||||
|
const int fileSize = FileUtils::getFileSize(path);
|
||||||
|
if (fileSize == -1) {
|
||||||
|
return 0;
|
||||||
|
} else if (fileSize == 0) {
|
||||||
|
return new MmappedBuffer(isUpdatable);
|
||||||
|
} else {
|
||||||
|
return openBuffer(path, 0 /* bufferOffset */, fileSize, isUpdatable);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static MmappedBuffer *openBuffer(const char *const dirPath, const char *const fileName,
|
||||||
|
const bool isUpdatable) {
|
||||||
|
const int filePathBufferSize = PATH_MAX + 1 /* terminator */;
|
||||||
|
char filePath[filePathBufferSize];
|
||||||
|
const int filePathLength = snprintf(filePath, filePathBufferSize, "%s%s", dirPath,
|
||||||
|
fileName);
|
||||||
|
if (filePathLength >= filePathBufferSize) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
return openBuffer(filePath, isUpdatable);
|
||||||
|
}
|
||||||
|
|
||||||
~MmappedBuffer() {
|
~MmappedBuffer() {
|
||||||
|
if (mAlignedSize == 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
int ret = munmap(mMmappedBuffer, mAlignedSize);
|
int ret = munmap(mMmappedBuffer, mAlignedSize);
|
||||||
if (ret != 0) {
|
if (ret != 0) {
|
||||||
AKLOGE("DICT: Failure in munmap. ret=%d errno=%d", ret, errno);
|
AKLOGE("DICT: Failure in munmap. ret=%d errno=%d", ret, errno);
|
||||||
|
@ -89,6 +119,11 @@ class MmappedBuffer {
|
||||||
: mBuffer(buffer), mBufferSize(bufferSize), mMmappedBuffer(mmappedBuffer),
|
: mBuffer(buffer), mBufferSize(bufferSize), mMmappedBuffer(mmappedBuffer),
|
||||||
mAlignedSize(alignedSize), mMmapFd(mmapFd), mIsUpdatable(isUpdatable) {}
|
mAlignedSize(alignedSize), mMmapFd(mmapFd), mIsUpdatable(isUpdatable) {}
|
||||||
|
|
||||||
|
// Empty file. We have to handle an empty file as a valid part of a dictionary.
|
||||||
|
AK_FORCE_INLINE MmappedBuffer(const bool isUpdatable)
|
||||||
|
: mBuffer(0), mBufferSize(0), mMmappedBuffer(0), mAlignedSize(0), mMmapFd(0),
|
||||||
|
mIsUpdatable(isUpdatable) {}
|
||||||
|
|
||||||
DISALLOW_IMPLICIT_CONSTRUCTORS(MmappedBuffer);
|
DISALLOW_IMPLICIT_CONSTRUCTORS(MmappedBuffer);
|
||||||
|
|
||||||
uint8_t *const mBuffer;
|
uint8_t *const mBuffer;
|
||||||
|
|
Loading…
Reference in a new issue