Merge "Add utility methods related to buffers for ver4 dict."
This commit is contained in:
commit
0913aaa56f
9 changed files with 113 additions and 15 deletions
|
@ -30,8 +30,8 @@ namespace latinime {
|
|||
class HeaderPolicy : public DictionaryHeaderStructurePolicy {
|
||||
public:
|
||||
// Reads information from existing dictionary buffer.
|
||||
HeaderPolicy(const uint8_t *const dictBuf, const int dictSize)
|
||||
: mDictFormatVersion(FormatUtils::detectFormatVersion(dictBuf, dictSize)),
|
||||
HeaderPolicy(const uint8_t *const dictBuf, const FormatUtils::FORMAT_VERSION formatVersion)
|
||||
: mDictFormatVersion(formatVersion),
|
||||
mDictionaryFlags(HeaderReadWriteUtils::getFlags(dictBuf)),
|
||||
mSize(HeaderReadWriteUtils::getHeaderSize(dictBuf)),
|
||||
mAttributeMap(createAttributeMapAndReadAllAttributes(dictBuf)),
|
||||
|
|
|
@ -24,6 +24,7 @@
|
|||
#include "suggest/policyimpl/dictionary/bigram/bigram_list_policy.h"
|
||||
#include "suggest/policyimpl/dictionary/header/header_policy.h"
|
||||
#include "suggest/policyimpl/dictionary/shortcut/shortcut_list_policy.h"
|
||||
#include "suggest/policyimpl/dictionary/utils/format_utils.h"
|
||||
#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
|
||||
|
||||
namespace latinime {
|
||||
|
@ -34,7 +35,7 @@ class DicNodeVector;
|
|||
class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
||||
public:
|
||||
PatriciaTriePolicy(const MmappedBuffer *const buffer)
|
||||
: mBuffer(buffer), mHeaderPolicy(mBuffer->getBuffer(), buffer->getBufferSize()),
|
||||
: mBuffer(buffer), mHeaderPolicy(mBuffer->getBuffer(), FormatUtils::VERSION_2),
|
||||
mDictRoot(mBuffer->getBuffer() + mHeaderPolicy.getSize()),
|
||||
mDictBufferSize(mBuffer->getBufferSize() - mHeaderPolicy.getSize()),
|
||||
mBigramListPolicy(mDictRoot), mShortcutListPolicy(mDictRoot) {}
|
||||
|
|
|
@ -23,6 +23,7 @@
|
|||
#include "suggest/policyimpl/dictionary/header/header_policy.h"
|
||||
#include "suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h"
|
||||
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
|
||||
#include "suggest/policyimpl/dictionary/utils/format_utils.h"
|
||||
#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
|
||||
|
||||
namespace latinime {
|
||||
|
@ -33,9 +34,10 @@ class DicNodeVector;
|
|||
class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
||||
public:
|
||||
DynamicPatriciaTriePolicy(const MmappedBuffer *const buffer)
|
||||
: mBuffer(buffer), mHeaderPolicy(mBuffer->getBuffer(), buffer->getBufferSize()),
|
||||
: mBuffer(buffer), mHeaderPolicy(mBuffer->getBuffer(), FormatUtils::VERSION_3),
|
||||
mBufferWithExtendableBuffer(mBuffer->getBuffer() + mHeaderPolicy.getSize(),
|
||||
mBuffer->getBufferSize() - mHeaderPolicy.getSize()),
|
||||
mBuffer->getBufferSize() - mHeaderPolicy.getSize(),
|
||||
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
|
||||
mShortcutListPolicy(&mBufferWithExtendableBuffer),
|
||||
mBigramListPolicy(&mHeaderPolicy, &mBufferWithExtendableBuffer, &mShortcutListPolicy,
|
||||
mHeaderPolicy.isDecayingDict()),
|
||||
|
|
|
@ -149,7 +149,8 @@ bool DynamicPatriciaTrieWritingHelper::removeBigramWords(const int word0Pos, con
|
|||
|
||||
void DynamicPatriciaTrieWritingHelper::writeToDictFile(const char *const fileName,
|
||||
const HeaderPolicy *const headerPolicy, const int unigramCount, const int bigramCount) {
|
||||
BufferWithExtendableBuffer headerBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */);
|
||||
BufferWithExtendableBuffer headerBuffer(
|
||||
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE);
|
||||
const int extendedRegionSize = headerPolicy->getExtendedRegionSize() +
|
||||
mBuffer->getUsedAdditionalBufferSize();
|
||||
if (!headerPolicy->writeHeaderToBuffer(&headerBuffer, false /* updatesLastUpdatedTime */,
|
||||
|
@ -161,8 +162,7 @@ void DynamicPatriciaTrieWritingHelper::writeToDictFile(const char *const fileNam
|
|||
|
||||
void DynamicPatriciaTrieWritingHelper::writeToDictFileWithGC(const int rootPtNodeArrayPos,
|
||||
const char *const fileName, const HeaderPolicy *const headerPolicy) {
|
||||
BufferWithExtendableBuffer newDictBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */,
|
||||
MAX_DICTIONARY_SIZE);
|
||||
BufferWithExtendableBuffer newDictBuffer(MAX_DICTIONARY_SIZE);
|
||||
int unigramCount = 0;
|
||||
int bigramCount = 0;
|
||||
if (mNeedsToDecay) {
|
||||
|
@ -171,7 +171,8 @@ void DynamicPatriciaTrieWritingHelper::writeToDictFileWithGC(const int rootPtNod
|
|||
if (!runGC(rootPtNodeArrayPos, headerPolicy, &newDictBuffer, &unigramCount, &bigramCount)) {
|
||||
return;
|
||||
}
|
||||
BufferWithExtendableBuffer headerBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */);
|
||||
BufferWithExtendableBuffer headerBuffer(
|
||||
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE);
|
||||
if (!headerPolicy->writeHeaderToBuffer(&headerBuffer, true /* updatesLastUpdatedTime */,
|
||||
mNeedsToDecay, unigramCount, bigramCount, 0 /* extendedRegionSize */)) {
|
||||
return;
|
||||
|
|
|
@ -18,7 +18,7 @@
|
|||
|
||||
namespace latinime {
|
||||
|
||||
const size_t BufferWithExtendableBuffer::MAX_ADDITIONAL_BUFFER_SIZE = 1024 * 1024;
|
||||
const size_t BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE = 1024 * 1024;
|
||||
const int BufferWithExtendableBuffer::NEAR_BUFFER_LIMIT_THRESHOLD_PERCENTILE = 90;
|
||||
// TODO: Needs to allocate larger memory corresponding to the current vector size.
|
||||
const size_t BufferWithExtendableBuffer::EXTEND_ADDITIONAL_BUFFER_SIZE_STEP = 128 * 1024;
|
||||
|
|
|
@ -32,12 +32,20 @@ namespace latinime {
|
|||
// raw pointer but provides several methods that handle boundary checking for writing data.
|
||||
class BufferWithExtendableBuffer {
|
||||
public:
|
||||
static const size_t DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE;
|
||||
|
||||
BufferWithExtendableBuffer(uint8_t *const originalBuffer, const int originalBufferSize,
|
||||
const int maxAdditionalBufferSize = MAX_ADDITIONAL_BUFFER_SIZE)
|
||||
const int maxAdditionalBufferSize)
|
||||
: mOriginalBuffer(originalBuffer), mOriginalBufferSize(originalBufferSize),
|
||||
mAdditionalBuffer(EXTEND_ADDITIONAL_BUFFER_SIZE_STEP), mUsedAdditionalBufferSize(0),
|
||||
mMaxAdditionalBufferSize(maxAdditionalBufferSize) {}
|
||||
|
||||
// Without original buffer.
|
||||
BufferWithExtendableBuffer(const int maxAdditionalBufferSize)
|
||||
: mOriginalBuffer(0), mOriginalBufferSize(0),
|
||||
mAdditionalBuffer(EXTEND_ADDITIONAL_BUFFER_SIZE_STEP), mUsedAdditionalBufferSize(0),
|
||||
mMaxAdditionalBufferSize(maxAdditionalBufferSize) {}
|
||||
|
||||
AK_FORCE_INLINE int getTailPosition() const {
|
||||
return mOriginalBufferSize + mUsedAdditionalBufferSize;
|
||||
}
|
||||
|
@ -86,7 +94,6 @@ class BufferWithExtendableBuffer {
|
|||
private:
|
||||
DISALLOW_COPY_AND_ASSIGN(BufferWithExtendableBuffer);
|
||||
|
||||
static const size_t MAX_ADDITIONAL_BUFFER_SIZE;
|
||||
static const int NEAR_BUFFER_LIMIT_THRESHOLD_PERCENTILE;
|
||||
static const size_t EXTEND_ADDITIONAL_BUFFER_SIZE_STEP;
|
||||
|
||||
|
|
|
@ -44,12 +44,14 @@ const char *const DictFileWritingUtils::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE =
|
|||
|
||||
/* static */ bool DictFileWritingUtils::createEmptyV3DictFile(const char *const filePath,
|
||||
const HeaderReadWriteUtils::AttributeMap *const attributeMap) {
|
||||
BufferWithExtendableBuffer headerBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */);
|
||||
BufferWithExtendableBuffer headerBuffer(
|
||||
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE);
|
||||
HeaderPolicy headerPolicy(FormatUtils::VERSION_3, attributeMap);
|
||||
headerPolicy.writeHeaderToBuffer(&headerBuffer, true /* updatesLastUpdatedTime */,
|
||||
true /* updatesLastDecayedTime */, 0 /* unigramCount */, 0 /* bigramCount */,
|
||||
0 /* extendedRegionSize */);
|
||||
BufferWithExtendableBuffer bodyBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */);
|
||||
BufferWithExtendableBuffer bodyBuffer(
|
||||
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE);
|
||||
if (!DynamicPatriciaTrieWritingUtils::writeEmptyDictionary(&bodyBuffer, 0 /* rootPos */)) {
|
||||
return false;
|
||||
}
|
||||
|
|
|
@ -0,0 +1,50 @@
|
|||
/*
|
||||
* Copyright (C) 2013, The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef LATINIME_FILE_UTILS_H
|
||||
#define LATINIME_FILE_UTILS_H
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#include <fcntl.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "defines.h"
|
||||
|
||||
namespace latinime {
|
||||
|
||||
class FileUtils {
|
||||
public:
|
||||
// Returns -1 on error.
|
||||
static int getFileSize(const char *const filePath) {
|
||||
const int fd = open(filePath, O_RDONLY);
|
||||
if (fd == -1) {
|
||||
return -1;
|
||||
}
|
||||
struct stat statBuf;
|
||||
if (fstat(fd, &statBuf) != 0) {
|
||||
close(fd);
|
||||
return -1;
|
||||
}
|
||||
close(fd);
|
||||
return static_cast<int>(statBuf.st_size);
|
||||
}
|
||||
|
||||
private:
|
||||
DISALLOW_IMPLICIT_CONSTRUCTORS(FileUtils);
|
||||
};
|
||||
} // namespace latinime
|
||||
#endif /* LATINIME_FILE_UTILS_H */
|
|
@ -18,18 +18,21 @@
|
|||
#define LATINIME_MMAPPED_BUFFER_H
|
||||
|
||||
#include <cerrno>
|
||||
#include <climits>
|
||||
#include <cstdio>
|
||||
#include <fcntl.h>
|
||||
#include <stdint.h>
|
||||
#include <sys/mman.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "defines.h"
|
||||
#include "suggest/policyimpl/dictionary/utils/file_utils.h"
|
||||
|
||||
namespace latinime {
|
||||
|
||||
class MmappedBuffer {
|
||||
public:
|
||||
static MmappedBuffer* openBuffer(const char *const path, const int bufferOffset,
|
||||
static MmappedBuffer *openBuffer(const char *const path, const int bufferOffset,
|
||||
const int bufferSize, const bool isUpdatable) {
|
||||
const int openMode = isUpdatable ? O_RDWR : O_RDONLY;
|
||||
const int mmapFd = open(path, openMode);
|
||||
|
@ -59,7 +62,34 @@ class MmappedBuffer {
|
|||
isUpdatable);
|
||||
}
|
||||
|
||||
// Mmap entire file.
|
||||
static MmappedBuffer *openBuffer(const char *const path, const bool isUpdatable) {
|
||||
const int fileSize = FileUtils::getFileSize(path);
|
||||
if (fileSize == -1) {
|
||||
return 0;
|
||||
} else if (fileSize == 0) {
|
||||
return new MmappedBuffer(isUpdatable);
|
||||
} else {
|
||||
return openBuffer(path, 0 /* bufferOffset */, fileSize, isUpdatable);
|
||||
}
|
||||
}
|
||||
|
||||
static MmappedBuffer *openBuffer(const char *const dirPath, const char *const fileName,
|
||||
const bool isUpdatable) {
|
||||
const int filePathBufferSize = PATH_MAX + 1 /* terminator */;
|
||||
char filePath[filePathBufferSize];
|
||||
const int filePathLength = snprintf(filePath, filePathBufferSize, "%s%s", dirPath,
|
||||
fileName);
|
||||
if (filePathLength >= filePathBufferSize) {
|
||||
return 0;
|
||||
}
|
||||
return openBuffer(filePath, isUpdatable);
|
||||
}
|
||||
|
||||
~MmappedBuffer() {
|
||||
if (mAlignedSize == 0) {
|
||||
return;
|
||||
}
|
||||
int ret = munmap(mMmappedBuffer, mAlignedSize);
|
||||
if (ret != 0) {
|
||||
AKLOGE("DICT: Failure in munmap. ret=%d errno=%d", ret, errno);
|
||||
|
@ -89,6 +119,11 @@ class MmappedBuffer {
|
|||
: mBuffer(buffer), mBufferSize(bufferSize), mMmappedBuffer(mmappedBuffer),
|
||||
mAlignedSize(alignedSize), mMmapFd(mmapFd), mIsUpdatable(isUpdatable) {}
|
||||
|
||||
// Empty file. We have to handle an empty file as a valid part of a dictionary.
|
||||
AK_FORCE_INLINE MmappedBuffer(const bool isUpdatable)
|
||||
: mBuffer(0), mBufferSize(0), mMmappedBuffer(0), mAlignedSize(0), mMmapFd(0),
|
||||
mIsUpdatable(isUpdatable) {}
|
||||
|
||||
DISALLOW_IMPLICIT_CONSTRUCTORS(MmappedBuffer);
|
||||
|
||||
uint8_t *const mBuffer;
|
||||
|
|
Loading…
Reference in a new issue