Use byte array view in ver4 dict contents.

Change-Id: Icf79a51a200f7ccd775264d1a83dd61e7dcfbab2
Keisuke Kuroyanagi 2014-08-18 22:46:10 +09:00
parent 4fbb2148ee
commit 1f6e52ef02
8 changed files with 32 additions and 51 deletions

View File

@ -17,7 +17,6 @@
#ifndef LATINIME_BIGRAM_DICT_CONTENT_H #ifndef LATINIME_BIGRAM_DICT_CONTENT_H
#define LATINIME_BIGRAM_DICT_CONTENT_H #define LATINIME_BIGRAM_DICT_CONTENT_H
#include <cstdint>
#include <cstdio> #include <cstdio>
#include "defines.h" #include "defines.h"
@ -28,11 +27,12 @@
namespace latinime { namespace latinime {
class ReadWriteByteArrayView;
class BigramDictContent : public SparseTableDictContent { class BigramDictContent : public SparseTableDictContent {
public: public:
BigramDictContent(uint8_t *const *buffers, const int *bufferSizes, const bool hasHistoricalInfo) BigramDictContent(const ReadWriteByteArrayView *const buffers, const bool hasHistoricalInfo)
: SparseTableDictContent(buffers, bufferSizes, : SparseTableDictContent(buffers, Ver4DictConstants::BIGRAM_ADDRESS_TABLE_BLOCK_SIZE,
Ver4DictConstants::BIGRAM_ADDRESS_TABLE_BLOCK_SIZE,
Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE), Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE),
mHasHistoricalInfo(hasHistoricalInfo) {} mHasHistoricalInfo(hasHistoricalInfo) {}

View File

@ -17,7 +17,6 @@
#ifndef LATINIME_SHORTCUT_DICT_CONTENT_H #ifndef LATINIME_SHORTCUT_DICT_CONTENT_H
#define LATINIME_SHORTCUT_DICT_CONTENT_H #define LATINIME_SHORTCUT_DICT_CONTENT_H
#include <cstdint>
#include <cstdio> #include <cstdio>
#include "defines.h" #include "defines.h"
@ -27,11 +26,12 @@
namespace latinime { namespace latinime {
class ReadWriteByteArrayView;
class ShortcutDictContent : public SparseTableDictContent { class ShortcutDictContent : public SparseTableDictContent {
public: public:
ShortcutDictContent(uint8_t *const *buffers, const int *bufferSizes) ShortcutDictContent(const ReadWriteByteArrayView *const buffers)
: SparseTableDictContent(buffers, bufferSizes, : SparseTableDictContent(buffers, Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE,
Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE,
Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_DATA_SIZE) {} Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_DATA_SIZE) {}
ShortcutDictContent() ShortcutDictContent()

View File

@ -17,7 +17,6 @@
#ifndef LATINIME_SINGLE_DICT_CONTENT_H #ifndef LATINIME_SINGLE_DICT_CONTENT_H
#define LATINIME_SINGLE_DICT_CONTENT_H #define LATINIME_SINGLE_DICT_CONTENT_H
#include <cstdint>
#include <cstdio> #include <cstdio>
#include "defines.h" #include "defines.h"
@ -30,8 +29,8 @@ namespace latinime {
class SingleDictContent { class SingleDictContent {
public: public:
SingleDictContent(uint8_t *const buffer, const int bufferSize) SingleDictContent(const ReadWriteByteArrayView buffer)
: mExpandableContentBuffer(ReadWriteByteArrayView(buffer, bufferSize), : mExpandableContentBuffer(buffer,
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE) {} BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE) {}
SingleDictContent() SingleDictContent()

View File

@ -17,7 +17,6 @@
#ifndef LATINIME_SPARSE_TABLE_DICT_CONTENT_H #ifndef LATINIME_SPARSE_TABLE_DICT_CONTENT_H
#define LATINIME_SPARSE_TABLE_DICT_CONTENT_H #define LATINIME_SPARSE_TABLE_DICT_CONTENT_H
#include <cstdint>
#include <cstdio> #include <cstdio>
#include "defines.h" #include "defines.h"
@ -31,19 +30,13 @@ namespace latinime {
// TODO: Support multiple contents. // TODO: Support multiple contents.
class SparseTableDictContent { class SparseTableDictContent {
public: public:
AK_FORCE_INLINE SparseTableDictContent(uint8_t *const *buffers, const int *bufferSizes, AK_FORCE_INLINE SparseTableDictContent(const ReadWriteByteArrayView *const buffers,
const int sparseTableBlockSize, const int sparseTableDataSize) const int sparseTableBlockSize, const int sparseTableDataSize)
: mExpandableLookupTableBuffer( : mExpandableLookupTableBuffer(buffers[LOOKUP_TABLE_BUFFER_INDEX],
ReadWriteByteArrayView(buffers[LOOKUP_TABLE_BUFFER_INDEX],
bufferSizes[LOOKUP_TABLE_BUFFER_INDEX]),
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE), BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
mExpandableAddressTableBuffer( mExpandableAddressTableBuffer(buffers[ADDRESS_TABLE_BUFFER_INDEX],
ReadWriteByteArrayView(buffers[ADDRESS_TABLE_BUFFER_INDEX],
bufferSizes[ADDRESS_TABLE_BUFFER_INDEX]),
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE), BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
mExpandableContentBuffer( mExpandableContentBuffer(buffers[CONTENT_BUFFER_INDEX],
ReadWriteByteArrayView(buffers[CONTENT_BUFFER_INDEX],
bufferSizes[CONTENT_BUFFER_INDEX]),
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE), BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
mAddressLookupTable(&mExpandableLookupTableBuffer, &mExpandableAddressTableBuffer, mAddressLookupTable(&mExpandableLookupTableBuffer, &mExpandableAddressTableBuffer,
sparseTableBlockSize, sparseTableDataSize) {} sparseTableBlockSize, sparseTableDataSize) {}

View File

@ -17,13 +17,13 @@
#ifndef LATINIME_TERMINAL_POSITION_LOOKUP_TABLE_H #ifndef LATINIME_TERMINAL_POSITION_LOOKUP_TABLE_H
#define LATINIME_TERMINAL_POSITION_LOOKUP_TABLE_H #define LATINIME_TERMINAL_POSITION_LOOKUP_TABLE_H
#include <cstdint>
#include <cstdio> #include <cstdio>
#include <unordered_map> #include <unordered_map>
#include "defines.h" #include "defines.h"
#include "suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h" #include "suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h" #include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
#include "utils/byte_array_view.h"
namespace latinime { namespace latinime {
@ -31,8 +31,8 @@ class TerminalPositionLookupTable : public SingleDictContent {
public: public:
typedef std::unordered_map<int, int> TerminalIdMap; typedef std::unordered_map<int, int> TerminalIdMap;
TerminalPositionLookupTable(uint8_t *const buffer, const int bufferSize) TerminalPositionLookupTable(const ReadWriteByteArrayView buffer)
: SingleDictContent(buffer, bufferSize), : SingleDictContent(buffer),
mSize(getBuffer()->getTailPosition() mSize(getBuffer()->getTailPosition()
/ Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE) {} / Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE) {}

View File

@ -45,16 +45,13 @@ namespace latinime {
if (!bodyBuffer) { if (!bodyBuffer) {
return Ver4DictBuffersPtr(nullptr); return Ver4DictBuffersPtr(nullptr);
} }
std::vector<uint8_t *> buffers; std::vector<ReadWriteByteArrayView> buffers;
std::vector<int> bufferSizes;
const ReadWriteByteArrayView buffer = bodyBuffer->getReadWriteByteArrayView(); const ReadWriteByteArrayView buffer = bodyBuffer->getReadWriteByteArrayView();
int position = 0; int position = 0;
while (position < static_cast<int>(buffer.size())) { while (position < static_cast<int>(buffer.size())) {
const int bufferSize = ByteArrayUtils::readUint32AndAdvancePosition( const int bufferSize = ByteArrayUtils::readUint32AndAdvancePosition(
buffer.data(), &position); buffer.data(), &position);
const ReadWriteByteArrayView subBuffer = buffer.subView(position, bufferSize); buffers.push_back(buffer.subView(position, bufferSize));
buffers.push_back(subBuffer.data());
bufferSizes.push_back(subBuffer.size());
position += bufferSize; position += bufferSize;
if (bufferSize < 0 || position < 0 || position > static_cast<int>(buffer.size())) { if (bufferSize < 0 || position < 0 || position > static_cast<int>(buffer.size())) {
AKLOGE("The dict body file is corrupted."); AKLOGE("The dict body file is corrupted.");
@ -66,7 +63,7 @@ namespace latinime {
return Ver4DictBuffersPtr(nullptr); return Ver4DictBuffersPtr(nullptr);
} }
return Ver4DictBuffersPtr(new Ver4DictBuffers(std::move(headerBuffer), std::move(bodyBuffer), return Ver4DictBuffersPtr(new Ver4DictBuffers(std::move(headerBuffer), std::move(bodyBuffer),
formatVersion, buffers, bufferSizes)); formatVersion, buffers));
} }
bool Ver4DictBuffers::flushHeaderAndDictBuffers(const char *const dictDirPath, bool Ver4DictBuffers::flushHeaderAndDictBuffers(const char *const dictDirPath,
@ -178,29 +175,20 @@ bool Ver4DictBuffers::flushDictBuffers(FILE *const file) const {
Ver4DictBuffers::Ver4DictBuffers(MmappedBuffer::MmappedBufferPtr &&headerBuffer, Ver4DictBuffers::Ver4DictBuffers(MmappedBuffer::MmappedBufferPtr &&headerBuffer,
MmappedBuffer::MmappedBufferPtr &&bodyBuffer, MmappedBuffer::MmappedBufferPtr &&bodyBuffer,
const FormatUtils::FORMAT_VERSION formatVersion, const FormatUtils::FORMAT_VERSION formatVersion,
const std::vector<uint8_t *> &contentBuffers, const std::vector<int> &contentBufferSizes) const std::vector<ReadWriteByteArrayView> &contentBuffers)
: mHeaderBuffer(std::move(headerBuffer)), mDictBuffer(std::move(bodyBuffer)), : mHeaderBuffer(std::move(headerBuffer)), mDictBuffer(std::move(bodyBuffer)),
mHeaderPolicy(mHeaderBuffer->getReadOnlyByteArrayView().data(), formatVersion), mHeaderPolicy(mHeaderBuffer->getReadOnlyByteArrayView().data(), formatVersion),
mExpandableHeaderBuffer(mHeaderBuffer->getReadWriteByteArrayView(), mExpandableHeaderBuffer(mHeaderBuffer->getReadWriteByteArrayView(),
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE), BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
mExpandableTrieBuffer( mExpandableTrieBuffer(contentBuffers[Ver4DictConstants::TRIE_BUFFER_INDEX],
ReadWriteByteArrayView(contentBuffers[Ver4DictConstants::TRIE_BUFFER_INDEX],
contentBufferSizes[Ver4DictConstants::TRIE_BUFFER_INDEX]),
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE), BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
mTerminalPositionLookupTable( mTerminalPositionLookupTable(
contentBuffers[Ver4DictConstants::TERMINAL_ADDRESS_LOOKUP_TABLE_BUFFER_INDEX], contentBuffers[Ver4DictConstants::TERMINAL_ADDRESS_LOOKUP_TABLE_BUFFER_INDEX]),
contentBufferSizes[ mLanguageModelDictContent(contentBuffers[Ver4DictConstants::LANGUAGE_MODEL_BUFFER_INDEX],
Ver4DictConstants::TERMINAL_ADDRESS_LOOKUP_TABLE_BUFFER_INDEX]),
mLanguageModelDictContent(
ReadWriteByteArrayView(
contentBuffers[Ver4DictConstants::LANGUAGE_MODEL_BUFFER_INDEX],
contentBufferSizes[Ver4DictConstants::LANGUAGE_MODEL_BUFFER_INDEX]),
mHeaderPolicy.hasHistoricalInfoOfWords()), mHeaderPolicy.hasHistoricalInfoOfWords()),
mBigramDictContent(&contentBuffers[Ver4DictConstants::BIGRAM_BUFFERS_INDEX], mBigramDictContent(&contentBuffers[Ver4DictConstants::BIGRAM_BUFFERS_INDEX],
&contentBufferSizes[Ver4DictConstants::BIGRAM_BUFFERS_INDEX],
mHeaderPolicy.hasHistoricalInfoOfWords()), mHeaderPolicy.hasHistoricalInfoOfWords()),
mShortcutDictContent(&contentBuffers[Ver4DictConstants::SHORTCUT_BUFFERS_INDEX], mShortcutDictContent(&contentBuffers[Ver4DictConstants::SHORTCUT_BUFFERS_INDEX]),
&contentBufferSizes[Ver4DictConstants::SHORTCUT_BUFFERS_INDEX]),
mIsUpdatable(mDictBuffer->isUpdatable()) {} mIsUpdatable(mDictBuffer->isUpdatable()) {}
Ver4DictBuffers::Ver4DictBuffers(const HeaderPolicy *const headerPolicy, const int maxTrieSize) Ver4DictBuffers::Ver4DictBuffers(const HeaderPolicy *const headerPolicy, const int maxTrieSize)

View File

@ -122,8 +122,7 @@ class Ver4DictBuffers {
Ver4DictBuffers(MmappedBuffer::MmappedBufferPtr &&headerBuffer, Ver4DictBuffers(MmappedBuffer::MmappedBufferPtr &&headerBuffer,
MmappedBuffer::MmappedBufferPtr &&bodyBuffer, MmappedBuffer::MmappedBufferPtr &&bodyBuffer,
const FormatUtils::FORMAT_VERSION formatVersion, const FormatUtils::FORMAT_VERSION formatVersion,
const std::vector<uint8_t *> &contentBuffers, const std::vector<ReadWriteByteArrayView> &contentBuffers);
const std::vector<int> &contentBufferSizes);
Ver4DictBuffers(const HeaderPolicy *const headerPolicy, const int maxTrieSize); Ver4DictBuffers(const HeaderPolicy *const headerPolicy, const int maxTrieSize);

View File

@ -77,10 +77,12 @@ class ReadWriteByteArrayView {
} }
private: private:
DISALLOW_ASSIGNMENT_OPERATOR(ReadWriteByteArrayView); // Default copy constructor and assignment operator are used for using this class with STL
// containers.
uint8_t *const mPtr; // These members cannot be const to have the assignment operator.
const size_t mSize; uint8_t *mPtr;
size_t mSize;
}; };
} // namespace latinime } // namespace latinime