Use byte array view in ver4 dict contents.

Change-Id: Icf79a51a200f7ccd775264d1a83dd61e7dcfbab2
This commit is contained in:
Keisuke Kuroyanagi 2014-08-18 22:46:10 +09:00
parent 4fbb2148ee
commit 1f6e52ef02
8 changed files with 32 additions and 51 deletions

View file

@ -17,7 +17,6 @@
#ifndef LATINIME_BIGRAM_DICT_CONTENT_H
#define LATINIME_BIGRAM_DICT_CONTENT_H
#include <cstdint>
#include <cstdio>
#include "defines.h"
@ -28,11 +27,12 @@
namespace latinime {
class ReadWriteByteArrayView;
class BigramDictContent : public SparseTableDictContent {
public:
BigramDictContent(uint8_t *const *buffers, const int *bufferSizes, const bool hasHistoricalInfo)
: SparseTableDictContent(buffers, bufferSizes,
Ver4DictConstants::BIGRAM_ADDRESS_TABLE_BLOCK_SIZE,
BigramDictContent(const ReadWriteByteArrayView *const buffers, const bool hasHistoricalInfo)
: SparseTableDictContent(buffers, Ver4DictConstants::BIGRAM_ADDRESS_TABLE_BLOCK_SIZE,
Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE),
mHasHistoricalInfo(hasHistoricalInfo) {}

View file

@ -17,7 +17,6 @@
#ifndef LATINIME_SHORTCUT_DICT_CONTENT_H
#define LATINIME_SHORTCUT_DICT_CONTENT_H
#include <cstdint>
#include <cstdio>
#include "defines.h"
@ -27,11 +26,12 @@
namespace latinime {
class ReadWriteByteArrayView;
class ShortcutDictContent : public SparseTableDictContent {
public:
ShortcutDictContent(uint8_t *const *buffers, const int *bufferSizes)
: SparseTableDictContent(buffers, bufferSizes,
Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE,
ShortcutDictContent(const ReadWriteByteArrayView *const buffers)
: SparseTableDictContent(buffers, Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE,
Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_DATA_SIZE) {}
ShortcutDictContent()

View file

@ -17,7 +17,6 @@
#ifndef LATINIME_SINGLE_DICT_CONTENT_H
#define LATINIME_SINGLE_DICT_CONTENT_H
#include <cstdint>
#include <cstdio>
#include "defines.h"
@ -30,9 +29,9 @@ namespace latinime {
class SingleDictContent {
public:
SingleDictContent(uint8_t *const buffer, const int bufferSize)
: mExpandableContentBuffer(ReadWriteByteArrayView(buffer, bufferSize),
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE) {}
SingleDictContent(const ReadWriteByteArrayView buffer)
: mExpandableContentBuffer(buffer,
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE) {}
SingleDictContent()
: mExpandableContentBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE) {}

View file

@ -17,7 +17,6 @@
#ifndef LATINIME_SPARSE_TABLE_DICT_CONTENT_H
#define LATINIME_SPARSE_TABLE_DICT_CONTENT_H
#include <cstdint>
#include <cstdio>
#include "defines.h"
@ -31,19 +30,13 @@ namespace latinime {
// TODO: Support multiple contents.
class SparseTableDictContent {
public:
AK_FORCE_INLINE SparseTableDictContent(uint8_t *const *buffers, const int *bufferSizes,
AK_FORCE_INLINE SparseTableDictContent(const ReadWriteByteArrayView *const buffers,
const int sparseTableBlockSize, const int sparseTableDataSize)
: mExpandableLookupTableBuffer(
ReadWriteByteArrayView(buffers[LOOKUP_TABLE_BUFFER_INDEX],
bufferSizes[LOOKUP_TABLE_BUFFER_INDEX]),
: mExpandableLookupTableBuffer(buffers[LOOKUP_TABLE_BUFFER_INDEX],
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
mExpandableAddressTableBuffer(
ReadWriteByteArrayView(buffers[ADDRESS_TABLE_BUFFER_INDEX],
bufferSizes[ADDRESS_TABLE_BUFFER_INDEX]),
mExpandableAddressTableBuffer(buffers[ADDRESS_TABLE_BUFFER_INDEX],
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
mExpandableContentBuffer(
ReadWriteByteArrayView(buffers[CONTENT_BUFFER_INDEX],
bufferSizes[CONTENT_BUFFER_INDEX]),
mExpandableContentBuffer(buffers[CONTENT_BUFFER_INDEX],
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
mAddressLookupTable(&mExpandableLookupTableBuffer, &mExpandableAddressTableBuffer,
sparseTableBlockSize, sparseTableDataSize) {}

View file

@ -17,13 +17,13 @@
#ifndef LATINIME_TERMINAL_POSITION_LOOKUP_TABLE_H
#define LATINIME_TERMINAL_POSITION_LOOKUP_TABLE_H
#include <cstdint>
#include <cstdio>
#include <unordered_map>
#include "defines.h"
#include "suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
#include "utils/byte_array_view.h"
namespace latinime {
@ -31,8 +31,8 @@ class TerminalPositionLookupTable : public SingleDictContent {
public:
typedef std::unordered_map<int, int> TerminalIdMap;
TerminalPositionLookupTable(uint8_t *const buffer, const int bufferSize)
: SingleDictContent(buffer, bufferSize),
TerminalPositionLookupTable(const ReadWriteByteArrayView buffer)
: SingleDictContent(buffer),
mSize(getBuffer()->getTailPosition()
/ Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE) {}

View file

@ -45,16 +45,13 @@ namespace latinime {
if (!bodyBuffer) {
return Ver4DictBuffersPtr(nullptr);
}
std::vector<uint8_t *> buffers;
std::vector<int> bufferSizes;
std::vector<ReadWriteByteArrayView> buffers;
const ReadWriteByteArrayView buffer = bodyBuffer->getReadWriteByteArrayView();
int position = 0;
while (position < static_cast<int>(buffer.size())) {
const int bufferSize = ByteArrayUtils::readUint32AndAdvancePosition(
buffer.data(), &position);
const ReadWriteByteArrayView subBuffer = buffer.subView(position, bufferSize);
buffers.push_back(subBuffer.data());
bufferSizes.push_back(subBuffer.size());
buffers.push_back(buffer.subView(position, bufferSize));
position += bufferSize;
if (bufferSize < 0 || position < 0 || position > static_cast<int>(buffer.size())) {
AKLOGE("The dict body file is corrupted.");
@ -66,7 +63,7 @@ namespace latinime {
return Ver4DictBuffersPtr(nullptr);
}
return Ver4DictBuffersPtr(new Ver4DictBuffers(std::move(headerBuffer), std::move(bodyBuffer),
formatVersion, buffers, bufferSizes));
formatVersion, buffers));
}
bool Ver4DictBuffers::flushHeaderAndDictBuffers(const char *const dictDirPath,
@ -178,29 +175,20 @@ bool Ver4DictBuffers::flushDictBuffers(FILE *const file) const {
Ver4DictBuffers::Ver4DictBuffers(MmappedBuffer::MmappedBufferPtr &&headerBuffer,
MmappedBuffer::MmappedBufferPtr &&bodyBuffer,
const FormatUtils::FORMAT_VERSION formatVersion,
const std::vector<uint8_t *> &contentBuffers, const std::vector<int> &contentBufferSizes)
const std::vector<ReadWriteByteArrayView> &contentBuffers)
: mHeaderBuffer(std::move(headerBuffer)), mDictBuffer(std::move(bodyBuffer)),
mHeaderPolicy(mHeaderBuffer->getReadOnlyByteArrayView().data(), formatVersion),
mExpandableHeaderBuffer(mHeaderBuffer->getReadWriteByteArrayView(),
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
mExpandableTrieBuffer(
ReadWriteByteArrayView(contentBuffers[Ver4DictConstants::TRIE_BUFFER_INDEX],
contentBufferSizes[Ver4DictConstants::TRIE_BUFFER_INDEX]),
mExpandableTrieBuffer(contentBuffers[Ver4DictConstants::TRIE_BUFFER_INDEX],
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
mTerminalPositionLookupTable(
contentBuffers[Ver4DictConstants::TERMINAL_ADDRESS_LOOKUP_TABLE_BUFFER_INDEX],
contentBufferSizes[
Ver4DictConstants::TERMINAL_ADDRESS_LOOKUP_TABLE_BUFFER_INDEX]),
mLanguageModelDictContent(
ReadWriteByteArrayView(
contentBuffers[Ver4DictConstants::LANGUAGE_MODEL_BUFFER_INDEX],
contentBufferSizes[Ver4DictConstants::LANGUAGE_MODEL_BUFFER_INDEX]),
contentBuffers[Ver4DictConstants::TERMINAL_ADDRESS_LOOKUP_TABLE_BUFFER_INDEX]),
mLanguageModelDictContent(contentBuffers[Ver4DictConstants::LANGUAGE_MODEL_BUFFER_INDEX],
mHeaderPolicy.hasHistoricalInfoOfWords()),
mBigramDictContent(&contentBuffers[Ver4DictConstants::BIGRAM_BUFFERS_INDEX],
&contentBufferSizes[Ver4DictConstants::BIGRAM_BUFFERS_INDEX],
mHeaderPolicy.hasHistoricalInfoOfWords()),
mShortcutDictContent(&contentBuffers[Ver4DictConstants::SHORTCUT_BUFFERS_INDEX],
&contentBufferSizes[Ver4DictConstants::SHORTCUT_BUFFERS_INDEX]),
mShortcutDictContent(&contentBuffers[Ver4DictConstants::SHORTCUT_BUFFERS_INDEX]),
mIsUpdatable(mDictBuffer->isUpdatable()) {}
Ver4DictBuffers::Ver4DictBuffers(const HeaderPolicy *const headerPolicy, const int maxTrieSize)

View file

@ -122,8 +122,7 @@ class Ver4DictBuffers {
Ver4DictBuffers(MmappedBuffer::MmappedBufferPtr &&headerBuffer,
MmappedBuffer::MmappedBufferPtr &&bodyBuffer,
const FormatUtils::FORMAT_VERSION formatVersion,
const std::vector<uint8_t *> &contentBuffers,
const std::vector<int> &contentBufferSizes);
const std::vector<ReadWriteByteArrayView> &contentBuffers);
Ver4DictBuffers(const HeaderPolicy *const headerPolicy, const int maxTrieSize);

View file

@ -77,10 +77,12 @@ class ReadWriteByteArrayView {
}
private:
DISALLOW_ASSIGNMENT_OPERATOR(ReadWriteByteArrayView);
// Default copy constructor and assignment operator are used for using this class with STL
// containers.
uint8_t *const mPtr;
const size_t mSize;
// These members cannot be const to have the assignment operator.
uint8_t *mPtr;
size_t mSize;
};
} // namespace latinime