Merge "Use byte array view in ver4 dict contents."
This commit is contained in:
commit
bfcd5efd50
8 changed files with 32 additions and 51 deletions
|
@ -17,7 +17,6 @@
|
|||
#ifndef LATINIME_BIGRAM_DICT_CONTENT_H
|
||||
#define LATINIME_BIGRAM_DICT_CONTENT_H
|
||||
|
||||
#include <cstdint>
|
||||
#include <cstdio>
|
||||
|
||||
#include "defines.h"
|
||||
|
@ -28,11 +27,12 @@
|
|||
|
||||
namespace latinime {
|
||||
|
||||
class ReadWriteByteArrayView;
|
||||
|
||||
class BigramDictContent : public SparseTableDictContent {
|
||||
public:
|
||||
BigramDictContent(uint8_t *const *buffers, const int *bufferSizes, const bool hasHistoricalInfo)
|
||||
: SparseTableDictContent(buffers, bufferSizes,
|
||||
Ver4DictConstants::BIGRAM_ADDRESS_TABLE_BLOCK_SIZE,
|
||||
BigramDictContent(const ReadWriteByteArrayView *const buffers, const bool hasHistoricalInfo)
|
||||
: SparseTableDictContent(buffers, Ver4DictConstants::BIGRAM_ADDRESS_TABLE_BLOCK_SIZE,
|
||||
Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE),
|
||||
mHasHistoricalInfo(hasHistoricalInfo) {}
|
||||
|
||||
|
|
|
@ -17,7 +17,6 @@
|
|||
#ifndef LATINIME_SHORTCUT_DICT_CONTENT_H
|
||||
#define LATINIME_SHORTCUT_DICT_CONTENT_H
|
||||
|
||||
#include <cstdint>
|
||||
#include <cstdio>
|
||||
|
||||
#include "defines.h"
|
||||
|
@ -27,11 +26,12 @@
|
|||
|
||||
namespace latinime {
|
||||
|
||||
class ReadWriteByteArrayView;
|
||||
|
||||
class ShortcutDictContent : public SparseTableDictContent {
|
||||
public:
|
||||
ShortcutDictContent(uint8_t *const *buffers, const int *bufferSizes)
|
||||
: SparseTableDictContent(buffers, bufferSizes,
|
||||
Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE,
|
||||
ShortcutDictContent(const ReadWriteByteArrayView *const buffers)
|
||||
: SparseTableDictContent(buffers, Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE,
|
||||
Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_DATA_SIZE) {}
|
||||
|
||||
ShortcutDictContent()
|
||||
|
|
|
@ -17,7 +17,6 @@
|
|||
#ifndef LATINIME_SINGLE_DICT_CONTENT_H
|
||||
#define LATINIME_SINGLE_DICT_CONTENT_H
|
||||
|
||||
#include <cstdint>
|
||||
#include <cstdio>
|
||||
|
||||
#include "defines.h"
|
||||
|
@ -30,9 +29,9 @@ namespace latinime {
|
|||
|
||||
class SingleDictContent {
|
||||
public:
|
||||
SingleDictContent(uint8_t *const buffer, const int bufferSize)
|
||||
: mExpandableContentBuffer(ReadWriteByteArrayView(buffer, bufferSize),
|
||||
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE) {}
|
||||
SingleDictContent(const ReadWriteByteArrayView buffer)
|
||||
: mExpandableContentBuffer(buffer,
|
||||
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE) {}
|
||||
|
||||
SingleDictContent()
|
||||
: mExpandableContentBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE) {}
|
||||
|
|
|
@ -17,7 +17,6 @@
|
|||
#ifndef LATINIME_SPARSE_TABLE_DICT_CONTENT_H
|
||||
#define LATINIME_SPARSE_TABLE_DICT_CONTENT_H
|
||||
|
||||
#include <cstdint>
|
||||
#include <cstdio>
|
||||
|
||||
#include "defines.h"
|
||||
|
@ -31,19 +30,13 @@ namespace latinime {
|
|||
// TODO: Support multiple contents.
|
||||
class SparseTableDictContent {
|
||||
public:
|
||||
AK_FORCE_INLINE SparseTableDictContent(uint8_t *const *buffers, const int *bufferSizes,
|
||||
AK_FORCE_INLINE SparseTableDictContent(const ReadWriteByteArrayView *const buffers,
|
||||
const int sparseTableBlockSize, const int sparseTableDataSize)
|
||||
: mExpandableLookupTableBuffer(
|
||||
ReadWriteByteArrayView(buffers[LOOKUP_TABLE_BUFFER_INDEX],
|
||||
bufferSizes[LOOKUP_TABLE_BUFFER_INDEX]),
|
||||
: mExpandableLookupTableBuffer(buffers[LOOKUP_TABLE_BUFFER_INDEX],
|
||||
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
|
||||
mExpandableAddressTableBuffer(
|
||||
ReadWriteByteArrayView(buffers[ADDRESS_TABLE_BUFFER_INDEX],
|
||||
bufferSizes[ADDRESS_TABLE_BUFFER_INDEX]),
|
||||
mExpandableAddressTableBuffer(buffers[ADDRESS_TABLE_BUFFER_INDEX],
|
||||
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
|
||||
mExpandableContentBuffer(
|
||||
ReadWriteByteArrayView(buffers[CONTENT_BUFFER_INDEX],
|
||||
bufferSizes[CONTENT_BUFFER_INDEX]),
|
||||
mExpandableContentBuffer(buffers[CONTENT_BUFFER_INDEX],
|
||||
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
|
||||
mAddressLookupTable(&mExpandableLookupTableBuffer, &mExpandableAddressTableBuffer,
|
||||
sparseTableBlockSize, sparseTableDataSize) {}
|
||||
|
|
|
@ -17,13 +17,13 @@
|
|||
#ifndef LATINIME_TERMINAL_POSITION_LOOKUP_TABLE_H
|
||||
#define LATINIME_TERMINAL_POSITION_LOOKUP_TABLE_H
|
||||
|
||||
#include <cstdint>
|
||||
#include <cstdio>
|
||||
#include <unordered_map>
|
||||
|
||||
#include "defines.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
|
||||
#include "utils/byte_array_view.h"
|
||||
|
||||
namespace latinime {
|
||||
|
||||
|
@ -31,8 +31,8 @@ class TerminalPositionLookupTable : public SingleDictContent {
|
|||
public:
|
||||
typedef std::unordered_map<int, int> TerminalIdMap;
|
||||
|
||||
TerminalPositionLookupTable(uint8_t *const buffer, const int bufferSize)
|
||||
: SingleDictContent(buffer, bufferSize),
|
||||
TerminalPositionLookupTable(const ReadWriteByteArrayView buffer)
|
||||
: SingleDictContent(buffer),
|
||||
mSize(getBuffer()->getTailPosition()
|
||||
/ Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE) {}
|
||||
|
||||
|
|
|
@ -45,16 +45,13 @@ namespace latinime {
|
|||
if (!bodyBuffer) {
|
||||
return Ver4DictBuffersPtr(nullptr);
|
||||
}
|
||||
std::vector<uint8_t *> buffers;
|
||||
std::vector<int> bufferSizes;
|
||||
std::vector<ReadWriteByteArrayView> buffers;
|
||||
const ReadWriteByteArrayView buffer = bodyBuffer->getReadWriteByteArrayView();
|
||||
int position = 0;
|
||||
while (position < static_cast<int>(buffer.size())) {
|
||||
const int bufferSize = ByteArrayUtils::readUint32AndAdvancePosition(
|
||||
buffer.data(), &position);
|
||||
const ReadWriteByteArrayView subBuffer = buffer.subView(position, bufferSize);
|
||||
buffers.push_back(subBuffer.data());
|
||||
bufferSizes.push_back(subBuffer.size());
|
||||
buffers.push_back(buffer.subView(position, bufferSize));
|
||||
position += bufferSize;
|
||||
if (bufferSize < 0 || position < 0 || position > static_cast<int>(buffer.size())) {
|
||||
AKLOGE("The dict body file is corrupted.");
|
||||
|
@ -66,7 +63,7 @@ namespace latinime {
|
|||
return Ver4DictBuffersPtr(nullptr);
|
||||
}
|
||||
return Ver4DictBuffersPtr(new Ver4DictBuffers(std::move(headerBuffer), std::move(bodyBuffer),
|
||||
formatVersion, buffers, bufferSizes));
|
||||
formatVersion, buffers));
|
||||
}
|
||||
|
||||
bool Ver4DictBuffers::flushHeaderAndDictBuffers(const char *const dictDirPath,
|
||||
|
@ -178,29 +175,20 @@ bool Ver4DictBuffers::flushDictBuffers(FILE *const file) const {
|
|||
Ver4DictBuffers::Ver4DictBuffers(MmappedBuffer::MmappedBufferPtr &&headerBuffer,
|
||||
MmappedBuffer::MmappedBufferPtr &&bodyBuffer,
|
||||
const FormatUtils::FORMAT_VERSION formatVersion,
|
||||
const std::vector<uint8_t *> &contentBuffers, const std::vector<int> &contentBufferSizes)
|
||||
const std::vector<ReadWriteByteArrayView> &contentBuffers)
|
||||
: mHeaderBuffer(std::move(headerBuffer)), mDictBuffer(std::move(bodyBuffer)),
|
||||
mHeaderPolicy(mHeaderBuffer->getReadOnlyByteArrayView().data(), formatVersion),
|
||||
mExpandableHeaderBuffer(mHeaderBuffer->getReadWriteByteArrayView(),
|
||||
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
|
||||
mExpandableTrieBuffer(
|
||||
ReadWriteByteArrayView(contentBuffers[Ver4DictConstants::TRIE_BUFFER_INDEX],
|
||||
contentBufferSizes[Ver4DictConstants::TRIE_BUFFER_INDEX]),
|
||||
mExpandableTrieBuffer(contentBuffers[Ver4DictConstants::TRIE_BUFFER_INDEX],
|
||||
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
|
||||
mTerminalPositionLookupTable(
|
||||
contentBuffers[Ver4DictConstants::TERMINAL_ADDRESS_LOOKUP_TABLE_BUFFER_INDEX],
|
||||
contentBufferSizes[
|
||||
Ver4DictConstants::TERMINAL_ADDRESS_LOOKUP_TABLE_BUFFER_INDEX]),
|
||||
mLanguageModelDictContent(
|
||||
ReadWriteByteArrayView(
|
||||
contentBuffers[Ver4DictConstants::LANGUAGE_MODEL_BUFFER_INDEX],
|
||||
contentBufferSizes[Ver4DictConstants::LANGUAGE_MODEL_BUFFER_INDEX]),
|
||||
contentBuffers[Ver4DictConstants::TERMINAL_ADDRESS_LOOKUP_TABLE_BUFFER_INDEX]),
|
||||
mLanguageModelDictContent(contentBuffers[Ver4DictConstants::LANGUAGE_MODEL_BUFFER_INDEX],
|
||||
mHeaderPolicy.hasHistoricalInfoOfWords()),
|
||||
mBigramDictContent(&contentBuffers[Ver4DictConstants::BIGRAM_BUFFERS_INDEX],
|
||||
&contentBufferSizes[Ver4DictConstants::BIGRAM_BUFFERS_INDEX],
|
||||
mHeaderPolicy.hasHistoricalInfoOfWords()),
|
||||
mShortcutDictContent(&contentBuffers[Ver4DictConstants::SHORTCUT_BUFFERS_INDEX],
|
||||
&contentBufferSizes[Ver4DictConstants::SHORTCUT_BUFFERS_INDEX]),
|
||||
mShortcutDictContent(&contentBuffers[Ver4DictConstants::SHORTCUT_BUFFERS_INDEX]),
|
||||
mIsUpdatable(mDictBuffer->isUpdatable()) {}
|
||||
|
||||
Ver4DictBuffers::Ver4DictBuffers(const HeaderPolicy *const headerPolicy, const int maxTrieSize)
|
||||
|
|
|
@ -122,8 +122,7 @@ class Ver4DictBuffers {
|
|||
Ver4DictBuffers(MmappedBuffer::MmappedBufferPtr &&headerBuffer,
|
||||
MmappedBuffer::MmappedBufferPtr &&bodyBuffer,
|
||||
const FormatUtils::FORMAT_VERSION formatVersion,
|
||||
const std::vector<uint8_t *> &contentBuffers,
|
||||
const std::vector<int> &contentBufferSizes);
|
||||
const std::vector<ReadWriteByteArrayView> &contentBuffers);
|
||||
|
||||
Ver4DictBuffers(const HeaderPolicy *const headerPolicy, const int maxTrieSize);
|
||||
|
||||
|
|
|
@ -77,10 +77,12 @@ class ReadWriteByteArrayView {
|
|||
}
|
||||
|
||||
private:
|
||||
DISALLOW_ASSIGNMENT_OPERATOR(ReadWriteByteArrayView);
|
||||
// Default copy constructor and assignment operator are used for using this class with STL
|
||||
// containers.
|
||||
|
||||
uint8_t *const mPtr;
|
||||
const size_t mSize;
|
||||
// These members cannot be const to have the assignment operator.
|
||||
uint8_t *mPtr;
|
||||
size_t mSize;
|
||||
};
|
||||
|
||||
} // namespace latinime
|
||||
|
|
Loading…
Reference in a new issue