Merge "Use byte array view in ver4 dict contents."
This commit is contained in:
commit
bfcd5efd50
8 changed files with 32 additions and 51 deletions
|
@ -17,7 +17,6 @@
|
||||||
#ifndef LATINIME_BIGRAM_DICT_CONTENT_H
|
#ifndef LATINIME_BIGRAM_DICT_CONTENT_H
|
||||||
#define LATINIME_BIGRAM_DICT_CONTENT_H
|
#define LATINIME_BIGRAM_DICT_CONTENT_H
|
||||||
|
|
||||||
#include <cstdint>
|
|
||||||
#include <cstdio>
|
#include <cstdio>
|
||||||
|
|
||||||
#include "defines.h"
|
#include "defines.h"
|
||||||
|
@ -28,11 +27,12 @@
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
|
||||||
|
class ReadWriteByteArrayView;
|
||||||
|
|
||||||
class BigramDictContent : public SparseTableDictContent {
|
class BigramDictContent : public SparseTableDictContent {
|
||||||
public:
|
public:
|
||||||
BigramDictContent(uint8_t *const *buffers, const int *bufferSizes, const bool hasHistoricalInfo)
|
BigramDictContent(const ReadWriteByteArrayView *const buffers, const bool hasHistoricalInfo)
|
||||||
: SparseTableDictContent(buffers, bufferSizes,
|
: SparseTableDictContent(buffers, Ver4DictConstants::BIGRAM_ADDRESS_TABLE_BLOCK_SIZE,
|
||||||
Ver4DictConstants::BIGRAM_ADDRESS_TABLE_BLOCK_SIZE,
|
|
||||||
Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE),
|
Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE),
|
||||||
mHasHistoricalInfo(hasHistoricalInfo) {}
|
mHasHistoricalInfo(hasHistoricalInfo) {}
|
||||||
|
|
||||||
|
|
|
@ -17,7 +17,6 @@
|
||||||
#ifndef LATINIME_SHORTCUT_DICT_CONTENT_H
|
#ifndef LATINIME_SHORTCUT_DICT_CONTENT_H
|
||||||
#define LATINIME_SHORTCUT_DICT_CONTENT_H
|
#define LATINIME_SHORTCUT_DICT_CONTENT_H
|
||||||
|
|
||||||
#include <cstdint>
|
|
||||||
#include <cstdio>
|
#include <cstdio>
|
||||||
|
|
||||||
#include "defines.h"
|
#include "defines.h"
|
||||||
|
@ -27,11 +26,12 @@
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
|
||||||
|
class ReadWriteByteArrayView;
|
||||||
|
|
||||||
class ShortcutDictContent : public SparseTableDictContent {
|
class ShortcutDictContent : public SparseTableDictContent {
|
||||||
public:
|
public:
|
||||||
ShortcutDictContent(uint8_t *const *buffers, const int *bufferSizes)
|
ShortcutDictContent(const ReadWriteByteArrayView *const buffers)
|
||||||
: SparseTableDictContent(buffers, bufferSizes,
|
: SparseTableDictContent(buffers, Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE,
|
||||||
Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE,
|
|
||||||
Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_DATA_SIZE) {}
|
Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_DATA_SIZE) {}
|
||||||
|
|
||||||
ShortcutDictContent()
|
ShortcutDictContent()
|
||||||
|
|
|
@ -17,7 +17,6 @@
|
||||||
#ifndef LATINIME_SINGLE_DICT_CONTENT_H
|
#ifndef LATINIME_SINGLE_DICT_CONTENT_H
|
||||||
#define LATINIME_SINGLE_DICT_CONTENT_H
|
#define LATINIME_SINGLE_DICT_CONTENT_H
|
||||||
|
|
||||||
#include <cstdint>
|
|
||||||
#include <cstdio>
|
#include <cstdio>
|
||||||
|
|
||||||
#include "defines.h"
|
#include "defines.h"
|
||||||
|
@ -30,8 +29,8 @@ namespace latinime {
|
||||||
|
|
||||||
class SingleDictContent {
|
class SingleDictContent {
|
||||||
public:
|
public:
|
||||||
SingleDictContent(uint8_t *const buffer, const int bufferSize)
|
SingleDictContent(const ReadWriteByteArrayView buffer)
|
||||||
: mExpandableContentBuffer(ReadWriteByteArrayView(buffer, bufferSize),
|
: mExpandableContentBuffer(buffer,
|
||||||
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE) {}
|
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE) {}
|
||||||
|
|
||||||
SingleDictContent()
|
SingleDictContent()
|
||||||
|
|
|
@ -17,7 +17,6 @@
|
||||||
#ifndef LATINIME_SPARSE_TABLE_DICT_CONTENT_H
|
#ifndef LATINIME_SPARSE_TABLE_DICT_CONTENT_H
|
||||||
#define LATINIME_SPARSE_TABLE_DICT_CONTENT_H
|
#define LATINIME_SPARSE_TABLE_DICT_CONTENT_H
|
||||||
|
|
||||||
#include <cstdint>
|
|
||||||
#include <cstdio>
|
#include <cstdio>
|
||||||
|
|
||||||
#include "defines.h"
|
#include "defines.h"
|
||||||
|
@ -31,19 +30,13 @@ namespace latinime {
|
||||||
// TODO: Support multiple contents.
|
// TODO: Support multiple contents.
|
||||||
class SparseTableDictContent {
|
class SparseTableDictContent {
|
||||||
public:
|
public:
|
||||||
AK_FORCE_INLINE SparseTableDictContent(uint8_t *const *buffers, const int *bufferSizes,
|
AK_FORCE_INLINE SparseTableDictContent(const ReadWriteByteArrayView *const buffers,
|
||||||
const int sparseTableBlockSize, const int sparseTableDataSize)
|
const int sparseTableBlockSize, const int sparseTableDataSize)
|
||||||
: mExpandableLookupTableBuffer(
|
: mExpandableLookupTableBuffer(buffers[LOOKUP_TABLE_BUFFER_INDEX],
|
||||||
ReadWriteByteArrayView(buffers[LOOKUP_TABLE_BUFFER_INDEX],
|
|
||||||
bufferSizes[LOOKUP_TABLE_BUFFER_INDEX]),
|
|
||||||
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
|
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
|
||||||
mExpandableAddressTableBuffer(
|
mExpandableAddressTableBuffer(buffers[ADDRESS_TABLE_BUFFER_INDEX],
|
||||||
ReadWriteByteArrayView(buffers[ADDRESS_TABLE_BUFFER_INDEX],
|
|
||||||
bufferSizes[ADDRESS_TABLE_BUFFER_INDEX]),
|
|
||||||
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
|
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
|
||||||
mExpandableContentBuffer(
|
mExpandableContentBuffer(buffers[CONTENT_BUFFER_INDEX],
|
||||||
ReadWriteByteArrayView(buffers[CONTENT_BUFFER_INDEX],
|
|
||||||
bufferSizes[CONTENT_BUFFER_INDEX]),
|
|
||||||
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
|
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
|
||||||
mAddressLookupTable(&mExpandableLookupTableBuffer, &mExpandableAddressTableBuffer,
|
mAddressLookupTable(&mExpandableLookupTableBuffer, &mExpandableAddressTableBuffer,
|
||||||
sparseTableBlockSize, sparseTableDataSize) {}
|
sparseTableBlockSize, sparseTableDataSize) {}
|
||||||
|
|
|
@ -17,13 +17,13 @@
|
||||||
#ifndef LATINIME_TERMINAL_POSITION_LOOKUP_TABLE_H
|
#ifndef LATINIME_TERMINAL_POSITION_LOOKUP_TABLE_H
|
||||||
#define LATINIME_TERMINAL_POSITION_LOOKUP_TABLE_H
|
#define LATINIME_TERMINAL_POSITION_LOOKUP_TABLE_H
|
||||||
|
|
||||||
#include <cstdint>
|
|
||||||
#include <cstdio>
|
#include <cstdio>
|
||||||
#include <unordered_map>
|
#include <unordered_map>
|
||||||
|
|
||||||
#include "defines.h"
|
#include "defines.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h"
|
#include "suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
|
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
|
||||||
|
#include "utils/byte_array_view.h"
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
|
||||||
|
@ -31,8 +31,8 @@ class TerminalPositionLookupTable : public SingleDictContent {
|
||||||
public:
|
public:
|
||||||
typedef std::unordered_map<int, int> TerminalIdMap;
|
typedef std::unordered_map<int, int> TerminalIdMap;
|
||||||
|
|
||||||
TerminalPositionLookupTable(uint8_t *const buffer, const int bufferSize)
|
TerminalPositionLookupTable(const ReadWriteByteArrayView buffer)
|
||||||
: SingleDictContent(buffer, bufferSize),
|
: SingleDictContent(buffer),
|
||||||
mSize(getBuffer()->getTailPosition()
|
mSize(getBuffer()->getTailPosition()
|
||||||
/ Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE) {}
|
/ Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE) {}
|
||||||
|
|
||||||
|
|
|
@ -45,16 +45,13 @@ namespace latinime {
|
||||||
if (!bodyBuffer) {
|
if (!bodyBuffer) {
|
||||||
return Ver4DictBuffersPtr(nullptr);
|
return Ver4DictBuffersPtr(nullptr);
|
||||||
}
|
}
|
||||||
std::vector<uint8_t *> buffers;
|
std::vector<ReadWriteByteArrayView> buffers;
|
||||||
std::vector<int> bufferSizes;
|
|
||||||
const ReadWriteByteArrayView buffer = bodyBuffer->getReadWriteByteArrayView();
|
const ReadWriteByteArrayView buffer = bodyBuffer->getReadWriteByteArrayView();
|
||||||
int position = 0;
|
int position = 0;
|
||||||
while (position < static_cast<int>(buffer.size())) {
|
while (position < static_cast<int>(buffer.size())) {
|
||||||
const int bufferSize = ByteArrayUtils::readUint32AndAdvancePosition(
|
const int bufferSize = ByteArrayUtils::readUint32AndAdvancePosition(
|
||||||
buffer.data(), &position);
|
buffer.data(), &position);
|
||||||
const ReadWriteByteArrayView subBuffer = buffer.subView(position, bufferSize);
|
buffers.push_back(buffer.subView(position, bufferSize));
|
||||||
buffers.push_back(subBuffer.data());
|
|
||||||
bufferSizes.push_back(subBuffer.size());
|
|
||||||
position += bufferSize;
|
position += bufferSize;
|
||||||
if (bufferSize < 0 || position < 0 || position > static_cast<int>(buffer.size())) {
|
if (bufferSize < 0 || position < 0 || position > static_cast<int>(buffer.size())) {
|
||||||
AKLOGE("The dict body file is corrupted.");
|
AKLOGE("The dict body file is corrupted.");
|
||||||
|
@ -66,7 +63,7 @@ namespace latinime {
|
||||||
return Ver4DictBuffersPtr(nullptr);
|
return Ver4DictBuffersPtr(nullptr);
|
||||||
}
|
}
|
||||||
return Ver4DictBuffersPtr(new Ver4DictBuffers(std::move(headerBuffer), std::move(bodyBuffer),
|
return Ver4DictBuffersPtr(new Ver4DictBuffers(std::move(headerBuffer), std::move(bodyBuffer),
|
||||||
formatVersion, buffers, bufferSizes));
|
formatVersion, buffers));
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Ver4DictBuffers::flushHeaderAndDictBuffers(const char *const dictDirPath,
|
bool Ver4DictBuffers::flushHeaderAndDictBuffers(const char *const dictDirPath,
|
||||||
|
@ -178,29 +175,20 @@ bool Ver4DictBuffers::flushDictBuffers(FILE *const file) const {
|
||||||
Ver4DictBuffers::Ver4DictBuffers(MmappedBuffer::MmappedBufferPtr &&headerBuffer,
|
Ver4DictBuffers::Ver4DictBuffers(MmappedBuffer::MmappedBufferPtr &&headerBuffer,
|
||||||
MmappedBuffer::MmappedBufferPtr &&bodyBuffer,
|
MmappedBuffer::MmappedBufferPtr &&bodyBuffer,
|
||||||
const FormatUtils::FORMAT_VERSION formatVersion,
|
const FormatUtils::FORMAT_VERSION formatVersion,
|
||||||
const std::vector<uint8_t *> &contentBuffers, const std::vector<int> &contentBufferSizes)
|
const std::vector<ReadWriteByteArrayView> &contentBuffers)
|
||||||
: mHeaderBuffer(std::move(headerBuffer)), mDictBuffer(std::move(bodyBuffer)),
|
: mHeaderBuffer(std::move(headerBuffer)), mDictBuffer(std::move(bodyBuffer)),
|
||||||
mHeaderPolicy(mHeaderBuffer->getReadOnlyByteArrayView().data(), formatVersion),
|
mHeaderPolicy(mHeaderBuffer->getReadOnlyByteArrayView().data(), formatVersion),
|
||||||
mExpandableHeaderBuffer(mHeaderBuffer->getReadWriteByteArrayView(),
|
mExpandableHeaderBuffer(mHeaderBuffer->getReadWriteByteArrayView(),
|
||||||
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
|
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
|
||||||
mExpandableTrieBuffer(
|
mExpandableTrieBuffer(contentBuffers[Ver4DictConstants::TRIE_BUFFER_INDEX],
|
||||||
ReadWriteByteArrayView(contentBuffers[Ver4DictConstants::TRIE_BUFFER_INDEX],
|
|
||||||
contentBufferSizes[Ver4DictConstants::TRIE_BUFFER_INDEX]),
|
|
||||||
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
|
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
|
||||||
mTerminalPositionLookupTable(
|
mTerminalPositionLookupTable(
|
||||||
contentBuffers[Ver4DictConstants::TERMINAL_ADDRESS_LOOKUP_TABLE_BUFFER_INDEX],
|
contentBuffers[Ver4DictConstants::TERMINAL_ADDRESS_LOOKUP_TABLE_BUFFER_INDEX]),
|
||||||
contentBufferSizes[
|
mLanguageModelDictContent(contentBuffers[Ver4DictConstants::LANGUAGE_MODEL_BUFFER_INDEX],
|
||||||
Ver4DictConstants::TERMINAL_ADDRESS_LOOKUP_TABLE_BUFFER_INDEX]),
|
|
||||||
mLanguageModelDictContent(
|
|
||||||
ReadWriteByteArrayView(
|
|
||||||
contentBuffers[Ver4DictConstants::LANGUAGE_MODEL_BUFFER_INDEX],
|
|
||||||
contentBufferSizes[Ver4DictConstants::LANGUAGE_MODEL_BUFFER_INDEX]),
|
|
||||||
mHeaderPolicy.hasHistoricalInfoOfWords()),
|
mHeaderPolicy.hasHistoricalInfoOfWords()),
|
||||||
mBigramDictContent(&contentBuffers[Ver4DictConstants::BIGRAM_BUFFERS_INDEX],
|
mBigramDictContent(&contentBuffers[Ver4DictConstants::BIGRAM_BUFFERS_INDEX],
|
||||||
&contentBufferSizes[Ver4DictConstants::BIGRAM_BUFFERS_INDEX],
|
|
||||||
mHeaderPolicy.hasHistoricalInfoOfWords()),
|
mHeaderPolicy.hasHistoricalInfoOfWords()),
|
||||||
mShortcutDictContent(&contentBuffers[Ver4DictConstants::SHORTCUT_BUFFERS_INDEX],
|
mShortcutDictContent(&contentBuffers[Ver4DictConstants::SHORTCUT_BUFFERS_INDEX]),
|
||||||
&contentBufferSizes[Ver4DictConstants::SHORTCUT_BUFFERS_INDEX]),
|
|
||||||
mIsUpdatable(mDictBuffer->isUpdatable()) {}
|
mIsUpdatable(mDictBuffer->isUpdatable()) {}
|
||||||
|
|
||||||
Ver4DictBuffers::Ver4DictBuffers(const HeaderPolicy *const headerPolicy, const int maxTrieSize)
|
Ver4DictBuffers::Ver4DictBuffers(const HeaderPolicy *const headerPolicy, const int maxTrieSize)
|
||||||
|
|
|
@ -122,8 +122,7 @@ class Ver4DictBuffers {
|
||||||
Ver4DictBuffers(MmappedBuffer::MmappedBufferPtr &&headerBuffer,
|
Ver4DictBuffers(MmappedBuffer::MmappedBufferPtr &&headerBuffer,
|
||||||
MmappedBuffer::MmappedBufferPtr &&bodyBuffer,
|
MmappedBuffer::MmappedBufferPtr &&bodyBuffer,
|
||||||
const FormatUtils::FORMAT_VERSION formatVersion,
|
const FormatUtils::FORMAT_VERSION formatVersion,
|
||||||
const std::vector<uint8_t *> &contentBuffers,
|
const std::vector<ReadWriteByteArrayView> &contentBuffers);
|
||||||
const std::vector<int> &contentBufferSizes);
|
|
||||||
|
|
||||||
Ver4DictBuffers(const HeaderPolicy *const headerPolicy, const int maxTrieSize);
|
Ver4DictBuffers(const HeaderPolicy *const headerPolicy, const int maxTrieSize);
|
||||||
|
|
||||||
|
|
|
@ -77,10 +77,12 @@ class ReadWriteByteArrayView {
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
DISALLOW_ASSIGNMENT_OPERATOR(ReadWriteByteArrayView);
|
// Default copy constructor and assignment operator are used for using this class with STL
|
||||||
|
// containers.
|
||||||
|
|
||||||
uint8_t *const mPtr;
|
// These members cannot be const to have the assignment operator.
|
||||||
const size_t mSize;
|
uint8_t *mPtr;
|
||||||
|
size_t mSize;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
|
|
Loading…
Reference in a new issue