Merge "Concatenate dict buffers other than header to a single file."
This commit is contained in:
commit
9e38fbaa2f
17 changed files with 234 additions and 167 deletions
|
@ -17,6 +17,9 @@
|
||||||
#ifndef LATINIME_BIGRAM_DICT_CONTENT_H
|
#ifndef LATINIME_BIGRAM_DICT_CONTENT_H
|
||||||
#define LATINIME_BIGRAM_DICT_CONTENT_H
|
#define LATINIME_BIGRAM_DICT_CONTENT_H
|
||||||
|
|
||||||
|
#include <cstdint>
|
||||||
|
#include <cstdio>
|
||||||
|
|
||||||
#include "defines.h"
|
#include "defines.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/v4/content/bigram_entry.h"
|
#include "suggest/policyimpl/dictionary/structure/v4/content/bigram_entry.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h"
|
#include "suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h"
|
||||||
|
@ -27,12 +30,9 @@ namespace latinime {
|
||||||
|
|
||||||
class BigramDictContent : public SparseTableDictContent {
|
class BigramDictContent : public SparseTableDictContent {
|
||||||
public:
|
public:
|
||||||
BigramDictContent(const char *const dictPath, const bool hasHistoricalInfo,
|
BigramDictContent(uint8_t *const *buffers, const int *bufferSizes, const bool hasHistoricalInfo,
|
||||||
const bool isUpdatable)
|
const bool isUpdatable)
|
||||||
: SparseTableDictContent(dictPath,
|
: SparseTableDictContent(buffers, bufferSizes, isUpdatable,
|
||||||
Ver4DictConstants::BIGRAM_LOOKUP_TABLE_FILE_EXTENSION,
|
|
||||||
Ver4DictConstants::BIGRAM_CONTENT_TABLE_FILE_EXTENSION,
|
|
||||||
Ver4DictConstants::BIGRAM_FILE_EXTENSION, isUpdatable,
|
|
||||||
Ver4DictConstants::BIGRAM_ADDRESS_TABLE_BLOCK_SIZE,
|
Ver4DictConstants::BIGRAM_ADDRESS_TABLE_BLOCK_SIZE,
|
||||||
Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE),
|
Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE),
|
||||||
mHasHistoricalInfo(hasHistoricalInfo) {}
|
mHasHistoricalInfo(hasHistoricalInfo) {}
|
||||||
|
@ -87,10 +87,8 @@ class BigramDictContent : public SparseTableDictContent {
|
||||||
return getUpdatableAddressLookupTable()->set(terminalId, bigramListPos);
|
return getUpdatableAddressLookupTable()->set(terminalId, bigramListPos);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool flushToFile(const char *const dictPath) const {
|
bool flushToFile(FILE *const file) const {
|
||||||
return flush(dictPath, Ver4DictConstants::BIGRAM_LOOKUP_TABLE_FILE_EXTENSION,
|
return flush(file);
|
||||||
Ver4DictConstants::BIGRAM_CONTENT_TABLE_FILE_EXTENSION,
|
|
||||||
Ver4DictConstants::BIGRAM_FILE_EXTENSION);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool runGC(const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
|
bool runGC(const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
|
||||||
|
|
|
@ -24,7 +24,6 @@ namespace latinime {
|
||||||
class DictContent {
|
class DictContent {
|
||||||
public:
|
public:
|
||||||
virtual ~DictContent() {}
|
virtual ~DictContent() {}
|
||||||
virtual bool isValid() const = 0;
|
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
DictContent() {}
|
DictContent() {}
|
||||||
|
|
|
@ -71,7 +71,7 @@ bool ProbabilityDictContent::setProbabilityEntry(const int terminalId,
|
||||||
return writeEntry(probabilityEntry, entryPos);
|
return writeEntry(probabilityEntry, entryPos);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool ProbabilityDictContent::flushToFile(const char *const dictPath) const {
|
bool ProbabilityDictContent::flushToFile(FILE *const file) const {
|
||||||
if (getEntryPos(mSize) < getBuffer()->getTailPosition()) {
|
if (getEntryPos(mSize) < getBuffer()->getTailPosition()) {
|
||||||
ProbabilityDictContent probabilityDictContentToWrite(mHasHistoricalInfo);
|
ProbabilityDictContent probabilityDictContentToWrite(mHasHistoricalInfo);
|
||||||
for (int i = 0; i < mSize; ++i) {
|
for (int i = 0; i < mSize; ++i) {
|
||||||
|
@ -81,10 +81,9 @@ bool ProbabilityDictContent::flushToFile(const char *const dictPath) const {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return probabilityDictContentToWrite.flush(dictPath,
|
return probabilityDictContentToWrite.flush(file);
|
||||||
Ver4DictConstants::FREQ_FILE_EXTENSION);
|
|
||||||
} else {
|
} else {
|
||||||
return flush(dictPath, Ver4DictConstants::FREQ_FILE_EXTENSION);
|
return flush(file);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -17,6 +17,9 @@
|
||||||
#ifndef LATINIME_PROBABILITY_DICT_CONTENT_H
|
#ifndef LATINIME_PROBABILITY_DICT_CONTENT_H
|
||||||
#define LATINIME_PROBABILITY_DICT_CONTENT_H
|
#define LATINIME_PROBABILITY_DICT_CONTENT_H
|
||||||
|
|
||||||
|
#include <cstdint>
|
||||||
|
#include <cstdio>
|
||||||
|
|
||||||
#include "defines.h"
|
#include "defines.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h"
|
#include "suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h"
|
#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h"
|
||||||
|
@ -29,9 +32,9 @@ class ProbabilityEntry;
|
||||||
|
|
||||||
class ProbabilityDictContent : public SingleDictContent {
|
class ProbabilityDictContent : public SingleDictContent {
|
||||||
public:
|
public:
|
||||||
ProbabilityDictContent(const char *const dictPath, const bool hasHistoricalInfo,
|
ProbabilityDictContent(uint8_t *const buffer, const int bufferSize,
|
||||||
const bool isUpdatable)
|
const bool hasHistoricalInfo, const bool isUpdatable)
|
||||||
: SingleDictContent(dictPath, Ver4DictConstants::FREQ_FILE_EXTENSION, isUpdatable),
|
: SingleDictContent(buffer, bufferSize, isUpdatable),
|
||||||
mHasHistoricalInfo(hasHistoricalInfo),
|
mHasHistoricalInfo(hasHistoricalInfo),
|
||||||
mSize(getBuffer()->getTailPosition() / getEntrySize()) {}
|
mSize(getBuffer()->getTailPosition() / getEntrySize()) {}
|
||||||
|
|
||||||
|
@ -42,7 +45,7 @@ class ProbabilityDictContent : public SingleDictContent {
|
||||||
|
|
||||||
bool setProbabilityEntry(const int terminalId, const ProbabilityEntry *const probabilityEntry);
|
bool setProbabilityEntry(const int terminalId, const ProbabilityEntry *const probabilityEntry);
|
||||||
|
|
||||||
bool flushToFile(const char *const dictPath) const;
|
bool flushToFile(FILE *const file) const;
|
||||||
|
|
||||||
bool runGC(const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
|
bool runGC(const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
|
||||||
const ProbabilityDictContent *const originalProbabilityDictContent);
|
const ProbabilityDictContent *const originalProbabilityDictContent);
|
||||||
|
|
|
@ -59,12 +59,6 @@ int ShortcutDictContent::getShortcutListHeadPos(const int terminalId) const {
|
||||||
return addressLookupTable->get(terminalId);
|
return addressLookupTable->get(terminalId);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool ShortcutDictContent::flushToFile(const char *const dictPath) const {
|
|
||||||
return flush(dictPath, Ver4DictConstants::SHORTCUT_LOOKUP_TABLE_FILE_EXTENSION,
|
|
||||||
Ver4DictConstants::SHORTCUT_CONTENT_TABLE_FILE_EXTENSION,
|
|
||||||
Ver4DictConstants::SHORTCUT_FILE_EXTENSION);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool ShortcutDictContent::runGC(
|
bool ShortcutDictContent::runGC(
|
||||||
const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
|
const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
|
||||||
const ShortcutDictContent *const originalShortcutDictContent) {
|
const ShortcutDictContent *const originalShortcutDictContent) {
|
||||||
|
|
|
@ -17,6 +17,9 @@
|
||||||
#ifndef LATINIME_SHORTCUT_DICT_CONTENT_H
|
#ifndef LATINIME_SHORTCUT_DICT_CONTENT_H
|
||||||
#define LATINIME_SHORTCUT_DICT_CONTENT_H
|
#define LATINIME_SHORTCUT_DICT_CONTENT_H
|
||||||
|
|
||||||
|
#include <cstdint>
|
||||||
|
#include <cstdio>
|
||||||
|
|
||||||
#include "defines.h"
|
#include "defines.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h"
|
#include "suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h"
|
#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h"
|
||||||
|
@ -26,11 +29,8 @@ namespace latinime {
|
||||||
|
|
||||||
class ShortcutDictContent : public SparseTableDictContent {
|
class ShortcutDictContent : public SparseTableDictContent {
|
||||||
public:
|
public:
|
||||||
ShortcutDictContent(const char *const dictPath, const bool isUpdatable)
|
ShortcutDictContent(uint8_t *const *buffers, const int *bufferSizes, const bool isUpdatable)
|
||||||
: SparseTableDictContent(dictPath,
|
: SparseTableDictContent(buffers, bufferSizes, isUpdatable,
|
||||||
Ver4DictConstants::SHORTCUT_LOOKUP_TABLE_FILE_EXTENSION,
|
|
||||||
Ver4DictConstants::SHORTCUT_CONTENT_TABLE_FILE_EXTENSION,
|
|
||||||
Ver4DictConstants::SHORTCUT_FILE_EXTENSION, isUpdatable,
|
|
||||||
Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE,
|
Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE,
|
||||||
Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_DATA_SIZE) {}
|
Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_DATA_SIZE) {}
|
||||||
|
|
||||||
|
@ -53,7 +53,9 @@ class ShortcutDictContent : public SparseTableDictContent {
|
||||||
// Returns head position of shortcut list for a PtNode specified by terminalId.
|
// Returns head position of shortcut list for a PtNode specified by terminalId.
|
||||||
int getShortcutListHeadPos(const int terminalId) const;
|
int getShortcutListHeadPos(const int terminalId) const;
|
||||||
|
|
||||||
bool flushToFile(const char *const dictPath) const;
|
bool flushToFile(FILE *const file) const {
|
||||||
|
return flush(file);
|
||||||
|
}
|
||||||
|
|
||||||
bool runGC(const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
|
bool runGC(const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
|
||||||
const ShortcutDictContent *const originalShortcutDictContent);
|
const ShortcutDictContent *const originalShortcutDictContent);
|
||||||
|
|
|
@ -17,35 +17,28 @@
|
||||||
#ifndef LATINIME_SINGLE_DICT_CONTENT_H
|
#ifndef LATINIME_SINGLE_DICT_CONTENT_H
|
||||||
#define LATINIME_SINGLE_DICT_CONTENT_H
|
#define LATINIME_SINGLE_DICT_CONTENT_H
|
||||||
|
|
||||||
|
#include <cstdint>
|
||||||
|
#include <cstdio>
|
||||||
|
|
||||||
#include "defines.h"
|
#include "defines.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/v4/content/dict_content.h"
|
#include "suggest/policyimpl/dictionary/structure/v4/content/dict_content.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
|
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
|
||||||
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
|
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
|
||||||
#include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h"
|
#include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h"
|
||||||
#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
|
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
|
||||||
class SingleDictContent : public DictContent {
|
class SingleDictContent : public DictContent {
|
||||||
public:
|
public:
|
||||||
SingleDictContent(const char *const dictPath, const char *const contentFileName,
|
SingleDictContent(uint8_t *const buffer, const int bufferSize, const bool isUpdatable)
|
||||||
const bool isUpdatable)
|
: mExpandableContentBuffer(buffer, bufferSize,
|
||||||
: mMmappedBuffer(MmappedBuffer::openBuffer(dictPath, contentFileName, isUpdatable)),
|
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE) {}
|
||||||
mExpandableContentBuffer(mMmappedBuffer ? mMmappedBuffer->getBuffer() : nullptr,
|
|
||||||
mMmappedBuffer ? mMmappedBuffer->getBufferSize() : 0,
|
|
||||||
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
|
|
||||||
mIsValid(mMmappedBuffer) {}
|
|
||||||
|
|
||||||
SingleDictContent()
|
SingleDictContent()
|
||||||
: mMmappedBuffer(nullptr),
|
: mExpandableContentBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE) {}
|
||||||
mExpandableContentBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE), mIsValid(true) {}
|
|
||||||
|
|
||||||
virtual ~SingleDictContent() {}
|
virtual ~SingleDictContent() {}
|
||||||
|
|
||||||
virtual bool isValid() const {
|
|
||||||
return mIsValid;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool isNearSizeLimit() const {
|
bool isNearSizeLimit() const {
|
||||||
return mExpandableContentBuffer.isNearSizeLimit();
|
return mExpandableContentBuffer.isNearSizeLimit();
|
||||||
}
|
}
|
||||||
|
@ -59,17 +52,14 @@ class SingleDictContent : public DictContent {
|
||||||
return &mExpandableContentBuffer;
|
return &mExpandableContentBuffer;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool flush(const char *const dictPath, const char *const contentFileNameSuffix) const {
|
bool flush(FILE *const file) const {
|
||||||
return DictFileWritingUtils::flushBufferToFileWithSuffix(dictPath,
|
return DictFileWritingUtils::writeBufferToFileTail(file, &mExpandableContentBuffer);
|
||||||
contentFileNameSuffix, &mExpandableContentBuffer);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
DISALLOW_COPY_AND_ASSIGN(SingleDictContent);
|
DISALLOW_COPY_AND_ASSIGN(SingleDictContent);
|
||||||
|
|
||||||
const MmappedBuffer::MmappedBufferPtr mMmappedBuffer;
|
|
||||||
BufferWithExtendableBuffer mExpandableContentBuffer;
|
BufferWithExtendableBuffer mExpandableContentBuffer;
|
||||||
const bool mIsValid;
|
|
||||||
};
|
};
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
#endif /* LATINIME_SINGLE_DICT_CONTENT_H */
|
#endif /* LATINIME_SINGLE_DICT_CONTENT_H */
|
||||||
|
|
|
@ -16,21 +16,22 @@
|
||||||
|
|
||||||
#include "suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h"
|
#include "suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h"
|
||||||
|
|
||||||
|
#include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h"
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
|
||||||
bool SparseTableDictContent::flush(const char *const dictPath,
|
const int SparseTableDictContent::LOOKUP_TABLE_BUFFER_INDEX = 0;
|
||||||
const char *const lookupTableFileNameSuffix, const char *const addressTableFileNameSuffix,
|
const int SparseTableDictContent::ADDRESS_TABLE_BUFFER_INDEX = 1;
|
||||||
const char *const contentFileNameSuffix) const {
|
const int SparseTableDictContent::CONTENT_BUFFER_INDEX = 2;
|
||||||
if (!DictFileWritingUtils::flushBufferToFileWithSuffix(dictPath, lookupTableFileNameSuffix,
|
|
||||||
&mExpandableLookupTableBuffer)){
|
bool SparseTableDictContent::flush(FILE *const file) const {
|
||||||
|
if (!DictFileWritingUtils::writeBufferToFileTail(file, &mExpandableLookupTableBuffer)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (!DictFileWritingUtils::flushBufferToFileWithSuffix(dictPath, addressTableFileNameSuffix,
|
if (!DictFileWritingUtils::writeBufferToFileTail(file, &mExpandableAddressTableBuffer)) {
|
||||||
&mExpandableAddressTableBuffer)) {
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (!DictFileWritingUtils::flushBufferToFileWithSuffix(dictPath, contentFileNameSuffix,
|
if (!DictFileWritingUtils::writeBufferToFileTail(file, &mExpandableContentBuffer)) {
|
||||||
&mExpandableContentBuffer)) {
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
|
|
|
@ -17,12 +17,13 @@
|
||||||
#ifndef LATINIME_SPARSE_TABLE_DICT_CONTENT_H
|
#ifndef LATINIME_SPARSE_TABLE_DICT_CONTENT_H
|
||||||
#define LATINIME_SPARSE_TABLE_DICT_CONTENT_H
|
#define LATINIME_SPARSE_TABLE_DICT_CONTENT_H
|
||||||
|
|
||||||
|
#include <cstdint>
|
||||||
|
#include <cstdio>
|
||||||
|
|
||||||
#include "defines.h"
|
#include "defines.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/v4/content/dict_content.h"
|
#include "suggest/policyimpl/dictionary/structure/v4/content/dict_content.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
|
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
|
||||||
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
|
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
|
||||||
#include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h"
|
|
||||||
#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
|
|
||||||
#include "suggest/policyimpl/dictionary/utils/sparse_table.h"
|
#include "suggest/policyimpl/dictionary/utils/sparse_table.h"
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
@ -30,45 +31,29 @@ namespace latinime {
|
||||||
// TODO: Support multiple contents.
|
// TODO: Support multiple contents.
|
||||||
class SparseTableDictContent : public DictContent {
|
class SparseTableDictContent : public DictContent {
|
||||||
public:
|
public:
|
||||||
AK_FORCE_INLINE SparseTableDictContent(const char *const dictPath,
|
AK_FORCE_INLINE SparseTableDictContent(uint8_t *const *buffers, const int *bufferSizes,
|
||||||
const char *const lookupTableFileName, const char *const addressTableFileName,
|
const bool isUpdatable, const int sparseTableBlockSize, const int sparseTableDataSize)
|
||||||
const char *const contentFileName, const bool isUpdatable,
|
: mExpandableLookupTableBuffer(buffers[LOOKUP_TABLE_BUFFER_INDEX],
|
||||||
const int sparseTableBlockSize, const int sparseTableDataSize)
|
bufferSizes[LOOKUP_TABLE_BUFFER_INDEX],
|
||||||
: mLookupTableBuffer(
|
|
||||||
MmappedBuffer::openBuffer(dictPath, lookupTableFileName, isUpdatable)),
|
|
||||||
mAddressTableBuffer(
|
|
||||||
MmappedBuffer::openBuffer(dictPath, addressTableFileName, isUpdatable)),
|
|
||||||
mContentBuffer(
|
|
||||||
MmappedBuffer::openBuffer(dictPath, contentFileName, isUpdatable)),
|
|
||||||
mExpandableLookupTableBuffer(
|
|
||||||
mLookupTableBuffer ? mLookupTableBuffer->getBuffer() : nullptr,
|
|
||||||
mLookupTableBuffer ? mLookupTableBuffer->getBufferSize() : 0,
|
|
||||||
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
|
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
|
||||||
mExpandableAddressTableBuffer(
|
mExpandableAddressTableBuffer(buffers[ADDRESS_TABLE_BUFFER_INDEX],
|
||||||
mAddressTableBuffer ? mAddressTableBuffer->getBuffer() : nullptr,
|
bufferSizes[ADDRESS_TABLE_BUFFER_INDEX],
|
||||||
mAddressTableBuffer ? mAddressTableBuffer->getBufferSize() : 0,
|
|
||||||
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
|
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
|
||||||
mExpandableContentBuffer(mContentBuffer ? mContentBuffer->getBuffer() : nullptr,
|
mExpandableContentBuffer(buffers[CONTENT_BUFFER_INDEX],
|
||||||
mContentBuffer ? mContentBuffer->getBufferSize() : 0,
|
bufferSizes[CONTENT_BUFFER_INDEX],
|
||||||
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
|
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
|
||||||
mAddressLookupTable(&mExpandableLookupTableBuffer, &mExpandableAddressTableBuffer,
|
mAddressLookupTable(&mExpandableLookupTableBuffer, &mExpandableAddressTableBuffer,
|
||||||
sparseTableBlockSize, sparseTableDataSize),
|
sparseTableBlockSize, sparseTableDataSize) {}
|
||||||
mIsValid(mLookupTableBuffer && mAddressTableBuffer && mContentBuffer) {}
|
|
||||||
|
|
||||||
SparseTableDictContent(const int sparseTableBlockSize, const int sparseTableDataSize)
|
SparseTableDictContent(const int sparseTableBlockSize, const int sparseTableDataSize)
|
||||||
: mLookupTableBuffer(), mAddressTableBuffer(), mContentBuffer(),
|
: mExpandableLookupTableBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE),
|
||||||
mExpandableLookupTableBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE),
|
|
||||||
mExpandableAddressTableBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE),
|
mExpandableAddressTableBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE),
|
||||||
mExpandableContentBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE),
|
mExpandableContentBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE),
|
||||||
mAddressLookupTable(&mExpandableLookupTableBuffer, &mExpandableAddressTableBuffer,
|
mAddressLookupTable(&mExpandableLookupTableBuffer, &mExpandableAddressTableBuffer,
|
||||||
sparseTableBlockSize, sparseTableDataSize), mIsValid(true) {}
|
sparseTableBlockSize, sparseTableDataSize) {}
|
||||||
|
|
||||||
virtual ~SparseTableDictContent() {}
|
virtual ~SparseTableDictContent() {}
|
||||||
|
|
||||||
virtual bool isValid() const {
|
|
||||||
return mIsValid;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool isNearSizeLimit() const {
|
bool isNearSizeLimit() const {
|
||||||
return mExpandableLookupTableBuffer.isNearSizeLimit()
|
return mExpandableLookupTableBuffer.isNearSizeLimit()
|
||||||
|| mExpandableAddressTableBuffer.isNearSizeLimit()
|
|| mExpandableAddressTableBuffer.isNearSizeLimit()
|
||||||
|
@ -92,20 +77,19 @@ class SparseTableDictContent : public DictContent {
|
||||||
return &mExpandableContentBuffer;
|
return &mExpandableContentBuffer;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool flush(const char *const dictDirPath, const char *const lookupTableFileName,
|
bool flush(FILE *const file) const;
|
||||||
const char *const addressTableFileName, const char *const contentFileName) const;
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
DISALLOW_IMPLICIT_CONSTRUCTORS(SparseTableDictContent);
|
DISALLOW_IMPLICIT_CONSTRUCTORS(SparseTableDictContent);
|
||||||
|
|
||||||
const MmappedBuffer::MmappedBufferPtr mLookupTableBuffer;
|
static const int LOOKUP_TABLE_BUFFER_INDEX;
|
||||||
const MmappedBuffer::MmappedBufferPtr mAddressTableBuffer;
|
static const int ADDRESS_TABLE_BUFFER_INDEX;
|
||||||
const MmappedBuffer::MmappedBufferPtr mContentBuffer;
|
static const int CONTENT_BUFFER_INDEX;
|
||||||
|
|
||||||
BufferWithExtendableBuffer mExpandableLookupTableBuffer;
|
BufferWithExtendableBuffer mExpandableLookupTableBuffer;
|
||||||
BufferWithExtendableBuffer mExpandableAddressTableBuffer;
|
BufferWithExtendableBuffer mExpandableAddressTableBuffer;
|
||||||
BufferWithExtendableBuffer mExpandableContentBuffer;
|
BufferWithExtendableBuffer mExpandableContentBuffer;
|
||||||
SparseTable mAddressLookupTable;
|
SparseTable mAddressLookupTable;
|
||||||
const bool mIsValid;
|
|
||||||
};
|
};
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
#endif /* LATINIME_SPARSE_TABLE_DICT_CONTENT_H */
|
#endif /* LATINIME_SPARSE_TABLE_DICT_CONTENT_H */
|
||||||
|
|
|
@ -50,7 +50,7 @@ bool TerminalPositionLookupTable::setTerminalPtNodePosition(
|
||||||
Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE, getEntryPos(terminalId));
|
Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE, getEntryPos(terminalId));
|
||||||
}
|
}
|
||||||
|
|
||||||
bool TerminalPositionLookupTable::flushToFile(const char *const dictPath) const {
|
bool TerminalPositionLookupTable::flushToFile(FILE *const file) const {
|
||||||
// If the used buffer size is smaller than the actual buffer size, regenerate the lookup
|
// If the used buffer size is smaller than the actual buffer size, regenerate the lookup
|
||||||
// table and write the new table to the file.
|
// table and write the new table to the file.
|
||||||
if (getEntryPos(mSize) < getBuffer()->getTailPosition()) {
|
if (getEntryPos(mSize) < getBuffer()->getTailPosition()) {
|
||||||
|
@ -63,12 +63,11 @@ bool TerminalPositionLookupTable::flushToFile(const char *const dictPath) const
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return lookupTableToWrite.flush(dictPath,
|
return lookupTableToWrite.flush(file);
|
||||||
Ver4DictConstants::TERMINAL_ADDRESS_TABLE_FILE_EXTENSION);
|
|
||||||
} else {
|
} else {
|
||||||
// We can simply use this lookup table because the buffer size has not been
|
// We can simply use this lookup table because the buffer size has not been
|
||||||
// changed.
|
// changed.
|
||||||
return flush(dictPath, Ver4DictConstants::TERMINAL_ADDRESS_TABLE_FILE_EXTENSION);
|
return flush(file);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -17,6 +17,8 @@
|
||||||
#ifndef LATINIME_TERMINAL_POSITION_LOOKUP_TABLE_H
|
#ifndef LATINIME_TERMINAL_POSITION_LOOKUP_TABLE_H
|
||||||
#define LATINIME_TERMINAL_POSITION_LOOKUP_TABLE_H
|
#define LATINIME_TERMINAL_POSITION_LOOKUP_TABLE_H
|
||||||
|
|
||||||
|
#include <cstdint>
|
||||||
|
#include <cstdio>
|
||||||
#include <unordered_map>
|
#include <unordered_map>
|
||||||
|
|
||||||
#include "defines.h"
|
#include "defines.h"
|
||||||
|
@ -29,9 +31,9 @@ class TerminalPositionLookupTable : public SingleDictContent {
|
||||||
public:
|
public:
|
||||||
typedef std::unordered_map<int, int> TerminalIdMap;
|
typedef std::unordered_map<int, int> TerminalIdMap;
|
||||||
|
|
||||||
TerminalPositionLookupTable(const char *const dictPath, const bool isUpdatable)
|
TerminalPositionLookupTable(uint8_t *const buffer, const int bufferSize,
|
||||||
: SingleDictContent(dictPath,
|
const bool isUpdatable)
|
||||||
Ver4DictConstants::TERMINAL_ADDRESS_TABLE_FILE_EXTENSION, isUpdatable),
|
: SingleDictContent(buffer, bufferSize, isUpdatable),
|
||||||
mSize(getBuffer()->getTailPosition()
|
mSize(getBuffer()->getTailPosition()
|
||||||
/ Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE) {}
|
/ Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE) {}
|
||||||
|
|
||||||
|
@ -45,7 +47,7 @@ class TerminalPositionLookupTable : public SingleDictContent {
|
||||||
return mSize;
|
return mSize;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool flushToFile(const char *const dictPath) const;
|
bool flushToFile(FILE *const file) const;
|
||||||
|
|
||||||
bool runGCTerminalIds(TerminalIdMap *const terminalIdMap);
|
bool runGCTerminalIds(TerminalIdMap *const terminalIdMap);
|
||||||
|
|
||||||
|
|
|
@ -18,16 +18,19 @@
|
||||||
|
|
||||||
#include <cerrno>
|
#include <cerrno>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
|
#include <fcntl.h>
|
||||||
#include <sys/stat.h>
|
#include <sys/stat.h>
|
||||||
#include <sys/types.h>
|
#include <sys/types.h>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h"
|
||||||
#include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h"
|
#include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h"
|
||||||
#include "suggest/policyimpl/dictionary/utils/file_utils.h"
|
#include "suggest/policyimpl/dictionary/utils/file_utils.h"
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
|
||||||
/* static */ Ver4DictBuffers::Ver4DictBuffersPtr Ver4DictBuffers::openVer4DictBuffers(
|
/* static */ Ver4DictBuffers::Ver4DictBuffersPtr Ver4DictBuffers::openVer4DictBuffers(
|
||||||
const char *const dictPath, MmappedBuffer::MmappedBufferPtr headerBuffer,
|
const char *const dictPath, MmappedBuffer::MmappedBufferPtr &&headerBuffer,
|
||||||
const FormatUtils::FORMAT_VERSION formatVersion) {
|
const FormatUtils::FORMAT_VERSION formatVersion) {
|
||||||
if (!headerBuffer) {
|
if (!headerBuffer) {
|
||||||
ASSERT(false);
|
ASSERT(false);
|
||||||
|
@ -36,8 +39,32 @@ namespace latinime {
|
||||||
}
|
}
|
||||||
// TODO: take only dictDirPath, and open both header and trie files in the constructor below
|
// TODO: take only dictDirPath, and open both header and trie files in the constructor below
|
||||||
const bool isUpdatable = headerBuffer->isUpdatable();
|
const bool isUpdatable = headerBuffer->isUpdatable();
|
||||||
return Ver4DictBuffersPtr(new Ver4DictBuffers(dictPath, std::move(headerBuffer), isUpdatable,
|
|
||||||
formatVersion));
|
MmappedBuffer::MmappedBufferPtr bodyBuffer = MmappedBuffer::openBuffer(dictPath,
|
||||||
|
Ver4DictConstants::BODY_FILE_EXTENSION, isUpdatable);
|
||||||
|
if (!bodyBuffer) {
|
||||||
|
return Ver4DictBuffersPtr(nullptr);
|
||||||
|
}
|
||||||
|
std::vector<uint8_t *> buffers;
|
||||||
|
std::vector<int> bufferSizes;
|
||||||
|
uint8_t *const buffer = bodyBuffer->getBuffer();
|
||||||
|
int position = 0;
|
||||||
|
while (position < bodyBuffer->getBufferSize()) {
|
||||||
|
const int bufferSize = ByteArrayUtils::readUint32AndAdvancePosition(buffer, &position);
|
||||||
|
buffers.push_back(buffer + position);
|
||||||
|
bufferSizes.push_back(bufferSize);
|
||||||
|
position += bufferSize;
|
||||||
|
if (bufferSize < 0 || position < 0 || position > bodyBuffer->getBufferSize()) {
|
||||||
|
AKLOGE("The dict body file is corrupted.");
|
||||||
|
return Ver4DictBuffersPtr(nullptr);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (buffers.size() != Ver4DictConstants::NUM_OF_CONTENT_BUFFERS_IN_BODY_FILE) {
|
||||||
|
AKLOGE("The dict body file is corrupted.");
|
||||||
|
return Ver4DictBuffersPtr(nullptr);
|
||||||
|
}
|
||||||
|
return Ver4DictBuffersPtr(new Ver4DictBuffers(std::move(headerBuffer), std::move(bodyBuffer),
|
||||||
|
isUpdatable, formatVersion, buffers, bufferSizes));
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Ver4DictBuffers::flushHeaderAndDictBuffers(const char *const dictDirPath,
|
bool Ver4DictBuffers::flushHeaderAndDictBuffers(const char *const dictDirPath,
|
||||||
|
@ -76,30 +103,32 @@ bool Ver4DictBuffers::flushHeaderAndDictBuffers(const char *const dictDirPath,
|
||||||
Ver4DictConstants::HEADER_FILE_EXTENSION);
|
Ver4DictConstants::HEADER_FILE_EXTENSION);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
// Write trie file.
|
|
||||||
if (!DictFileWritingUtils::flushBufferToFileWithSuffix(dictPath,
|
// Write body file.
|
||||||
Ver4DictConstants::TRIE_FILE_EXTENSION, &mExpandableTrieBuffer)) {
|
const int bodyFilePathBufSize = FileUtils::getFilePathWithSuffixBufSize(dictPath,
|
||||||
AKLOGE("Dictionary trie file %s%s cannot be written.", tmpDirPath,
|
Ver4DictConstants::BODY_FILE_EXTENSION);
|
||||||
Ver4DictConstants::TRIE_FILE_EXTENSION);
|
char bodyFilePath[bodyFilePathBufSize];
|
||||||
|
FileUtils::getFilePathWithSuffix(dictPath, Ver4DictConstants::BODY_FILE_EXTENSION,
|
||||||
|
bodyFilePathBufSize, bodyFilePath);
|
||||||
|
|
||||||
|
const int fd = open(bodyFilePath, O_WRONLY | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR);
|
||||||
|
if (fd == -1) {
|
||||||
|
AKLOGE("File %s cannot be opened. errno: %d", bodyFilePath, errno);
|
||||||
|
ASSERT(false);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
// Write dictionary contents.
|
FILE *const file = fdopen(fd, "wb");
|
||||||
if (!mTerminalPositionLookupTable.flushToFile(dictPath)) {
|
if (!file) {
|
||||||
AKLOGE("Terminal position lookup table cannot be written. %s", tmpDirPath);
|
AKLOGE("fdopen failed for the file %s. errno: %d", filePath, errno);
|
||||||
|
ASSERT(false);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (!mProbabilityDictContent.flushToFile(dictPath)) {
|
|
||||||
AKLOGE("Probability dict content cannot be written. %s", tmpDirPath);
|
if (!flushDictBuffers(file)) {
|
||||||
return false;
|
fclose(file);
|
||||||
}
|
|
||||||
if (!mBigramDictContent.flushToFile(dictPath)) {
|
|
||||||
AKLOGE("Bigram dict content cannot be written. %s", tmpDirPath);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
if (!mShortcutDictContent.flushToFile(dictPath)) {
|
|
||||||
AKLOGE("Shortcut dict content cannot be written. %s", tmpDirPath);
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
fclose(file);
|
||||||
// Remove existing dictionary.
|
// Remove existing dictionary.
|
||||||
if (!FileUtils::removeDirAndFiles(dictDirPath)) {
|
if (!FileUtils::removeDirAndFiles(dictDirPath)) {
|
||||||
AKLOGE("Existing directory %s cannot be removed.", dictDirPath);
|
AKLOGE("Existing directory %s cannot be removed.", dictDirPath);
|
||||||
|
@ -115,23 +144,60 @@ bool Ver4DictBuffers::flushHeaderAndDictBuffers(const char *const dictDirPath,
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
Ver4DictBuffers::Ver4DictBuffers(const char *const dictPath,
|
bool Ver4DictBuffers::flushDictBuffers(FILE *const file) const {
|
||||||
MmappedBuffer::MmappedBufferPtr headerBuffer, const bool isUpdatable,
|
// Write trie.
|
||||||
const FormatUtils::FORMAT_VERSION formatVersion)
|
if (!DictFileWritingUtils::writeBufferToFileTail(file, &mExpandableTrieBuffer)) {
|
||||||
: mHeaderBuffer(std::move(headerBuffer)),
|
AKLOGE("Trie cannot be written. %s", tmpDirPath);
|
||||||
mDictBuffer(MmappedBuffer::openBuffer(dictPath,
|
return false;
|
||||||
Ver4DictConstants::TRIE_FILE_EXTENSION, isUpdatable)),
|
}
|
||||||
|
// Write terminal position lookup table.
|
||||||
|
if (!mTerminalPositionLookupTable.flushToFile(file)) {
|
||||||
|
AKLOGE("Terminal position lookup table cannot be written. %s", tmpDirPath);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
// Write probability dict content.
|
||||||
|
if (!mProbabilityDictContent.flushToFile(file)) {
|
||||||
|
AKLOGE("Probability dict content cannot be written. %s", tmpDirPath);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
// Write bigram dict content.
|
||||||
|
if (!mBigramDictContent.flushToFile(file)) {
|
||||||
|
AKLOGE("Bigram dict content cannot be written. %s", tmpDirPath);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
// Write shortcut dict content.
|
||||||
|
if (!mShortcutDictContent.flushToFile(file)) {
|
||||||
|
AKLOGE("Shortcut dict content cannot be written. %s", tmpDirPath);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ver4DictBuffers::Ver4DictBuffers(MmappedBuffer::MmappedBufferPtr &&headerBuffer,
|
||||||
|
MmappedBuffer::MmappedBufferPtr &&bodyBuffer,
|
||||||
|
const bool isUpdatable, const FormatUtils::FORMAT_VERSION formatVersion,
|
||||||
|
const std::vector<uint8_t *> &contentBuffers, const std::vector<int> &contentBufferSizes)
|
||||||
|
: mHeaderBuffer(std::move(headerBuffer)), mDictBuffer(std::move(bodyBuffer)),
|
||||||
mHeaderPolicy(mHeaderBuffer->getBuffer(), formatVersion),
|
mHeaderPolicy(mHeaderBuffer->getBuffer(), formatVersion),
|
||||||
mExpandableHeaderBuffer(mHeaderBuffer ? mHeaderBuffer->getBuffer() : nullptr,
|
mExpandableHeaderBuffer(mHeaderBuffer ? mHeaderBuffer->getBuffer() : nullptr,
|
||||||
mHeaderPolicy.getSize(),
|
mHeaderPolicy.getSize(),
|
||||||
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
|
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
|
||||||
mExpandableTrieBuffer(mDictBuffer ? mDictBuffer->getBuffer() : nullptr,
|
mExpandableTrieBuffer(contentBuffers[Ver4DictConstants::TRIE_BUFFER_INDEX],
|
||||||
mDictBuffer ? mDictBuffer->getBufferSize() : 0,
|
contentBufferSizes[Ver4DictConstants::TRIE_BUFFER_INDEX],
|
||||||
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
|
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
|
||||||
mTerminalPositionLookupTable(dictPath, isUpdatable),
|
mTerminalPositionLookupTable(
|
||||||
mProbabilityDictContent(dictPath, mHeaderPolicy.hasHistoricalInfoOfWords(), isUpdatable),
|
contentBuffers[Ver4DictConstants::TERMINAL_ADDRESS_LOOKUP_TABLE_BUFFER_INDEX],
|
||||||
mBigramDictContent(dictPath, mHeaderPolicy.hasHistoricalInfoOfWords(), isUpdatable),
|
contentBufferSizes[Ver4DictConstants::TERMINAL_ADDRESS_LOOKUP_TABLE_BUFFER_INDEX],
|
||||||
mShortcutDictContent(dictPath, isUpdatable),
|
isUpdatable),
|
||||||
|
mProbabilityDictContent(
|
||||||
|
contentBuffers[Ver4DictConstants::PROBABILITY_BUFFER_INDEX],
|
||||||
|
contentBufferSizes[Ver4DictConstants::PROBABILITY_BUFFER_INDEX],
|
||||||
|
mHeaderPolicy.hasHistoricalInfoOfWords(), isUpdatable),
|
||||||
|
mBigramDictContent(&contentBuffers[Ver4DictConstants::BIGRAM_BUFFERS_INDEX],
|
||||||
|
&contentBufferSizes[Ver4DictConstants::BIGRAM_BUFFERS_INDEX],
|
||||||
|
mHeaderPolicy.hasHistoricalInfoOfWords(), isUpdatable),
|
||||||
|
mShortcutDictContent(&contentBuffers[Ver4DictConstants::SHORTCUT_BUFFERS_INDEX],
|
||||||
|
&contentBufferSizes[Ver4DictConstants::SHORTCUT_BUFFERS_INDEX], isUpdatable),
|
||||||
mIsUpdatable(isUpdatable) {}
|
mIsUpdatable(isUpdatable) {}
|
||||||
|
|
||||||
Ver4DictBuffers::Ver4DictBuffers(const HeaderPolicy *const headerPolicy, const int maxTrieSize)
|
Ver4DictBuffers::Ver4DictBuffers(const HeaderPolicy *const headerPolicy, const int maxTrieSize)
|
||||||
|
|
|
@ -17,6 +17,7 @@
|
||||||
#ifndef LATINIME_VER4_DICT_BUFFER_H
|
#ifndef LATINIME_VER4_DICT_BUFFER_H
|
||||||
#define LATINIME_VER4_DICT_BUFFER_H
|
#define LATINIME_VER4_DICT_BUFFER_H
|
||||||
|
|
||||||
|
#include <cstdio>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
|
|
||||||
#include "defines.h"
|
#include "defines.h"
|
||||||
|
@ -36,7 +37,7 @@ class Ver4DictBuffers {
|
||||||
typedef std::unique_ptr<Ver4DictBuffers> Ver4DictBuffersPtr;
|
typedef std::unique_ptr<Ver4DictBuffers> Ver4DictBuffersPtr;
|
||||||
|
|
||||||
static Ver4DictBuffersPtr openVer4DictBuffers(const char *const dictDirPath,
|
static Ver4DictBuffersPtr openVer4DictBuffers(const char *const dictDirPath,
|
||||||
MmappedBuffer::MmappedBufferPtr headerBuffer,
|
MmappedBuffer::MmappedBufferPtr &&headerBuffer,
|
||||||
const FormatUtils::FORMAT_VERSION formatVersion);
|
const FormatUtils::FORMAT_VERSION formatVersion);
|
||||||
|
|
||||||
static AK_FORCE_INLINE Ver4DictBuffersPtr createVer4DictBuffers(
|
static AK_FORCE_INLINE Ver4DictBuffersPtr createVer4DictBuffers(
|
||||||
|
@ -45,9 +46,7 @@ class Ver4DictBuffers {
|
||||||
}
|
}
|
||||||
|
|
||||||
AK_FORCE_INLINE bool isValid() const {
|
AK_FORCE_INLINE bool isValid() const {
|
||||||
return mHeaderBuffer && mDictBuffer && mHeaderPolicy.isValid()
|
return mHeaderBuffer && mDictBuffer && mHeaderPolicy.isValid();
|
||||||
&& mProbabilityDictContent.isValid() && mTerminalPositionLookupTable.isValid()
|
|
||||||
&& mBigramDictContent.isValid() && mShortcutDictContent.isValid();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
AK_FORCE_INLINE bool isNearSizeLimit() const {
|
AK_FORCE_INLINE bool isNearSizeLimit() const {
|
||||||
|
@ -120,12 +119,16 @@ class Ver4DictBuffers {
|
||||||
private:
|
private:
|
||||||
DISALLOW_COPY_AND_ASSIGN(Ver4DictBuffers);
|
DISALLOW_COPY_AND_ASSIGN(Ver4DictBuffers);
|
||||||
|
|
||||||
Ver4DictBuffers(const char *const dictDirPath,
|
Ver4DictBuffers(MmappedBuffer::MmappedBufferPtr &&headerBuffer,
|
||||||
const MmappedBuffer::MmappedBufferPtr headerBuffer, const bool isUpdatable,
|
MmappedBuffer::MmappedBufferPtr &&bodyBuffer,
|
||||||
const FormatUtils::FORMAT_VERSION formatVersion);
|
const bool isUpdatable, const FormatUtils::FORMAT_VERSION formatVersion,
|
||||||
|
const std::vector<uint8_t *> &contentBuffers,
|
||||||
|
const std::vector<int> &contentBufferSizes);
|
||||||
|
|
||||||
Ver4DictBuffers(const HeaderPolicy *const headerPolicy, const int maxTrieSize);
|
Ver4DictBuffers(const HeaderPolicy *const headerPolicy, const int maxTrieSize);
|
||||||
|
|
||||||
|
bool flushDictBuffers(FILE *const file) const;
|
||||||
|
|
||||||
const MmappedBuffer::MmappedBufferPtr mHeaderBuffer;
|
const MmappedBuffer::MmappedBufferPtr mHeaderBuffer;
|
||||||
const MmappedBuffer::MmappedBufferPtr mDictBuffer;
|
const MmappedBuffer::MmappedBufferPtr mDictBuffer;
|
||||||
const HeaderPolicy mHeaderPolicy;
|
const HeaderPolicy mHeaderPolicy;
|
||||||
|
|
|
@ -18,19 +18,8 @@
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
|
||||||
// These values MUST match the definitions in FormatSpec.java.
|
const char *const Ver4DictConstants::BODY_FILE_EXTENSION = ".body";
|
||||||
const char *const Ver4DictConstants::TRIE_FILE_EXTENSION = ".trie";
|
|
||||||
const char *const Ver4DictConstants::HEADER_FILE_EXTENSION = ".header";
|
const char *const Ver4DictConstants::HEADER_FILE_EXTENSION = ".header";
|
||||||
const char *const Ver4DictConstants::FREQ_FILE_EXTENSION = ".freq";
|
|
||||||
// tat = Terminal Address Table
|
|
||||||
const char *const Ver4DictConstants::TERMINAL_ADDRESS_TABLE_FILE_EXTENSION = ".tat";
|
|
||||||
const char *const Ver4DictConstants::BIGRAM_FILE_EXTENSION = ".bigram_freq";
|
|
||||||
const char *const Ver4DictConstants::BIGRAM_LOOKUP_TABLE_FILE_EXTENSION = ".bigram_lookup";
|
|
||||||
const char *const Ver4DictConstants::BIGRAM_CONTENT_TABLE_FILE_EXTENSION = ".bigram_index_freq";
|
|
||||||
const char *const Ver4DictConstants::SHORTCUT_FILE_EXTENSION = ".shortcut_shortcut";
|
|
||||||
const char *const Ver4DictConstants::SHORTCUT_LOOKUP_TABLE_FILE_EXTENSION = ".shortcut_lookup";
|
|
||||||
const char *const Ver4DictConstants::SHORTCUT_CONTENT_TABLE_FILE_EXTENSION =
|
|
||||||
".shortcut_index_shortcut";
|
|
||||||
|
|
||||||
// Version 4 dictionary size is implicitly limited to 8MB due to 3-byte offsets.
|
// Version 4 dictionary size is implicitly limited to 8MB due to 3-byte offsets.
|
||||||
const int Ver4DictConstants::MAX_DICTIONARY_SIZE = 8 * 1024 * 1024;
|
const int Ver4DictConstants::MAX_DICTIONARY_SIZE = 8 * 1024 * 1024;
|
||||||
|
@ -38,6 +27,21 @@ const int Ver4DictConstants::MAX_DICTIONARY_SIZE = 8 * 1024 * 1024;
|
||||||
// limited to 1MB to prevent from inefficient traversing.
|
// limited to 1MB to prevent from inefficient traversing.
|
||||||
const int Ver4DictConstants::MAX_DICT_EXTENDED_REGION_SIZE = 1 * 1024 * 1024;
|
const int Ver4DictConstants::MAX_DICT_EXTENDED_REGION_SIZE = 1 * 1024 * 1024;
|
||||||
|
|
||||||
|
// NUM_OF_BUFFERS_FOR_SINGLE_DICT_CONTENT for Trie, TerminalAddressLookupTable and Probability.
|
||||||
|
// NUM_OF_BUFFERS_FOR_SPARSE_TABLE_DICT_CONTENT for bigram and shortcut.
|
||||||
|
const size_t Ver4DictConstants::NUM_OF_CONTENT_BUFFERS_IN_BODY_FILE =
|
||||||
|
NUM_OF_BUFFERS_FOR_SINGLE_DICT_CONTENT * 3
|
||||||
|
+ NUM_OF_BUFFERS_FOR_SPARSE_TABLE_DICT_CONTENT * 2;
|
||||||
|
const int Ver4DictConstants::TRIE_BUFFER_INDEX = 0;
|
||||||
|
const int Ver4DictConstants::TERMINAL_ADDRESS_LOOKUP_TABLE_BUFFER_INDEX =
|
||||||
|
TRIE_BUFFER_INDEX + NUM_OF_BUFFERS_FOR_SINGLE_DICT_CONTENT;
|
||||||
|
const int Ver4DictConstants::PROBABILITY_BUFFER_INDEX =
|
||||||
|
TERMINAL_ADDRESS_LOOKUP_TABLE_BUFFER_INDEX + NUM_OF_BUFFERS_FOR_SINGLE_DICT_CONTENT;
|
||||||
|
const int Ver4DictConstants::BIGRAM_BUFFERS_INDEX =
|
||||||
|
PROBABILITY_BUFFER_INDEX + NUM_OF_BUFFERS_FOR_SINGLE_DICT_CONTENT;
|
||||||
|
const int Ver4DictConstants::SHORTCUT_BUFFERS_INDEX =
|
||||||
|
BIGRAM_BUFFERS_INDEX + NUM_OF_BUFFERS_FOR_SPARSE_TABLE_DICT_CONTENT;
|
||||||
|
|
||||||
const int Ver4DictConstants::NOT_A_TERMINAL_ID = -1;
|
const int Ver4DictConstants::NOT_A_TERMINAL_ID = -1;
|
||||||
const int Ver4DictConstants::PROBABILITY_SIZE = 1;
|
const int Ver4DictConstants::PROBABILITY_SIZE = 1;
|
||||||
const int Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE = 1;
|
const int Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE = 1;
|
||||||
|
@ -67,4 +71,7 @@ const int Ver4DictConstants::SHORTCUT_FLAGS_FIELD_SIZE = 1;
|
||||||
const int Ver4DictConstants::SHORTCUT_PROBABILITY_MASK = 0x0F;
|
const int Ver4DictConstants::SHORTCUT_PROBABILITY_MASK = 0x0F;
|
||||||
const int Ver4DictConstants::SHORTCUT_HAS_NEXT_MASK = 0x80;
|
const int Ver4DictConstants::SHORTCUT_HAS_NEXT_MASK = 0x80;
|
||||||
|
|
||||||
|
const size_t Ver4DictConstants::NUM_OF_BUFFERS_FOR_SINGLE_DICT_CONTENT = 1;
|
||||||
|
const size_t Ver4DictConstants::NUM_OF_BUFFERS_FOR_SPARSE_TABLE_DICT_CONTENT = 3;
|
||||||
|
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
|
|
|
@ -19,26 +19,26 @@
|
||||||
|
|
||||||
#include "defines.h"
|
#include "defines.h"
|
||||||
|
|
||||||
|
#include <cstddef>
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
|
||||||
// TODO: Create PtConstants under the pt_common and move some constant values there.
|
// TODO: Create PtConstants under the pt_common and move some constant values there.
|
||||||
// Note that there are corresponding definitions in FormatSpec.java.
|
// Note that there are corresponding definitions in FormatSpec.java.
|
||||||
class Ver4DictConstants {
|
class Ver4DictConstants {
|
||||||
public:
|
public:
|
||||||
static const char *const TRIE_FILE_EXTENSION;
|
static const char *const BODY_FILE_EXTENSION;
|
||||||
static const char *const HEADER_FILE_EXTENSION;
|
static const char *const HEADER_FILE_EXTENSION;
|
||||||
static const char *const FREQ_FILE_EXTENSION;
|
|
||||||
static const char *const TERMINAL_ADDRESS_TABLE_FILE_EXTENSION;
|
|
||||||
static const char *const BIGRAM_FILE_EXTENSION;
|
|
||||||
static const char *const BIGRAM_LOOKUP_TABLE_FILE_EXTENSION;
|
|
||||||
static const char *const BIGRAM_CONTENT_TABLE_FILE_EXTENSION;
|
|
||||||
static const char *const SHORTCUT_FILE_EXTENSION;
|
|
||||||
static const char *const SHORTCUT_LOOKUP_TABLE_FILE_EXTENSION;
|
|
||||||
static const char *const SHORTCUT_CONTENT_TABLE_FILE_EXTENSION;
|
|
||||||
|
|
||||||
static const int MAX_DICTIONARY_SIZE;
|
static const int MAX_DICTIONARY_SIZE;
|
||||||
static const int MAX_DICT_EXTENDED_REGION_SIZE;
|
static const int MAX_DICT_EXTENDED_REGION_SIZE;
|
||||||
|
|
||||||
|
static const size_t NUM_OF_CONTENT_BUFFERS_IN_BODY_FILE;
|
||||||
|
static const int TRIE_BUFFER_INDEX;
|
||||||
|
static const int TERMINAL_ADDRESS_LOOKUP_TABLE_BUFFER_INDEX;
|
||||||
|
static const int PROBABILITY_BUFFER_INDEX;
|
||||||
|
static const int BIGRAM_BUFFERS_INDEX;
|
||||||
|
static const int SHORTCUT_BUFFERS_INDEX;
|
||||||
|
|
||||||
static const int NOT_A_TERMINAL_ID;
|
static const int NOT_A_TERMINAL_ID;
|
||||||
static const int PROBABILITY_SIZE;
|
static const int PROBABILITY_SIZE;
|
||||||
static const int FLAGS_IN_PROBABILITY_FILE_SIZE;
|
static const int FLAGS_IN_PROBABILITY_FILE_SIZE;
|
||||||
|
@ -68,6 +68,9 @@ class Ver4DictConstants {
|
||||||
|
|
||||||
private:
|
private:
|
||||||
DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4DictConstants);
|
DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4DictConstants);
|
||||||
|
|
||||||
|
static const size_t NUM_OF_BUFFERS_FOR_SINGLE_DICT_CONTENT;
|
||||||
|
static const size_t NUM_OF_BUFFERS_FOR_SPARSE_TABLE_DICT_CONTENT;
|
||||||
};
|
};
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
#endif /* LATINIME_VER4_DICT_CONSTANTS_H */
|
#endif /* LATINIME_VER4_DICT_CONSTANTS_H */
|
||||||
|
|
|
@ -34,6 +34,8 @@
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
|
||||||
const char *const DictFileWritingUtils::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE = ".tmp";
|
const char *const DictFileWritingUtils::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE = ".tmp";
|
||||||
|
// Enough size to describe buffer size.
|
||||||
|
const int DictFileWritingUtils::SIZE_OF_BUFFER_SIZE_FIELD = 4;
|
||||||
|
|
||||||
/* static */ bool DictFileWritingUtils::createEmptyDictFile(const char *const filePath,
|
/* static */ bool DictFileWritingUtils::createEmptyDictFile(const char *const filePath,
|
||||||
const int dictVersion, const std::vector<int> localeAsCodePointVector,
|
const int dictVersion, const std::vector<int> localeAsCodePointVector,
|
||||||
|
@ -85,6 +87,18 @@ template<class DictConstants, class DictBuffers, class DictBuffersPtr>
|
||||||
return flushBufferToFile(filePath, buffer);
|
return flushBufferToFile(filePath, buffer);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* static */ bool DictFileWritingUtils::writeBufferToFileTail(FILE *const file,
|
||||||
|
const BufferWithExtendableBuffer *const buffer) {
|
||||||
|
uint8_t bufferSize[SIZE_OF_BUFFER_SIZE_FIELD];
|
||||||
|
int writingPos = 0;
|
||||||
|
ByteArrayUtils::writeUintAndAdvancePosition(bufferSize, buffer->getTailPosition(),
|
||||||
|
SIZE_OF_BUFFER_SIZE_FIELD, &writingPos);
|
||||||
|
if (fwrite(bufferSize, SIZE_OF_BUFFER_SIZE_FIELD, 1 /* count */, file) < 1) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return writeBufferToFile(file, buffer);
|
||||||
|
}
|
||||||
|
|
||||||
/* static */ bool DictFileWritingUtils::flushBufferToFile(const char *const filePath,
|
/* static */ bool DictFileWritingUtils::flushBufferToFile(const char *const filePath,
|
||||||
const BufferWithExtendableBuffer *const buffer) {
|
const BufferWithExtendableBuffer *const buffer) {
|
||||||
const int fd = open(filePath, O_WRONLY | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR);
|
const int fd = open(filePath, O_WRONLY | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR);
|
||||||
|
@ -100,6 +114,7 @@ template<class DictConstants, class DictBuffers, class DictBuffersPtr>
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (!writeBufferToFile(file, buffer)) {
|
if (!writeBufferToFile(file, buffer)) {
|
||||||
|
fclose(file);
|
||||||
remove(filePath);
|
remove(filePath);
|
||||||
AKLOGE("Buffer cannot be written to the file %s. size: %d", filePath,
|
AKLOGE("Buffer cannot be written to the file %s. size: %d", filePath,
|
||||||
buffer->getTailPosition());
|
buffer->getTailPosition());
|
||||||
|
@ -110,20 +125,17 @@ template<class DictConstants, class DictBuffers, class DictBuffersPtr>
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
// This closes file pointer when an error is caused and returns whether the writing was succeeded
|
// Returns whether the writing was succeeded or not.
|
||||||
// or not.
|
|
||||||
/* static */ bool DictFileWritingUtils::writeBufferToFile(FILE *const file,
|
/* static */ bool DictFileWritingUtils::writeBufferToFile(FILE *const file,
|
||||||
const BufferWithExtendableBuffer *const buffer) {
|
const BufferWithExtendableBuffer *const buffer) {
|
||||||
const int originalBufSize = buffer->getOriginalBufferSize();
|
const int originalBufSize = buffer->getOriginalBufferSize();
|
||||||
if (originalBufSize > 0 && fwrite(buffer->getBuffer(false /* usesAdditionalBuffer */),
|
if (originalBufSize > 0 && fwrite(buffer->getBuffer(false /* usesAdditionalBuffer */),
|
||||||
originalBufSize, 1, file) < 1) {
|
originalBufSize, 1, file) < 1) {
|
||||||
fclose(file);
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
const int additionalBufSize = buffer->getUsedAdditionalBufferSize();
|
const int additionalBufSize = buffer->getUsedAdditionalBufferSize();
|
||||||
if (additionalBufSize > 0 && fwrite(buffer->getBuffer(true /* usesAdditionalBuffer */),
|
if (additionalBufSize > 0 && fwrite(buffer->getBuffer(true /* usesAdditionalBuffer */),
|
||||||
additionalBufSize, 1, file) < 1) {
|
additionalBufSize, 1, file) < 1) {
|
||||||
fclose(file);
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
|
|
|
@ -38,9 +38,14 @@ class DictFileWritingUtils {
|
||||||
static bool flushBufferToFileWithSuffix(const char *const basePath, const char *const suffix,
|
static bool flushBufferToFileWithSuffix(const char *const basePath, const char *const suffix,
|
||||||
const BufferWithExtendableBuffer *const buffer);
|
const BufferWithExtendableBuffer *const buffer);
|
||||||
|
|
||||||
|
static bool writeBufferToFileTail(FILE *const file,
|
||||||
|
const BufferWithExtendableBuffer *const buffer);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
DISALLOW_IMPLICIT_CONSTRUCTORS(DictFileWritingUtils);
|
DISALLOW_IMPLICIT_CONSTRUCTORS(DictFileWritingUtils);
|
||||||
|
|
||||||
|
static const int SIZE_OF_BUFFER_SIZE_FIELD;
|
||||||
|
|
||||||
static bool createEmptyV401DictFile(const char *const filePath,
|
static bool createEmptyV401DictFile(const char *const filePath,
|
||||||
const std::vector<int> localeAsCodePointVector,
|
const std::vector<int> localeAsCodePointVector,
|
||||||
const DictionaryHeaderStructurePolicy::AttributeMap *const attributeMap,
|
const DictionaryHeaderStructurePolicy::AttributeMap *const attributeMap,
|
||||||
|
|
Loading…
Reference in a new issue