am c3a4c075: Merge "Create empty ver4 dictionary buffer on memory."
* commit 'c3a4c075c97affb47fbceab6ce68172c65bcdbfd': Create empty ver4 dictionary buffer on memory.main
commit
eb2f109bb5
|
@ -86,10 +86,10 @@ static jlong latinime_BinaryDictionary_open(JNIEnv *env, jclass clazz, jstring s
|
||||||
char sourceDirChars[sourceDirUtf8Length + 1];
|
char sourceDirChars[sourceDirUtf8Length + 1];
|
||||||
env->GetStringUTFRegion(sourceDir, 0, env->GetStringLength(sourceDir), sourceDirChars);
|
env->GetStringUTFRegion(sourceDir, 0, env->GetStringLength(sourceDir), sourceDirChars);
|
||||||
sourceDirChars[sourceDirUtf8Length] = '\0';
|
sourceDirChars[sourceDirUtf8Length] = '\0';
|
||||||
DictionaryStructureWithBufferPolicy::StructurePoilcyPtr dictionaryStructureWithBufferPolicy(
|
DictionaryStructureWithBufferPolicy::StructurePoilcyPtr dictionaryStructureWithBufferPolicy =
|
||||||
DictionaryStructureWithBufferPolicyFactory::newDictionaryStructureWithBufferPolicy(
|
DictionaryStructureWithBufferPolicyFactory::newDictionaryStructureWithBufferPolicy(
|
||||||
sourceDirChars, static_cast<int>(dictOffset), static_cast<int>(dictSize),
|
sourceDirChars, static_cast<int>(dictOffset), static_cast<int>(dictSize),
|
||||||
isUpdatable == JNI_TRUE));
|
isUpdatable == JNI_TRUE);
|
||||||
if (!dictionaryStructureWithBufferPolicy.get()) {
|
if (!dictionaryStructureWithBufferPolicy.get()) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
|
@ -35,8 +35,8 @@ namespace latinime {
|
||||||
const int bufOffset, const int size, const bool isUpdatable) {
|
const int bufOffset, const int size, const bool isUpdatable) {
|
||||||
// Allocated buffer in MmapedBuffer::newBuffer() will be freed in the destructor of
|
// Allocated buffer in MmapedBuffer::newBuffer() will be freed in the destructor of
|
||||||
// MmappedBufferWrapper if the instance has the responsibility.
|
// MmappedBufferWrapper if the instance has the responsibility.
|
||||||
MmappedBuffer::MmappedBufferPtr mmappedBuffer(MmappedBuffer::openBuffer(path, bufOffset, size,
|
MmappedBuffer::MmappedBufferPtr mmappedBuffer = MmappedBuffer::openBuffer(path, bufOffset, size,
|
||||||
isUpdatable));
|
isUpdatable);
|
||||||
if (!mmappedBuffer.get()) {
|
if (!mmappedBuffer.get()) {
|
||||||
return DictionaryStructureWithBufferPolicy::StructurePoilcyPtr(0);
|
return DictionaryStructureWithBufferPolicy::StructurePoilcyPtr(0);
|
||||||
}
|
}
|
||||||
|
@ -58,8 +58,8 @@ namespace latinime {
|
||||||
}
|
}
|
||||||
// Removing extension to get the base path.
|
// Removing extension to get the base path.
|
||||||
dictDirPath.erase(pos);
|
dictDirPath.erase(pos);
|
||||||
const Ver4DictBuffers::Ver4DictBuffersPtr dictBuffers(
|
const Ver4DictBuffers::Ver4DictBuffersPtr dictBuffers =
|
||||||
Ver4DictBuffers::openVer4DictBuffers(dictDirPath.c_str(), mmappedBuffer));
|
Ver4DictBuffers::openVer4DictBuffers(dictDirPath.c_str(), mmappedBuffer);
|
||||||
if (!dictBuffers.get()->isValid()) {
|
if (!dictBuffers.get()->isValid()) {
|
||||||
AKLOGE("DICT: The dictionary doesn't satisfy ver4 format requirements.");
|
AKLOGE("DICT: The dictionary doesn't satisfy ver4 format requirements.");
|
||||||
ASSERT(false);
|
ASSERT(false);
|
||||||
|
|
|
@ -33,6 +33,10 @@ class BigramDictContent : public SparseTableDictContent {
|
||||||
Ver4DictConstants::BIGRAM_ADDRESS_TABLE_BLOCK_SIZE,
|
Ver4DictConstants::BIGRAM_ADDRESS_TABLE_BLOCK_SIZE,
|
||||||
Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE) {}
|
Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE) {}
|
||||||
|
|
||||||
|
BigramDictContent()
|
||||||
|
: SparseTableDictContent(Ver4DictConstants::BIGRAM_ADDRESS_TABLE_BLOCK_SIZE,
|
||||||
|
Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE) {}
|
||||||
|
|
||||||
void getBigramEntryAndAdvancePosition(int *const outProbability, bool *const outHasNext,
|
void getBigramEntryAndAdvancePosition(int *const outProbability, bool *const outHasNext,
|
||||||
int *const outTargetTerminalId, int *const bigramEntryPos) const;
|
int *const outTargetTerminalId, int *const bigramEntryPos) const;
|
||||||
|
|
||||||
|
@ -56,7 +60,7 @@ class BigramDictContent : public SparseTableDictContent {
|
||||||
bool copyBigramList(const int bigramListPos, const int toPos);
|
bool copyBigramList(const int bigramListPos, const int toPos);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
DISALLOW_IMPLICIT_CONSTRUCTORS(BigramDictContent);
|
DISALLOW_COPY_AND_ASSIGN(BigramDictContent);
|
||||||
|
|
||||||
int createAndGetBigramFlags(const int probability, const bool hasNext) const {
|
int createAndGetBigramFlags(const int probability, const bool hasNext) const {
|
||||||
return (probability & Ver4DictConstants::BIGRAM_PROBABILITY_MASK)
|
return (probability & Ver4DictConstants::BIGRAM_PROBABILITY_MASK)
|
||||||
|
|
|
@ -31,6 +31,8 @@ class ProbabilityDictContent : public SingleDictContent {
|
||||||
: SingleDictContent(dictDirPath, Ver4DictConstants::FREQ_FILE_EXTENSION,
|
: SingleDictContent(dictDirPath, Ver4DictConstants::FREQ_FILE_EXTENSION,
|
||||||
isUpdatable) {}
|
isUpdatable) {}
|
||||||
|
|
||||||
|
ProbabilityDictContent() {}
|
||||||
|
|
||||||
int getProbability(const int terminalId) const {
|
int getProbability(const int terminalId) const {
|
||||||
if (terminalId < 0 || terminalId >= getSize()) {
|
if (terminalId < 0 || terminalId >= getSize()) {
|
||||||
return NOT_A_PROBABILITY;
|
return NOT_A_PROBABILITY;
|
||||||
|
@ -61,7 +63,7 @@ class ProbabilityDictContent : public SingleDictContent {
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
DISALLOW_IMPLICIT_CONSTRUCTORS(ProbabilityDictContent);
|
DISALLOW_COPY_AND_ASSIGN(ProbabilityDictContent);
|
||||||
|
|
||||||
int getSize() const {
|
int getSize() const {
|
||||||
return getBuffer()->getTailPosition() / (Ver4DictConstants::PROBABILITY_SIZE
|
return getBuffer()->getTailPosition() / (Ver4DictConstants::PROBABILITY_SIZE
|
||||||
|
|
|
@ -33,6 +33,10 @@ class ShortcutDictContent : public SparseTableDictContent {
|
||||||
Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE,
|
Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE,
|
||||||
Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_DATA_SIZE) {}
|
Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_DATA_SIZE) {}
|
||||||
|
|
||||||
|
ShortcutDictContent()
|
||||||
|
: SparseTableDictContent(Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE,
|
||||||
|
Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_DATA_SIZE) {}
|
||||||
|
|
||||||
void getShortcutEntryAndAdvancePosition(const int maxCodePointCount,
|
void getShortcutEntryAndAdvancePosition(const int maxCodePointCount,
|
||||||
int *const outCodePoint, int *const outCodePointCount, int *const outShortcutFlags,
|
int *const outCodePoint, int *const outCodePointCount, int *const outShortcutFlags,
|
||||||
int *const shortcutEntryPos) const {
|
int *const shortcutEntryPos) const {
|
||||||
|
@ -57,7 +61,7 @@ class ShortcutDictContent : public SparseTableDictContent {
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
DISALLOW_IMPLICIT_CONSTRUCTORS(ShortcutDictContent);
|
DISALLOW_COPY_AND_ASSIGN(ShortcutDictContent);
|
||||||
};
|
};
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
#endif /* LATINIME_SHORTCUT_DICT_CONTENT_H */
|
#endif /* LATINIME_SHORTCUT_DICT_CONTENT_H */
|
||||||
|
|
|
@ -19,6 +19,7 @@
|
||||||
|
|
||||||
#include "defines.h"
|
#include "defines.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/v4/content/dict_content.h"
|
#include "suggest/policyimpl/dictionary/structure/v4/content/dict_content.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
|
||||||
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
|
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
|
||||||
#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
|
#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
|
||||||
|
|
||||||
|
@ -31,12 +32,17 @@ class SingleDictContent : public DictContent {
|
||||||
: mMmappedBuffer(MmappedBuffer::openBuffer(dictDirPath, contentFileName, isUpdatable)),
|
: mMmappedBuffer(MmappedBuffer::openBuffer(dictDirPath, contentFileName, isUpdatable)),
|
||||||
mExpandableContentBuffer(mMmappedBuffer.get() ? mMmappedBuffer.get()->getBuffer() : 0,
|
mExpandableContentBuffer(mMmappedBuffer.get() ? mMmappedBuffer.get()->getBuffer() : 0,
|
||||||
mMmappedBuffer.get() ? mMmappedBuffer.get()->getBufferSize() : 0,
|
mMmappedBuffer.get() ? mMmappedBuffer.get()->getBufferSize() : 0,
|
||||||
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE) {}
|
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
|
||||||
|
mIsValid(mMmappedBuffer.get() != 0) {}
|
||||||
|
|
||||||
|
SingleDictContent()
|
||||||
|
: mMmappedBuffer(0), mExpandableContentBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE),
|
||||||
|
mIsValid(true) {}
|
||||||
|
|
||||||
virtual ~SingleDictContent() {}
|
virtual ~SingleDictContent() {}
|
||||||
|
|
||||||
virtual bool isValid() const {
|
virtual bool isValid() const {
|
||||||
return mMmappedBuffer.get() != 0;
|
return mIsValid;
|
||||||
}
|
}
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
@ -49,10 +55,11 @@ class SingleDictContent : public DictContent {
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
DISALLOW_IMPLICIT_CONSTRUCTORS(SingleDictContent);
|
DISALLOW_COPY_AND_ASSIGN(SingleDictContent);
|
||||||
|
|
||||||
const MmappedBuffer::MmappedBufferPtr mMmappedBuffer;
|
const MmappedBuffer::MmappedBufferPtr mMmappedBuffer;
|
||||||
BufferWithExtendableBuffer mExpandableContentBuffer;
|
BufferWithExtendableBuffer mExpandableContentBuffer;
|
||||||
|
const bool mIsValid;
|
||||||
};
|
};
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
#endif /* LATINIME_SINGLE_DICT_CONTENT_H */
|
#endif /* LATINIME_SINGLE_DICT_CONTENT_H */
|
||||||
|
|
|
@ -19,6 +19,7 @@
|
||||||
|
|
||||||
#include "defines.h"
|
#include "defines.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/v4/content/dict_content.h"
|
#include "suggest/policyimpl/dictionary/structure/v4/content/dict_content.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
|
||||||
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
|
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
|
||||||
#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
|
#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
|
||||||
#include "suggest/policyimpl/dictionary/utils/sparse_table.h"
|
#include "suggest/policyimpl/dictionary/utils/sparse_table.h"
|
||||||
|
@ -49,13 +50,22 @@ class SparseTableDictContent : public DictContent {
|
||||||
mContentBuffer.get() ? mContentBuffer.get()->getBufferSize() : 0,
|
mContentBuffer.get() ? mContentBuffer.get()->getBufferSize() : 0,
|
||||||
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
|
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
|
||||||
mAddressLookupTable(&mExpandableLookupTableBuffer, &mExpandableAddressTableBuffer,
|
mAddressLookupTable(&mExpandableLookupTableBuffer, &mExpandableAddressTableBuffer,
|
||||||
sparseTableBlockSize, sparseTableDataSize) {}
|
sparseTableBlockSize, sparseTableDataSize),
|
||||||
|
mIsValid(mLookupTableBuffer.get() != 0 && mAddressTableBuffer.get() != 0
|
||||||
|
&& mContentBuffer.get() != 0) {}
|
||||||
|
|
||||||
|
SparseTableDictContent(const int sparseTableBlockSize, const int sparseTableDataSize)
|
||||||
|
: mLookupTableBuffer(0), mAddressTableBuffer(0), mContentBuffer(0),
|
||||||
|
mExpandableLookupTableBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE),
|
||||||
|
mExpandableAddressTableBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE),
|
||||||
|
mExpandableContentBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE),
|
||||||
|
mAddressLookupTable(&mExpandableLookupTableBuffer, &mExpandableAddressTableBuffer,
|
||||||
|
sparseTableBlockSize, sparseTableDataSize), mIsValid(true) {}
|
||||||
|
|
||||||
virtual ~SparseTableDictContent() {}
|
virtual ~SparseTableDictContent() {}
|
||||||
|
|
||||||
virtual bool isValid() const {
|
virtual bool isValid() const {
|
||||||
return mLookupTableBuffer.get() != 0 && mAddressTableBuffer.get() != 0
|
return mIsValid;
|
||||||
&& mContentBuffer.get() != 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
@ -78,7 +88,6 @@ class SparseTableDictContent : public DictContent {
|
||||||
private:
|
private:
|
||||||
DISALLOW_IMPLICIT_CONSTRUCTORS(SparseTableDictContent);
|
DISALLOW_IMPLICIT_CONSTRUCTORS(SparseTableDictContent);
|
||||||
|
|
||||||
// TODO: Have sparse table.
|
|
||||||
const MmappedBuffer::MmappedBufferPtr mLookupTableBuffer;
|
const MmappedBuffer::MmappedBufferPtr mLookupTableBuffer;
|
||||||
const MmappedBuffer::MmappedBufferPtr mAddressTableBuffer;
|
const MmappedBuffer::MmappedBufferPtr mAddressTableBuffer;
|
||||||
const MmappedBuffer::MmappedBufferPtr mContentBuffer;
|
const MmappedBuffer::MmappedBufferPtr mContentBuffer;
|
||||||
|
@ -86,6 +95,7 @@ class SparseTableDictContent : public DictContent {
|
||||||
BufferWithExtendableBuffer mExpandableAddressTableBuffer;
|
BufferWithExtendableBuffer mExpandableAddressTableBuffer;
|
||||||
BufferWithExtendableBuffer mExpandableContentBuffer;
|
BufferWithExtendableBuffer mExpandableContentBuffer;
|
||||||
SparseTable mAddressLookupTable;
|
SparseTable mAddressLookupTable;
|
||||||
|
const bool mIsValid;
|
||||||
};
|
};
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
#endif /* LATINIME_SPARSE_TABLE_DICT_CONTENT_H */
|
#endif /* LATINIME_SPARSE_TABLE_DICT_CONTENT_H */
|
||||||
|
|
|
@ -38,6 +38,8 @@ class TerminalPositionLookupTable : public SingleDictContent {
|
||||||
/ Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE),
|
/ Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE),
|
||||||
mHeaderRegionSize(headerRegionSize) {}
|
mHeaderRegionSize(headerRegionSize) {}
|
||||||
|
|
||||||
|
TerminalPositionLookupTable() : mSize(0), mHeaderRegionSize(0) {}
|
||||||
|
|
||||||
int getTerminalPtNodePosition(const int terminalId) const {
|
int getTerminalPtNodePosition(const int terminalId) const {
|
||||||
if (terminalId < 0 || terminalId >= mSize) {
|
if (terminalId < 0 || terminalId >= mSize) {
|
||||||
return NOT_A_DICT_POS;
|
return NOT_A_DICT_POS;
|
||||||
|
@ -66,7 +68,7 @@ class TerminalPositionLookupTable : public SingleDictContent {
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
DISALLOW_IMPLICIT_CONSTRUCTORS(TerminalPositionLookupTable);
|
DISALLOW_COPY_AND_ASSIGN(TerminalPositionLookupTable);
|
||||||
|
|
||||||
int mSize;
|
int mSize;
|
||||||
const int mHeaderRegionSize;
|
const int mHeaderRegionSize;
|
||||||
|
|
|
@ -33,27 +33,30 @@ class Ver4DictBuffers {
|
||||||
public:
|
public:
|
||||||
typedef ExclusiveOwnershipPointer<Ver4DictBuffers> Ver4DictBuffersPtr;
|
typedef ExclusiveOwnershipPointer<Ver4DictBuffers> Ver4DictBuffersPtr;
|
||||||
|
|
||||||
static Ver4DictBuffersPtr openVer4DictBuffers(const char *const dictDirPath,
|
static AK_FORCE_INLINE Ver4DictBuffersPtr openVer4DictBuffers(const char *const dictDirPath,
|
||||||
const MmappedBuffer::MmappedBufferPtr &dictBuffer) {
|
const MmappedBuffer::MmappedBufferPtr &dictBuffer) {
|
||||||
const bool isUpdatable = dictBuffer.get() ? dictBuffer.get()->isUpdatable() : false;
|
const bool isUpdatable = dictBuffer.get() ? dictBuffer.get()->isUpdatable() : false;
|
||||||
return Ver4DictBuffersPtr(new Ver4DictBuffers(dictDirPath, dictBuffer, isUpdatable));
|
return Ver4DictBuffersPtr(new Ver4DictBuffers(dictDirPath, dictBuffer, isUpdatable));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static AK_FORCE_INLINE Ver4DictBuffersPtr createVer4DictBuffers() {
|
||||||
|
return Ver4DictBuffersPtr(new Ver4DictBuffers());
|
||||||
|
}
|
||||||
|
|
||||||
AK_FORCE_INLINE bool isValid() const {
|
AK_FORCE_INLINE bool isValid() const {
|
||||||
return mDictBuffer.get() != 0 && mProbabilityDictContent.isValid()
|
return mDictBuffer.get() != 0 && mProbabilityDictContent.isValid()
|
||||||
&& mTerminalPositionLookupTable.isValid() && mBigramDictContent.isValid()
|
&& mTerminalPositionLookupTable.isValid() && mBigramDictContent.isValid()
|
||||||
&& mShortcutDictContent.isValid();
|
&& mShortcutDictContent.isValid();
|
||||||
}
|
}
|
||||||
|
|
||||||
AK_FORCE_INLINE uint8_t *getRawDictBuffer() const {
|
AK_FORCE_INLINE BufferWithExtendableBuffer *getWritableHeaderBuffer() {
|
||||||
return mDictBuffer.get()->getBuffer();
|
return &mExpandableHeaderBuffer;
|
||||||
}
|
}
|
||||||
|
|
||||||
AK_FORCE_INLINE int getRawDictBufferSize() const {
|
AK_FORCE_INLINE BufferWithExtendableBuffer *getWritableTrieBuffer() {
|
||||||
return mDictBuffer.get()->getBufferSize();
|
return &mExpandableTrieBuffer;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
AK_FORCE_INLINE TerminalPositionLookupTable *getUpdatableTerminalPositionLookupTable() {
|
AK_FORCE_INLINE TerminalPositionLookupTable *getUpdatableTerminalPositionLookupTable() {
|
||||||
return &mTerminalPositionLookupTable;
|
return &mTerminalPositionLookupTable;
|
||||||
}
|
}
|
||||||
|
@ -86,21 +89,41 @@ class Ver4DictBuffers {
|
||||||
return mIsUpdatable;
|
return mIsUpdatable;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool flush(const char *const dictDirPath) {
|
||||||
|
// TODO: Implement.
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4DictBuffers);
|
DISALLOW_COPY_AND_ASSIGN(Ver4DictBuffers);
|
||||||
|
|
||||||
AK_FORCE_INLINE Ver4DictBuffers(const char *const dictDirPath,
|
AK_FORCE_INLINE Ver4DictBuffers(const char *const dictDirPath,
|
||||||
const MmappedBuffer::MmappedBufferPtr &dictBuffer, const bool isUpdatable)
|
const MmappedBuffer::MmappedBufferPtr &dictBuffer, const bool isUpdatable)
|
||||||
: mDictBuffer(dictBuffer),
|
: mDictBuffer(dictBuffer),
|
||||||
// TODO: Quit using getHeaderSize.
|
mHeaderSize(HeaderReadWriteUtils::getHeaderSize(mDictBuffer.get()->getBuffer())),
|
||||||
mTerminalPositionLookupTable(dictDirPath, isUpdatable,
|
mExpandableHeaderBuffer(dictBuffer.get()->getBuffer(), mHeaderSize,
|
||||||
HeaderReadWriteUtils::getHeaderSize(mDictBuffer.get()->getBuffer())),
|
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
|
||||||
|
mExpandableTrieBuffer(dictBuffer.get()->getBuffer() + mHeaderSize,
|
||||||
|
dictBuffer.get()->getBufferSize() - mHeaderSize,
|
||||||
|
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
|
||||||
|
// TODO: Quit using header size.
|
||||||
|
mTerminalPositionLookupTable(dictDirPath, isUpdatable, mHeaderSize),
|
||||||
mProbabilityDictContent(dictDirPath, isUpdatable),
|
mProbabilityDictContent(dictDirPath, isUpdatable),
|
||||||
mBigramDictContent(dictDirPath, isUpdatable),
|
mBigramDictContent(dictDirPath, isUpdatable),
|
||||||
mShortcutDictContent(dictDirPath, isUpdatable),
|
mShortcutDictContent(dictDirPath, isUpdatable),
|
||||||
mIsUpdatable(isUpdatable) {}
|
mIsUpdatable(isUpdatable) {}
|
||||||
|
|
||||||
|
AK_FORCE_INLINE Ver4DictBuffers()
|
||||||
|
: mDictBuffer(0), mHeaderSize(0),
|
||||||
|
mExpandableHeaderBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE),
|
||||||
|
mExpandableTrieBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE),
|
||||||
|
mTerminalPositionLookupTable(), mProbabilityDictContent(),
|
||||||
|
mBigramDictContent(), mShortcutDictContent(), mIsUpdatable(true) {}
|
||||||
|
|
||||||
const MmappedBuffer::MmappedBufferPtr mDictBuffer;
|
const MmappedBuffer::MmappedBufferPtr mDictBuffer;
|
||||||
|
const int mHeaderSize;
|
||||||
|
BufferWithExtendableBuffer mExpandableHeaderBuffer;
|
||||||
|
BufferWithExtendableBuffer mExpandableTrieBuffer;
|
||||||
TerminalPositionLookupTable mTerminalPositionLookupTable;
|
TerminalPositionLookupTable mTerminalPositionLookupTable;
|
||||||
ProbabilityDictContent mProbabilityDictContent;
|
ProbabilityDictContent mProbabilityDictContent;
|
||||||
BigramDictContent mBigramDictContent;
|
BigramDictContent mBigramDictContent;
|
||||||
|
|
|
@ -30,6 +30,10 @@ const char *const Ver4DictConstants::SHORTCUT_LOOKUP_TABLE_FILE_EXTENSION = ".sh
|
||||||
const char *const Ver4DictConstants::SHORTCUT_CONTENT_TABLE_FILE_EXTENSION =
|
const char *const Ver4DictConstants::SHORTCUT_CONTENT_TABLE_FILE_EXTENSION =
|
||||||
".shortcut_index_shortcut";
|
".shortcut_index_shortcut";
|
||||||
|
|
||||||
|
// Version 4 dictionary size is implicitly limited to 8MB due to 3-byte offsets.
|
||||||
|
// TODO: Make MAX_DICTIONARY_SIZE 8MB.
|
||||||
|
const int Ver4DictConstants::MAX_DICTIONARY_SIZE = 2 * 1024 * 1024;
|
||||||
|
|
||||||
const int Ver4DictConstants::NOT_A_TERMINAL_ID = -1;
|
const int Ver4DictConstants::NOT_A_TERMINAL_ID = -1;
|
||||||
const int Ver4DictConstants::PROBABILITY_SIZE = 1;
|
const int Ver4DictConstants::PROBABILITY_SIZE = 1;
|
||||||
const int Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE = 1;
|
const int Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE = 1;
|
||||||
|
|
|
@ -34,6 +34,8 @@ class Ver4DictConstants {
|
||||||
static const char *const SHORTCUT_LOOKUP_TABLE_FILE_EXTENSION;
|
static const char *const SHORTCUT_LOOKUP_TABLE_FILE_EXTENSION;
|
||||||
static const char *const SHORTCUT_CONTENT_TABLE_FILE_EXTENSION;
|
static const char *const SHORTCUT_CONTENT_TABLE_FILE_EXTENSION;
|
||||||
|
|
||||||
|
static const int MAX_DICTIONARY_SIZE;
|
||||||
|
|
||||||
static const int NOT_A_TERMINAL_ID;
|
static const int NOT_A_TERMINAL_ID;
|
||||||
static const int PROBABILITY_SIZE;
|
static const int PROBABILITY_SIZE;
|
||||||
static const int FLAGS_IN_PROBABILITY_FILE_SIZE;
|
static const int FLAGS_IN_PROBABILITY_FILE_SIZE;
|
||||||
|
|
|
@ -28,14 +28,14 @@ namespace latinime {
|
||||||
|
|
||||||
const int Ver4PatriciaTriePolicy::MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS = 1024;
|
const int Ver4PatriciaTriePolicy::MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS = 1024;
|
||||||
const int Ver4PatriciaTriePolicy::MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS =
|
const int Ver4PatriciaTriePolicy::MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS =
|
||||||
DynamicPatriciaTrieWritingHelper::MAX_DICTIONARY_SIZE - MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS;
|
Ver4DictConstants::MAX_DICTIONARY_SIZE - MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS;
|
||||||
|
|
||||||
void Ver4PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const dicNode,
|
void Ver4PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const dicNode,
|
||||||
DicNodeVector *const childDicNodes) const {
|
DicNodeVector *const childDicNodes) const {
|
||||||
if (!dicNode->hasChildren()) {
|
if (!dicNode->hasChildren()) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
DynamicPatriciaTrieReadingHelper readingHelper(&mDictBuffer, &mNodeReader);
|
DynamicPatriciaTrieReadingHelper readingHelper(mDictBuffer, &mNodeReader);
|
||||||
readingHelper.initWithPtNodeArrayPos(dicNode->getChildrenPtNodeArrayPos());
|
readingHelper.initWithPtNodeArrayPos(dicNode->getChildrenPtNodeArrayPos());
|
||||||
while (!readingHelper.isEnd()) {
|
while (!readingHelper.isEnd()) {
|
||||||
const PtNodeParams ptNodeParams = readingHelper.getPtNodeParams();
|
const PtNodeParams ptNodeParams = readingHelper.getPtNodeParams();
|
||||||
|
@ -63,7 +63,7 @@ void Ver4PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const d
|
||||||
int Ver4PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
|
int Ver4PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
|
||||||
const int ptNodePos, const int maxCodePointCount, int *const outCodePoints,
|
const int ptNodePos, const int maxCodePointCount, int *const outCodePoints,
|
||||||
int *const outUnigramProbability) const {
|
int *const outUnigramProbability) const {
|
||||||
DynamicPatriciaTrieReadingHelper readingHelper(&mDictBuffer, &mNodeReader);
|
DynamicPatriciaTrieReadingHelper readingHelper(mDictBuffer, &mNodeReader);
|
||||||
readingHelper.initWithPtNodePos(ptNodePos);
|
readingHelper.initWithPtNodePos(ptNodePos);
|
||||||
return readingHelper.getCodePointsAndProbabilityAndReturnCodePointCount(
|
return readingHelper.getCodePointsAndProbabilityAndReturnCodePointCount(
|
||||||
maxCodePointCount, outCodePoints, outUnigramProbability);
|
maxCodePointCount, outCodePoints, outUnigramProbability);
|
||||||
|
@ -71,7 +71,7 @@ int Ver4PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
|
||||||
|
|
||||||
int Ver4PatriciaTriePolicy::getTerminalPtNodePositionOfWord(const int *const inWord,
|
int Ver4PatriciaTriePolicy::getTerminalPtNodePositionOfWord(const int *const inWord,
|
||||||
const int length, const bool forceLowerCaseSearch) const {
|
const int length, const bool forceLowerCaseSearch) const {
|
||||||
DynamicPatriciaTrieReadingHelper readingHelper(&mDictBuffer, &mNodeReader);
|
DynamicPatriciaTrieReadingHelper readingHelper(mDictBuffer, &mNodeReader);
|
||||||
readingHelper.initWithPtNodeArrayPos(getRootPosition());
|
readingHelper.initWithPtNodeArrayPos(getRootPosition());
|
||||||
return readingHelper.getTerminalPtNodePositionOfWord(inWord, length, forceLowerCaseSearch);
|
return readingHelper.getTerminalPtNodePositionOfWord(inWord, length, forceLowerCaseSearch);
|
||||||
}
|
}
|
||||||
|
@ -135,12 +135,12 @@ bool Ver4PatriciaTriePolicy::addUnigramWord(const int *const word, const int len
|
||||||
AKLOGI("Warning: addUnigramWord() is called for non-updatable dictionary.");
|
AKLOGI("Warning: addUnigramWord() is called for non-updatable dictionary.");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (mDictBuffer.getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) {
|
if (mDictBuffer->getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) {
|
||||||
AKLOGE("The dictionary is too large to dynamically update. Dictionary size: %d",
|
AKLOGE("The dictionary is too large to dynamically update. Dictionary size: %d",
|
||||||
mDictBuffer.getTailPosition());
|
mDictBuffer->getTailPosition());
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
DynamicPatriciaTrieReadingHelper readingHelper(&mDictBuffer, &mNodeReader);
|
DynamicPatriciaTrieReadingHelper readingHelper(mDictBuffer, &mNodeReader);
|
||||||
readingHelper.initWithPtNodeArrayPos(getRootPosition());
|
readingHelper.initWithPtNodeArrayPos(getRootPosition());
|
||||||
bool addedNewUnigram = false;
|
bool addedNewUnigram = false;
|
||||||
if (mUpdatingHelper.addUnigramWord(&readingHelper, word, length, probability,
|
if (mUpdatingHelper.addUnigramWord(&readingHelper, word, length, probability,
|
||||||
|
@ -160,9 +160,9 @@ bool Ver4PatriciaTriePolicy::addBigramWords(const int *const word0, const int le
|
||||||
AKLOGI("Warning: addBigramWords() is called for non-updatable dictionary.");
|
AKLOGI("Warning: addBigramWords() is called for non-updatable dictionary.");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (mDictBuffer.getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) {
|
if (mDictBuffer->getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) {
|
||||||
AKLOGE("The dictionary is too large to dynamically update. Dictionary size: %d",
|
AKLOGE("The dictionary is too large to dynamically update. Dictionary size: %d",
|
||||||
mDictBuffer.getTailPosition());
|
mDictBuffer->getTailPosition());
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
const int word0Pos = getTerminalPtNodePositionOfWord(word0, length0,
|
const int word0Pos = getTerminalPtNodePositionOfWord(word0, length0,
|
||||||
|
@ -192,9 +192,9 @@ bool Ver4PatriciaTriePolicy::removeBigramWords(const int *const word0, const int
|
||||||
AKLOGI("Warning: addBigramWords() is called for non-updatable dictionary.");
|
AKLOGI("Warning: addBigramWords() is called for non-updatable dictionary.");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (mDictBuffer.getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) {
|
if (mDictBuffer->getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) {
|
||||||
AKLOGE("The dictionary is too large to dynamically update. Dictionary size: %d",
|
AKLOGE("The dictionary is too large to dynamically update. Dictionary size: %d",
|
||||||
mDictBuffer.getTailPosition());
|
mDictBuffer->getTailPosition());
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
const int word0Pos = getTerminalPtNodePositionOfWord(word0, length0,
|
const int word0Pos = getTerminalPtNodePositionOfWord(word0, length0,
|
||||||
|
|
|
@ -38,18 +38,17 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
||||||
public:
|
public:
|
||||||
Ver4PatriciaTriePolicy(const Ver4DictBuffers::Ver4DictBuffersPtr &buffers)
|
Ver4PatriciaTriePolicy(const Ver4DictBuffers::Ver4DictBuffersPtr &buffers)
|
||||||
: mBuffers(buffers),
|
: mBuffers(buffers),
|
||||||
mHeaderPolicy(mBuffers.get()->getRawDictBuffer(), FormatUtils::VERSION_4),
|
mHeaderPolicy(mBuffers.get()->getWritableHeaderBuffer()->getBuffer(
|
||||||
mDictBuffer(mBuffers.get()->getRawDictBuffer() + mHeaderPolicy.getSize(),
|
false /* usesAdditionalBuffer*/), FormatUtils::VERSION_4),
|
||||||
mBuffers.get()->getRawDictBufferSize() - mHeaderPolicy.getSize(),
|
mDictBuffer(mBuffers.get()->getWritableTrieBuffer()),
|
||||||
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
|
|
||||||
mBigramPolicy(mBuffers.get()->getUpdatableBigramDictContent(),
|
mBigramPolicy(mBuffers.get()->getUpdatableBigramDictContent(),
|
||||||
mBuffers.get()->getTerminalPositionLookupTable()),
|
mBuffers.get()->getTerminalPositionLookupTable()),
|
||||||
mShortcutPolicy(mBuffers.get()->getShortcutDictContent(),
|
mShortcutPolicy(mBuffers.get()->getShortcutDictContent(),
|
||||||
mBuffers.get()->getTerminalPositionLookupTable()),
|
mBuffers.get()->getTerminalPositionLookupTable()),
|
||||||
mNodeReader(&mDictBuffer, mBuffers.get()->getProbabilityDictContent()),
|
mNodeReader(mDictBuffer, mBuffers.get()->getProbabilityDictContent()),
|
||||||
mNodeWriter(&mDictBuffer, mBuffers.get(), &mNodeReader, &mBigramPolicy,
|
mNodeWriter(mDictBuffer, mBuffers.get(), &mNodeReader, &mBigramPolicy,
|
||||||
&mShortcutPolicy),
|
&mShortcutPolicy),
|
||||||
mUpdatingHelper(&mDictBuffer, &mNodeReader, &mNodeWriter,
|
mUpdatingHelper(mDictBuffer, &mNodeReader, &mNodeWriter,
|
||||||
mHeaderPolicy.isDecayingDict()),
|
mHeaderPolicy.isDecayingDict()),
|
||||||
mUnigramCount(mHeaderPolicy.getUnigramCount()),
|
mUnigramCount(mHeaderPolicy.getUnigramCount()),
|
||||||
mBigramCount(mHeaderPolicy.getBigramCount()) {};
|
mBigramCount(mHeaderPolicy.getBigramCount()) {};
|
||||||
|
@ -115,7 +114,7 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
||||||
|
|
||||||
Ver4DictBuffers::Ver4DictBuffersPtr mBuffers;
|
Ver4DictBuffers::Ver4DictBuffersPtr mBuffers;
|
||||||
const HeaderPolicy mHeaderPolicy;
|
const HeaderPolicy mHeaderPolicy;
|
||||||
BufferWithExtendableBuffer mDictBuffer;
|
BufferWithExtendableBuffer *const mDictBuffer;
|
||||||
Ver4BigramListPolicy mBigramPolicy;
|
Ver4BigramListPolicy mBigramPolicy;
|
||||||
Ver4ShortcutListPolicy mShortcutPolicy;
|
Ver4ShortcutListPolicy mShortcutPolicy;
|
||||||
Ver4PatriciaTrieNodeReader mNodeReader;
|
Ver4PatriciaTrieNodeReader mNodeReader;
|
||||||
|
|
|
@ -21,6 +21,7 @@
|
||||||
|
|
||||||
#include "suggest/policyimpl/dictionary/header/header_policy.h"
|
#include "suggest/policyimpl/dictionary/header/header_policy.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_utils.h"
|
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_utils.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h"
|
||||||
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
|
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
|
||||||
#include "suggest/policyimpl/dictionary/utils/format_utils.h"
|
#include "suggest/policyimpl/dictionary/utils/format_utils.h"
|
||||||
|
|
||||||
|
@ -34,7 +35,7 @@ const char *const DictFileWritingUtils::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE =
|
||||||
case 3:
|
case 3:
|
||||||
return createEmptyV3DictFile(filePath, attributeMap);
|
return createEmptyV3DictFile(filePath, attributeMap);
|
||||||
case 4:
|
case 4:
|
||||||
// TODO: Support version 4 dictionary format.
|
return createEmptyV4DictFile(filePath, attributeMap);
|
||||||
return false;
|
return false;
|
||||||
default:
|
default:
|
||||||
// Only version 3 dictionary is supported for now.
|
// Only version 3 dictionary is supported for now.
|
||||||
|
@ -58,6 +59,20 @@ const char *const DictFileWritingUtils::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE =
|
||||||
return flushAllHeaderAndBodyToFile(filePath, &headerBuffer, &bodyBuffer);
|
return flushAllHeaderAndBodyToFile(filePath, &headerBuffer, &bodyBuffer);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* static */ bool DictFileWritingUtils::createEmptyV4DictFile(const char *const filePath,
|
||||||
|
const HeaderReadWriteUtils::AttributeMap *const attributeMap) {
|
||||||
|
Ver4DictBuffers::Ver4DictBuffersPtr dictBuffers = Ver4DictBuffers::createVer4DictBuffers();
|
||||||
|
HeaderPolicy headerPolicy(FormatUtils::VERSION_4, attributeMap);
|
||||||
|
headerPolicy.writeHeaderToBuffer(dictBuffers.get()->getWritableHeaderBuffer(),
|
||||||
|
true /* updatesLastUpdatedTime */, true /* updatesLastDecayedTime */,
|
||||||
|
0 /* unigramCount */, 0 /* bigramCount */, 0 /* extendedRegionSize */);
|
||||||
|
if (!DynamicPatriciaTrieWritingUtils::writeEmptyDictionary(
|
||||||
|
dictBuffers.get()->getWritableTrieBuffer(), 0 /* rootPos */)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return dictBuffers.get()->flush(filePath);
|
||||||
|
}
|
||||||
|
|
||||||
/* static */ bool DictFileWritingUtils::flushAllHeaderAndBodyToFile(const char *const filePath,
|
/* static */ bool DictFileWritingUtils::flushAllHeaderAndBodyToFile(const char *const filePath,
|
||||||
BufferWithExtendableBuffer *const dictHeader, BufferWithExtendableBuffer *const dictBody) {
|
BufferWithExtendableBuffer *const dictHeader, BufferWithExtendableBuffer *const dictBody) {
|
||||||
const int tmpFileNameBufSize = strlen(filePath)
|
const int tmpFileNameBufSize = strlen(filePath)
|
||||||
|
@ -69,21 +84,21 @@ const char *const DictFileWritingUtils::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE =
|
||||||
TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE);
|
TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE);
|
||||||
FILE *const file = fopen(tmpFileName, "wb");
|
FILE *const file = fopen(tmpFileName, "wb");
|
||||||
if (!file) {
|
if (!file) {
|
||||||
AKLOGE("Dictionary file %s cannnot be opened.", tmpFileName);
|
AKLOGE("Dictionary file %s cannot be opened.", tmpFileName);
|
||||||
ASSERT(false);
|
ASSERT(false);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
// Write the dictionary header.
|
// Write the dictionary header.
|
||||||
if (!writeBufferToFile(file, dictHeader)) {
|
if (!writeBufferToFile(file, dictHeader)) {
|
||||||
remove(tmpFileName);
|
remove(tmpFileName);
|
||||||
AKLOGE("Dictionary header cannnot be written. size: %d", dictHeader->getTailPosition());
|
AKLOGE("Dictionary header cannot be written. size: %d", dictHeader->getTailPosition());
|
||||||
ASSERT(false);
|
ASSERT(false);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
// Write the dictionary body.
|
// Write the dictionary body.
|
||||||
if (!writeBufferToFile(file, dictBody)) {
|
if (!writeBufferToFile(file, dictBody)) {
|
||||||
remove(tmpFileName);
|
remove(tmpFileName);
|
||||||
AKLOGE("Dictionary body cannnot be written. size: %d", dictBody->getTailPosition());
|
AKLOGE("Dictionary body cannot be written. size: %d", dictBody->getTailPosition());
|
||||||
ASSERT(false);
|
ASSERT(false);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
|
@ -43,6 +43,9 @@ class DictFileWritingUtils {
|
||||||
static bool createEmptyV3DictFile(const char *const filePath,
|
static bool createEmptyV3DictFile(const char *const filePath,
|
||||||
const HeaderReadWriteUtils::AttributeMap *const attributeMap);
|
const HeaderReadWriteUtils::AttributeMap *const attributeMap);
|
||||||
|
|
||||||
|
static bool createEmptyV4DictFile(const char *const filePath,
|
||||||
|
const HeaderReadWriteUtils::AttributeMap *const attributeMap);
|
||||||
|
|
||||||
static bool writeBufferToFile(FILE *const file,
|
static bool writeBufferToFile(FILE *const file,
|
||||||
const BufferWithExtendableBuffer *const buffer);
|
const BufferWithExtendableBuffer *const buffer);
|
||||||
};
|
};
|
||||||
|
|
Loading…
Reference in New Issue