am c3a4c075: Merge "Create empty ver4 dictionary buffer on memory."

* commit 'c3a4c075c97affb47fbceab6ce68172c65bcdbfd':
  Create empty ver4 dictionary buffer on memory.
main
Keisuke Kuroyanagi 2013-11-13 00:21:16 -08:00 committed by Android Git Automerger
commit eb2f109bb5
15 changed files with 125 additions and 50 deletions

View File

@ -86,10 +86,10 @@ static jlong latinime_BinaryDictionary_open(JNIEnv *env, jclass clazz, jstring s
char sourceDirChars[sourceDirUtf8Length + 1]; char sourceDirChars[sourceDirUtf8Length + 1];
env->GetStringUTFRegion(sourceDir, 0, env->GetStringLength(sourceDir), sourceDirChars); env->GetStringUTFRegion(sourceDir, 0, env->GetStringLength(sourceDir), sourceDirChars);
sourceDirChars[sourceDirUtf8Length] = '\0'; sourceDirChars[sourceDirUtf8Length] = '\0';
DictionaryStructureWithBufferPolicy::StructurePoilcyPtr dictionaryStructureWithBufferPolicy( DictionaryStructureWithBufferPolicy::StructurePoilcyPtr dictionaryStructureWithBufferPolicy =
DictionaryStructureWithBufferPolicyFactory::newDictionaryStructureWithBufferPolicy( DictionaryStructureWithBufferPolicyFactory::newDictionaryStructureWithBufferPolicy(
sourceDirChars, static_cast<int>(dictOffset), static_cast<int>(dictSize), sourceDirChars, static_cast<int>(dictOffset), static_cast<int>(dictSize),
isUpdatable == JNI_TRUE)); isUpdatable == JNI_TRUE);
if (!dictionaryStructureWithBufferPolicy.get()) { if (!dictionaryStructureWithBufferPolicy.get()) {
return 0; return 0;
} }

View File

@ -35,8 +35,8 @@ namespace latinime {
const int bufOffset, const int size, const bool isUpdatable) { const int bufOffset, const int size, const bool isUpdatable) {
// Allocated buffer in MmapedBuffer::newBuffer() will be freed in the destructor of // Allocated buffer in MmapedBuffer::newBuffer() will be freed in the destructor of
// MmappedBufferWrapper if the instance has the responsibility. // MmappedBufferWrapper if the instance has the responsibility.
MmappedBuffer::MmappedBufferPtr mmappedBuffer(MmappedBuffer::openBuffer(path, bufOffset, size, MmappedBuffer::MmappedBufferPtr mmappedBuffer = MmappedBuffer::openBuffer(path, bufOffset, size,
isUpdatable)); isUpdatable);
if (!mmappedBuffer.get()) { if (!mmappedBuffer.get()) {
return DictionaryStructureWithBufferPolicy::StructurePoilcyPtr(0); return DictionaryStructureWithBufferPolicy::StructurePoilcyPtr(0);
} }
@ -58,8 +58,8 @@ namespace latinime {
} }
// Removing extension to get the base path. // Removing extension to get the base path.
dictDirPath.erase(pos); dictDirPath.erase(pos);
const Ver4DictBuffers::Ver4DictBuffersPtr dictBuffers( const Ver4DictBuffers::Ver4DictBuffersPtr dictBuffers =
Ver4DictBuffers::openVer4DictBuffers(dictDirPath.c_str(), mmappedBuffer)); Ver4DictBuffers::openVer4DictBuffers(dictDirPath.c_str(), mmappedBuffer);
if (!dictBuffers.get()->isValid()) { if (!dictBuffers.get()->isValid()) {
AKLOGE("DICT: The dictionary doesn't satisfy ver4 format requirements."); AKLOGE("DICT: The dictionary doesn't satisfy ver4 format requirements.");
ASSERT(false); ASSERT(false);

View File

@ -33,6 +33,10 @@ class BigramDictContent : public SparseTableDictContent {
Ver4DictConstants::BIGRAM_ADDRESS_TABLE_BLOCK_SIZE, Ver4DictConstants::BIGRAM_ADDRESS_TABLE_BLOCK_SIZE,
Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE) {} Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE) {}
BigramDictContent()
: SparseTableDictContent(Ver4DictConstants::BIGRAM_ADDRESS_TABLE_BLOCK_SIZE,
Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE) {}
void getBigramEntryAndAdvancePosition(int *const outProbability, bool *const outHasNext, void getBigramEntryAndAdvancePosition(int *const outProbability, bool *const outHasNext,
int *const outTargetTerminalId, int *const bigramEntryPos) const; int *const outTargetTerminalId, int *const bigramEntryPos) const;
@ -56,7 +60,7 @@ class BigramDictContent : public SparseTableDictContent {
bool copyBigramList(const int bigramListPos, const int toPos); bool copyBigramList(const int bigramListPos, const int toPos);
private: private:
DISALLOW_IMPLICIT_CONSTRUCTORS(BigramDictContent); DISALLOW_COPY_AND_ASSIGN(BigramDictContent);
int createAndGetBigramFlags(const int probability, const bool hasNext) const { int createAndGetBigramFlags(const int probability, const bool hasNext) const {
return (probability & Ver4DictConstants::BIGRAM_PROBABILITY_MASK) return (probability & Ver4DictConstants::BIGRAM_PROBABILITY_MASK)

View File

@ -31,6 +31,8 @@ class ProbabilityDictContent : public SingleDictContent {
: SingleDictContent(dictDirPath, Ver4DictConstants::FREQ_FILE_EXTENSION, : SingleDictContent(dictDirPath, Ver4DictConstants::FREQ_FILE_EXTENSION,
isUpdatable) {} isUpdatable) {}
ProbabilityDictContent() {}
int getProbability(const int terminalId) const { int getProbability(const int terminalId) const {
if (terminalId < 0 || terminalId >= getSize()) { if (terminalId < 0 || terminalId >= getSize()) {
return NOT_A_PROBABILITY; return NOT_A_PROBABILITY;
@ -61,7 +63,7 @@ class ProbabilityDictContent : public SingleDictContent {
} }
private: private:
DISALLOW_IMPLICIT_CONSTRUCTORS(ProbabilityDictContent); DISALLOW_COPY_AND_ASSIGN(ProbabilityDictContent);
int getSize() const { int getSize() const {
return getBuffer()->getTailPosition() / (Ver4DictConstants::PROBABILITY_SIZE return getBuffer()->getTailPosition() / (Ver4DictConstants::PROBABILITY_SIZE

View File

@ -33,6 +33,10 @@ class ShortcutDictContent : public SparseTableDictContent {
Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE, Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE,
Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_DATA_SIZE) {} Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_DATA_SIZE) {}
ShortcutDictContent()
: SparseTableDictContent(Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE,
Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_DATA_SIZE) {}
void getShortcutEntryAndAdvancePosition(const int maxCodePointCount, void getShortcutEntryAndAdvancePosition(const int maxCodePointCount,
int *const outCodePoint, int *const outCodePointCount, int *const outShortcutFlags, int *const outCodePoint, int *const outCodePointCount, int *const outShortcutFlags,
int *const shortcutEntryPos) const { int *const shortcutEntryPos) const {
@ -57,7 +61,7 @@ class ShortcutDictContent : public SparseTableDictContent {
} }
private: private:
DISALLOW_IMPLICIT_CONSTRUCTORS(ShortcutDictContent); DISALLOW_COPY_AND_ASSIGN(ShortcutDictContent);
}; };
} // namespace latinime } // namespace latinime
#endif /* LATINIME_SHORTCUT_DICT_CONTENT_H */ #endif /* LATINIME_SHORTCUT_DICT_CONTENT_H */

View File

@ -19,6 +19,7 @@
#include "defines.h" #include "defines.h"
#include "suggest/policyimpl/dictionary/structure/v4/content/dict_content.h" #include "suggest/policyimpl/dictionary/structure/v4/content/dict_content.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" #include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h" #include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
@ -31,12 +32,17 @@ class SingleDictContent : public DictContent {
: mMmappedBuffer(MmappedBuffer::openBuffer(dictDirPath, contentFileName, isUpdatable)), : mMmappedBuffer(MmappedBuffer::openBuffer(dictDirPath, contentFileName, isUpdatable)),
mExpandableContentBuffer(mMmappedBuffer.get() ? mMmappedBuffer.get()->getBuffer() : 0, mExpandableContentBuffer(mMmappedBuffer.get() ? mMmappedBuffer.get()->getBuffer() : 0,
mMmappedBuffer.get() ? mMmappedBuffer.get()->getBufferSize() : 0, mMmappedBuffer.get() ? mMmappedBuffer.get()->getBufferSize() : 0,
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE) {} BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
mIsValid(mMmappedBuffer.get() != 0) {}
SingleDictContent()
: mMmappedBuffer(0), mExpandableContentBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE),
mIsValid(true) {}
virtual ~SingleDictContent() {} virtual ~SingleDictContent() {}
virtual bool isValid() const { virtual bool isValid() const {
return mMmappedBuffer.get() != 0; return mIsValid;
} }
protected: protected:
@ -49,10 +55,11 @@ class SingleDictContent : public DictContent {
} }
private: private:
DISALLOW_IMPLICIT_CONSTRUCTORS(SingleDictContent); DISALLOW_COPY_AND_ASSIGN(SingleDictContent);
const MmappedBuffer::MmappedBufferPtr mMmappedBuffer; const MmappedBuffer::MmappedBufferPtr mMmappedBuffer;
BufferWithExtendableBuffer mExpandableContentBuffer; BufferWithExtendableBuffer mExpandableContentBuffer;
const bool mIsValid;
}; };
} // namespace latinime } // namespace latinime
#endif /* LATINIME_SINGLE_DICT_CONTENT_H */ #endif /* LATINIME_SINGLE_DICT_CONTENT_H */

View File

@ -19,6 +19,7 @@
#include "defines.h" #include "defines.h"
#include "suggest/policyimpl/dictionary/structure/v4/content/dict_content.h" #include "suggest/policyimpl/dictionary/structure/v4/content/dict_content.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" #include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h" #include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
#include "suggest/policyimpl/dictionary/utils/sparse_table.h" #include "suggest/policyimpl/dictionary/utils/sparse_table.h"
@ -49,13 +50,22 @@ class SparseTableDictContent : public DictContent {
mContentBuffer.get() ? mContentBuffer.get()->getBufferSize() : 0, mContentBuffer.get() ? mContentBuffer.get()->getBufferSize() : 0,
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE), BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
mAddressLookupTable(&mExpandableLookupTableBuffer, &mExpandableAddressTableBuffer, mAddressLookupTable(&mExpandableLookupTableBuffer, &mExpandableAddressTableBuffer,
sparseTableBlockSize, sparseTableDataSize) {} sparseTableBlockSize, sparseTableDataSize),
mIsValid(mLookupTableBuffer.get() != 0 && mAddressTableBuffer.get() != 0
&& mContentBuffer.get() != 0) {}
SparseTableDictContent(const int sparseTableBlockSize, const int sparseTableDataSize)
: mLookupTableBuffer(0), mAddressTableBuffer(0), mContentBuffer(0),
mExpandableLookupTableBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE),
mExpandableAddressTableBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE),
mExpandableContentBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE),
mAddressLookupTable(&mExpandableLookupTableBuffer, &mExpandableAddressTableBuffer,
sparseTableBlockSize, sparseTableDataSize), mIsValid(true) {}
virtual ~SparseTableDictContent() {} virtual ~SparseTableDictContent() {}
virtual bool isValid() const { virtual bool isValid() const {
return mLookupTableBuffer.get() != 0 && mAddressTableBuffer.get() != 0 return mIsValid;
&& mContentBuffer.get() != 0;
} }
protected: protected:
@ -78,7 +88,6 @@ class SparseTableDictContent : public DictContent {
private: private:
DISALLOW_IMPLICIT_CONSTRUCTORS(SparseTableDictContent); DISALLOW_IMPLICIT_CONSTRUCTORS(SparseTableDictContent);
// TODO: Have sparse table.
const MmappedBuffer::MmappedBufferPtr mLookupTableBuffer; const MmappedBuffer::MmappedBufferPtr mLookupTableBuffer;
const MmappedBuffer::MmappedBufferPtr mAddressTableBuffer; const MmappedBuffer::MmappedBufferPtr mAddressTableBuffer;
const MmappedBuffer::MmappedBufferPtr mContentBuffer; const MmappedBuffer::MmappedBufferPtr mContentBuffer;
@ -86,6 +95,7 @@ class SparseTableDictContent : public DictContent {
BufferWithExtendableBuffer mExpandableAddressTableBuffer; BufferWithExtendableBuffer mExpandableAddressTableBuffer;
BufferWithExtendableBuffer mExpandableContentBuffer; BufferWithExtendableBuffer mExpandableContentBuffer;
SparseTable mAddressLookupTable; SparseTable mAddressLookupTable;
const bool mIsValid;
}; };
} // namespace latinime } // namespace latinime
#endif /* LATINIME_SPARSE_TABLE_DICT_CONTENT_H */ #endif /* LATINIME_SPARSE_TABLE_DICT_CONTENT_H */

View File

@ -38,6 +38,8 @@ class TerminalPositionLookupTable : public SingleDictContent {
/ Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE), / Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE),
mHeaderRegionSize(headerRegionSize) {} mHeaderRegionSize(headerRegionSize) {}
TerminalPositionLookupTable() : mSize(0), mHeaderRegionSize(0) {}
int getTerminalPtNodePosition(const int terminalId) const { int getTerminalPtNodePosition(const int terminalId) const {
if (terminalId < 0 || terminalId >= mSize) { if (terminalId < 0 || terminalId >= mSize) {
return NOT_A_DICT_POS; return NOT_A_DICT_POS;
@ -66,7 +68,7 @@ class TerminalPositionLookupTable : public SingleDictContent {
} }
private: private:
DISALLOW_IMPLICIT_CONSTRUCTORS(TerminalPositionLookupTable); DISALLOW_COPY_AND_ASSIGN(TerminalPositionLookupTable);
int mSize; int mSize;
const int mHeaderRegionSize; const int mHeaderRegionSize;

View File

@ -33,27 +33,30 @@ class Ver4DictBuffers {
public: public:
typedef ExclusiveOwnershipPointer<Ver4DictBuffers> Ver4DictBuffersPtr; typedef ExclusiveOwnershipPointer<Ver4DictBuffers> Ver4DictBuffersPtr;
static Ver4DictBuffersPtr openVer4DictBuffers(const char *const dictDirPath, static AK_FORCE_INLINE Ver4DictBuffersPtr openVer4DictBuffers(const char *const dictDirPath,
const MmappedBuffer::MmappedBufferPtr &dictBuffer) { const MmappedBuffer::MmappedBufferPtr &dictBuffer) {
const bool isUpdatable = dictBuffer.get() ? dictBuffer.get()->isUpdatable() : false; const bool isUpdatable = dictBuffer.get() ? dictBuffer.get()->isUpdatable() : false;
return Ver4DictBuffersPtr(new Ver4DictBuffers(dictDirPath, dictBuffer, isUpdatable)); return Ver4DictBuffersPtr(new Ver4DictBuffers(dictDirPath, dictBuffer, isUpdatable));
} }
static AK_FORCE_INLINE Ver4DictBuffersPtr createVer4DictBuffers() {
return Ver4DictBuffersPtr(new Ver4DictBuffers());
}
AK_FORCE_INLINE bool isValid() const { AK_FORCE_INLINE bool isValid() const {
return mDictBuffer.get() != 0 && mProbabilityDictContent.isValid() return mDictBuffer.get() != 0 && mProbabilityDictContent.isValid()
&& mTerminalPositionLookupTable.isValid() && mBigramDictContent.isValid() && mTerminalPositionLookupTable.isValid() && mBigramDictContent.isValid()
&& mShortcutDictContent.isValid(); && mShortcutDictContent.isValid();
} }
AK_FORCE_INLINE uint8_t *getRawDictBuffer() const { AK_FORCE_INLINE BufferWithExtendableBuffer *getWritableHeaderBuffer() {
return mDictBuffer.get()->getBuffer(); return &mExpandableHeaderBuffer;
} }
AK_FORCE_INLINE int getRawDictBufferSize() const { AK_FORCE_INLINE BufferWithExtendableBuffer *getWritableTrieBuffer() {
return mDictBuffer.get()->getBufferSize(); return &mExpandableTrieBuffer;
} }
AK_FORCE_INLINE TerminalPositionLookupTable *getUpdatableTerminalPositionLookupTable() { AK_FORCE_INLINE TerminalPositionLookupTable *getUpdatableTerminalPositionLookupTable() {
return &mTerminalPositionLookupTable; return &mTerminalPositionLookupTable;
} }
@ -86,21 +89,41 @@ class Ver4DictBuffers {
return mIsUpdatable; return mIsUpdatable;
} }
bool flush(const char *const dictDirPath) {
// TODO: Implement.
return false;
}
private: private:
DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4DictBuffers); DISALLOW_COPY_AND_ASSIGN(Ver4DictBuffers);
AK_FORCE_INLINE Ver4DictBuffers(const char *const dictDirPath, AK_FORCE_INLINE Ver4DictBuffers(const char *const dictDirPath,
const MmappedBuffer::MmappedBufferPtr &dictBuffer, const bool isUpdatable) const MmappedBuffer::MmappedBufferPtr &dictBuffer, const bool isUpdatable)
: mDictBuffer(dictBuffer), : mDictBuffer(dictBuffer),
// TODO: Quit using getHeaderSize. mHeaderSize(HeaderReadWriteUtils::getHeaderSize(mDictBuffer.get()->getBuffer())),
mTerminalPositionLookupTable(dictDirPath, isUpdatable, mExpandableHeaderBuffer(dictBuffer.get()->getBuffer(), mHeaderSize,
HeaderReadWriteUtils::getHeaderSize(mDictBuffer.get()->getBuffer())), BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
mExpandableTrieBuffer(dictBuffer.get()->getBuffer() + mHeaderSize,
dictBuffer.get()->getBufferSize() - mHeaderSize,
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
// TODO: Quit using header size.
mTerminalPositionLookupTable(dictDirPath, isUpdatable, mHeaderSize),
mProbabilityDictContent(dictDirPath, isUpdatable), mProbabilityDictContent(dictDirPath, isUpdatable),
mBigramDictContent(dictDirPath, isUpdatable), mBigramDictContent(dictDirPath, isUpdatable),
mShortcutDictContent(dictDirPath, isUpdatable), mShortcutDictContent(dictDirPath, isUpdatable),
mIsUpdatable(isUpdatable) {} mIsUpdatable(isUpdatable) {}
AK_FORCE_INLINE Ver4DictBuffers()
: mDictBuffer(0), mHeaderSize(0),
mExpandableHeaderBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE),
mExpandableTrieBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE),
mTerminalPositionLookupTable(), mProbabilityDictContent(),
mBigramDictContent(), mShortcutDictContent(), mIsUpdatable(true) {}
const MmappedBuffer::MmappedBufferPtr mDictBuffer; const MmappedBuffer::MmappedBufferPtr mDictBuffer;
const int mHeaderSize;
BufferWithExtendableBuffer mExpandableHeaderBuffer;
BufferWithExtendableBuffer mExpandableTrieBuffer;
TerminalPositionLookupTable mTerminalPositionLookupTable; TerminalPositionLookupTable mTerminalPositionLookupTable;
ProbabilityDictContent mProbabilityDictContent; ProbabilityDictContent mProbabilityDictContent;
BigramDictContent mBigramDictContent; BigramDictContent mBigramDictContent;

View File

@ -30,6 +30,10 @@ const char *const Ver4DictConstants::SHORTCUT_LOOKUP_TABLE_FILE_EXTENSION = ".sh
const char *const Ver4DictConstants::SHORTCUT_CONTENT_TABLE_FILE_EXTENSION = const char *const Ver4DictConstants::SHORTCUT_CONTENT_TABLE_FILE_EXTENSION =
".shortcut_index_shortcut"; ".shortcut_index_shortcut";
// Version 4 dictionary size is implicitly limited to 8MB due to 3-byte offsets.
// TODO: Make MAX_DICTIONARY_SIZE 8MB.
const int Ver4DictConstants::MAX_DICTIONARY_SIZE = 2 * 1024 * 1024;
const int Ver4DictConstants::NOT_A_TERMINAL_ID = -1; const int Ver4DictConstants::NOT_A_TERMINAL_ID = -1;
const int Ver4DictConstants::PROBABILITY_SIZE = 1; const int Ver4DictConstants::PROBABILITY_SIZE = 1;
const int Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE = 1; const int Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE = 1;

View File

@ -34,6 +34,8 @@ class Ver4DictConstants {
static const char *const SHORTCUT_LOOKUP_TABLE_FILE_EXTENSION; static const char *const SHORTCUT_LOOKUP_TABLE_FILE_EXTENSION;
static const char *const SHORTCUT_CONTENT_TABLE_FILE_EXTENSION; static const char *const SHORTCUT_CONTENT_TABLE_FILE_EXTENSION;
static const int MAX_DICTIONARY_SIZE;
static const int NOT_A_TERMINAL_ID; static const int NOT_A_TERMINAL_ID;
static const int PROBABILITY_SIZE; static const int PROBABILITY_SIZE;
static const int FLAGS_IN_PROBABILITY_FILE_SIZE; static const int FLAGS_IN_PROBABILITY_FILE_SIZE;

View File

@ -28,14 +28,14 @@ namespace latinime {
const int Ver4PatriciaTriePolicy::MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS = 1024; const int Ver4PatriciaTriePolicy::MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS = 1024;
const int Ver4PatriciaTriePolicy::MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS = const int Ver4PatriciaTriePolicy::MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS =
DynamicPatriciaTrieWritingHelper::MAX_DICTIONARY_SIZE - MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS; Ver4DictConstants::MAX_DICTIONARY_SIZE - MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS;
void Ver4PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const dicNode, void Ver4PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const dicNode,
DicNodeVector *const childDicNodes) const { DicNodeVector *const childDicNodes) const {
if (!dicNode->hasChildren()) { if (!dicNode->hasChildren()) {
return; return;
} }
DynamicPatriciaTrieReadingHelper readingHelper(&mDictBuffer, &mNodeReader); DynamicPatriciaTrieReadingHelper readingHelper(mDictBuffer, &mNodeReader);
readingHelper.initWithPtNodeArrayPos(dicNode->getChildrenPtNodeArrayPos()); readingHelper.initWithPtNodeArrayPos(dicNode->getChildrenPtNodeArrayPos());
while (!readingHelper.isEnd()) { while (!readingHelper.isEnd()) {
const PtNodeParams ptNodeParams = readingHelper.getPtNodeParams(); const PtNodeParams ptNodeParams = readingHelper.getPtNodeParams();
@ -63,7 +63,7 @@ void Ver4PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const d
int Ver4PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount( int Ver4PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
const int ptNodePos, const int maxCodePointCount, int *const outCodePoints, const int ptNodePos, const int maxCodePointCount, int *const outCodePoints,
int *const outUnigramProbability) const { int *const outUnigramProbability) const {
DynamicPatriciaTrieReadingHelper readingHelper(&mDictBuffer, &mNodeReader); DynamicPatriciaTrieReadingHelper readingHelper(mDictBuffer, &mNodeReader);
readingHelper.initWithPtNodePos(ptNodePos); readingHelper.initWithPtNodePos(ptNodePos);
return readingHelper.getCodePointsAndProbabilityAndReturnCodePointCount( return readingHelper.getCodePointsAndProbabilityAndReturnCodePointCount(
maxCodePointCount, outCodePoints, outUnigramProbability); maxCodePointCount, outCodePoints, outUnigramProbability);
@ -71,7 +71,7 @@ int Ver4PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
int Ver4PatriciaTriePolicy::getTerminalPtNodePositionOfWord(const int *const inWord, int Ver4PatriciaTriePolicy::getTerminalPtNodePositionOfWord(const int *const inWord,
const int length, const bool forceLowerCaseSearch) const { const int length, const bool forceLowerCaseSearch) const {
DynamicPatriciaTrieReadingHelper readingHelper(&mDictBuffer, &mNodeReader); DynamicPatriciaTrieReadingHelper readingHelper(mDictBuffer, &mNodeReader);
readingHelper.initWithPtNodeArrayPos(getRootPosition()); readingHelper.initWithPtNodeArrayPos(getRootPosition());
return readingHelper.getTerminalPtNodePositionOfWord(inWord, length, forceLowerCaseSearch); return readingHelper.getTerminalPtNodePositionOfWord(inWord, length, forceLowerCaseSearch);
} }
@ -135,12 +135,12 @@ bool Ver4PatriciaTriePolicy::addUnigramWord(const int *const word, const int len
AKLOGI("Warning: addUnigramWord() is called for non-updatable dictionary."); AKLOGI("Warning: addUnigramWord() is called for non-updatable dictionary.");
return false; return false;
} }
if (mDictBuffer.getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) { if (mDictBuffer->getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) {
AKLOGE("The dictionary is too large to dynamically update. Dictionary size: %d", AKLOGE("The dictionary is too large to dynamically update. Dictionary size: %d",
mDictBuffer.getTailPosition()); mDictBuffer->getTailPosition());
return false; return false;
} }
DynamicPatriciaTrieReadingHelper readingHelper(&mDictBuffer, &mNodeReader); DynamicPatriciaTrieReadingHelper readingHelper(mDictBuffer, &mNodeReader);
readingHelper.initWithPtNodeArrayPos(getRootPosition()); readingHelper.initWithPtNodeArrayPos(getRootPosition());
bool addedNewUnigram = false; bool addedNewUnigram = false;
if (mUpdatingHelper.addUnigramWord(&readingHelper, word, length, probability, if (mUpdatingHelper.addUnigramWord(&readingHelper, word, length, probability,
@ -160,9 +160,9 @@ bool Ver4PatriciaTriePolicy::addBigramWords(const int *const word0, const int le
AKLOGI("Warning: addBigramWords() is called for non-updatable dictionary."); AKLOGI("Warning: addBigramWords() is called for non-updatable dictionary.");
return false; return false;
} }
if (mDictBuffer.getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) { if (mDictBuffer->getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) {
AKLOGE("The dictionary is too large to dynamically update. Dictionary size: %d", AKLOGE("The dictionary is too large to dynamically update. Dictionary size: %d",
mDictBuffer.getTailPosition()); mDictBuffer->getTailPosition());
return false; return false;
} }
const int word0Pos = getTerminalPtNodePositionOfWord(word0, length0, const int word0Pos = getTerminalPtNodePositionOfWord(word0, length0,
@ -192,9 +192,9 @@ bool Ver4PatriciaTriePolicy::removeBigramWords(const int *const word0, const int
AKLOGI("Warning: addBigramWords() is called for non-updatable dictionary."); AKLOGI("Warning: addBigramWords() is called for non-updatable dictionary.");
return false; return false;
} }
if (mDictBuffer.getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) { if (mDictBuffer->getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) {
AKLOGE("The dictionary is too large to dynamically update. Dictionary size: %d", AKLOGE("The dictionary is too large to dynamically update. Dictionary size: %d",
mDictBuffer.getTailPosition()); mDictBuffer->getTailPosition());
return false; return false;
} }
const int word0Pos = getTerminalPtNodePositionOfWord(word0, length0, const int word0Pos = getTerminalPtNodePositionOfWord(word0, length0,

View File

@ -38,18 +38,17 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
public: public:
Ver4PatriciaTriePolicy(const Ver4DictBuffers::Ver4DictBuffersPtr &buffers) Ver4PatriciaTriePolicy(const Ver4DictBuffers::Ver4DictBuffersPtr &buffers)
: mBuffers(buffers), : mBuffers(buffers),
mHeaderPolicy(mBuffers.get()->getRawDictBuffer(), FormatUtils::VERSION_4), mHeaderPolicy(mBuffers.get()->getWritableHeaderBuffer()->getBuffer(
mDictBuffer(mBuffers.get()->getRawDictBuffer() + mHeaderPolicy.getSize(), false /* usesAdditionalBuffer*/), FormatUtils::VERSION_4),
mBuffers.get()->getRawDictBufferSize() - mHeaderPolicy.getSize(), mDictBuffer(mBuffers.get()->getWritableTrieBuffer()),
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
mBigramPolicy(mBuffers.get()->getUpdatableBigramDictContent(), mBigramPolicy(mBuffers.get()->getUpdatableBigramDictContent(),
mBuffers.get()->getTerminalPositionLookupTable()), mBuffers.get()->getTerminalPositionLookupTable()),
mShortcutPolicy(mBuffers.get()->getShortcutDictContent(), mShortcutPolicy(mBuffers.get()->getShortcutDictContent(),
mBuffers.get()->getTerminalPositionLookupTable()), mBuffers.get()->getTerminalPositionLookupTable()),
mNodeReader(&mDictBuffer, mBuffers.get()->getProbabilityDictContent()), mNodeReader(mDictBuffer, mBuffers.get()->getProbabilityDictContent()),
mNodeWriter(&mDictBuffer, mBuffers.get(), &mNodeReader, &mBigramPolicy, mNodeWriter(mDictBuffer, mBuffers.get(), &mNodeReader, &mBigramPolicy,
&mShortcutPolicy), &mShortcutPolicy),
mUpdatingHelper(&mDictBuffer, &mNodeReader, &mNodeWriter, mUpdatingHelper(mDictBuffer, &mNodeReader, &mNodeWriter,
mHeaderPolicy.isDecayingDict()), mHeaderPolicy.isDecayingDict()),
mUnigramCount(mHeaderPolicy.getUnigramCount()), mUnigramCount(mHeaderPolicy.getUnigramCount()),
mBigramCount(mHeaderPolicy.getBigramCount()) {}; mBigramCount(mHeaderPolicy.getBigramCount()) {};
@ -115,7 +114,7 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
Ver4DictBuffers::Ver4DictBuffersPtr mBuffers; Ver4DictBuffers::Ver4DictBuffersPtr mBuffers;
const HeaderPolicy mHeaderPolicy; const HeaderPolicy mHeaderPolicy;
BufferWithExtendableBuffer mDictBuffer; BufferWithExtendableBuffer *const mDictBuffer;
Ver4BigramListPolicy mBigramPolicy; Ver4BigramListPolicy mBigramPolicy;
Ver4ShortcutListPolicy mShortcutPolicy; Ver4ShortcutListPolicy mShortcutPolicy;
Ver4PatriciaTrieNodeReader mNodeReader; Ver4PatriciaTrieNodeReader mNodeReader;

View File

@ -21,6 +21,7 @@
#include "suggest/policyimpl/dictionary/header/header_policy.h" #include "suggest/policyimpl/dictionary/header/header_policy.h"
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_utils.h" #include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_utils.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" #include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
#include "suggest/policyimpl/dictionary/utils/format_utils.h" #include "suggest/policyimpl/dictionary/utils/format_utils.h"
@ -34,7 +35,7 @@ const char *const DictFileWritingUtils::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE =
case 3: case 3:
return createEmptyV3DictFile(filePath, attributeMap); return createEmptyV3DictFile(filePath, attributeMap);
case 4: case 4:
// TODO: Support version 4 dictionary format. return createEmptyV4DictFile(filePath, attributeMap);
return false; return false;
default: default:
// Only version 3 dictionary is supported for now. // Only version 3 dictionary is supported for now.
@ -58,6 +59,20 @@ const char *const DictFileWritingUtils::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE =
return flushAllHeaderAndBodyToFile(filePath, &headerBuffer, &bodyBuffer); return flushAllHeaderAndBodyToFile(filePath, &headerBuffer, &bodyBuffer);
} }
/* static */ bool DictFileWritingUtils::createEmptyV4DictFile(const char *const filePath,
const HeaderReadWriteUtils::AttributeMap *const attributeMap) {
Ver4DictBuffers::Ver4DictBuffersPtr dictBuffers = Ver4DictBuffers::createVer4DictBuffers();
HeaderPolicy headerPolicy(FormatUtils::VERSION_4, attributeMap);
headerPolicy.writeHeaderToBuffer(dictBuffers.get()->getWritableHeaderBuffer(),
true /* updatesLastUpdatedTime */, true /* updatesLastDecayedTime */,
0 /* unigramCount */, 0 /* bigramCount */, 0 /* extendedRegionSize */);
if (!DynamicPatriciaTrieWritingUtils::writeEmptyDictionary(
dictBuffers.get()->getWritableTrieBuffer(), 0 /* rootPos */)) {
return false;
}
return dictBuffers.get()->flush(filePath);
}
/* static */ bool DictFileWritingUtils::flushAllHeaderAndBodyToFile(const char *const filePath, /* static */ bool DictFileWritingUtils::flushAllHeaderAndBodyToFile(const char *const filePath,
BufferWithExtendableBuffer *const dictHeader, BufferWithExtendableBuffer *const dictBody) { BufferWithExtendableBuffer *const dictHeader, BufferWithExtendableBuffer *const dictBody) {
const int tmpFileNameBufSize = strlen(filePath) const int tmpFileNameBufSize = strlen(filePath)
@ -69,21 +84,21 @@ const char *const DictFileWritingUtils::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE =
TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE); TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE);
FILE *const file = fopen(tmpFileName, "wb"); FILE *const file = fopen(tmpFileName, "wb");
if (!file) { if (!file) {
AKLOGE("Dictionary file %s cannnot be opened.", tmpFileName); AKLOGE("Dictionary file %s cannot be opened.", tmpFileName);
ASSERT(false); ASSERT(false);
return false; return false;
} }
// Write the dictionary header. // Write the dictionary header.
if (!writeBufferToFile(file, dictHeader)) { if (!writeBufferToFile(file, dictHeader)) {
remove(tmpFileName); remove(tmpFileName);
AKLOGE("Dictionary header cannnot be written. size: %d", dictHeader->getTailPosition()); AKLOGE("Dictionary header cannot be written. size: %d", dictHeader->getTailPosition());
ASSERT(false); ASSERT(false);
return false; return false;
} }
// Write the dictionary body. // Write the dictionary body.
if (!writeBufferToFile(file, dictBody)) { if (!writeBufferToFile(file, dictBody)) {
remove(tmpFileName); remove(tmpFileName);
AKLOGE("Dictionary body cannnot be written. size: %d", dictBody->getTailPosition()); AKLOGE("Dictionary body cannot be written. size: %d", dictBody->getTailPosition());
ASSERT(false); ASSERT(false);
return false; return false;
} }

View File

@ -43,6 +43,9 @@ class DictFileWritingUtils {
static bool createEmptyV3DictFile(const char *const filePath, static bool createEmptyV3DictFile(const char *const filePath,
const HeaderReadWriteUtils::AttributeMap *const attributeMap); const HeaderReadWriteUtils::AttributeMap *const attributeMap);
static bool createEmptyV4DictFile(const char *const filePath,
const HeaderReadWriteUtils::AttributeMap *const attributeMap);
static bool writeBufferToFile(FILE *const file, static bool writeBufferToFile(FILE *const file,
const BufferWithExtendableBuffer *const buffer); const BufferWithExtendableBuffer *const buffer);
}; };