Consolidating dict buffers into BufferWithExtendableBuffer.

Bug: 6669677

Change-Id: Ied58c123b68199bddb91293ff5cf5d96691e1f6b
This commit is contained in:
Keisuke Kuroyanagi 2013-09-02 17:17:18 +09:00
parent fd3509ef69
commit 3e76487c6c
10 changed files with 121 additions and 128 deletions

View file

@ -31,50 +31,48 @@ namespace latinime {
*/ */
class DynamicBigramListPolicy : public DictionaryBigramsStructurePolicy { class DynamicBigramListPolicy : public DictionaryBigramsStructurePolicy {
public: public:
DynamicBigramListPolicy(const uint8_t *const bigramsBuf, const int bufSize, DynamicBigramListPolicy(const BufferWithExtendableBuffer *const buffer)
const ExtendableBuffer *const additionalBuffer) : mBuffer(buffer) {}
: mDictRoot(bigramsBuf), mBufSize(bufSize), mAdditionalBuffer(additionalBuffer) {}
~DynamicBigramListPolicy() {} ~DynamicBigramListPolicy() {}
void getNextBigram(int *const outBigramPos, int *const outProbability, bool *const outHasNext, void getNextBigram(int *const outBigramPos, int *const outProbability, bool *const outHasNext,
int *const pos) const { int *const pos) const {
const bool usesAdditionalBuffer = *pos >= mBufSize; const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*pos);
const uint8_t *const buffer = (usesAdditionalBuffer) ? const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer);
mAdditionalBuffer->getBuffer() : mDictRoot;
if (usesAdditionalBuffer) { if (usesAdditionalBuffer) {
*pos -= mBufSize; *pos -= mBuffer->getOriginalBufferSize();
} }
const BigramListReadingUtils::BigramFlags flags = const BigramListReadingUtils::BigramFlags flags =
BigramListReadingUtils::getFlagsAndForwardPointer(buffer, pos); BigramListReadingUtils::getFlagsAndForwardPointer(buffer, pos);
*outBigramPos = BigramListReadingUtils::getBigramAddressAndForwardPointer( *outBigramPos = BigramListReadingUtils::getBigramAddressAndForwardPointer(
buffer, flags, pos); buffer, flags, pos);
if (usesAdditionalBuffer) { if (usesAdditionalBuffer) {
*outBigramPos += mBufSize; *outBigramPos += mBuffer->getOriginalBufferSize();
} }
*outProbability = BigramListReadingUtils::getProbabilityFromFlags(flags); *outProbability = BigramListReadingUtils::getProbabilityFromFlags(flags);
*outHasNext = BigramListReadingUtils::hasNext(flags); *outHasNext = BigramListReadingUtils::hasNext(flags);
if (usesAdditionalBuffer) { if (usesAdditionalBuffer) {
*pos += mBufSize; *pos += mBuffer->getOriginalBufferSize();
} }
} }
void skipAllBigrams(int *const pos) const { void skipAllBigrams(int *const pos) const {
if (*pos >= mBufSize) { const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*pos);
*pos -= mBufSize; const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer);
BigramListReadingUtils::skipExistingBigrams(mAdditionalBuffer->getBuffer(), pos); if (usesAdditionalBuffer) {
*pos += mBufSize; *pos -= mBuffer->getOriginalBufferSize();
} else { }
BigramListReadingUtils::skipExistingBigrams(mDictRoot, pos); BigramListReadingUtils::skipExistingBigrams(buffer, pos);
if (usesAdditionalBuffer) {
*pos += mBuffer->getOriginalBufferSize();
} }
} }
private: private:
DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicBigramListPolicy); DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicBigramListPolicy);
const uint8_t *const mDictRoot; const BufferWithExtendableBuffer *const mBuffer;
const int mBufSize;
const ExtendableBuffer *const mAdditionalBuffer;
}; };
} // namespace latinime } // namespace latinime
#endif // LATINIME_DYNAMIC_BIGRAM_LIST_POLICY_H #endif // LATINIME_DYNAMIC_BIGRAM_LIST_POLICY_H

View file

@ -25,10 +25,12 @@ namespace latinime {
void DynamicPatriciaTrieNodeReader::fetchNodeInfoFromBufferAndProcessMovedNode(const int nodePos, void DynamicPatriciaTrieNodeReader::fetchNodeInfoFromBufferAndProcessMovedNode(const int nodePos,
const int maxCodePointCount, int *const outCodePoints) { const int maxCodePointCount, int *const outCodePoints) {
const bool usesAdditionalBuffer = nodePos >= mOriginalDictSize; const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(nodePos);
const uint8_t *const dictBuf = const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
usesAdditionalBuffer ? mExtendableBuffer->getBuffer() : mDictRoot; int pos = nodePos;
int pos = (usesAdditionalBuffer) ? nodePos - mOriginalDictSize : nodePos; if (usesAdditionalBuffer) {
pos -= mBuffer->getOriginalBufferSize();
}
mFlags = PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos); mFlags = PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos);
const int parentPos = const int parentPos =
DynamicPatriciaTrieReadingUtils::getParentPosAndAdvancePosition(dictBuf, &pos); DynamicPatriciaTrieReadingUtils::getParentPosAndAdvancePosition(dictBuf, &pos);
@ -48,10 +50,10 @@ void DynamicPatriciaTrieNodeReader::fetchNodeInfoFromBufferAndProcessMovedNode(c
mChildrenPos = DynamicPatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition( mChildrenPos = DynamicPatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(
dictBuf, mFlags, &pos); dictBuf, mFlags, &pos);
if (usesAdditionalBuffer && mChildrenPos != NOT_A_DICT_POS) { if (usesAdditionalBuffer && mChildrenPos != NOT_A_DICT_POS) {
mChildrenPos += mOriginalDictSize; mChildrenPos += mBuffer->getOriginalBufferSize();
} }
if (usesAdditionalBuffer) { if (usesAdditionalBuffer) {
pos += mOriginalDictSize; pos += mBuffer->getOriginalBufferSize();
} }
if (PatriciaTrieReadingUtils::hasShortcutTargets(mFlags)) { if (PatriciaTrieReadingUtils::hasShortcutTargets(mFlags)) {
mShortcutPos = pos; mShortcutPos = pos;

View file

@ -25,9 +25,9 @@
namespace latinime { namespace latinime {
class BufferWithExtendableBuffer;
class DictionaryBigramsStructurePolicy; class DictionaryBigramsStructurePolicy;
class DictionaryShortcutsStructurePolicy; class DictionaryShortcutsStructurePolicy;
class ExtendableBuffer;
/* /*
* This class is used for helping to read nodes of dynamic patricia trie. This class handles moved * This class is used for helping to read nodes of dynamic patricia trie. This class handles moved
@ -35,12 +35,10 @@ class ExtendableBuffer;
*/ */
class DynamicPatriciaTrieNodeReader { class DynamicPatriciaTrieNodeReader {
public: public:
DynamicPatriciaTrieNodeReader(const uint8_t *const dictRoot, const int originalDictSize, DynamicPatriciaTrieNodeReader(const BufferWithExtendableBuffer *const buffer,
const ExtendableBuffer *const extendableBuffer,
const DictionaryBigramsStructurePolicy *const bigramsPolicy, const DictionaryBigramsStructurePolicy *const bigramsPolicy,
const DictionaryShortcutsStructurePolicy *const shortcutsPolicy) const DictionaryShortcutsStructurePolicy *const shortcutsPolicy)
: mDictRoot(dictRoot), mOriginalDictSize(originalDictSize), : mBuffer(buffer), mBigramsPolicy(bigramsPolicy),
mExtendableBuffer(extendableBuffer), mBigramsPolicy(bigramsPolicy),
mShortcutsPolicy(shortcutsPolicy), mNodePos(NOT_A_VALID_WORD_POS), mFlags(0), mShortcutsPolicy(shortcutsPolicy), mNodePos(NOT_A_VALID_WORD_POS), mFlags(0),
mParentPos(NOT_A_DICT_POS), mCodePointCount(0), mProbability(NOT_A_PROBABILITY), mParentPos(NOT_A_DICT_POS), mCodePointCount(0), mProbability(NOT_A_PROBABILITY),
mChildrenPos(NOT_A_DICT_POS), mShortcutPos(NOT_A_DICT_POS), mChildrenPos(NOT_A_DICT_POS), mShortcutPos(NOT_A_DICT_POS),
@ -124,10 +122,7 @@ class DynamicPatriciaTrieNodeReader {
private: private:
DISALLOW_COPY_AND_ASSIGN(DynamicPatriciaTrieNodeReader); DISALLOW_COPY_AND_ASSIGN(DynamicPatriciaTrieNodeReader);
// TODO: Consolidate mDictRoot. const BufferWithExtendableBuffer *const mBuffer;
const uint8_t *const mDictRoot;
const int mOriginalDictSize;
const ExtendableBuffer *const mExtendableBuffer;
const DictionaryBigramsStructurePolicy *const mBigramsPolicy; const DictionaryBigramsStructurePolicy *const mBigramsPolicy;
const DictionaryShortcutsStructurePolicy *const mShortcutsPolicy; const DictionaryShortcutsStructurePolicy *const mShortcutsPolicy;
int mNodePos; int mNodePos;

View file

@ -31,8 +31,8 @@ void DynamicPatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const d
if (!dicNode->hasChildren()) { if (!dicNode->hasChildren()) {
return; return;
} }
DynamicPatriciaTrieReadingHelper readingHelper(mDictRoot, mOriginalDictSize, DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer,
&mExtendableBuffer, getBigramsStructurePolicy(), getShortcutsStructurePolicy()); getBigramsStructurePolicy(), getShortcutsStructurePolicy());
readingHelper.initWithNodeArrayPos(dicNode->getChildrenPos()); readingHelper.initWithNodeArrayPos(dicNode->getChildrenPos());
const DynamicPatriciaTrieNodeReader *const nodeReader = readingHelper.getNodeReader(); const DynamicPatriciaTrieNodeReader *const nodeReader = readingHelper.getNodeReader();
while (!readingHelper.isEnd()) { while (!readingHelper.isEnd()) {
@ -51,8 +51,8 @@ int DynamicPatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCoun
// This method traverses parent nodes from the terminal by following parent pointers; thus, // This method traverses parent nodes from the terminal by following parent pointers; thus,
// node code points are stored in the buffer in the reverse order. // node code points are stored in the buffer in the reverse order.
int reverseCodePoints[maxCodePointCount]; int reverseCodePoints[maxCodePointCount];
DynamicPatriciaTrieReadingHelper readingHelper(mDictRoot, mOriginalDictSize, DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer,
&mExtendableBuffer, getBigramsStructurePolicy(), getShortcutsStructurePolicy()); getBigramsStructurePolicy(), getShortcutsStructurePolicy());
// First, read the terminal node and get its probability. // First, read the terminal node and get its probability.
readingHelper.initWithNodePos(nodePos); readingHelper.initWithNodePos(nodePos);
if (!readingHelper.isValidTerminalNode()) { if (!readingHelper.isValidTerminalNode()) {
@ -94,8 +94,8 @@ int DynamicPatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const in
for (int i = 0; i < length; ++i) { for (int i = 0; i < length; ++i) {
searchCodePoints[i] = forceLowerCaseSearch ? CharUtils::toLowerCase(inWord[i]) : inWord[i]; searchCodePoints[i] = forceLowerCaseSearch ? CharUtils::toLowerCase(inWord[i]) : inWord[i];
} }
DynamicPatriciaTrieReadingHelper readingHelper(mDictRoot, mOriginalDictSize, DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer,
&mExtendableBuffer, getBigramsStructurePolicy(), getShortcutsStructurePolicy()); getBigramsStructurePolicy(), getShortcutsStructurePolicy());
readingHelper.initWithNodeArrayPos(getRootPosition()); readingHelper.initWithNodeArrayPos(getRootPosition());
const DynamicPatriciaTrieNodeReader *const nodeReader = readingHelper.getNodeReader(); const DynamicPatriciaTrieNodeReader *const nodeReader = readingHelper.getNodeReader();
while (!readingHelper.isEnd()) { while (!readingHelper.isEnd()) {
@ -137,7 +137,7 @@ int DynamicPatriciaTriePolicy::getUnigramProbability(const int nodePos) const {
if (nodePos == NOT_A_VALID_WORD_POS) { if (nodePos == NOT_A_VALID_WORD_POS) {
return NOT_A_PROBABILITY; return NOT_A_PROBABILITY;
} }
DynamicPatriciaTrieNodeReader nodeReader(mDictRoot, mOriginalDictSize, &mExtendableBuffer, DynamicPatriciaTrieNodeReader nodeReader(&mBufferWithExtendableBuffer,
getBigramsStructurePolicy(), getShortcutsStructurePolicy()); getBigramsStructurePolicy(), getShortcutsStructurePolicy());
nodeReader.fetchNodeInfoFromBuffer(nodePos); nodeReader.fetchNodeInfoFromBuffer(nodePos);
if (nodeReader.isDeleted() || nodeReader.isBlacklisted() || nodeReader.isNotAWord()) { if (nodeReader.isDeleted() || nodeReader.isBlacklisted() || nodeReader.isNotAWord()) {
@ -150,7 +150,7 @@ int DynamicPatriciaTriePolicy::getShortcutPositionOfNode(const int nodePos) cons
if (nodePos == NOT_A_VALID_WORD_POS) { if (nodePos == NOT_A_VALID_WORD_POS) {
return NOT_A_DICT_POS; return NOT_A_DICT_POS;
} }
DynamicPatriciaTrieNodeReader nodeReader(mDictRoot, mOriginalDictSize, &mExtendableBuffer, DynamicPatriciaTrieNodeReader nodeReader(&mBufferWithExtendableBuffer,
getBigramsStructurePolicy(), getShortcutsStructurePolicy()); getBigramsStructurePolicy(), getShortcutsStructurePolicy());
nodeReader.fetchNodeInfoFromBuffer(nodePos); nodeReader.fetchNodeInfoFromBuffer(nodePos);
if (nodeReader.isDeleted()) { if (nodeReader.isDeleted()) {
@ -163,7 +163,7 @@ int DynamicPatriciaTriePolicy::getBigramsPositionOfNode(const int nodePos) const
if (nodePos == NOT_A_VALID_WORD_POS) { if (nodePos == NOT_A_VALID_WORD_POS) {
return NOT_A_DICT_POS; return NOT_A_DICT_POS;
} }
DynamicPatriciaTrieNodeReader nodeReader(mDictRoot, mOriginalDictSize, &mExtendableBuffer, DynamicPatriciaTrieNodeReader nodeReader(&mBufferWithExtendableBuffer,
getBigramsStructurePolicy(), getShortcutsStructurePolicy()); getBigramsStructurePolicy(), getShortcutsStructurePolicy());
nodeReader.fetchNodeInfoFromBuffer(nodePos); nodeReader.fetchNodeInfoFromBuffer(nodePos);
if (nodeReader.isDeleted()) { if (nodeReader.isDeleted()) {

View file

@ -17,8 +17,6 @@
#ifndef LATINIME_DYNAMIC_PATRICIA_TRIE_POLICY_H #ifndef LATINIME_DYNAMIC_PATRICIA_TRIE_POLICY_H
#define LATINIME_DYNAMIC_PATRICIA_TRIE_POLICY_H #define LATINIME_DYNAMIC_PATRICIA_TRIE_POLICY_H
#include <stdint.h>
#include "defines.h" #include "defines.h"
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" #include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
#include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h" #include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h"
@ -35,11 +33,11 @@ class DicNodeVector;
class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
public: public:
DynamicPatriciaTriePolicy(const MmappedBuffer *const buffer) DynamicPatriciaTriePolicy(const MmappedBuffer *const buffer)
: mBuffer(buffer), mExtendableBuffer(), mHeaderPolicy(mBuffer->getBuffer()), : mBuffer(buffer), mHeaderPolicy(mBuffer->getBuffer()),
mDictRoot(mBuffer->getBuffer() + mHeaderPolicy.getSize()), mBufferWithExtendableBuffer(mBuffer->getBuffer() + mHeaderPolicy.getSize(),
mOriginalDictSize(mBuffer->getBufferSize() - mHeaderPolicy.getSize()), mBuffer->getBufferSize() - mHeaderPolicy.getSize()),
mBigramListPolicy(mDictRoot, mOriginalDictSize, &mExtendableBuffer), mBigramListPolicy(&mBufferWithExtendableBuffer),
mShortcutListPolicy(mDictRoot, mOriginalDictSize, &mExtendableBuffer) {} mShortcutListPolicy(&mBufferWithExtendableBuffer) {}
~DynamicPatriciaTriePolicy() { ~DynamicPatriciaTriePolicy() {
delete mBuffer; delete mBuffer;
@ -89,12 +87,8 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTriePolicy); DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTriePolicy);
const MmappedBuffer *const mBuffer; const MmappedBuffer *const mBuffer;
const ExtendableBuffer mExtendableBuffer;
const HeaderPolicy mHeaderPolicy; const HeaderPolicy mHeaderPolicy;
// TODO: Consolidate mDictRoot. const BufferWithExtendableBuffer mBufferWithExtendableBuffer;
// CAVEAT!: Be careful about array out of bound access with mDictRoot
const uint8_t *const mDictRoot;
const int mOriginalDictSize;
const DynamicBigramListPolicy mBigramListPolicy; const DynamicBigramListPolicy mBigramListPolicy;
const DynamicShortcutListPolicy mShortcutListPolicy; const DynamicShortcutListPolicy mShortcutListPolicy;
}; };

View file

@ -25,16 +25,15 @@ const int DynamicPatriciaTrieReadingHelper::MAX_NODE_ARRAY_COUNT_TO_AVOID_INFINI
// Read node array size and process empty node arrays. Nodes and arrays are counted up in this // Read node array size and process empty node arrays. Nodes and arrays are counted up in this
// method to avoid an infinite loop. // method to avoid an infinite loop.
void DynamicPatriciaTrieReadingHelper::nextNodeArray() { void DynamicPatriciaTrieReadingHelper::nextNodeArray() {
const bool usesAdditionalBuffer = mPos >= mOriginalDictSize; const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(mPos);
const uint8_t *const dictBuf = (usesAdditionalBuffer) const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
? mExtendableBuffer->getBuffer() : mDictRoot;
if (usesAdditionalBuffer) { if (usesAdditionalBuffer) {
mPos -= mOriginalDictSize; mPos -= mBuffer->getOriginalBufferSize();
} }
mNodeCount = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition(dictBuf, mNodeCount = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition(dictBuf,
&mPos); &mPos);
if (usesAdditionalBuffer) { if (usesAdditionalBuffer) {
mPos += mOriginalDictSize; mPos += mBuffer->getOriginalBufferSize();
} }
// Count up nodes and node arrays to avoid infinite loop. // Count up nodes and node arrays to avoid infinite loop.
mTotalNodeCount += mNodeCount; mTotalNodeCount += mNodeCount;
@ -59,16 +58,15 @@ void DynamicPatriciaTrieReadingHelper::nextNodeArray() {
// Follow the forward link and read the next node array if exists. // Follow the forward link and read the next node array if exists.
void DynamicPatriciaTrieReadingHelper::followForwardLink() { void DynamicPatriciaTrieReadingHelper::followForwardLink() {
const bool usesAdditionalBuffer = mPos >= mOriginalDictSize; const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(mPos);
const uint8_t *const dictBuf = (usesAdditionalBuffer) const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
? mExtendableBuffer->getBuffer() : mDictRoot;
if (usesAdditionalBuffer) { if (usesAdditionalBuffer) {
mPos -= mOriginalDictSize; mPos -= mBuffer->getOriginalBufferSize();
} }
const int forwardLinkPosition = const int forwardLinkPosition =
DynamicPatriciaTrieReadingUtils::getForwardLinkPosition(dictBuf, mPos); DynamicPatriciaTrieReadingUtils::getForwardLinkPosition(dictBuf, mPos);
if (usesAdditionalBuffer) { if (usesAdditionalBuffer) {
mPos += mOriginalDictSize; mPos += mBuffer->getOriginalBufferSize();
} }
if (DynamicPatriciaTrieReadingUtils::isValidForwardLinkPosition(forwardLinkPosition)) { if (DynamicPatriciaTrieReadingUtils::isValidForwardLinkPosition(forwardLinkPosition)) {
// Follow the forward link. // Follow the forward link.

View file

@ -31,15 +31,12 @@ namespace latinime {
*/ */
class DynamicPatriciaTrieReadingHelper { class DynamicPatriciaTrieReadingHelper {
public: public:
DynamicPatriciaTrieReadingHelper(const uint8_t *const dictRoot, const int originalDictSize, DynamicPatriciaTrieReadingHelper(const BufferWithExtendableBuffer *const buffer,
const ExtendableBuffer *const extendableBuffer,
const DictionaryBigramsStructurePolicy *const bigramsPolicy, const DictionaryBigramsStructurePolicy *const bigramsPolicy,
const DictionaryShortcutsStructurePolicy *const shortcutsPolicy) const DictionaryShortcutsStructurePolicy *const shortcutsPolicy)
: mIsError(false), mPos(NOT_A_DICT_POS), mNodeCount(0), mPrevTotalCodePointCount(0), : mIsError(false), mPos(NOT_A_DICT_POS), mNodeCount(0), mPrevTotalCodePointCount(0),
mTotalNodeCount(0), mNodeArrayCount(0), mDictRoot(dictRoot), mTotalNodeCount(0), mNodeArrayCount(0), mBuffer(buffer),
mOriginalDictSize(originalDictSize), mExtendableBuffer(extendableBuffer), mNodeReader(mBuffer, bigramsPolicy, shortcutsPolicy) {}
mNodeReader(mDictRoot, mOriginalDictSize, mExtendableBuffer, bigramsPolicy,
shortcutsPolicy) {}
~DynamicPatriciaTrieReadingHelper() {} ~DynamicPatriciaTrieReadingHelper() {}
@ -177,9 +174,7 @@ class DynamicPatriciaTrieReadingHelper {
int mPrevTotalCodePointCount; int mPrevTotalCodePointCount;
int mTotalNodeCount; int mTotalNodeCount;
int mNodeArrayCount; int mNodeArrayCount;
const uint8_t *const mDictRoot; const BufferWithExtendableBuffer *const mBuffer;
const int mOriginalDictSize;
const ExtendableBuffer *const mExtendableBuffer;
DynamicPatriciaTrieNodeReader mNodeReader; DynamicPatriciaTrieNodeReader mNodeReader;
int mMergedNodeCodePoints[MAX_WORD_LENGTH]; int mMergedNodeCodePoints[MAX_WORD_LENGTH];

View file

@ -31,9 +31,8 @@ namespace latinime {
*/ */
class DynamicShortcutListPolicy : public DictionaryShortcutsStructurePolicy { class DynamicShortcutListPolicy : public DictionaryShortcutsStructurePolicy {
public: public:
DynamicShortcutListPolicy(const uint8_t *const shortcutBuf, const int bufSize, explicit DynamicShortcutListPolicy(const BufferWithExtendableBuffer *const buffer)
const ExtendableBuffer *const additionalBuffer) : mBuffer(buffer) {}
: mShortcutsBuf(shortcutBuf), mBufSize(bufSize), mAdditionalBuffer(additionalBuffer) {}
~DynamicShortcutListPolicy() {} ~DynamicShortcutListPolicy() {}
@ -47,11 +46,10 @@ class DynamicShortcutListPolicy : public DictionaryShortcutsStructurePolicy {
void getNextShortcut(const int maxCodePointCount, int *const outCodePoint, void getNextShortcut(const int maxCodePointCount, int *const outCodePoint,
int *const outCodePointCount, bool *const outIsWhitelist, bool *const outHasNext, int *const outCodePointCount, bool *const outIsWhitelist, bool *const outHasNext,
int *const pos) const { int *const pos) const {
const bool usesAdditionalBuffer = *pos >= mBufSize; const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*pos);
const uint8_t *const buffer = usesAdditionalBuffer const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer);
? mAdditionalBuffer->getBuffer() : mShortcutsBuf;
if (usesAdditionalBuffer) { if (usesAdditionalBuffer) {
*pos -= mBufSize; *pos -= mBuffer->getOriginalBufferSize();
} }
const ShortcutListReadingUtils::ShortcutFlags flags = const ShortcutListReadingUtils::ShortcutFlags flags =
ShortcutListReadingUtils::getFlagsAndForwardPointer(buffer, pos); ShortcutListReadingUtils::getFlagsAndForwardPointer(buffer, pos);
@ -66,29 +64,28 @@ class DynamicShortcutListPolicy : public DictionaryShortcutsStructurePolicy {
buffer, maxCodePointCount, outCodePoint, pos); buffer, maxCodePointCount, outCodePoint, pos);
} }
if (usesAdditionalBuffer) { if (usesAdditionalBuffer) {
*pos += mBufSize; *pos += mBuffer->getOriginalBufferSize();
} }
} }
void skipAllShortcuts(int *const pos) const { void skipAllShortcuts(int *const pos) const {
if (*pos >= mBufSize) { const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*pos);
*pos -= mBufSize; const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer);
const int shortcutListSize = ShortcutListReadingUtils if (usesAdditionalBuffer) {
::getShortcutListSizeAndForwardPointer(mAdditionalBuffer->getBuffer(), pos); *pos -= mBuffer->getOriginalBufferSize();
*pos += mBufSize + shortcutListSize; }
} else { const int shortcutListSize = ShortcutListReadingUtils
const int shortcutListSize = ShortcutListReadingUtils ::getShortcutListSizeAndForwardPointer(buffer, pos);
::getShortcutListSizeAndForwardPointer(mShortcutsBuf, pos); *pos += shortcutListSize;
*pos += shortcutListSize; if (usesAdditionalBuffer) {
*pos += mBuffer->getOriginalBufferSize();
} }
} }
private: private:
DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicShortcutListPolicy); DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicShortcutListPolicy);
const uint8_t *const mShortcutsBuf; const BufferWithExtendableBuffer *const mBuffer;
const int mBufSize;
const ExtendableBuffer *const mAdditionalBuffer;
}; };
} // namespace latinime } // namespace latinime
#endif // LATINIME_DYNAMIC_SHORTCUT_LIST_POLICY_H #endif // LATINIME_DYNAMIC_SHORTCUT_LIST_POLICY_H

View file

@ -18,8 +18,8 @@
namespace latinime { namespace latinime {
const size_t ExtendableBuffer::INITIAL_BUFFER_SIZE = 16 * 1024; const size_t BufferWithExtendableBuffer::INITIAL_ADDITIONAL_BUFFER_SIZE = 16 * 1024;
const size_t ExtendableBuffer::MAX_BUFFER_SIZE = 1024 * 1024; const size_t BufferWithExtendableBuffer::MAX_ADDITIONAL_BUFFER_SIZE = 1024 * 1024;
const size_t ExtendableBuffer::EXTEND_BUFFER_SIZE_STEP = 16 * 1024; const size_t BufferWithExtendableBuffer::EXTEND_ADDITIONAL_BUFFER_SIZE_STEP = 16 * 1024;
} }

View file

@ -14,57 +14,71 @@
* limitations under the License. * limitations under the License.
*/ */
#ifndef LATINIME_EXTENDABLE_BUFFER_H #ifndef LATINIME_BUFFER_WITH_EXTENDABLE_BUFFER_H
#define LATINIME_EXTENDABLE_BUFFER_H #define LATINIME_BUFFER_WITH_EXTENDABLE_BUFFER_H
#include <cstddef> #include <cstddef>
#include <stdint.h> #include <stdint.h>
#include <vector> #include <vector>
#include "defines.h" #include "defines.h"
#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h"
namespace latinime { namespace latinime {
// TODO: change file name according to the class name.
// This is used as a buffer that can be extended for updatable dictionaries. // This is used as a buffer that can be extended for updatable dictionaries.
class ExtendableBuffer { // To optimize performance, raw pointer is directly used for reading buffer. The position has to be
// adjusted to access additional buffer. On the other hand, this class does not provide writable
// raw pointer but provides several methods that handle boundary checking for writing data.
class BufferWithExtendableBuffer {
public: public:
ExtendableBuffer() : mBuffer(INITIAL_BUFFER_SIZE), mUsedSize(0) {} BufferWithExtendableBuffer(uint8_t *const originalBuffer, const int originalBufferSize)
: mOriginalBuffer(originalBuffer), mOriginalBufferSize(originalBufferSize),
mAdditionalBuffer(INITIAL_ADDITIONAL_BUFFER_SIZE), mUsedAdditionalBufferSize(0) {}
AK_FORCE_INLINE const uint8_t *getBuffer() const { /**
return &mBuffer[0]; * For reading.
*/
AK_FORCE_INLINE bool isInAdditionalBuffer(const int position) const {
return position >= mOriginalBufferSize;
} }
// CAVEAT!: Be careful about array out of bound access with buffers
AK_FORCE_INLINE const uint8_t *getBuffer(const bool usesAdditionalBuffer) const {
if (usesAdditionalBuffer) {
return &mAdditionalBuffer[0];
} else {
return mOriginalBuffer;
}
}
AK_FORCE_INLINE int getOriginalBufferSize() const {
return mOriginalBufferSize;
}
private:
DISALLOW_COPY_AND_ASSIGN(BufferWithExtendableBuffer);
static const size_t INITIAL_ADDITIONAL_BUFFER_SIZE;
static const size_t MAX_ADDITIONAL_BUFFER_SIZE;
static const size_t EXTEND_ADDITIONAL_BUFFER_SIZE_STEP;
uint8_t *const mOriginalBuffer;
const int mOriginalBufferSize;
std::vector<uint8_t> mAdditionalBuffer;
int mUsedAdditionalBufferSize;
// Return if the buffer is successfully extended or not. // Return if the buffer is successfully extended or not.
AK_FORCE_INLINE bool extendBuffer() { AK_FORCE_INLINE bool extendBuffer() {
if (mBuffer.size() + EXTEND_BUFFER_SIZE_STEP > MAX_BUFFER_SIZE) { if (mAdditionalBuffer.size() + EXTEND_ADDITIONAL_BUFFER_SIZE_STEP
> MAX_ADDITIONAL_BUFFER_SIZE) {
return false; return false;
} }
mBuffer.resize(mBuffer.size() + EXTEND_BUFFER_SIZE_STEP); mAdditionalBuffer.resize(mAdditionalBuffer.size() + EXTEND_ADDITIONAL_BUFFER_SIZE_STEP);
return true; return true;
} }
AK_FORCE_INLINE int getAllocatedSize() const {
return mBuffer.size();
}
AK_FORCE_INLINE int getUsedSize() const {
return mUsedSize;
}
AK_FORCE_INLINE void clear() {
mUsedSize = 0;
mBuffer.clear();
}
private:
DISALLOW_COPY_AND_ASSIGN(ExtendableBuffer);
static const size_t INITIAL_BUFFER_SIZE;
static const size_t MAX_BUFFER_SIZE;
static const size_t EXTEND_BUFFER_SIZE_STEP;
std::vector<uint8_t> mBuffer;
int mUsedSize;
}; };
} }
#endif /* LATINIME_MMAPED_BUFFER_H */ #endif /* LATINIME_BUFFER_WITH_EXTENDABLE_BUFFER_H */