am 3e76487c: Consolidating dict buffers into BufferWithExtendableBuffer.
* commit '3e76487c6c95ccec49622b9d7e0b45efff97f937': Consolidating dict buffers into BufferWithExtendableBuffer.main
commit
cbbf25559d
|
@ -31,50 +31,48 @@ namespace latinime {
|
||||||
*/
|
*/
|
||||||
class DynamicBigramListPolicy : public DictionaryBigramsStructurePolicy {
|
class DynamicBigramListPolicy : public DictionaryBigramsStructurePolicy {
|
||||||
public:
|
public:
|
||||||
DynamicBigramListPolicy(const uint8_t *const bigramsBuf, const int bufSize,
|
DynamicBigramListPolicy(const BufferWithExtendableBuffer *const buffer)
|
||||||
const ExtendableBuffer *const additionalBuffer)
|
: mBuffer(buffer) {}
|
||||||
: mDictRoot(bigramsBuf), mBufSize(bufSize), mAdditionalBuffer(additionalBuffer) {}
|
|
||||||
|
|
||||||
~DynamicBigramListPolicy() {}
|
~DynamicBigramListPolicy() {}
|
||||||
|
|
||||||
void getNextBigram(int *const outBigramPos, int *const outProbability, bool *const outHasNext,
|
void getNextBigram(int *const outBigramPos, int *const outProbability, bool *const outHasNext,
|
||||||
int *const pos) const {
|
int *const pos) const {
|
||||||
const bool usesAdditionalBuffer = *pos >= mBufSize;
|
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*pos);
|
||||||
const uint8_t *const buffer = (usesAdditionalBuffer) ?
|
const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer);
|
||||||
mAdditionalBuffer->getBuffer() : mDictRoot;
|
|
||||||
if (usesAdditionalBuffer) {
|
if (usesAdditionalBuffer) {
|
||||||
*pos -= mBufSize;
|
*pos -= mBuffer->getOriginalBufferSize();
|
||||||
}
|
}
|
||||||
const BigramListReadingUtils::BigramFlags flags =
|
const BigramListReadingUtils::BigramFlags flags =
|
||||||
BigramListReadingUtils::getFlagsAndForwardPointer(buffer, pos);
|
BigramListReadingUtils::getFlagsAndForwardPointer(buffer, pos);
|
||||||
*outBigramPos = BigramListReadingUtils::getBigramAddressAndForwardPointer(
|
*outBigramPos = BigramListReadingUtils::getBigramAddressAndForwardPointer(
|
||||||
buffer, flags, pos);
|
buffer, flags, pos);
|
||||||
if (usesAdditionalBuffer) {
|
if (usesAdditionalBuffer) {
|
||||||
*outBigramPos += mBufSize;
|
*outBigramPos += mBuffer->getOriginalBufferSize();
|
||||||
}
|
}
|
||||||
*outProbability = BigramListReadingUtils::getProbabilityFromFlags(flags);
|
*outProbability = BigramListReadingUtils::getProbabilityFromFlags(flags);
|
||||||
*outHasNext = BigramListReadingUtils::hasNext(flags);
|
*outHasNext = BigramListReadingUtils::hasNext(flags);
|
||||||
if (usesAdditionalBuffer) {
|
if (usesAdditionalBuffer) {
|
||||||
*pos += mBufSize;
|
*pos += mBuffer->getOriginalBufferSize();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void skipAllBigrams(int *const pos) const {
|
void skipAllBigrams(int *const pos) const {
|
||||||
if (*pos >= mBufSize) {
|
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*pos);
|
||||||
*pos -= mBufSize;
|
const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer);
|
||||||
BigramListReadingUtils::skipExistingBigrams(mAdditionalBuffer->getBuffer(), pos);
|
if (usesAdditionalBuffer) {
|
||||||
*pos += mBufSize;
|
*pos -= mBuffer->getOriginalBufferSize();
|
||||||
} else {
|
}
|
||||||
BigramListReadingUtils::skipExistingBigrams(mDictRoot, pos);
|
BigramListReadingUtils::skipExistingBigrams(buffer, pos);
|
||||||
|
if (usesAdditionalBuffer) {
|
||||||
|
*pos += mBuffer->getOriginalBufferSize();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicBigramListPolicy);
|
DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicBigramListPolicy);
|
||||||
|
|
||||||
const uint8_t *const mDictRoot;
|
const BufferWithExtendableBuffer *const mBuffer;
|
||||||
const int mBufSize;
|
|
||||||
const ExtendableBuffer *const mAdditionalBuffer;
|
|
||||||
};
|
};
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
#endif // LATINIME_DYNAMIC_BIGRAM_LIST_POLICY_H
|
#endif // LATINIME_DYNAMIC_BIGRAM_LIST_POLICY_H
|
||||||
|
|
|
@ -25,10 +25,12 @@ namespace latinime {
|
||||||
|
|
||||||
void DynamicPatriciaTrieNodeReader::fetchNodeInfoFromBufferAndProcessMovedNode(const int nodePos,
|
void DynamicPatriciaTrieNodeReader::fetchNodeInfoFromBufferAndProcessMovedNode(const int nodePos,
|
||||||
const int maxCodePointCount, int *const outCodePoints) {
|
const int maxCodePointCount, int *const outCodePoints) {
|
||||||
const bool usesAdditionalBuffer = nodePos >= mOriginalDictSize;
|
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(nodePos);
|
||||||
const uint8_t *const dictBuf =
|
const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
|
||||||
usesAdditionalBuffer ? mExtendableBuffer->getBuffer() : mDictRoot;
|
int pos = nodePos;
|
||||||
int pos = (usesAdditionalBuffer) ? nodePos - mOriginalDictSize : nodePos;
|
if (usesAdditionalBuffer) {
|
||||||
|
pos -= mBuffer->getOriginalBufferSize();
|
||||||
|
}
|
||||||
mFlags = PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos);
|
mFlags = PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos);
|
||||||
const int parentPos =
|
const int parentPos =
|
||||||
DynamicPatriciaTrieReadingUtils::getParentPosAndAdvancePosition(dictBuf, &pos);
|
DynamicPatriciaTrieReadingUtils::getParentPosAndAdvancePosition(dictBuf, &pos);
|
||||||
|
@ -48,10 +50,10 @@ void DynamicPatriciaTrieNodeReader::fetchNodeInfoFromBufferAndProcessMovedNode(c
|
||||||
mChildrenPos = DynamicPatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(
|
mChildrenPos = DynamicPatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(
|
||||||
dictBuf, mFlags, &pos);
|
dictBuf, mFlags, &pos);
|
||||||
if (usesAdditionalBuffer && mChildrenPos != NOT_A_DICT_POS) {
|
if (usesAdditionalBuffer && mChildrenPos != NOT_A_DICT_POS) {
|
||||||
mChildrenPos += mOriginalDictSize;
|
mChildrenPos += mBuffer->getOriginalBufferSize();
|
||||||
}
|
}
|
||||||
if (usesAdditionalBuffer) {
|
if (usesAdditionalBuffer) {
|
||||||
pos += mOriginalDictSize;
|
pos += mBuffer->getOriginalBufferSize();
|
||||||
}
|
}
|
||||||
if (PatriciaTrieReadingUtils::hasShortcutTargets(mFlags)) {
|
if (PatriciaTrieReadingUtils::hasShortcutTargets(mFlags)) {
|
||||||
mShortcutPos = pos;
|
mShortcutPos = pos;
|
||||||
|
|
|
@ -25,9 +25,9 @@
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
|
||||||
|
class BufferWithExtendableBuffer;
|
||||||
class DictionaryBigramsStructurePolicy;
|
class DictionaryBigramsStructurePolicy;
|
||||||
class DictionaryShortcutsStructurePolicy;
|
class DictionaryShortcutsStructurePolicy;
|
||||||
class ExtendableBuffer;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This class is used for helping to read nodes of dynamic patricia trie. This class handles moved
|
* This class is used for helping to read nodes of dynamic patricia trie. This class handles moved
|
||||||
|
@ -35,12 +35,10 @@ class ExtendableBuffer;
|
||||||
*/
|
*/
|
||||||
class DynamicPatriciaTrieNodeReader {
|
class DynamicPatriciaTrieNodeReader {
|
||||||
public:
|
public:
|
||||||
DynamicPatriciaTrieNodeReader(const uint8_t *const dictRoot, const int originalDictSize,
|
DynamicPatriciaTrieNodeReader(const BufferWithExtendableBuffer *const buffer,
|
||||||
const ExtendableBuffer *const extendableBuffer,
|
|
||||||
const DictionaryBigramsStructurePolicy *const bigramsPolicy,
|
const DictionaryBigramsStructurePolicy *const bigramsPolicy,
|
||||||
const DictionaryShortcutsStructurePolicy *const shortcutsPolicy)
|
const DictionaryShortcutsStructurePolicy *const shortcutsPolicy)
|
||||||
: mDictRoot(dictRoot), mOriginalDictSize(originalDictSize),
|
: mBuffer(buffer), mBigramsPolicy(bigramsPolicy),
|
||||||
mExtendableBuffer(extendableBuffer), mBigramsPolicy(bigramsPolicy),
|
|
||||||
mShortcutsPolicy(shortcutsPolicy), mNodePos(NOT_A_VALID_WORD_POS), mFlags(0),
|
mShortcutsPolicy(shortcutsPolicy), mNodePos(NOT_A_VALID_WORD_POS), mFlags(0),
|
||||||
mParentPos(NOT_A_DICT_POS), mCodePointCount(0), mProbability(NOT_A_PROBABILITY),
|
mParentPos(NOT_A_DICT_POS), mCodePointCount(0), mProbability(NOT_A_PROBABILITY),
|
||||||
mChildrenPos(NOT_A_DICT_POS), mShortcutPos(NOT_A_DICT_POS),
|
mChildrenPos(NOT_A_DICT_POS), mShortcutPos(NOT_A_DICT_POS),
|
||||||
|
@ -124,10 +122,7 @@ class DynamicPatriciaTrieNodeReader {
|
||||||
private:
|
private:
|
||||||
DISALLOW_COPY_AND_ASSIGN(DynamicPatriciaTrieNodeReader);
|
DISALLOW_COPY_AND_ASSIGN(DynamicPatriciaTrieNodeReader);
|
||||||
|
|
||||||
// TODO: Consolidate mDictRoot.
|
const BufferWithExtendableBuffer *const mBuffer;
|
||||||
const uint8_t *const mDictRoot;
|
|
||||||
const int mOriginalDictSize;
|
|
||||||
const ExtendableBuffer *const mExtendableBuffer;
|
|
||||||
const DictionaryBigramsStructurePolicy *const mBigramsPolicy;
|
const DictionaryBigramsStructurePolicy *const mBigramsPolicy;
|
||||||
const DictionaryShortcutsStructurePolicy *const mShortcutsPolicy;
|
const DictionaryShortcutsStructurePolicy *const mShortcutsPolicy;
|
||||||
int mNodePos;
|
int mNodePos;
|
||||||
|
|
|
@ -31,8 +31,8 @@ void DynamicPatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const d
|
||||||
if (!dicNode->hasChildren()) {
|
if (!dicNode->hasChildren()) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
DynamicPatriciaTrieReadingHelper readingHelper(mDictRoot, mOriginalDictSize,
|
DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer,
|
||||||
&mExtendableBuffer, getBigramsStructurePolicy(), getShortcutsStructurePolicy());
|
getBigramsStructurePolicy(), getShortcutsStructurePolicy());
|
||||||
readingHelper.initWithNodeArrayPos(dicNode->getChildrenPos());
|
readingHelper.initWithNodeArrayPos(dicNode->getChildrenPos());
|
||||||
const DynamicPatriciaTrieNodeReader *const nodeReader = readingHelper.getNodeReader();
|
const DynamicPatriciaTrieNodeReader *const nodeReader = readingHelper.getNodeReader();
|
||||||
while (!readingHelper.isEnd()) {
|
while (!readingHelper.isEnd()) {
|
||||||
|
@ -51,8 +51,8 @@ int DynamicPatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCoun
|
||||||
// This method traverses parent nodes from the terminal by following parent pointers; thus,
|
// This method traverses parent nodes from the terminal by following parent pointers; thus,
|
||||||
// node code points are stored in the buffer in the reverse order.
|
// node code points are stored in the buffer in the reverse order.
|
||||||
int reverseCodePoints[maxCodePointCount];
|
int reverseCodePoints[maxCodePointCount];
|
||||||
DynamicPatriciaTrieReadingHelper readingHelper(mDictRoot, mOriginalDictSize,
|
DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer,
|
||||||
&mExtendableBuffer, getBigramsStructurePolicy(), getShortcutsStructurePolicy());
|
getBigramsStructurePolicy(), getShortcutsStructurePolicy());
|
||||||
// First, read the terminal node and get its probability.
|
// First, read the terminal node and get its probability.
|
||||||
readingHelper.initWithNodePos(nodePos);
|
readingHelper.initWithNodePos(nodePos);
|
||||||
if (!readingHelper.isValidTerminalNode()) {
|
if (!readingHelper.isValidTerminalNode()) {
|
||||||
|
@ -94,8 +94,8 @@ int DynamicPatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const in
|
||||||
for (int i = 0; i < length; ++i) {
|
for (int i = 0; i < length; ++i) {
|
||||||
searchCodePoints[i] = forceLowerCaseSearch ? CharUtils::toLowerCase(inWord[i]) : inWord[i];
|
searchCodePoints[i] = forceLowerCaseSearch ? CharUtils::toLowerCase(inWord[i]) : inWord[i];
|
||||||
}
|
}
|
||||||
DynamicPatriciaTrieReadingHelper readingHelper(mDictRoot, mOriginalDictSize,
|
DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer,
|
||||||
&mExtendableBuffer, getBigramsStructurePolicy(), getShortcutsStructurePolicy());
|
getBigramsStructurePolicy(), getShortcutsStructurePolicy());
|
||||||
readingHelper.initWithNodeArrayPos(getRootPosition());
|
readingHelper.initWithNodeArrayPos(getRootPosition());
|
||||||
const DynamicPatriciaTrieNodeReader *const nodeReader = readingHelper.getNodeReader();
|
const DynamicPatriciaTrieNodeReader *const nodeReader = readingHelper.getNodeReader();
|
||||||
while (!readingHelper.isEnd()) {
|
while (!readingHelper.isEnd()) {
|
||||||
|
@ -137,7 +137,7 @@ int DynamicPatriciaTriePolicy::getUnigramProbability(const int nodePos) const {
|
||||||
if (nodePos == NOT_A_VALID_WORD_POS) {
|
if (nodePos == NOT_A_VALID_WORD_POS) {
|
||||||
return NOT_A_PROBABILITY;
|
return NOT_A_PROBABILITY;
|
||||||
}
|
}
|
||||||
DynamicPatriciaTrieNodeReader nodeReader(mDictRoot, mOriginalDictSize, &mExtendableBuffer,
|
DynamicPatriciaTrieNodeReader nodeReader(&mBufferWithExtendableBuffer,
|
||||||
getBigramsStructurePolicy(), getShortcutsStructurePolicy());
|
getBigramsStructurePolicy(), getShortcutsStructurePolicy());
|
||||||
nodeReader.fetchNodeInfoFromBuffer(nodePos);
|
nodeReader.fetchNodeInfoFromBuffer(nodePos);
|
||||||
if (nodeReader.isDeleted() || nodeReader.isBlacklisted() || nodeReader.isNotAWord()) {
|
if (nodeReader.isDeleted() || nodeReader.isBlacklisted() || nodeReader.isNotAWord()) {
|
||||||
|
@ -150,7 +150,7 @@ int DynamicPatriciaTriePolicy::getShortcutPositionOfNode(const int nodePos) cons
|
||||||
if (nodePos == NOT_A_VALID_WORD_POS) {
|
if (nodePos == NOT_A_VALID_WORD_POS) {
|
||||||
return NOT_A_DICT_POS;
|
return NOT_A_DICT_POS;
|
||||||
}
|
}
|
||||||
DynamicPatriciaTrieNodeReader nodeReader(mDictRoot, mOriginalDictSize, &mExtendableBuffer,
|
DynamicPatriciaTrieNodeReader nodeReader(&mBufferWithExtendableBuffer,
|
||||||
getBigramsStructurePolicy(), getShortcutsStructurePolicy());
|
getBigramsStructurePolicy(), getShortcutsStructurePolicy());
|
||||||
nodeReader.fetchNodeInfoFromBuffer(nodePos);
|
nodeReader.fetchNodeInfoFromBuffer(nodePos);
|
||||||
if (nodeReader.isDeleted()) {
|
if (nodeReader.isDeleted()) {
|
||||||
|
@ -163,7 +163,7 @@ int DynamicPatriciaTriePolicy::getBigramsPositionOfNode(const int nodePos) const
|
||||||
if (nodePos == NOT_A_VALID_WORD_POS) {
|
if (nodePos == NOT_A_VALID_WORD_POS) {
|
||||||
return NOT_A_DICT_POS;
|
return NOT_A_DICT_POS;
|
||||||
}
|
}
|
||||||
DynamicPatriciaTrieNodeReader nodeReader(mDictRoot, mOriginalDictSize, &mExtendableBuffer,
|
DynamicPatriciaTrieNodeReader nodeReader(&mBufferWithExtendableBuffer,
|
||||||
getBigramsStructurePolicy(), getShortcutsStructurePolicy());
|
getBigramsStructurePolicy(), getShortcutsStructurePolicy());
|
||||||
nodeReader.fetchNodeInfoFromBuffer(nodePos);
|
nodeReader.fetchNodeInfoFromBuffer(nodePos);
|
||||||
if (nodeReader.isDeleted()) {
|
if (nodeReader.isDeleted()) {
|
||||||
|
|
|
@ -17,8 +17,6 @@
|
||||||
#ifndef LATINIME_DYNAMIC_PATRICIA_TRIE_POLICY_H
|
#ifndef LATINIME_DYNAMIC_PATRICIA_TRIE_POLICY_H
|
||||||
#define LATINIME_DYNAMIC_PATRICIA_TRIE_POLICY_H
|
#define LATINIME_DYNAMIC_PATRICIA_TRIE_POLICY_H
|
||||||
|
|
||||||
#include <stdint.h>
|
|
||||||
|
|
||||||
#include "defines.h"
|
#include "defines.h"
|
||||||
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
|
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
|
||||||
#include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h"
|
#include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h"
|
||||||
|
@ -35,11 +33,11 @@ class DicNodeVector;
|
||||||
class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
||||||
public:
|
public:
|
||||||
DynamicPatriciaTriePolicy(const MmappedBuffer *const buffer)
|
DynamicPatriciaTriePolicy(const MmappedBuffer *const buffer)
|
||||||
: mBuffer(buffer), mExtendableBuffer(), mHeaderPolicy(mBuffer->getBuffer()),
|
: mBuffer(buffer), mHeaderPolicy(mBuffer->getBuffer()),
|
||||||
mDictRoot(mBuffer->getBuffer() + mHeaderPolicy.getSize()),
|
mBufferWithExtendableBuffer(mBuffer->getBuffer() + mHeaderPolicy.getSize(),
|
||||||
mOriginalDictSize(mBuffer->getBufferSize() - mHeaderPolicy.getSize()),
|
mBuffer->getBufferSize() - mHeaderPolicy.getSize()),
|
||||||
mBigramListPolicy(mDictRoot, mOriginalDictSize, &mExtendableBuffer),
|
mBigramListPolicy(&mBufferWithExtendableBuffer),
|
||||||
mShortcutListPolicy(mDictRoot, mOriginalDictSize, &mExtendableBuffer) {}
|
mShortcutListPolicy(&mBufferWithExtendableBuffer) {}
|
||||||
|
|
||||||
~DynamicPatriciaTriePolicy() {
|
~DynamicPatriciaTriePolicy() {
|
||||||
delete mBuffer;
|
delete mBuffer;
|
||||||
|
@ -89,12 +87,8 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
||||||
DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTriePolicy);
|
DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTriePolicy);
|
||||||
|
|
||||||
const MmappedBuffer *const mBuffer;
|
const MmappedBuffer *const mBuffer;
|
||||||
const ExtendableBuffer mExtendableBuffer;
|
|
||||||
const HeaderPolicy mHeaderPolicy;
|
const HeaderPolicy mHeaderPolicy;
|
||||||
// TODO: Consolidate mDictRoot.
|
const BufferWithExtendableBuffer mBufferWithExtendableBuffer;
|
||||||
// CAVEAT!: Be careful about array out of bound access with mDictRoot
|
|
||||||
const uint8_t *const mDictRoot;
|
|
||||||
const int mOriginalDictSize;
|
|
||||||
const DynamicBigramListPolicy mBigramListPolicy;
|
const DynamicBigramListPolicy mBigramListPolicy;
|
||||||
const DynamicShortcutListPolicy mShortcutListPolicy;
|
const DynamicShortcutListPolicy mShortcutListPolicy;
|
||||||
};
|
};
|
||||||
|
|
|
@ -25,16 +25,15 @@ const int DynamicPatriciaTrieReadingHelper::MAX_NODE_ARRAY_COUNT_TO_AVOID_INFINI
|
||||||
// Read node array size and process empty node arrays. Nodes and arrays are counted up in this
|
// Read node array size and process empty node arrays. Nodes and arrays are counted up in this
|
||||||
// method to avoid an infinite loop.
|
// method to avoid an infinite loop.
|
||||||
void DynamicPatriciaTrieReadingHelper::nextNodeArray() {
|
void DynamicPatriciaTrieReadingHelper::nextNodeArray() {
|
||||||
const bool usesAdditionalBuffer = mPos >= mOriginalDictSize;
|
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(mPos);
|
||||||
const uint8_t *const dictBuf = (usesAdditionalBuffer)
|
const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
|
||||||
? mExtendableBuffer->getBuffer() : mDictRoot;
|
|
||||||
if (usesAdditionalBuffer) {
|
if (usesAdditionalBuffer) {
|
||||||
mPos -= mOriginalDictSize;
|
mPos -= mBuffer->getOriginalBufferSize();
|
||||||
}
|
}
|
||||||
mNodeCount = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition(dictBuf,
|
mNodeCount = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition(dictBuf,
|
||||||
&mPos);
|
&mPos);
|
||||||
if (usesAdditionalBuffer) {
|
if (usesAdditionalBuffer) {
|
||||||
mPos += mOriginalDictSize;
|
mPos += mBuffer->getOriginalBufferSize();
|
||||||
}
|
}
|
||||||
// Count up nodes and node arrays to avoid infinite loop.
|
// Count up nodes and node arrays to avoid infinite loop.
|
||||||
mTotalNodeCount += mNodeCount;
|
mTotalNodeCount += mNodeCount;
|
||||||
|
@ -59,16 +58,15 @@ void DynamicPatriciaTrieReadingHelper::nextNodeArray() {
|
||||||
|
|
||||||
// Follow the forward link and read the next node array if exists.
|
// Follow the forward link and read the next node array if exists.
|
||||||
void DynamicPatriciaTrieReadingHelper::followForwardLink() {
|
void DynamicPatriciaTrieReadingHelper::followForwardLink() {
|
||||||
const bool usesAdditionalBuffer = mPos >= mOriginalDictSize;
|
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(mPos);
|
||||||
const uint8_t *const dictBuf = (usesAdditionalBuffer)
|
const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
|
||||||
? mExtendableBuffer->getBuffer() : mDictRoot;
|
|
||||||
if (usesAdditionalBuffer) {
|
if (usesAdditionalBuffer) {
|
||||||
mPos -= mOriginalDictSize;
|
mPos -= mBuffer->getOriginalBufferSize();
|
||||||
}
|
}
|
||||||
const int forwardLinkPosition =
|
const int forwardLinkPosition =
|
||||||
DynamicPatriciaTrieReadingUtils::getForwardLinkPosition(dictBuf, mPos);
|
DynamicPatriciaTrieReadingUtils::getForwardLinkPosition(dictBuf, mPos);
|
||||||
if (usesAdditionalBuffer) {
|
if (usesAdditionalBuffer) {
|
||||||
mPos += mOriginalDictSize;
|
mPos += mBuffer->getOriginalBufferSize();
|
||||||
}
|
}
|
||||||
if (DynamicPatriciaTrieReadingUtils::isValidForwardLinkPosition(forwardLinkPosition)) {
|
if (DynamicPatriciaTrieReadingUtils::isValidForwardLinkPosition(forwardLinkPosition)) {
|
||||||
// Follow the forward link.
|
// Follow the forward link.
|
||||||
|
|
|
@ -31,15 +31,12 @@ namespace latinime {
|
||||||
*/
|
*/
|
||||||
class DynamicPatriciaTrieReadingHelper {
|
class DynamicPatriciaTrieReadingHelper {
|
||||||
public:
|
public:
|
||||||
DynamicPatriciaTrieReadingHelper(const uint8_t *const dictRoot, const int originalDictSize,
|
DynamicPatriciaTrieReadingHelper(const BufferWithExtendableBuffer *const buffer,
|
||||||
const ExtendableBuffer *const extendableBuffer,
|
|
||||||
const DictionaryBigramsStructurePolicy *const bigramsPolicy,
|
const DictionaryBigramsStructurePolicy *const bigramsPolicy,
|
||||||
const DictionaryShortcutsStructurePolicy *const shortcutsPolicy)
|
const DictionaryShortcutsStructurePolicy *const shortcutsPolicy)
|
||||||
: mIsError(false), mPos(NOT_A_DICT_POS), mNodeCount(0), mPrevTotalCodePointCount(0),
|
: mIsError(false), mPos(NOT_A_DICT_POS), mNodeCount(0), mPrevTotalCodePointCount(0),
|
||||||
mTotalNodeCount(0), mNodeArrayCount(0), mDictRoot(dictRoot),
|
mTotalNodeCount(0), mNodeArrayCount(0), mBuffer(buffer),
|
||||||
mOriginalDictSize(originalDictSize), mExtendableBuffer(extendableBuffer),
|
mNodeReader(mBuffer, bigramsPolicy, shortcutsPolicy) {}
|
||||||
mNodeReader(mDictRoot, mOriginalDictSize, mExtendableBuffer, bigramsPolicy,
|
|
||||||
shortcutsPolicy) {}
|
|
||||||
|
|
||||||
~DynamicPatriciaTrieReadingHelper() {}
|
~DynamicPatriciaTrieReadingHelper() {}
|
||||||
|
|
||||||
|
@ -177,9 +174,7 @@ class DynamicPatriciaTrieReadingHelper {
|
||||||
int mPrevTotalCodePointCount;
|
int mPrevTotalCodePointCount;
|
||||||
int mTotalNodeCount;
|
int mTotalNodeCount;
|
||||||
int mNodeArrayCount;
|
int mNodeArrayCount;
|
||||||
const uint8_t *const mDictRoot;
|
const BufferWithExtendableBuffer *const mBuffer;
|
||||||
const int mOriginalDictSize;
|
|
||||||
const ExtendableBuffer *const mExtendableBuffer;
|
|
||||||
DynamicPatriciaTrieNodeReader mNodeReader;
|
DynamicPatriciaTrieNodeReader mNodeReader;
|
||||||
int mMergedNodeCodePoints[MAX_WORD_LENGTH];
|
int mMergedNodeCodePoints[MAX_WORD_LENGTH];
|
||||||
|
|
||||||
|
|
|
@ -31,9 +31,8 @@ namespace latinime {
|
||||||
*/
|
*/
|
||||||
class DynamicShortcutListPolicy : public DictionaryShortcutsStructurePolicy {
|
class DynamicShortcutListPolicy : public DictionaryShortcutsStructurePolicy {
|
||||||
public:
|
public:
|
||||||
DynamicShortcutListPolicy(const uint8_t *const shortcutBuf, const int bufSize,
|
explicit DynamicShortcutListPolicy(const BufferWithExtendableBuffer *const buffer)
|
||||||
const ExtendableBuffer *const additionalBuffer)
|
: mBuffer(buffer) {}
|
||||||
: mShortcutsBuf(shortcutBuf), mBufSize(bufSize), mAdditionalBuffer(additionalBuffer) {}
|
|
||||||
|
|
||||||
~DynamicShortcutListPolicy() {}
|
~DynamicShortcutListPolicy() {}
|
||||||
|
|
||||||
|
@ -47,11 +46,10 @@ class DynamicShortcutListPolicy : public DictionaryShortcutsStructurePolicy {
|
||||||
void getNextShortcut(const int maxCodePointCount, int *const outCodePoint,
|
void getNextShortcut(const int maxCodePointCount, int *const outCodePoint,
|
||||||
int *const outCodePointCount, bool *const outIsWhitelist, bool *const outHasNext,
|
int *const outCodePointCount, bool *const outIsWhitelist, bool *const outHasNext,
|
||||||
int *const pos) const {
|
int *const pos) const {
|
||||||
const bool usesAdditionalBuffer = *pos >= mBufSize;
|
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*pos);
|
||||||
const uint8_t *const buffer = usesAdditionalBuffer
|
const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer);
|
||||||
? mAdditionalBuffer->getBuffer() : mShortcutsBuf;
|
|
||||||
if (usesAdditionalBuffer) {
|
if (usesAdditionalBuffer) {
|
||||||
*pos -= mBufSize;
|
*pos -= mBuffer->getOriginalBufferSize();
|
||||||
}
|
}
|
||||||
const ShortcutListReadingUtils::ShortcutFlags flags =
|
const ShortcutListReadingUtils::ShortcutFlags flags =
|
||||||
ShortcutListReadingUtils::getFlagsAndForwardPointer(buffer, pos);
|
ShortcutListReadingUtils::getFlagsAndForwardPointer(buffer, pos);
|
||||||
|
@ -66,29 +64,28 @@ class DynamicShortcutListPolicy : public DictionaryShortcutsStructurePolicy {
|
||||||
buffer, maxCodePointCount, outCodePoint, pos);
|
buffer, maxCodePointCount, outCodePoint, pos);
|
||||||
}
|
}
|
||||||
if (usesAdditionalBuffer) {
|
if (usesAdditionalBuffer) {
|
||||||
*pos += mBufSize;
|
*pos += mBuffer->getOriginalBufferSize();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void skipAllShortcuts(int *const pos) const {
|
void skipAllShortcuts(int *const pos) const {
|
||||||
if (*pos >= mBufSize) {
|
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*pos);
|
||||||
*pos -= mBufSize;
|
const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer);
|
||||||
|
if (usesAdditionalBuffer) {
|
||||||
|
*pos -= mBuffer->getOriginalBufferSize();
|
||||||
|
}
|
||||||
const int shortcutListSize = ShortcutListReadingUtils
|
const int shortcutListSize = ShortcutListReadingUtils
|
||||||
::getShortcutListSizeAndForwardPointer(mAdditionalBuffer->getBuffer(), pos);
|
::getShortcutListSizeAndForwardPointer(buffer, pos);
|
||||||
*pos += mBufSize + shortcutListSize;
|
|
||||||
} else {
|
|
||||||
const int shortcutListSize = ShortcutListReadingUtils
|
|
||||||
::getShortcutListSizeAndForwardPointer(mShortcutsBuf, pos);
|
|
||||||
*pos += shortcutListSize;
|
*pos += shortcutListSize;
|
||||||
|
if (usesAdditionalBuffer) {
|
||||||
|
*pos += mBuffer->getOriginalBufferSize();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicShortcutListPolicy);
|
DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicShortcutListPolicy);
|
||||||
|
|
||||||
const uint8_t *const mShortcutsBuf;
|
const BufferWithExtendableBuffer *const mBuffer;
|
||||||
const int mBufSize;
|
|
||||||
const ExtendableBuffer *const mAdditionalBuffer;
|
|
||||||
};
|
};
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
#endif // LATINIME_DYNAMIC_SHORTCUT_LIST_POLICY_H
|
#endif // LATINIME_DYNAMIC_SHORTCUT_LIST_POLICY_H
|
||||||
|
|
|
@ -18,8 +18,8 @@
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
|
||||||
const size_t ExtendableBuffer::INITIAL_BUFFER_SIZE = 16 * 1024;
|
const size_t BufferWithExtendableBuffer::INITIAL_ADDITIONAL_BUFFER_SIZE = 16 * 1024;
|
||||||
const size_t ExtendableBuffer::MAX_BUFFER_SIZE = 1024 * 1024;
|
const size_t BufferWithExtendableBuffer::MAX_ADDITIONAL_BUFFER_SIZE = 1024 * 1024;
|
||||||
const size_t ExtendableBuffer::EXTEND_BUFFER_SIZE_STEP = 16 * 1024;
|
const size_t BufferWithExtendableBuffer::EXTEND_ADDITIONAL_BUFFER_SIZE_STEP = 16 * 1024;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -14,57 +14,71 @@
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef LATINIME_EXTENDABLE_BUFFER_H
|
#ifndef LATINIME_BUFFER_WITH_EXTENDABLE_BUFFER_H
|
||||||
#define LATINIME_EXTENDABLE_BUFFER_H
|
#define LATINIME_BUFFER_WITH_EXTENDABLE_BUFFER_H
|
||||||
|
|
||||||
#include <cstddef>
|
#include <cstddef>
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include "defines.h"
|
#include "defines.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h"
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
|
||||||
|
// TODO: change file name according to the class name.
|
||||||
// This is used as a buffer that can be extended for updatable dictionaries.
|
// This is used as a buffer that can be extended for updatable dictionaries.
|
||||||
class ExtendableBuffer {
|
// To optimize performance, raw pointer is directly used for reading buffer. The position has to be
|
||||||
|
// adjusted to access additional buffer. On the other hand, this class does not provide writable
|
||||||
|
// raw pointer but provides several methods that handle boundary checking for writing data.
|
||||||
|
class BufferWithExtendableBuffer {
|
||||||
public:
|
public:
|
||||||
ExtendableBuffer() : mBuffer(INITIAL_BUFFER_SIZE), mUsedSize(0) {}
|
BufferWithExtendableBuffer(uint8_t *const originalBuffer, const int originalBufferSize)
|
||||||
|
: mOriginalBuffer(originalBuffer), mOriginalBufferSize(originalBufferSize),
|
||||||
|
mAdditionalBuffer(INITIAL_ADDITIONAL_BUFFER_SIZE), mUsedAdditionalBufferSize(0) {}
|
||||||
|
|
||||||
AK_FORCE_INLINE const uint8_t *getBuffer() const {
|
/**
|
||||||
return &mBuffer[0];
|
* For reading.
|
||||||
|
*/
|
||||||
|
AK_FORCE_INLINE bool isInAdditionalBuffer(const int position) const {
|
||||||
|
return position >= mOriginalBufferSize;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// CAVEAT!: Be careful about array out of bound access with buffers
|
||||||
|
AK_FORCE_INLINE const uint8_t *getBuffer(const bool usesAdditionalBuffer) const {
|
||||||
|
if (usesAdditionalBuffer) {
|
||||||
|
return &mAdditionalBuffer[0];
|
||||||
|
} else {
|
||||||
|
return mOriginalBuffer;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
AK_FORCE_INLINE int getOriginalBufferSize() const {
|
||||||
|
return mOriginalBufferSize;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private:
|
||||||
|
DISALLOW_COPY_AND_ASSIGN(BufferWithExtendableBuffer);
|
||||||
|
|
||||||
|
static const size_t INITIAL_ADDITIONAL_BUFFER_SIZE;
|
||||||
|
static const size_t MAX_ADDITIONAL_BUFFER_SIZE;
|
||||||
|
static const size_t EXTEND_ADDITIONAL_BUFFER_SIZE_STEP;
|
||||||
|
|
||||||
|
uint8_t *const mOriginalBuffer;
|
||||||
|
const int mOriginalBufferSize;
|
||||||
|
std::vector<uint8_t> mAdditionalBuffer;
|
||||||
|
int mUsedAdditionalBufferSize;
|
||||||
|
|
||||||
// Return if the buffer is successfully extended or not.
|
// Return if the buffer is successfully extended or not.
|
||||||
AK_FORCE_INLINE bool extendBuffer() {
|
AK_FORCE_INLINE bool extendBuffer() {
|
||||||
if (mBuffer.size() + EXTEND_BUFFER_SIZE_STEP > MAX_BUFFER_SIZE) {
|
if (mAdditionalBuffer.size() + EXTEND_ADDITIONAL_BUFFER_SIZE_STEP
|
||||||
|
> MAX_ADDITIONAL_BUFFER_SIZE) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
mBuffer.resize(mBuffer.size() + EXTEND_BUFFER_SIZE_STEP);
|
mAdditionalBuffer.resize(mAdditionalBuffer.size() + EXTEND_ADDITIONAL_BUFFER_SIZE_STEP);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
AK_FORCE_INLINE int getAllocatedSize() const {
|
|
||||||
return mBuffer.size();
|
|
||||||
}
|
|
||||||
|
|
||||||
AK_FORCE_INLINE int getUsedSize() const {
|
|
||||||
return mUsedSize;
|
|
||||||
}
|
|
||||||
|
|
||||||
AK_FORCE_INLINE void clear() {
|
|
||||||
mUsedSize = 0;
|
|
||||||
mBuffer.clear();
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
|
||||||
DISALLOW_COPY_AND_ASSIGN(ExtendableBuffer);
|
|
||||||
|
|
||||||
static const size_t INITIAL_BUFFER_SIZE;
|
|
||||||
static const size_t MAX_BUFFER_SIZE;
|
|
||||||
static const size_t EXTEND_BUFFER_SIZE_STEP;
|
|
||||||
|
|
||||||
std::vector<uint8_t> mBuffer;
|
|
||||||
int mUsedSize;
|
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
#endif /* LATINIME_MMAPED_BUFFER_H */
|
#endif /* LATINIME_BUFFER_WITH_EXTENDABLE_BUFFER_H */
|
||||||
|
|
Loading…
Reference in New Issue