Merge "Employ "bigram link" for handling moved bigram target."
This commit is contained in:
commit
04bf3cd4e0
8 changed files with 128 additions and 72 deletions
|
@ -18,6 +18,42 @@
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
|
||||||
|
const int DynamicBigramListPolicy::BIGRAM_LINK_COUNT_LIMIT = 10000;
|
||||||
|
|
||||||
|
void DynamicBigramListPolicy::getNextBigram(int *const outBigramPos, int *const outProbability,
|
||||||
|
bool *const outHasNext, int *const pos) const {
|
||||||
|
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*pos);
|
||||||
|
const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer);
|
||||||
|
if (usesAdditionalBuffer) {
|
||||||
|
*pos -= mBuffer->getOriginalBufferSize();
|
||||||
|
}
|
||||||
|
const BigramListReadWriteUtils::BigramFlags flags =
|
||||||
|
BigramListReadWriteUtils::getFlagsAndForwardPointer(buffer, pos);
|
||||||
|
int originalBigramPos = BigramListReadWriteUtils::getBigramAddressAndForwardPointer(
|
||||||
|
buffer, flags, pos);
|
||||||
|
if (usesAdditionalBuffer && originalBigramPos != NOT_A_VALID_WORD_POS) {
|
||||||
|
originalBigramPos += mBuffer->getOriginalBufferSize();
|
||||||
|
}
|
||||||
|
*outBigramPos = followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos);
|
||||||
|
*outProbability = BigramListReadWriteUtils::getProbabilityFromFlags(flags);
|
||||||
|
*outHasNext = BigramListReadWriteUtils::hasNext(flags);
|
||||||
|
if (usesAdditionalBuffer) {
|
||||||
|
*pos += mBuffer->getOriginalBufferSize();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void DynamicBigramListPolicy::skipAllBigrams(int *const pos) const {
|
||||||
|
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*pos);
|
||||||
|
const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer);
|
||||||
|
if (usesAdditionalBuffer) {
|
||||||
|
*pos -= mBuffer->getOriginalBufferSize();
|
||||||
|
}
|
||||||
|
BigramListReadWriteUtils::skipExistingBigrams(buffer, pos);
|
||||||
|
if (usesAdditionalBuffer) {
|
||||||
|
*pos += mBuffer->getOriginalBufferSize();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
bool DynamicBigramListPolicy::copyAllBigrams(int *const fromPos, int *const toPos) {
|
bool DynamicBigramListPolicy::copyAllBigrams(int *const fromPos, int *const toPos) {
|
||||||
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*fromPos);
|
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*fromPos);
|
||||||
if (usesAdditionalBuffer) {
|
if (usesAdditionalBuffer) {
|
||||||
|
@ -28,15 +64,16 @@ bool DynamicBigramListPolicy::copyAllBigrams(int *const fromPos, int *const toPo
|
||||||
// The buffer address can be changed after calling buffer writing methods.
|
// The buffer address can be changed after calling buffer writing methods.
|
||||||
const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer);
|
const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer);
|
||||||
flags = BigramListReadWriteUtils::getFlagsAndForwardPointer(buffer, fromPos);
|
flags = BigramListReadWriteUtils::getFlagsAndForwardPointer(buffer, fromPos);
|
||||||
int bigramPos = BigramListReadWriteUtils::getBigramAddressAndForwardPointer(
|
int originalBigramPos = BigramListReadWriteUtils::getBigramAddressAndForwardPointer(
|
||||||
buffer, flags, fromPos);
|
buffer, flags, fromPos);
|
||||||
if (bigramPos == NOT_A_VALID_WORD_POS) {
|
if (originalBigramPos == NOT_A_VALID_WORD_POS) {
|
||||||
// skip invalid bigram entry.
|
// skip invalid bigram entry.
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (usesAdditionalBuffer) {
|
if (usesAdditionalBuffer) {
|
||||||
bigramPos += mBuffer->getOriginalBufferSize();
|
originalBigramPos += mBuffer->getOriginalBufferSize();
|
||||||
}
|
}
|
||||||
|
const int bigramPos = followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos);
|
||||||
BigramListReadWriteUtils::BigramFlags newBigramFlags;
|
BigramListReadWriteUtils::BigramFlags newBigramFlags;
|
||||||
uint32_t newBigramOffset;
|
uint32_t newBigramOffset;
|
||||||
int newBigramOffsetFieldSize;
|
int newBigramOffsetFieldSize;
|
||||||
|
@ -133,11 +170,12 @@ bool DynamicBigramListPolicy::removeBigram(const int bigramListPos, const int ta
|
||||||
if (usesAdditionalBuffer) {
|
if (usesAdditionalBuffer) {
|
||||||
bigramOffsetFieldPos += mBuffer->getOriginalBufferSize();
|
bigramOffsetFieldPos += mBuffer->getOriginalBufferSize();
|
||||||
}
|
}
|
||||||
int bigramPos = BigramListReadWriteUtils::getBigramAddressAndForwardPointer(
|
int originalBigramPos = BigramListReadWriteUtils::getBigramAddressAndForwardPointer(
|
||||||
buffer, flags, &pos);
|
buffer, flags, &pos);
|
||||||
if (usesAdditionalBuffer && bigramPos != NOT_A_VALID_WORD_POS) {
|
if (usesAdditionalBuffer && originalBigramPos != NOT_A_VALID_WORD_POS) {
|
||||||
bigramPos += mBuffer->getOriginalBufferSize();
|
originalBigramPos += mBuffer->getOriginalBufferSize();
|
||||||
}
|
}
|
||||||
|
const int bigramPos = followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos);
|
||||||
if (bigramPos != targetBigramPos) {
|
if (bigramPos != targetBigramPos) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
@ -152,4 +190,26 @@ bool DynamicBigramListPolicy::removeBigram(const int bigramListPos, const int ta
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int DynamicBigramListPolicy::followBigramLinkAndGetCurrentBigramPtNodePos(
|
||||||
|
const int originalBigramPos) const {
|
||||||
|
if (originalBigramPos == NOT_A_VALID_WORD_POS) {
|
||||||
|
return NOT_A_VALID_WORD_POS;
|
||||||
|
}
|
||||||
|
int currentPos = originalBigramPos;
|
||||||
|
DynamicPatriciaTrieNodeReader nodeReader(mBuffer, this /* bigramsPolicy */, mShortcutPolicy);
|
||||||
|
nodeReader.fetchNodeInfoFromBuffer(currentPos);
|
||||||
|
int bigramLinkCount = 0;
|
||||||
|
while (nodeReader.getBigramLinkedNodePos() != NOT_A_DICT_POS) {
|
||||||
|
currentPos = nodeReader.getBigramLinkedNodePos();
|
||||||
|
nodeReader.fetchNodeInfoFromBuffer(currentPos);
|
||||||
|
bigramLinkCount++;
|
||||||
|
if (bigramLinkCount > BIGRAM_LINK_COUNT_LIMIT) {
|
||||||
|
AKLOGI("Bigram link is invalid. start position: %d", bigramPos);
|
||||||
|
ASSERT(false);
|
||||||
|
return NOT_A_VALID_WORD_POS;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return currentPos;
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
|
|
|
@ -21,7 +21,9 @@
|
||||||
|
|
||||||
#include "defines.h"
|
#include "defines.h"
|
||||||
#include "suggest/core/policy/dictionary_bigrams_structure_policy.h"
|
#include "suggest/core/policy/dictionary_bigrams_structure_policy.h"
|
||||||
|
#include "suggest/core/policy/dictionary_shortcuts_structure_policy.h"
|
||||||
#include "suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h"
|
#include "suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h"
|
||||||
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
|
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
@ -31,43 +33,16 @@ namespace latinime {
|
||||||
*/
|
*/
|
||||||
class DynamicBigramListPolicy : public DictionaryBigramsStructurePolicy {
|
class DynamicBigramListPolicy : public DictionaryBigramsStructurePolicy {
|
||||||
public:
|
public:
|
||||||
DynamicBigramListPolicy(BufferWithExtendableBuffer *const buffer)
|
DynamicBigramListPolicy(BufferWithExtendableBuffer *const buffer,
|
||||||
: mBuffer(buffer) {}
|
const DictionaryShortcutsStructurePolicy *const shortcutPolicy)
|
||||||
|
: mBuffer(buffer), mShortcutPolicy(shortcutPolicy) {}
|
||||||
|
|
||||||
~DynamicBigramListPolicy() {}
|
~DynamicBigramListPolicy() {}
|
||||||
|
|
||||||
void getNextBigram(int *const outBigramPos, int *const outProbability, bool *const outHasNext,
|
void getNextBigram(int *const outBigramPos, int *const outProbability, bool *const outHasNext,
|
||||||
int *const pos) const {
|
int *const pos) const;
|
||||||
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*pos);
|
|
||||||
const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer);
|
|
||||||
if (usesAdditionalBuffer) {
|
|
||||||
*pos -= mBuffer->getOriginalBufferSize();
|
|
||||||
}
|
|
||||||
const BigramListReadWriteUtils::BigramFlags flags =
|
|
||||||
BigramListReadWriteUtils::getFlagsAndForwardPointer(buffer, pos);
|
|
||||||
*outBigramPos = BigramListReadWriteUtils::getBigramAddressAndForwardPointer(
|
|
||||||
buffer, flags, pos);
|
|
||||||
if (usesAdditionalBuffer && *outBigramPos != NOT_A_VALID_WORD_POS) {
|
|
||||||
*outBigramPos += mBuffer->getOriginalBufferSize();
|
|
||||||
}
|
|
||||||
*outProbability = BigramListReadWriteUtils::getProbabilityFromFlags(flags);
|
|
||||||
*outHasNext = BigramListReadWriteUtils::hasNext(flags);
|
|
||||||
if (usesAdditionalBuffer) {
|
|
||||||
*pos += mBuffer->getOriginalBufferSize();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void skipAllBigrams(int *const pos) const {
|
void skipAllBigrams(int *const pos) const;
|
||||||
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*pos);
|
|
||||||
const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer);
|
|
||||||
if (usesAdditionalBuffer) {
|
|
||||||
*pos -= mBuffer->getOriginalBufferSize();
|
|
||||||
}
|
|
||||||
BigramListReadWriteUtils::skipExistingBigrams(buffer, pos);
|
|
||||||
if (usesAdditionalBuffer) {
|
|
||||||
*pos += mBuffer->getOriginalBufferSize();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Copy bigrams from the bigram list that starts at fromPos to toPos and advance these
|
// Copy bigrams from the bigram list that starts at fromPos to toPos and advance these
|
||||||
// positions after bigram lists. This method skips invalid bigram entries.
|
// positions after bigram lists. This method skips invalid bigram entries.
|
||||||
|
@ -81,7 +56,13 @@ class DynamicBigramListPolicy : public DictionaryBigramsStructurePolicy {
|
||||||
private:
|
private:
|
||||||
DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicBigramListPolicy);
|
DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicBigramListPolicy);
|
||||||
|
|
||||||
|
static const int BIGRAM_LINK_COUNT_LIMIT;
|
||||||
|
|
||||||
BufferWithExtendableBuffer *const mBuffer;
|
BufferWithExtendableBuffer *const mBuffer;
|
||||||
|
const DictionaryShortcutsStructurePolicy *const mShortcutPolicy;
|
||||||
|
|
||||||
|
// Follow bigram link and return the position of bigram target PtNode that is currently valid.
|
||||||
|
int followBigramLinkAndGetCurrentBigramPtNodePos(const int originalBigramPos) const;
|
||||||
};
|
};
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
#endif // LATINIME_DYNAMIC_BIGRAM_LIST_POLICY_H
|
#endif // LATINIME_DYNAMIC_BIGRAM_LIST_POLICY_H
|
||||||
|
|
|
@ -62,6 +62,11 @@ void DynamicPatriciaTrieNodeReader::fetchNodeInfoFromBufferAndProcessMovedNode(c
|
||||||
if (usesAdditionalBuffer && mChildrenPos != NOT_A_DICT_POS) {
|
if (usesAdditionalBuffer && mChildrenPos != NOT_A_DICT_POS) {
|
||||||
mChildrenPos += mBuffer->getOriginalBufferSize();
|
mChildrenPos += mBuffer->getOriginalBufferSize();
|
||||||
}
|
}
|
||||||
|
if (mSiblingPos == NOT_A_VALID_WORD_POS && DynamicPatriciaTrieReadingUtils::isMoved(mFlags)) {
|
||||||
|
mBigramLinkedNodePos = mChildrenPos;
|
||||||
|
} else {
|
||||||
|
mBigramLinkedNodePos = NOT_A_DICT_POS;
|
||||||
|
}
|
||||||
if (usesAdditionalBuffer) {
|
if (usesAdditionalBuffer) {
|
||||||
pos += mBuffer->getOriginalBufferSize();
|
pos += mBuffer->getOriginalBufferSize();
|
||||||
}
|
}
|
||||||
|
|
|
@ -39,11 +39,11 @@ class DynamicPatriciaTrieNodeReader {
|
||||||
const DictionaryBigramsStructurePolicy *const bigramsPolicy,
|
const DictionaryBigramsStructurePolicy *const bigramsPolicy,
|
||||||
const DictionaryShortcutsStructurePolicy *const shortcutsPolicy)
|
const DictionaryShortcutsStructurePolicy *const shortcutsPolicy)
|
||||||
: mBuffer(buffer), mBigramsPolicy(bigramsPolicy),
|
: mBuffer(buffer), mBigramsPolicy(bigramsPolicy),
|
||||||
mShortcutsPolicy(shortcutsPolicy), mNodePos(NOT_A_VALID_WORD_POS),
|
mShortcutsPolicy(shortcutsPolicy), mHeadPos(NOT_A_VALID_WORD_POS), mFlags(0),
|
||||||
mHeadPos(NOT_A_DICT_POS), mFlags(0), mParentPos(NOT_A_DICT_POS), mCodePointCount(0),
|
mParentPos(NOT_A_DICT_POS), mCodePointCount(0), mProbabilityFieldPos(NOT_A_DICT_POS),
|
||||||
mProbabilityFieldPos(NOT_A_DICT_POS), mProbability(NOT_A_PROBABILITY),
|
mProbability(NOT_A_PROBABILITY), mChildrenPosFieldPos(NOT_A_DICT_POS),
|
||||||
mChildrenPosFieldPos(NOT_A_DICT_POS), mChildrenPos(NOT_A_DICT_POS),
|
mChildrenPos(NOT_A_DICT_POS), mBigramLinkedNodePos(NOT_A_DICT_POS),
|
||||||
mShortcutPos(NOT_A_DICT_POS), mBigramPos(NOT_A_DICT_POS),
|
mShortcutPos(NOT_A_DICT_POS), mBigramPos(NOT_A_DICT_POS),
|
||||||
mSiblingPos(NOT_A_VALID_WORD_POS) {}
|
mSiblingPos(NOT_A_VALID_WORD_POS) {}
|
||||||
|
|
||||||
~DynamicPatriciaTrieNodeReader() {}
|
~DynamicPatriciaTrieNodeReader() {}
|
||||||
|
@ -56,13 +56,9 @@ class DynamicPatriciaTrieNodeReader {
|
||||||
|
|
||||||
AK_FORCE_INLINE void fetchNodeInfoFromBufferAndGetNodeCodePoints(const int nodePos,
|
AK_FORCE_INLINE void fetchNodeInfoFromBufferAndGetNodeCodePoints(const int nodePos,
|
||||||
const int maxCodePointCount, int *const outCodePoints) {
|
const int maxCodePointCount, int *const outCodePoints) {
|
||||||
mNodePos = nodePos;
|
|
||||||
mSiblingPos = NOT_A_VALID_WORD_POS;
|
mSiblingPos = NOT_A_VALID_WORD_POS;
|
||||||
fetchNodeInfoFromBufferAndProcessMovedNode(mNodePos, maxCodePointCount, outCodePoints);
|
mBigramLinkedNodePos = NOT_A_DICT_POS;
|
||||||
}
|
fetchNodeInfoFromBufferAndProcessMovedNode(nodePos, maxCodePointCount, outCodePoints);
|
||||||
|
|
||||||
AK_FORCE_INLINE int getNodePos() const {
|
|
||||||
return mNodePos;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// HeadPos is different from NodePos when the current PtNode is a moved PtNode.
|
// HeadPos is different from NodePos when the current PtNode is a moved PtNode.
|
||||||
|
@ -119,6 +115,11 @@ class DynamicPatriciaTrieNodeReader {
|
||||||
return mChildrenPos;
|
return mChildrenPos;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Bigram linked node position.
|
||||||
|
AK_FORCE_INLINE int getBigramLinkedNodePos() const {
|
||||||
|
return mBigramLinkedNodePos;
|
||||||
|
}
|
||||||
|
|
||||||
// Shortcutlist position
|
// Shortcutlist position
|
||||||
AK_FORCE_INLINE int getShortcutPos() const {
|
AK_FORCE_INLINE int getShortcutPos() const {
|
||||||
return mShortcutPos;
|
return mShortcutPos;
|
||||||
|
@ -140,7 +141,6 @@ class DynamicPatriciaTrieNodeReader {
|
||||||
const BufferWithExtendableBuffer *const mBuffer;
|
const BufferWithExtendableBuffer *const mBuffer;
|
||||||
const DictionaryBigramsStructurePolicy *const mBigramsPolicy;
|
const DictionaryBigramsStructurePolicy *const mBigramsPolicy;
|
||||||
const DictionaryShortcutsStructurePolicy *const mShortcutsPolicy;
|
const DictionaryShortcutsStructurePolicy *const mShortcutsPolicy;
|
||||||
int mNodePos;
|
|
||||||
int mHeadPos;
|
int mHeadPos;
|
||||||
DynamicPatriciaTrieReadingUtils::NodeFlags mFlags;
|
DynamicPatriciaTrieReadingUtils::NodeFlags mFlags;
|
||||||
int mParentPos;
|
int mParentPos;
|
||||||
|
@ -149,6 +149,7 @@ class DynamicPatriciaTrieNodeReader {
|
||||||
int mProbability;
|
int mProbability;
|
||||||
int mChildrenPosFieldPos;
|
int mChildrenPosFieldPos;
|
||||||
int mChildrenPos;
|
int mChildrenPos;
|
||||||
|
int mBigramLinkedNodePos;
|
||||||
int mShortcutPos;
|
int mShortcutPos;
|
||||||
int mBigramPos;
|
int mBigramPos;
|
||||||
int mSiblingPos;
|
int mSiblingPos;
|
||||||
|
|
|
@ -38,7 +38,7 @@ void DynamicPatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const d
|
||||||
readingHelper.initWithNodeArrayPos(dicNode->getChildrenPos());
|
readingHelper.initWithNodeArrayPos(dicNode->getChildrenPos());
|
||||||
const DynamicPatriciaTrieNodeReader *const nodeReader = readingHelper.getNodeReader();
|
const DynamicPatriciaTrieNodeReader *const nodeReader = readingHelper.getNodeReader();
|
||||||
while (!readingHelper.isEnd()) {
|
while (!readingHelper.isEnd()) {
|
||||||
childDicNodes->pushLeavingChild(dicNode, nodeReader->getNodePos(),
|
childDicNodes->pushLeavingChild(dicNode, nodeReader->getHeadPos(),
|
||||||
nodeReader->getChildrenPos(), nodeReader->getProbability(),
|
nodeReader->getChildrenPos(), nodeReader->getProbability(),
|
||||||
nodeReader->isTerminal() && !nodeReader->isDeleted(),
|
nodeReader->isTerminal() && !nodeReader->isDeleted(),
|
||||||
nodeReader->hasChildren(), nodeReader->isBlacklisted() || nodeReader->isNotAWord(),
|
nodeReader->hasChildren(), nodeReader->isBlacklisted() || nodeReader->isNotAWord(),
|
||||||
|
@ -122,7 +122,7 @@ int DynamicPatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const in
|
||||||
// All characters are matched.
|
// All characters are matched.
|
||||||
if (length == readingHelper.getTotalCodePointCount()) {
|
if (length == readingHelper.getTotalCodePointCount()) {
|
||||||
// Terminal position is found.
|
// Terminal position is found.
|
||||||
return nodeReader->getNodePos();
|
return nodeReader->getHeadPos();
|
||||||
}
|
}
|
||||||
if (!nodeReader->hasChildren()) {
|
if (!nodeReader->hasChildren()) {
|
||||||
return NOT_A_VALID_WORD_POS;
|
return NOT_A_VALID_WORD_POS;
|
||||||
|
|
|
@ -36,8 +36,8 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
||||||
: mBuffer(buffer), mHeaderPolicy(mBuffer->getBuffer()),
|
: mBuffer(buffer), mHeaderPolicy(mBuffer->getBuffer()),
|
||||||
mBufferWithExtendableBuffer(mBuffer->getBuffer() + mHeaderPolicy.getSize(),
|
mBufferWithExtendableBuffer(mBuffer->getBuffer() + mHeaderPolicy.getSize(),
|
||||||
mBuffer->getBufferSize() - mHeaderPolicy.getSize()),
|
mBuffer->getBufferSize() - mHeaderPolicy.getSize()),
|
||||||
mBigramListPolicy(&mBufferWithExtendableBuffer),
|
mShortcutListPolicy(&mBufferWithExtendableBuffer),
|
||||||
mShortcutListPolicy(&mBufferWithExtendableBuffer) {}
|
mBigramListPolicy(&mBufferWithExtendableBuffer, &mShortcutListPolicy) {}
|
||||||
|
|
||||||
~DynamicPatriciaTriePolicy() {
|
~DynamicPatriciaTriePolicy() {
|
||||||
delete mBuffer;
|
delete mBuffer;
|
||||||
|
@ -91,8 +91,8 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
||||||
const MmappedBuffer *const mBuffer;
|
const MmappedBuffer *const mBuffer;
|
||||||
const HeaderPolicy mHeaderPolicy;
|
const HeaderPolicy mHeaderPolicy;
|
||||||
BufferWithExtendableBuffer mBufferWithExtendableBuffer;
|
BufferWithExtendableBuffer mBufferWithExtendableBuffer;
|
||||||
DynamicBigramListPolicy mBigramListPolicy;
|
|
||||||
DynamicShortcutListPolicy mShortcutListPolicy;
|
DynamicShortcutListPolicy mShortcutListPolicy;
|
||||||
|
DynamicBigramListPolicy mBigramListPolicy;
|
||||||
};
|
};
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
#endif // LATINIME_DYNAMIC_PATRICIA_TRIE_POLICY_H
|
#endif // LATINIME_DYNAMIC_PATRICIA_TRIE_POLICY_H
|
||||||
|
|
|
@ -63,7 +63,7 @@ bool DynamicPatriciaTrieWritingHelper::addUnigramWord(
|
||||||
codePointCount - readingHelper->getTotalCodePointCount());
|
codePointCount - readingHelper->getTotalCodePointCount());
|
||||||
}
|
}
|
||||||
// Advance to the children nodes.
|
// Advance to the children nodes.
|
||||||
parentPos = nodeReader->getNodePos();
|
parentPos = nodeReader->getHeadPos();
|
||||||
readingHelper->readChildNode();
|
readingHelper->readChildNode();
|
||||||
}
|
}
|
||||||
if (readingHelper->isError()) {
|
if (readingHelper->isError()) {
|
||||||
|
@ -100,8 +100,9 @@ bool DynamicPatriciaTrieWritingHelper::removeBigramWords(const int word0Pos, con
|
||||||
}
|
}
|
||||||
|
|
||||||
bool DynamicPatriciaTrieWritingHelper::markNodeAsMovedAndSetPosition(
|
bool DynamicPatriciaTrieWritingHelper::markNodeAsMovedAndSetPosition(
|
||||||
const DynamicPatriciaTrieNodeReader *const originalNode, const int movedPos) {
|
const DynamicPatriciaTrieNodeReader *const originalNode, const int movedPos,
|
||||||
int pos = originalNode->getNodePos();
|
const int bigramLinkedNodePos) {
|
||||||
|
int pos = originalNode->getHeadPos();
|
||||||
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(pos);
|
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(pos);
|
||||||
const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
|
const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
|
||||||
if (usesAdditionalBuffer) {
|
if (usesAdditionalBuffer) {
|
||||||
|
@ -113,18 +114,24 @@ bool DynamicPatriciaTrieWritingHelper::markNodeAsMovedAndSetPosition(
|
||||||
const PatriciaTrieReadingUtils::NodeFlags updatedFlags =
|
const PatriciaTrieReadingUtils::NodeFlags updatedFlags =
|
||||||
DynamicPatriciaTrieReadingUtils::updateAndGetFlags(originalFlags, true /* isMoved */,
|
DynamicPatriciaTrieReadingUtils::updateAndGetFlags(originalFlags, true /* isMoved */,
|
||||||
false /* isDeleted */);
|
false /* isDeleted */);
|
||||||
int writingPos = originalNode->getNodePos();
|
int writingPos = originalNode->getHeadPos();
|
||||||
// Update flags.
|
// Update flags.
|
||||||
if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, updatedFlags,
|
if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, updatedFlags,
|
||||||
&writingPos)) {
|
&writingPos)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
// Update moved position, which is stored in the parent offset field.
|
// Update moved position, which is stored in the parent offset field.
|
||||||
const int movedPosOffset = movedPos - originalNode->getNodePos();
|
const int movedPosOffset = movedPos - originalNode->getHeadPos();
|
||||||
if (!DynamicPatriciaTrieWritingUtils::writeParentOffsetAndAdvancePosition(
|
if (!DynamicPatriciaTrieWritingUtils::writeParentOffsetAndAdvancePosition(
|
||||||
mBuffer, movedPosOffset, &writingPos)) {
|
mBuffer, movedPosOffset, &writingPos)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
// Update bigram linked node position, which is stored in the children position field.
|
||||||
|
int childrenPosFieldPos = originalNode->getChildrenPosFieldPos();
|
||||||
|
if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(
|
||||||
|
mBuffer, bigramLinkedNodePos, &childrenPosFieldPos)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
if (originalNode->hasChildren()) {
|
if (originalNode->hasChildren()) {
|
||||||
// Update children's parent position.
|
// Update children's parent position.
|
||||||
DynamicPatriciaTrieReadingHelper readingHelper(mBuffer, mBigramPolicy, mShortcutPolicy);
|
DynamicPatriciaTrieReadingHelper readingHelper(mBuffer, mBigramPolicy, mShortcutPolicy);
|
||||||
|
@ -248,7 +255,7 @@ bool DynamicPatriciaTrieWritingHelper::setPtNodeProbability(
|
||||||
} else {
|
} else {
|
||||||
// Make the node terminal and write the probability.
|
// Make the node terminal and write the probability.
|
||||||
int movedPos = mBuffer->getTailPosition();
|
int movedPos = mBuffer->getTailPosition();
|
||||||
if (!markNodeAsMovedAndSetPosition(originalPtNode, movedPos)) {
|
if (!markNodeAsMovedAndSetPosition(originalPtNode, movedPos, movedPos)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (!writePtNodeToBufferByCopyingPtNodeInfo(originalPtNode, originalPtNode->getParentPos(),
|
if (!writePtNodeToBufferByCopyingPtNodeInfo(originalPtNode, originalPtNode->getParentPos(),
|
||||||
|
@ -268,7 +275,7 @@ bool DynamicPatriciaTrieWritingHelper::createChildrenPtNodeArrayAndAChildPtNode(
|
||||||
newPtNodeArrayPos, &childrenPosFieldPos)) {
|
newPtNodeArrayPos, &childrenPosFieldPos)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
return createNewPtNodeArrayWithAChildPtNode(parentNode->getNodePos(), codePoints,
|
return createNewPtNodeArrayWithAChildPtNode(parentNode->getHeadPos(), codePoints,
|
||||||
codePointCount, probability);
|
codePointCount, probability);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -305,11 +312,8 @@ bool DynamicPatriciaTrieWritingHelper::reallocatePtNodeAndAddNewPtNodes(
|
||||||
// Reallocating PtNode: abcde, newNode: abc.
|
// Reallocating PtNode: abcde, newNode: abc.
|
||||||
// abc (1st, terminal) __ de (2nd)
|
// abc (1st, terminal) __ de (2nd)
|
||||||
const bool addsExtraChild = newNodeCodePointCount > overlappingCodePointCount;
|
const bool addsExtraChild = newNodeCodePointCount > overlappingCodePointCount;
|
||||||
const int firstPtNodePos = mBuffer->getTailPosition();
|
const int firstPartOfReallocatedPtNodePos = mBuffer->getTailPosition();
|
||||||
if (!markNodeAsMovedAndSetPosition(reallocatingPtNode, firstPtNodePos)) {
|
int writingPos = firstPartOfReallocatedPtNodePos;
|
||||||
return false;
|
|
||||||
}
|
|
||||||
int writingPos = firstPtNodePos;
|
|
||||||
// Write the 1st part of the reallocating node. The children position will be updated later
|
// Write the 1st part of the reallocating node. The children position will be updated later
|
||||||
// with actual children position.
|
// with actual children position.
|
||||||
const int newProbability = addsExtraChild ? NOT_A_PROBABILITY : probabilityOfNewPtNode;
|
const int newProbability = addsExtraChild ? NOT_A_PROBABILITY : probabilityOfNewPtNode;
|
||||||
|
@ -325,15 +329,15 @@ bool DynamicPatriciaTrieWritingHelper::reallocatePtNodeAndAddNewPtNodes(
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
// Write the 2nd part of the reallocating node.
|
// Write the 2nd part of the reallocating node.
|
||||||
if (!writePtNodeToBufferByCopyingPtNodeInfo(reallocatingPtNode,
|
const int secondPartOfReallocatedPtNodePos = writingPos;
|
||||||
reallocatingPtNode->getNodePos(),
|
if (!writePtNodeToBufferByCopyingPtNodeInfo(reallocatingPtNode, firstPartOfReallocatedPtNodePos,
|
||||||
reallocatingPtNodeCodePoints + overlappingCodePointCount,
|
reallocatingPtNodeCodePoints + overlappingCodePointCount,
|
||||||
reallocatingPtNode->getCodePointCount() - overlappingCodePointCount,
|
reallocatingPtNode->getCodePointCount() - overlappingCodePointCount,
|
||||||
reallocatingPtNode->getProbability(), &writingPos)) {
|
reallocatingPtNode->getProbability(), &writingPos)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (addsExtraChild) {
|
if (addsExtraChild) {
|
||||||
if (!writePtNodeToBuffer(reallocatingPtNode->getNodePos(),
|
if (!writePtNodeToBuffer(firstPartOfReallocatedPtNodePos,
|
||||||
newNodeCodePoints + overlappingCodePointCount,
|
newNodeCodePoints + overlappingCodePointCount,
|
||||||
newNodeCodePointCount - overlappingCodePointCount, probabilityOfNewPtNode,
|
newNodeCodePointCount - overlappingCodePointCount, probabilityOfNewPtNode,
|
||||||
&writingPos)) {
|
&writingPos)) {
|
||||||
|
@ -344,9 +348,14 @@ bool DynamicPatriciaTrieWritingHelper::reallocatePtNodeAndAddNewPtNodes(
|
||||||
NOT_A_DICT_POS /* forwardLinkPos */, &writingPos)) {
|
NOT_A_DICT_POS /* forwardLinkPos */, &writingPos)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
// Update original reallocatingPtNode as moved.
|
||||||
|
if (!markNodeAsMovedAndSetPosition(reallocatingPtNode, firstPartOfReallocatedPtNodePos,
|
||||||
|
secondPartOfReallocatedPtNodePos)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
// Load node info. Information of the 1st part will be fetched.
|
// Load node info. Information of the 1st part will be fetched.
|
||||||
DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy);
|
DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy);
|
||||||
nodeReader.fetchNodeInfoFromBuffer(firstPtNodePos);
|
nodeReader.fetchNodeInfoFromBuffer(firstPartOfReallocatedPtNodePos);
|
||||||
// Update children position.
|
// Update children position.
|
||||||
int childrenPosFieldPos = nodeReader.getChildrenPosFieldPos();
|
int childrenPosFieldPos = nodeReader.getChildrenPosFieldPos();
|
||||||
if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(mBuffer,
|
if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(mBuffer,
|
||||||
|
|
|
@ -54,7 +54,7 @@ class DynamicPatriciaTrieWritingHelper {
|
||||||
DynamicShortcutListPolicy *const mShortcutPolicy;
|
DynamicShortcutListPolicy *const mShortcutPolicy;
|
||||||
|
|
||||||
bool markNodeAsMovedAndSetPosition(const DynamicPatriciaTrieNodeReader *const nodeToUpdate,
|
bool markNodeAsMovedAndSetPosition(const DynamicPatriciaTrieNodeReader *const nodeToUpdate,
|
||||||
const int movedPos);
|
const int movedPos, const int bigramLinkedNodePos);
|
||||||
|
|
||||||
bool writePtNodeWithFullInfoToBuffer(const bool isBlacklisted, const bool isNotAWord,
|
bool writePtNodeWithFullInfoToBuffer(const bool isBlacklisted, const bool isNotAWord,
|
||||||
const int parentPos, const int *const codePoints, const int codePointCount,
|
const int parentPos, const int *const codePoints, const int codePointCount,
|
||||||
|
|
Loading…
Reference in a new issue