Merge "GC step 2. Finding garbage bigram entries."

main
Keisuke Kuroyanagi 2013-09-20 11:11:55 +00:00 committed by Android (Google) Code Review
commit b71f63bc1c
19 changed files with 383 additions and 263 deletions

View File

@ -147,7 +147,7 @@ int BigramDictionary::getBigramListPositionForWord(const int *prevWord, const in
int pos = mDictionaryStructurePolicy->getTerminalNodePositionOfWord(prevWord, prevWordLength, int pos = mDictionaryStructurePolicy->getTerminalNodePositionOfWord(prevWord, prevWordLength,
forceLowerCaseSearch); forceLowerCaseSearch);
if (NOT_A_DICT_POS == pos) return NOT_A_DICT_POS; if (NOT_A_DICT_POS == pos) return NOT_A_DICT_POS;
return mDictionaryStructurePolicy->getBigramsPositionOfNode(pos); return mDictionaryStructurePolicy->getBigramsPositionOfPtNode(pos);
} }
int BigramDictionary::getBigramProbability(const int *word0, int length0, const int *word1, int BigramDictionary::getBigramProbability(const int *word0, int length0, const int *word1,

View File

@ -68,7 +68,7 @@ class MultiBigramMap {
void init(const DictionaryStructureWithBufferPolicy *const structurePolicy, void init(const DictionaryStructureWithBufferPolicy *const structurePolicy,
const int nodePos) { const int nodePos) {
const int bigramsListPos = structurePolicy->getBigramsPositionOfNode(nodePos); const int bigramsListPos = structurePolicy->getBigramsPositionOfPtNode(nodePos);
BinaryDictionaryBigramsIterator bigramsIt(structurePolicy->getBigramsStructurePolicy(), BinaryDictionaryBigramsIterator bigramsIt(structurePolicy->getBigramsStructurePolicy(),
bigramsListPos); bigramsListPos);
while (bigramsIt.hasNext()) { while (bigramsIt.hasNext()) {
@ -112,7 +112,7 @@ class MultiBigramMap {
const DictionaryStructureWithBufferPolicy *const structurePolicy, const int nodePos, const DictionaryStructureWithBufferPolicy *const structurePolicy, const int nodePos,
const int nextWordPosition, const int unigramProbability) { const int nextWordPosition, const int unigramProbability) {
int bigramProbability = NOT_A_PROBABILITY; int bigramProbability = NOT_A_PROBABILITY;
const int bigramsListPos = structurePolicy->getBigramsPositionOfNode(nodePos); const int bigramsListPos = structurePolicy->getBigramsPositionOfPtNode(nodePos);
BinaryDictionaryBigramsIterator bigramsIt(structurePolicy->getBigramsStructurePolicy(), BinaryDictionaryBigramsIterator bigramsIt(structurePolicy->getBigramsStructurePolicy(),
bigramsListPos); bigramsListPos);
while (bigramsIt.hasNext()) { while (bigramsIt.hasNext()) {

View File

@ -52,9 +52,9 @@ class DictionaryStructureWithBufferPolicy {
virtual int getUnigramProbabilityOfPtNode(const int nodePos) const = 0; virtual int getUnigramProbabilityOfPtNode(const int nodePos) const = 0;
virtual int getShortcutPositionOfNode(const int nodePos) const = 0; virtual int getShortcutPositionOfPtNode(const int nodePos) const = 0;
virtual int getBigramsPositionOfNode(const int nodePos) const = 0; virtual int getBigramsPositionOfPtNode(const int nodePos) const = 0;
virtual const DictionaryHeaderStructurePolicy *getHeaderStructurePolicy() const = 0; virtual const DictionaryHeaderStructurePolicy *getHeaderStructurePolicy() const = 0;

View File

@ -223,7 +223,7 @@ int Suggest::outputSuggestions(DicTraverseSession *traverseSession, int *frequen
BinaryDictionaryShortcutIterator shortcutIt( BinaryDictionaryShortcutIterator shortcutIt(
traverseSession->getDictionaryStructurePolicy()->getShortcutsStructurePolicy(), traverseSession->getDictionaryStructurePolicy()->getShortcutsStructurePolicy(),
traverseSession->getDictionaryStructurePolicy() traverseSession->getDictionaryStructurePolicy()
->getShortcutPositionOfNode(terminalDicNode->getPos())); ->getShortcutPositionOfPtNode(terminalDicNode->getPos()));
// Shortcut is not supported for multiple words suggestions. // Shortcut is not supported for multiple words suggestions.
// TODO: Check shortcuts during traversal for multiple words suggestions. // TODO: Check shortcuts during traversal for multiple words suggestions.
const bool sameAsTyped = TRAVERSAL->sameAsTyped(traverseSession, terminalDicNode); const bool sameAsTyped = TRAVERSAL->sameAsTyped(traverseSession, terminalDicNode);

View File

@ -33,10 +33,9 @@ class BigramListPolicy : public DictionaryBigramsStructurePolicy {
void getNextBigram(int *const outBigramPos, int *const outProbability, bool *const outHasNext, void getNextBigram(int *const outBigramPos, int *const outProbability, bool *const outHasNext,
int *const pos) const { int *const pos) const {
const BigramListReadWriteUtils::BigramFlags flags = BigramListReadWriteUtils::BigramFlags flags;
BigramListReadWriteUtils::getFlagsAndForwardPointer(mBigramsBuf, pos); BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition(mBigramsBuf, &flags,
*outBigramPos = BigramListReadWriteUtils::getBigramAddressAndForwardPointer( outBigramPos, pos);
mBigramsBuf, flags, pos);
*outProbability = BigramListReadWriteUtils::getProbabilityFromFlags(flags); *outProbability = BigramListReadWriteUtils::getProbabilityFromFlags(flags);
*outHasNext = BigramListReadWriteUtils::hasNext(flags); *outHasNext = BigramListReadWriteUtils::hasNext(flags);
} }

View File

@ -17,6 +17,7 @@
#include "suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h" #include "suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h"
#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h" #include "suggest/policyimpl/dictionary/utils/byte_array_utils.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
namespace latinime { namespace latinime {
@ -38,23 +39,31 @@ const BigramListReadWriteUtils::BigramFlags
BigramListReadWriteUtils::MASK_ATTRIBUTE_PROBABILITY = 0x0F; BigramListReadWriteUtils::MASK_ATTRIBUTE_PROBABILITY = 0x0F;
const int BigramListReadWriteUtils::ATTRIBUTE_ADDRESS_SHIFT = 4; const int BigramListReadWriteUtils::ATTRIBUTE_ADDRESS_SHIFT = 4;
/* static */ BigramListReadWriteUtils::BigramFlags /* static */ void BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition(
BigramListReadWriteUtils::getFlagsAndForwardPointer(const uint8_t *const bigramsBuf, const uint8_t *const bigramsBuf, BigramFlags *const outBigramFlags,
int *const pos) { int *const outTargetPtNodePos, int *const bigramEntryPos) {
return ByteArrayUtils::readUint8AndAdvancePosition(bigramsBuf, pos); const BigramFlags bigramFlags = ByteArrayUtils::readUint8AndAdvancePosition(bigramsBuf,
bigramEntryPos);
if (outBigramFlags) {
*outBigramFlags = bigramFlags;
}
const int targetPos = getBigramAddressAndAdvancePosition(bigramsBuf, bigramFlags,
bigramEntryPos);
if (outTargetPtNodePos) {
*outTargetPtNodePos = targetPos;
}
} }
/* static */ void BigramListReadWriteUtils::skipExistingBigrams(const uint8_t *const bigramsBuf, /* static */ void BigramListReadWriteUtils::skipExistingBigrams(const uint8_t *const bigramsBuf,
int *const pos) { int *const bigramListPos) {
BigramFlags flags = getFlagsAndForwardPointer(bigramsBuf, pos); BigramFlags flags;
while (hasNext(flags)) { do {
*pos += attributeAddressSize(flags); getBigramEntryPropertiesAndAdvancePosition(bigramsBuf, &flags, 0 /* outTargetPtNodePos */,
flags = getFlagsAndForwardPointer(bigramsBuf, pos); bigramListPos);
} } while(hasNext(flags));
*pos += attributeAddressSize(flags);
} }
/* static */ int BigramListReadWriteUtils::getBigramAddressAndForwardPointer( /* static */ int BigramListReadWriteUtils::getBigramAddressAndAdvancePosition(
const uint8_t *const bigramsBuf, const BigramFlags flags, int *const pos) { const uint8_t *const bigramsBuf, const BigramFlags flags, int *const pos) {
int offset = 0; int offset = 0;
const int origin = *pos; const int origin = *pos;
@ -79,4 +88,59 @@ const int BigramListReadWriteUtils::ATTRIBUTE_ADDRESS_SHIFT = 4;
} }
} }
/* static */ bool BigramListReadWriteUtils::createAndWriteBigramEntry(
BufferWithExtendableBuffer *const buffer, const int targetPos, const int probability,
const bool hasNext, int *const writingPos) {
BigramFlags flags;
if (!createAndGetBigramFlags(*writingPos, targetPos, probability, hasNext, &flags)) {
return false;
}
return writeBigramEntry(buffer, flags, targetPos, writingPos);
}
/* static */ bool BigramListReadWriteUtils::writeBigramEntry(
BufferWithExtendableBuffer *const bufferToWrite, const BigramFlags flags,
const int targetPtNodePos, int *const writingPos) {
if (!bufferToWrite->writeUintAndAdvancePosition(flags, 1 /* size */, writingPos)) {
return false;
}
const int offset = (targetPtNodePos != NOT_A_DICT_POS) ? targetPtNodePos - *writingPos : 0;
const uint32_t absOffest = abs(offset);
const int bigramTargetFieldSize = attributeAddressSize(flags);
return bufferToWrite->writeUintAndAdvancePosition(absOffest, bigramTargetFieldSize,
writingPos);
}
// Returns true if the bigram entry is valid and put entry flags into out*.
/* static */ bool BigramListReadWriteUtils::createAndGetBigramFlags(const int entryPos,
const int targetPos, const int probability, const bool hasNext,
BigramFlags *const outBigramFlags) {
BigramFlags flags = probability & MASK_ATTRIBUTE_PROBABILITY;
if (hasNext) {
flags |= FLAG_ATTRIBUTE_HAS_NEXT;
}
const int targetFieldPos = entryPos + 1;
const int offset = (targetPos != NOT_A_DICT_POS) ? targetPos - targetFieldPos : 0;
if (offset < 0) {
flags |= FLAG_ATTRIBUTE_OFFSET_NEGATIVE;
}
const uint32_t absOffest = abs(offset);
if ((absOffest >> 24) != 0) {
// Offset is too large.
return false;
} else if ((absOffest >> 16) != 0) {
flags |= FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES;
} else if ((absOffest >> 8) != 0) {
flags |= FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES;
} else {
flags |= FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE;
}
// Currently, all newly written bigram position fields are 3 bytes to simplify dictionary
// writing.
// TODO: Remove following 2 lines and optimize memory space.
flags = (flags & (~MASK_ATTRIBUTE_ADDRESS_TYPE)) | FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES;
*outBigramFlags = flags;
return true;
}
} // namespace latinime } // namespace latinime

View File

@ -24,11 +24,15 @@
namespace latinime { namespace latinime {
class BufferWithExtendableBuffer;
class BigramListReadWriteUtils { class BigramListReadWriteUtils {
public: public:
typedef uint8_t BigramFlags; typedef uint8_t BigramFlags;
static BigramFlags getFlagsAndForwardPointer(const uint8_t *const bigramsBuf, int *const pos); static void getBigramEntryPropertiesAndAdvancePosition(const uint8_t *const bigramsBuf,
BigramFlags *const outBigramFlags, int *const outTargetPtNodePos,
int *const bigramEntryPos);
static AK_FORCE_INLINE int getProbabilityFromFlags(const BigramFlags flags) { static AK_FORCE_INLINE int getProbabilityFromFlags(const BigramFlags flags) {
return flags & MASK_ATTRIBUTE_PROBABILITY; return flags & MASK_ATTRIBUTE_PROBABILITY;
@ -39,10 +43,7 @@ public:
} }
// Bigrams reading methods // Bigrams reading methods
static void skipExistingBigrams(const uint8_t *const bigramsBuf, int *const pos); static void skipExistingBigrams(const uint8_t *const bigramsBuf, int *const bigramListPos);
static int getBigramAddressAndForwardPointer(const uint8_t *const bigramsBuf,
const BigramFlags flags, int *const pos);
// Returns the size of the bigram position field that is stored in bigram flags. // Returns the size of the bigram position field that is stored in bigram flags.
static AK_FORCE_INLINE int attributeAddressSize(const BigramFlags flags) { static AK_FORCE_INLINE int attributeAddressSize(const BigramFlags flags) {
@ -67,48 +68,11 @@ public:
return (flags & (~MASK_ATTRIBUTE_PROBABILITY)) | (probability & MASK_ATTRIBUTE_PROBABILITY); return (flags & (~MASK_ATTRIBUTE_PROBABILITY)) | (probability & MASK_ATTRIBUTE_PROBABILITY);
} }
// Returns true if the bigram entry is valid and put entry values into out*. static bool createAndWriteBigramEntry(BufferWithExtendableBuffer *const buffer,
static AK_FORCE_INLINE bool createBigramEntryAndGetFlagsAndOffsetAndOffsetFieldSize( const int targetPos, const int probability, const bool hasNext, int *const writingPos);
const int entryPos, const int targetPos, const int probability, const bool hasNext,
BigramFlags *const outBigramFlags, uint32_t *const outOffset,
int *const outOffsetFieldSize) {
if (targetPos == NOT_A_DICT_POS) {
return false;
}
BigramFlags flags = probability & MASK_ATTRIBUTE_PROBABILITY;
if (hasNext) {
flags |= FLAG_ATTRIBUTE_HAS_NEXT;
}
const int targetFieldPos = entryPos + 1;
const int offset = targetPos - targetFieldPos;
if (offset < 0) {
flags |= FLAG_ATTRIBUTE_OFFSET_NEGATIVE;
}
const uint32_t absOffest = abs(offset);
if ((absOffest >> 24) != 0) {
// Offset is too large.
return false;
} else if ((absOffest >> 16) != 0) {
flags |= FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES;
*outOffsetFieldSize = 3;
} else if ((absOffest >> 8) != 0) {
flags |= FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES;
*outOffsetFieldSize = 2;
} else {
flags |= FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE;
*outOffsetFieldSize = 1;
}
// Currently, all newly written bigram position fields are 3 bytes to simplify dictionary static bool writeBigramEntry(BufferWithExtendableBuffer *const buffer, const BigramFlags flags,
// writing. const int targetOffset, int *const writingPos);
// TODO: Remove following 2 lines and optimize memory space.
flags = (flags & (~MASK_ATTRIBUTE_ADDRESS_TYPE)) | FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES;
*outOffsetFieldSize = 3;
*outBigramFlags = flags;
*outOffset = absOffest;
return true;
}
private: private:
DISALLOW_IMPLICIT_CONSTRUCTORS(BigramListReadWriteUtils); DISALLOW_IMPLICIT_CONSTRUCTORS(BigramListReadWriteUtils);
@ -122,9 +86,16 @@ private:
static const BigramFlags MASK_ATTRIBUTE_PROBABILITY; static const BigramFlags MASK_ATTRIBUTE_PROBABILITY;
static const int ATTRIBUTE_ADDRESS_SHIFT; static const int ATTRIBUTE_ADDRESS_SHIFT;
// Returns true if the bigram entry is valid and put entry flags into out*.
static bool createAndGetBigramFlags(const int entryPos, const int targetPos,
const int probability, const bool hasNext, BigramFlags *const outBigramFlags);
static AK_FORCE_INLINE bool isOffsetNegative(const BigramFlags flags) { static AK_FORCE_INLINE bool isOffsetNegative(const BigramFlags flags) {
return (flags & FLAG_ATTRIBUTE_OFFSET_NEGATIVE) != 0; return (flags & FLAG_ATTRIBUTE_OFFSET_NEGATIVE) != 0;
} }
static int getBigramAddressAndAdvancePosition(const uint8_t *const bigramsBuf,
const BigramFlags flags, int *const pos);
}; };
} // namespace latinime } // namespace latinime
#endif // LATINIME_BIGRAM_LIST_READ_WRITE_UTILS_H #endif // LATINIME_BIGRAM_LIST_READ_WRITE_UTILS_H

View File

@ -16,41 +16,47 @@
#include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h" #include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h"
#include "suggest/core/policy/dictionary_shortcuts_structure_policy.h"
#include "suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h"
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
namespace latinime { namespace latinime {
const int DynamicBigramListPolicy::BIGRAM_LINK_COUNT_LIMIT = 10000; const int DynamicBigramListPolicy::CONTINUING_BIGRAM_LINK_COUNT_LIMIT = 10000;
const int DynamicBigramListPolicy::BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT = 100000;
void DynamicBigramListPolicy::getNextBigram(int *const outBigramPos, int *const outProbability, void DynamicBigramListPolicy::getNextBigram(int *const outBigramPos, int *const outProbability,
bool *const outHasNext, int *const pos) const { bool *const outHasNext, int *const bigramEntryPos) const {
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*pos); const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*bigramEntryPos);
const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer); const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer);
if (usesAdditionalBuffer) { if (usesAdditionalBuffer) {
*pos -= mBuffer->getOriginalBufferSize(); *bigramEntryPos -= mBuffer->getOriginalBufferSize();
} }
const BigramListReadWriteUtils::BigramFlags flags = BigramListReadWriteUtils::BigramFlags bigramFlags;
BigramListReadWriteUtils::getFlagsAndForwardPointer(buffer, pos); int originalBigramPos;
int originalBigramPos = BigramListReadWriteUtils::getBigramAddressAndForwardPointer( BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition(buffer, &bigramFlags,
buffer, flags, pos); &originalBigramPos, bigramEntryPos);
if (usesAdditionalBuffer && originalBigramPos != NOT_A_DICT_POS) { if (usesAdditionalBuffer && originalBigramPos != NOT_A_DICT_POS) {
originalBigramPos += mBuffer->getOriginalBufferSize(); originalBigramPos += mBuffer->getOriginalBufferSize();
} }
*outBigramPos = followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos); *outBigramPos = followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos);
*outProbability = BigramListReadWriteUtils::getProbabilityFromFlags(flags); *outProbability = BigramListReadWriteUtils::getProbabilityFromFlags(bigramFlags);
*outHasNext = BigramListReadWriteUtils::hasNext(flags); *outHasNext = BigramListReadWriteUtils::hasNext(bigramFlags);
if (usesAdditionalBuffer) { if (usesAdditionalBuffer) {
*pos += mBuffer->getOriginalBufferSize(); *bigramEntryPos += mBuffer->getOriginalBufferSize();
} }
} }
void DynamicBigramListPolicy::skipAllBigrams(int *const pos) const { void DynamicBigramListPolicy::skipAllBigrams(int *const bigramListPos) const {
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*pos); const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*bigramListPos);
const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer); const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer);
if (usesAdditionalBuffer) { if (usesAdditionalBuffer) {
*pos -= mBuffer->getOriginalBufferSize(); *bigramListPos -= mBuffer->getOriginalBufferSize();
} }
BigramListReadWriteUtils::skipExistingBigrams(buffer, pos); BigramListReadWriteUtils::skipExistingBigrams(buffer, bigramListPos);
if (usesAdditionalBuffer) { if (usesAdditionalBuffer) {
*pos += mBuffer->getOriginalBufferSize(); *bigramListPos += mBuffer->getOriginalBufferSize();
} }
} }
@ -61,13 +67,19 @@ bool DynamicBigramListPolicy::copyAllBigrams(BufferWithExtendableBuffer *const b
*fromPos -= mBuffer->getOriginalBufferSize(); *fromPos -= mBuffer->getOriginalBufferSize();
} }
*outBigramsCount = 0; *outBigramsCount = 0;
BigramListReadWriteUtils::BigramFlags flags; BigramListReadWriteUtils::BigramFlags bigramFlags;
int bigramEntryCount = 0;
do { do {
if (++bigramEntryCount > BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT) {
AKLOGE("Too many bigram entries. %d", BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT);
ASSERT(false);
return false;
}
// The buffer address can be changed after calling buffer writing methods. // The buffer address can be changed after calling buffer writing methods.
const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer); int originalBigramPos;
flags = BigramListReadWriteUtils::getFlagsAndForwardPointer(buffer, fromPos); BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition(
int originalBigramPos = BigramListReadWriteUtils::getBigramAddressAndForwardPointer( mBuffer->getBuffer(usesAdditionalBuffer), &bigramFlags, &originalBigramPos,
buffer, flags, fromPos); fromPos);
if (originalBigramPos == NOT_A_DICT_POS) { if (originalBigramPos == NOT_A_DICT_POS) {
// skip invalid bigram entry. // skip invalid bigram entry.
continue; continue;
@ -76,132 +88,163 @@ bool DynamicBigramListPolicy::copyAllBigrams(BufferWithExtendableBuffer *const b
originalBigramPos += mBuffer->getOriginalBufferSize(); originalBigramPos += mBuffer->getOriginalBufferSize();
} }
const int bigramPos = followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos); const int bigramPos = followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos);
BigramListReadWriteUtils::BigramFlags newBigramFlags; if (!BigramListReadWriteUtils::createAndWriteBigramEntry(bufferToWrite, bigramPos,
uint32_t newBigramOffset; BigramListReadWriteUtils::getProbabilityFromFlags(bigramFlags),
int newBigramOffsetFieldSize; BigramListReadWriteUtils::hasNext(bigramFlags), toPos)) {
if(!BigramListReadWriteUtils::createBigramEntryAndGetFlagsAndOffsetAndOffsetFieldSize(
*toPos, bigramPos, BigramListReadWriteUtils::getProbabilityFromFlags(flags),
BigramListReadWriteUtils::hasNext(flags), &newBigramFlags, &newBigramOffset,
&newBigramOffsetFieldSize)) {
continue;
}
// Write bigram entry. Target buffer is always the additional buffer.
if (!bufferToWrite->writeUintAndAdvancePosition(newBigramFlags, 1 /* size */,toPos)) {
return false;
}
if (!bufferToWrite->writeUintAndAdvancePosition(newBigramOffset, newBigramOffsetFieldSize,
toPos)) {
return false; return false;
} }
(*outBigramsCount)++; (*outBigramsCount)++;
} while(BigramListReadWriteUtils::hasNext(flags)); } while(BigramListReadWriteUtils::hasNext(bigramFlags));
if (usesAdditionalBuffer) { if (usesAdditionalBuffer) {
*fromPos += mBuffer->getOriginalBufferSize(); *fromPos += mBuffer->getOriginalBufferSize();
} }
return true; return true;
} }
bool DynamicBigramListPolicy::addNewBigramEntryToBigramList(const int bigramPos, // Finding useless bigram entries and remove them. Bigram entry is useless when the target PtNode
const int probability, int *const pos) { // has been deleted or is not a valid terminal.
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*pos); bool DynamicBigramListPolicy::updateAllBigramEntriesAndDeleteUselessEntries(
int *const bigramListPos) {
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*bigramListPos);
if (usesAdditionalBuffer) { if (usesAdditionalBuffer) {
*pos -= mBuffer->getOriginalBufferSize(); *bigramListPos -= mBuffer->getOriginalBufferSize();
} }
BigramListReadWriteUtils::BigramFlags flags; DynamicPatriciaTrieNodeReader nodeReader(mBuffer, this /* bigramsPolicy */, mShortcutPolicy);
BigramListReadWriteUtils::BigramFlags bigramFlags;
int bigramEntryCount = 0;
do { do {
int entryPos = *pos; if (++bigramEntryCount > BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT) {
AKLOGE("Too many bigram entries. %d", BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT);
ASSERT(false);
return false;
}
int bigramEntryPos = *bigramListPos;
int originalBigramPos;
// The buffer address can be changed after calling buffer writing methods.
BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition(
mBuffer->getBuffer(usesAdditionalBuffer), &bigramFlags, &originalBigramPos,
bigramListPos);
if (usesAdditionalBuffer) {
bigramEntryPos += mBuffer->getOriginalBufferSize();
}
if (originalBigramPos == NOT_A_DICT_POS) {
// This entry has already been removed.
continue;
}
if (usesAdditionalBuffer) {
originalBigramPos += mBuffer->getOriginalBufferSize();
}
const int bigramTargetNodePos =
followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos);
nodeReader.fetchNodeInfoInBufferFromPtNodePos(bigramTargetNodePos);
// TODO: Update probability for supporting probability decaying.
if (nodeReader.isDeleted() || !nodeReader.isTerminal()
|| bigramTargetNodePos == NOT_A_DICT_POS) {
// The target is no longer valid terminal. Invalidate the current bigram entry.
if (!BigramListReadWriteUtils::writeBigramEntry(mBuffer, bigramFlags,
NOT_A_DICT_POS /* targetOffset */, &bigramEntryPos)) {
return false;
}
}
} while(BigramListReadWriteUtils::hasNext(bigramFlags));
return true;
}
bool DynamicBigramListPolicy::addNewBigramEntryToBigramList(const int bigramTargetPos,
const int probability, int *const bigramListPos) {
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*bigramListPos);
if (usesAdditionalBuffer) {
*bigramListPos -= mBuffer->getOriginalBufferSize();
}
BigramListReadWriteUtils::BigramFlags bigramFlags;
int bigramEntryCount = 0;
do {
if (++bigramEntryCount > BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT) {
AKLOGE("Too many bigram entries. %d", BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT);
ASSERT(false);
return false;
}
int entryPos = *bigramListPos;
if (usesAdditionalBuffer) { if (usesAdditionalBuffer) {
entryPos += mBuffer->getOriginalBufferSize(); entryPos += mBuffer->getOriginalBufferSize();
} }
int originalBigramPos;
// The buffer address can be changed after calling buffer writing methods. // The buffer address can be changed after calling buffer writing methods.
const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer); BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition(
flags = BigramListReadWriteUtils::getFlagsAndForwardPointer(buffer, pos); mBuffer->getBuffer(usesAdditionalBuffer), &bigramFlags, &originalBigramPos,
int originalBigramPos = BigramListReadWriteUtils::getBigramAddressAndForwardPointer( bigramListPos);
buffer, flags, pos);
if (usesAdditionalBuffer && originalBigramPos != NOT_A_DICT_POS) { if (usesAdditionalBuffer && originalBigramPos != NOT_A_DICT_POS) {
originalBigramPos += mBuffer->getOriginalBufferSize(); originalBigramPos += mBuffer->getOriginalBufferSize();
} }
if (followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos) == bigramPos) { if (followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos) == bigramTargetPos) {
// Update this bigram entry. // Update this bigram entry.
const BigramListReadWriteUtils::BigramFlags updatedFlags = const BigramListReadWriteUtils::BigramFlags updatedFlags =
BigramListReadWriteUtils::setProbabilityInFlags(flags, probability); BigramListReadWriteUtils::setProbabilityInFlags(bigramFlags, probability);
return mBuffer->writeUintAndAdvancePosition(updatedFlags, 1 /* size */, &entryPos); return BigramListReadWriteUtils::writeBigramEntry(mBuffer, updatedFlags,
originalBigramPos, &entryPos);
} }
if (BigramListReadWriteUtils::hasNext(flags)) { if (BigramListReadWriteUtils::hasNext(bigramFlags)) {
continue; continue;
} }
// The current last entry is found. // The current last entry is found.
// First, update the flags of the last entry. // First, update the flags of the last entry.
const BigramListReadWriteUtils::BigramFlags updatedFlags = const BigramListReadWriteUtils::BigramFlags updatedFlags =
BigramListReadWriteUtils::setHasNextFlag(flags); BigramListReadWriteUtils::setHasNextFlag(bigramFlags);
if (!mBuffer->writeUintAndAdvancePosition(updatedFlags, 1 /* size */, &entryPos)) { if (!BigramListReadWriteUtils::writeBigramEntry(mBuffer, updatedFlags, originalBigramPos,
&entryPos)) {
return false; return false;
} }
if (usesAdditionalBuffer) { if (usesAdditionalBuffer) {
*pos += mBuffer->getOriginalBufferSize(); *bigramListPos += mBuffer->getOriginalBufferSize();
} }
// Then, add a new entry after the last entry. // Then, add a new entry after the last entry.
return writeNewBigramEntry(bigramPos, probability, pos); return writeNewBigramEntry(bigramTargetPos, probability, bigramListPos);
} while(BigramListReadWriteUtils::hasNext(flags)); } while(BigramListReadWriteUtils::hasNext(bigramFlags));
// We return directly from the while loop. // We return directly from the while loop.
ASSERT(false); ASSERT(false);
return false; return false;
} }
bool DynamicBigramListPolicy::writeNewBigramEntry(const int bigramPos, const int probability, bool DynamicBigramListPolicy::writeNewBigramEntry(const int bigramTargetPos, const int probability,
int *const writingPos) { int *const writingPos) {
BigramListReadWriteUtils::BigramFlags newBigramFlags; // hasNext is false because we are adding a new bigram entry at the end of the bigram list.
uint32_t newBigramOffset; return BigramListReadWriteUtils::createAndWriteBigramEntry(mBuffer, bigramTargetPos,
int newBigramOffsetFieldSize; probability, false /* hasNext */, writingPos);
if(!BigramListReadWriteUtils::createBigramEntryAndGetFlagsAndOffsetAndOffsetFieldSize(
*writingPos, bigramPos, probability, false /* hasNext */, &newBigramFlags,
&newBigramOffset, &newBigramOffsetFieldSize)) {
return false;
}
// Write bigram flags.
if (!mBuffer->writeUintAndAdvancePosition(newBigramFlags, 1 /* size */, writingPos)) {
return false;
}
// Write bigram positon offset.
if (!mBuffer->writeUintAndAdvancePosition(newBigramOffset, newBigramOffsetFieldSize,
writingPos)) {
return false;
}
return true;
} }
bool DynamicBigramListPolicy::removeBigram(const int bigramListPos, const int targetBigramPos) { bool DynamicBigramListPolicy::removeBigram(const int bigramListPos, const int bigramTargetPos) {
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(bigramListPos); const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(bigramListPos);
int pos = bigramListPos; int pos = bigramListPos;
if (usesAdditionalBuffer) { if (usesAdditionalBuffer) {
pos -= mBuffer->getOriginalBufferSize(); pos -= mBuffer->getOriginalBufferSize();
} }
BigramListReadWriteUtils::BigramFlags flags; BigramListReadWriteUtils::BigramFlags bigramFlags;
int bigramEntryCount = 0;
do { do {
// The buffer address can be changed after calling buffer writing methods. if (++bigramEntryCount > BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT) {
const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer); AKLOGE("Too many bigram entries. %d", BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT);
flags = BigramListReadWriteUtils::getFlagsAndForwardPointer(buffer, &pos); ASSERT(false);
int bigramOffsetFieldPos = pos; return false;
if (usesAdditionalBuffer) { }
bigramOffsetFieldPos += mBuffer->getOriginalBufferSize(); int bigramEntryPos = pos;
int originalBigramPos;
// The buffer address can be changed after calling buffer writing methods.
BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition(
mBuffer->getBuffer(usesAdditionalBuffer), &bigramFlags, &originalBigramPos, &pos);
if (usesAdditionalBuffer) {
bigramEntryPos += mBuffer->getOriginalBufferSize();
} }
int originalBigramPos = BigramListReadWriteUtils::getBigramAddressAndForwardPointer(
buffer, flags, &pos);
if (usesAdditionalBuffer && originalBigramPos != NOT_A_DICT_POS) { if (usesAdditionalBuffer && originalBigramPos != NOT_A_DICT_POS) {
originalBigramPos += mBuffer->getOriginalBufferSize(); originalBigramPos += mBuffer->getOriginalBufferSize();
} }
const int bigramPos = followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos); const int bigramPos = followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos);
if (bigramPos != targetBigramPos) { if (bigramPos != bigramTargetPos) {
continue; continue;
} }
// Target entry is found. Write 0 into the bigram pos field to mark the bigram invalid. // Target entry is found. Write an invalid target position to mark the bigram invalid.
const int bigramOffsetFieldSize = BigramListReadWriteUtils::attributeAddressSize(flags); return BigramListReadWriteUtils::writeBigramEntry(mBuffer, bigramFlags,
if (!mBuffer->writeUintAndAdvancePosition(0 /* data */, bigramOffsetFieldSize, NOT_A_DICT_POS /* targetOffset */, &bigramEntryPos);
&bigramOffsetFieldPos)) { } while(BigramListReadWriteUtils::hasNext(bigramFlags));
return false;
}
return true;
} while(BigramListReadWriteUtils::hasNext(flags));
return false; return false;
} }
@ -212,14 +255,14 @@ int DynamicBigramListPolicy::followBigramLinkAndGetCurrentBigramPtNodePos(
} }
int currentPos = originalBigramPos; int currentPos = originalBigramPos;
DynamicPatriciaTrieNodeReader nodeReader(mBuffer, this /* bigramsPolicy */, mShortcutPolicy); DynamicPatriciaTrieNodeReader nodeReader(mBuffer, this /* bigramsPolicy */, mShortcutPolicy);
nodeReader.fetchNodeInfoFromBuffer(currentPos); nodeReader.fetchNodeInfoInBufferFromPtNodePos(currentPos);
int bigramLinkCount = 0; int bigramLinkCount = 0;
while (nodeReader.getBigramLinkedNodePos() != NOT_A_DICT_POS) { while (nodeReader.getBigramLinkedNodePos() != NOT_A_DICT_POS) {
currentPos = nodeReader.getBigramLinkedNodePos(); currentPos = nodeReader.getBigramLinkedNodePos();
nodeReader.fetchNodeInfoFromBuffer(currentPos); nodeReader.fetchNodeInfoInBufferFromPtNodePos(currentPos);
bigramLinkCount++; bigramLinkCount++;
if (bigramLinkCount > BIGRAM_LINK_COUNT_LIMIT) { if (bigramLinkCount > CONTINUING_BIGRAM_LINK_COUNT_LIMIT) {
AKLOGI("Bigram link is invalid. start position: %d", bigramPos); AKLOGE("Bigram link is invalid. start position: %d", bigramPos);
ASSERT(false); ASSERT(false);
return NOT_A_DICT_POS; return NOT_A_DICT_POS;
} }

View File

@ -21,13 +21,12 @@
#include "defines.h" #include "defines.h"
#include "suggest/core/policy/dictionary_bigrams_structure_policy.h" #include "suggest/core/policy/dictionary_bigrams_structure_policy.h"
#include "suggest/core/policy/dictionary_shortcuts_structure_policy.h"
#include "suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h"
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
namespace latinime { namespace latinime {
class BufferWithExtendableBuffer;
class DictionaryShortcutsStructurePolicy;
/* /*
* This is a dynamic version of BigramListPolicy and supports an additional buffer. * This is a dynamic version of BigramListPolicy and supports an additional buffer.
*/ */
@ -40,9 +39,9 @@ class DynamicBigramListPolicy : public DictionaryBigramsStructurePolicy {
~DynamicBigramListPolicy() {} ~DynamicBigramListPolicy() {}
void getNextBigram(int *const outBigramPos, int *const outProbability, bool *const outHasNext, void getNextBigram(int *const outBigramPos, int *const outProbability, bool *const outHasNext,
int *const pos) const; int *const bigramEntryPos) const;
void skipAllBigrams(int *const pos) const; void skipAllBigrams(int *const bigramListPos) const;
// Copy bigrams from the bigram list that starts at fromPos in mBuffer to toPos in // Copy bigrams from the bigram list that starts at fromPos in mBuffer to toPos in
// bufferToWrite and advance these positions after bigram lists. This method skips invalid // bufferToWrite and advance these positions after bigram lists. This method skips invalid
@ -50,18 +49,22 @@ class DynamicBigramListPolicy : public DictionaryBigramsStructurePolicy {
bool copyAllBigrams(BufferWithExtendableBuffer *const bufferToWrite, int *const fromPos, bool copyAllBigrams(BufferWithExtendableBuffer *const bufferToWrite, int *const fromPos,
int *const toPos, int *const outBigramsCount) const; int *const toPos, int *const outBigramsCount) const;
bool addNewBigramEntryToBigramList(const int bigramPos, const int probability, int *const pos); bool updateAllBigramEntriesAndDeleteUselessEntries(int *const bigramListPos);
bool writeNewBigramEntry(const int bigramPos, const int probability, bool addNewBigramEntryToBigramList(const int bigramTargetPos, const int probability,
int *const bigramListPos);
bool writeNewBigramEntry(const int bigramTargetPos, const int probability,
int *const writingPos); int *const writingPos);
// Return if targetBigramPos is found or not. // Return if targetBigramPos is found or not.
bool removeBigram(const int bigramListPos, const int targetBigramPos); bool removeBigram(const int bigramListPos, const int bigramTargetPos);
private: private:
DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicBigramListPolicy); DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicBigramListPolicy);
static const int BIGRAM_LINK_COUNT_LIMIT; static const int CONTINUING_BIGRAM_LINK_COUNT_LIMIT;
static const int BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT;
BufferWithExtendableBuffer *const mBuffer; BufferWithExtendableBuffer *const mBuffer;
const DictionaryShortcutsStructurePolicy *const mShortcutPolicy; const DictionaryShortcutsStructurePolicy *const mShortcutPolicy;

View File

@ -20,6 +20,7 @@
#include <vector> #include <vector>
#include "defines.h" #include "defines.h"
#include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h"
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h" #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h"
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h" #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h"
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h" #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h"
@ -68,6 +69,36 @@ class DynamicPatriciaTrieGcEventListeners {
int mChildrenValue; int mChildrenValue;
}; };
// Updates all bigram entries that are held by valid PtNodes. This removes useless bigram
// entries.
class ListenerForUpdatingBigramProbability
: public DynamicPatriciaTrieReadingHelper::TraversingEventListener {
public:
ListenerForUpdatingBigramProbability(DynamicBigramListPolicy *const bigramPolicy)
: mBigramPolicy(bigramPolicy) {}
bool onAscend() { return true; }
bool onDescend() { return true; }
bool onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node) {
if (!node->isDeleted()) {
int pos = node->getBigramsPos();
if (pos != NOT_A_DICT_POS) {
if (!mBigramPolicy->updateAllBigramEntriesAndDeleteUselessEntries(&pos)) {
return false;
}
}
}
return true;
}
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(ListenerForUpdatingBigramProbability);
DynamicBigramListPolicy *const mBigramPolicy;
};
private: private:
DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTrieGcEventListeners); DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTrieGcEventListeners);
}; };

View File

@ -23,26 +23,26 @@
namespace latinime { namespace latinime {
void DynamicPatriciaTrieNodeReader::fetchNodeInfoFromBufferAndProcessMovedNode(const int nodePos, void DynamicPatriciaTrieNodeReader::fetchPtNodeInfoFromBufferAndProcessMovedPtNode(
const int maxCodePointCount, int *const outCodePoints) { const int ptNodePos, const int maxCodePointCount, int *const outCodePoints) {
if (nodePos < 0 || nodePos >= mBuffer->getTailPosition()) { if (ptNodePos < 0 || ptNodePos >= mBuffer->getTailPosition()) {
AKLOGE("Fetching PtNode info form invalid dictionary position: %d, dictionary size: %d", AKLOGE("Fetching PtNode info form invalid dictionary position: %d, dictionary size: %d",
nodePos, mBuffer->getTailPosition()); ptNodePos, mBuffer->getTailPosition());
ASSERT(false); ASSERT(false);
invalidatePtNodeInfo(); invalidatePtNodeInfo();
return; return;
} }
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(nodePos); const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(ptNodePos);
const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer); const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
int pos = nodePos; int pos = ptNodePos;
mHeadPos = nodePos; mHeadPos = ptNodePos;
if (usesAdditionalBuffer) { if (usesAdditionalBuffer) {
pos -= mBuffer->getOriginalBufferSize(); pos -= mBuffer->getOriginalBufferSize();
} }
mFlags = PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos); mFlags = PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos);
const int parentPos = const int parentPos =
DynamicPatriciaTrieReadingUtils::getParentPosAndAdvancePosition(dictBuf, &pos); DynamicPatriciaTrieReadingUtils::getParentPosAndAdvancePosition(dictBuf, &pos);
mParentPos = (parentPos != 0) ? nodePos + parentPos : NOT_A_DICT_POS; mParentPos = (parentPos != 0) ? ptNodePos + parentPos : NOT_A_DICT_POS;
if (outCodePoints != 0) { if (outCodePoints != 0) {
mCodePointCount = PatriciaTrieReadingUtils::getCharsAndAdvancePosition( mCodePointCount = PatriciaTrieReadingUtils::getCharsAndAdvancePosition(
dictBuf, mFlags, maxCodePointCount, outCodePoints, &pos); dictBuf, mFlags, maxCodePointCount, outCodePoints, &pos);
@ -99,7 +99,8 @@ void DynamicPatriciaTrieNodeReader::fetchNodeInfoFromBufferAndProcessMovedNode(c
// Read destination node if the read node is a moved node. // Read destination node if the read node is a moved node.
if (DynamicPatriciaTrieReadingUtils::isMoved(mFlags)) { if (DynamicPatriciaTrieReadingUtils::isMoved(mFlags)) {
// The destination position is stored at the same place as the parent position. // The destination position is stored at the same place as the parent position.
fetchNodeInfoFromBufferAndProcessMovedNode(mParentPos, maxCodePointCount, outCodePoints); fetchPtNodeInfoFromBufferAndProcessMovedPtNode(mParentPos, maxCodePointCount,
outCodePoints);
} }
} }

View File

@ -48,17 +48,17 @@ class DynamicPatriciaTrieNodeReader {
~DynamicPatriciaTrieNodeReader() {} ~DynamicPatriciaTrieNodeReader() {}
// Reads node information from dictionary buffer and updates members with the information. // Reads PtNode information from dictionary buffer and updates members with the information.
AK_FORCE_INLINE void fetchNodeInfoFromBuffer(const int nodePos) { AK_FORCE_INLINE void fetchNodeInfoInBufferFromPtNodePos(const int ptNodePos) {
fetchNodeInfoFromBufferAndGetNodeCodePoints(nodePos , 0 /* maxCodePointCount */, fetchNodeInfoInBufferFromPtNodePosAndGetNodeCodePoints(ptNodePos ,
0 /* outCodePoints */); 0 /* maxCodePointCount */, 0 /* outCodePoints */);
} }
AK_FORCE_INLINE void fetchNodeInfoFromBufferAndGetNodeCodePoints(const int nodePos, AK_FORCE_INLINE void fetchNodeInfoInBufferFromPtNodePosAndGetNodeCodePoints(
const int maxCodePointCount, int *const outCodePoints) { const int ptNodePos, const int maxCodePointCount, int *const outCodePoints) {
mSiblingPos = NOT_A_DICT_POS; mSiblingPos = NOT_A_DICT_POS;
mBigramLinkedNodePos = NOT_A_DICT_POS; mBigramLinkedNodePos = NOT_A_DICT_POS;
fetchNodeInfoFromBufferAndProcessMovedNode(nodePos, maxCodePointCount, outCodePoints); fetchPtNodeInfoFromBufferAndProcessMovedPtNode(ptNodePos, maxCodePointCount, outCodePoints);
} }
// HeadPos is different from NodePos when the current PtNode is a moved PtNode. // HeadPos is different from NodePos when the current PtNode is a moved PtNode.
@ -154,8 +154,8 @@ class DynamicPatriciaTrieNodeReader {
int mBigramPos; int mBigramPos;
int mSiblingPos; int mSiblingPos;
void fetchNodeInfoFromBufferAndProcessMovedNode(const int nodePos, const int maxCodePointCount, void fetchPtNodeInfoFromBufferAndProcessMovedPtNode(const int ptNodePos,
int *const outCodePoints); const int maxCodePointCount, int *const outCodePoints);
void invalidatePtNodeInfo(); void invalidatePtNodeInfo();
}; };

View File

@ -35,7 +35,7 @@ void DynamicPatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const d
} }
DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer, DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer,
getBigramsStructurePolicy(), getShortcutsStructurePolicy()); getBigramsStructurePolicy(), getShortcutsStructurePolicy());
readingHelper.initWithNodeArrayPos(dicNode->getChildrenPos()); readingHelper.initWithPtNodeArrayPos(dicNode->getChildrenPos());
const DynamicPatriciaTrieNodeReader *const nodeReader = readingHelper.getNodeReader(); const DynamicPatriciaTrieNodeReader *const nodeReader = readingHelper.getNodeReader();
while (!readingHelper.isEnd()) { while (!readingHelper.isEnd()) {
childDicNodes->pushLeavingChild(dicNode, nodeReader->getHeadPos(), childDicNodes->pushLeavingChild(dicNode, nodeReader->getHeadPos(),
@ -48,7 +48,7 @@ void DynamicPatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const d
} }
int DynamicPatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount( int DynamicPatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
const int nodePos, const int maxCodePointCount, int *const outCodePoints, const int ptNodePos, const int maxCodePointCount, int *const outCodePoints,
int *const outUnigramProbability) const { int *const outUnigramProbability) const {
// This method traverses parent nodes from the terminal by following parent pointers; thus, // This method traverses parent nodes from the terminal by following parent pointers; thus,
// node code points are stored in the buffer in the reverse order. // node code points are stored in the buffer in the reverse order.
@ -56,9 +56,9 @@ int DynamicPatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCoun
DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer, DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer,
getBigramsStructurePolicy(), getShortcutsStructurePolicy()); getBigramsStructurePolicy(), getShortcutsStructurePolicy());
// First, read the terminal node and get its probability. // First, read the terminal node and get its probability.
readingHelper.initWithNodePos(nodePos); readingHelper.initWithPtNodePos(ptNodePos);
if (!readingHelper.isValidTerminalNode()) { if (!readingHelper.isValidTerminalNode()) {
// Node at the nodePos is not a valid terminal node. // Node at the ptNodePos is not a valid terminal node.
*outUnigramProbability = NOT_A_PROBABILITY; *outUnigramProbability = NOT_A_PROBABILITY;
return 0; return 0;
} }
@ -67,7 +67,7 @@ int DynamicPatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCoun
// Then, following parent node link to the dictionary root and fetch node code points. // Then, following parent node link to the dictionary root and fetch node code points.
while (!readingHelper.isEnd()) { while (!readingHelper.isEnd()) {
if (readingHelper.getTotalCodePointCount() > maxCodePointCount) { if (readingHelper.getTotalCodePointCount() > maxCodePointCount) {
// The nodePos is not a valid terminal node position in the dictionary. // The ptNodePos is not a valid terminal node position in the dictionary.
*outUnigramProbability = NOT_A_PROBABILITY; *outUnigramProbability = NOT_A_PROBABILITY;
return 0; return 0;
} }
@ -98,7 +98,7 @@ int DynamicPatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const in
} }
DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer, DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer,
getBigramsStructurePolicy(), getShortcutsStructurePolicy()); getBigramsStructurePolicy(), getShortcutsStructurePolicy());
readingHelper.initWithNodeArrayPos(getRootPosition()); readingHelper.initWithPtNodeArrayPos(getRootPosition());
const DynamicPatriciaTrieNodeReader *const nodeReader = readingHelper.getNodeReader(); const DynamicPatriciaTrieNodeReader *const nodeReader = readingHelper.getNodeReader();
while (!readingHelper.isEnd()) { while (!readingHelper.isEnd()) {
const int matchedCodePointCount = readingHelper.getPrevTotalCodePointCount(); const int matchedCodePointCount = readingHelper.getPrevTotalCodePointCount();
@ -148,39 +148,39 @@ int DynamicPatriciaTriePolicy::getProbability(const int unigramProbability,
} }
} }
int DynamicPatriciaTriePolicy::getUnigramProbabilityOfPtNode(const int nodePos) const { int DynamicPatriciaTriePolicy::getUnigramProbabilityOfPtNode(const int ptNodePos) const {
if (nodePos == NOT_A_DICT_POS) { if (ptNodePos == NOT_A_DICT_POS) {
return NOT_A_PROBABILITY; return NOT_A_PROBABILITY;
} }
DynamicPatriciaTrieNodeReader nodeReader(&mBufferWithExtendableBuffer, DynamicPatriciaTrieNodeReader nodeReader(&mBufferWithExtendableBuffer,
getBigramsStructurePolicy(), getShortcutsStructurePolicy()); getBigramsStructurePolicy(), getShortcutsStructurePolicy());
nodeReader.fetchNodeInfoFromBuffer(nodePos); nodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos);
if (nodeReader.isDeleted() || nodeReader.isBlacklisted() || nodeReader.isNotAWord()) { if (nodeReader.isDeleted() || nodeReader.isBlacklisted() || nodeReader.isNotAWord()) {
return NOT_A_PROBABILITY; return NOT_A_PROBABILITY;
} }
return getProbability(nodeReader.getProbability(), NOT_A_PROBABILITY); return getProbability(nodeReader.getProbability(), NOT_A_PROBABILITY);
} }
int DynamicPatriciaTriePolicy::getShortcutPositionOfNode(const int nodePos) const { int DynamicPatriciaTriePolicy::getShortcutPositionOfPtNode(const int ptNodePos) const {
if (nodePos == NOT_A_DICT_POS) { if (ptNodePos == NOT_A_DICT_POS) {
return NOT_A_DICT_POS; return NOT_A_DICT_POS;
} }
DynamicPatriciaTrieNodeReader nodeReader(&mBufferWithExtendableBuffer, DynamicPatriciaTrieNodeReader nodeReader(&mBufferWithExtendableBuffer,
getBigramsStructurePolicy(), getShortcutsStructurePolicy()); getBigramsStructurePolicy(), getShortcutsStructurePolicy());
nodeReader.fetchNodeInfoFromBuffer(nodePos); nodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos);
if (nodeReader.isDeleted()) { if (nodeReader.isDeleted()) {
return NOT_A_DICT_POS; return NOT_A_DICT_POS;
} }
return nodeReader.getShortcutPos(); return nodeReader.getShortcutPos();
} }
int DynamicPatriciaTriePolicy::getBigramsPositionOfNode(const int nodePos) const { int DynamicPatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) const {
if (nodePos == NOT_A_DICT_POS) { if (ptNodePos == NOT_A_DICT_POS) {
return NOT_A_DICT_POS; return NOT_A_DICT_POS;
} }
DynamicPatriciaTrieNodeReader nodeReader(&mBufferWithExtendableBuffer, DynamicPatriciaTrieNodeReader nodeReader(&mBufferWithExtendableBuffer,
getBigramsStructurePolicy(), getShortcutsStructurePolicy()); getBigramsStructurePolicy(), getShortcutsStructurePolicy());
nodeReader.fetchNodeInfoFromBuffer(nodePos); nodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos);
if (nodeReader.isDeleted()) { if (nodeReader.isDeleted()) {
return NOT_A_DICT_POS; return NOT_A_DICT_POS;
} }
@ -195,7 +195,7 @@ bool DynamicPatriciaTriePolicy::addUnigramWord(const int *const word, const int
} }
DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer, DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer,
getBigramsStructurePolicy(), getShortcutsStructurePolicy()); getBigramsStructurePolicy(), getShortcutsStructurePolicy());
readingHelper.initWithNodeArrayPos(getRootPosition()); readingHelper.initWithPtNodeArrayPos(getRootPosition());
DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer, DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer,
&mBigramListPolicy, &mShortcutListPolicy); &mBigramListPolicy, &mShortcutListPolicy);
return writingHelper.addUnigramWord(&readingHelper, word, length, probability); return writingHelper.addUnigramWord(&readingHelper, word, length, probability);

View File

@ -51,7 +51,7 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
DicNodeVector *const childDicNodes) const; DicNodeVector *const childDicNodes) const;
int getCodePointsAndProbabilityAndReturnCodePointCount( int getCodePointsAndProbabilityAndReturnCodePointCount(
const int terminalNodePos, const int maxCodePointCount, int *const outCodePoints, const int terminalPtNodePos, const int maxCodePointCount, int *const outCodePoints,
int *const outUnigramProbability) const; int *const outUnigramProbability) const;
int getTerminalNodePositionOfWord(const int *const inWord, int getTerminalNodePositionOfWord(const int *const inWord,
@ -59,11 +59,11 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
int getProbability(const int unigramProbability, const int bigramProbability) const; int getProbability(const int unigramProbability, const int bigramProbability) const;
int getUnigramProbabilityOfPtNode(const int nodePos) const; int getUnigramProbabilityOfPtNode(const int ptNodePos) const;
int getShortcutPositionOfNode(const int nodePos) const; int getShortcutPositionOfPtNode(const int ptNodePos) const;
int getBigramsPositionOfNode(const int nodePos) const; int getBigramsPositionOfPtNode(const int ptNodePos) const;
const DictionaryHeaderStructurePolicy *getHeaderStructurePolicy() const { const DictionaryHeaderStructurePolicy *getHeaderStructurePolicy() const {
return &mHeaderPolicy; return &mHeaderPolicy;

View File

@ -72,7 +72,7 @@ bool DynamicPatriciaTrieReadingHelper::traverseAllPtNodesInPostorderDepthFirstMa
// Read node array size and process empty node arrays. Nodes and arrays are counted up in this // Read node array size and process empty node arrays. Nodes and arrays are counted up in this
// method to avoid an infinite loop. // method to avoid an infinite loop.
void DynamicPatriciaTrieReadingHelper::nextNodeArray() { void DynamicPatriciaTrieReadingHelper::nextPtNodeArray() {
mReadingState.mPosOfLastPtNodeArrayHead = mReadingState.mPos; mReadingState.mPosOfLastPtNodeArrayHead = mReadingState.mPos;
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(mReadingState.mPos); const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(mReadingState.mPos);
const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer); const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
@ -123,7 +123,7 @@ void DynamicPatriciaTrieReadingHelper::followForwardLink() {
if (DynamicPatriciaTrieReadingUtils::isValidForwardLinkPosition(forwardLinkPosition)) { if (DynamicPatriciaTrieReadingUtils::isValidForwardLinkPosition(forwardLinkPosition)) {
// Follow the forward link. // Follow the forward link.
mReadingState.mPos += forwardLinkPosition; mReadingState.mPos += forwardLinkPosition;
nextNodeArray(); nextPtNodeArray();
} else { } else {
// All node arrays have been read. // All node arrays have been read.
mReadingState.mPos = NOT_A_DICT_POS; mReadingState.mPos = NOT_A_DICT_POS;

View File

@ -73,32 +73,32 @@ class DynamicPatriciaTrieReadingHelper {
return mReadingState.mPos == NOT_A_DICT_POS; return mReadingState.mPos == NOT_A_DICT_POS;
} }
// Initialize reading state with the head position of a node array. // Initialize reading state with the head position of a PtNode array.
AK_FORCE_INLINE void initWithNodeArrayPos(const int nodeArrayPos) { AK_FORCE_INLINE void initWithPtNodeArrayPos(const int ptNodeArrayPos) {
if (nodeArrayPos == NOT_A_DICT_POS) { if (ptNodeArrayPos == NOT_A_DICT_POS) {
mReadingState.mPos = NOT_A_DICT_POS; mReadingState.mPos = NOT_A_DICT_POS;
} else { } else {
mIsError = false; mIsError = false;
mReadingState.mPos = nodeArrayPos; mReadingState.mPos = ptNodeArrayPos;
mReadingState.mPrevTotalCodePointCount = 0; mReadingState.mPrevTotalCodePointCount = 0;
mReadingState.mTotalNodeCount = 0; mReadingState.mTotalNodeCount = 0;
mReadingState.mNodeArrayCount = 0; mReadingState.mNodeArrayCount = 0;
mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS; mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS;
mReadingStateStack.clear(); mReadingStateStack.clear();
nextNodeArray(); nextPtNodeArray();
if (!isEnd()) { if (!isEnd()) {
fetchNodeInfo(); fetchPtNodeInfo();
} }
} }
} }
// Initialize reading state with the head position of a node. // Initialize reading state with the head position of a node.
AK_FORCE_INLINE void initWithNodePos(const int nodePos) { AK_FORCE_INLINE void initWithPtNodePos(const int ptNodePos) {
if (nodePos == NOT_A_DICT_POS) { if (ptNodePos == NOT_A_DICT_POS) {
mReadingState.mPos = NOT_A_DICT_POS; mReadingState.mPos = NOT_A_DICT_POS;
} else { } else {
mIsError = false; mIsError = false;
mReadingState.mPos = nodePos; mReadingState.mPos = ptNodePos;
mReadingState.mNodeCount = 1; mReadingState.mNodeCount = 1;
mReadingState.mPrevTotalCodePointCount = 0; mReadingState.mPrevTotalCodePointCount = 0;
mReadingState.mTotalNodeCount = 1; mReadingState.mTotalNodeCount = 1;
@ -106,7 +106,7 @@ class DynamicPatriciaTrieReadingHelper {
mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS; mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS;
mReadingState.mPosOfLastPtNodeArrayHead = NOT_A_DICT_POS; mReadingState.mPosOfLastPtNodeArrayHead = NOT_A_DICT_POS;
mReadingStateStack.clear(); mReadingStateStack.clear();
fetchNodeInfo(); fetchPtNodeInfo();
} }
} }
@ -151,10 +151,10 @@ class DynamicPatriciaTrieReadingHelper {
// All nodes in the current node array have been read. // All nodes in the current node array have been read.
followForwardLink(); followForwardLink();
if (!isEnd()) { if (!isEnd()) {
fetchNodeInfo(); fetchPtNodeInfo();
} }
} else { } else {
fetchNodeInfo(); fetchPtNodeInfo();
} }
} }
@ -167,9 +167,9 @@ class DynamicPatriciaTrieReadingHelper {
mReadingState.mPos = mNodeReader.getChildrenPos(); mReadingState.mPos = mNodeReader.getChildrenPos();
mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS; mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS;
// Read children node array. // Read children node array.
nextNodeArray(); nextPtNodeArray();
if (!isEnd()) { if (!isEnd()) {
fetchNodeInfo(); fetchPtNodeInfo();
} }
} else { } else {
mReadingState.mPos = NOT_A_DICT_POS; mReadingState.mPos = NOT_A_DICT_POS;
@ -186,7 +186,7 @@ class DynamicPatriciaTrieReadingHelper {
mReadingState.mPos = mNodeReader.getParentPos(); mReadingState.mPos = mNodeReader.getParentPos();
mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS; mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS;
mReadingState.mPosOfLastPtNodeArrayHead = NOT_A_DICT_POS; mReadingState.mPosOfLastPtNodeArrayHead = NOT_A_DICT_POS;
fetchNodeInfo(); fetchPtNodeInfo();
} else { } else {
mReadingState.mPos = NOT_A_DICT_POS; mReadingState.mPos = NOT_A_DICT_POS;
} }
@ -202,7 +202,7 @@ class DynamicPatriciaTrieReadingHelper {
AK_FORCE_INLINE void reloadCurrentPtNodeInfo() { AK_FORCE_INLINE void reloadCurrentPtNodeInfo() {
if (!isEnd()) { if (!isEnd()) {
fetchNodeInfo(); fetchPtNodeInfo();
} }
} }
@ -240,12 +240,12 @@ class DynamicPatriciaTrieReadingHelper {
int mMergedNodeCodePoints[MAX_WORD_LENGTH]; int mMergedNodeCodePoints[MAX_WORD_LENGTH];
std::vector<ReadingState> mReadingStateStack; std::vector<ReadingState> mReadingStateStack;
void nextNodeArray(); void nextPtNodeArray();
void followForwardLink(); void followForwardLink();
AK_FORCE_INLINE void fetchNodeInfo() { AK_FORCE_INLINE void fetchPtNodeInfo() {
mNodeReader.fetchNodeInfoFromBufferAndGetNodeCodePoints(mReadingState.mPos, mNodeReader.fetchNodeInfoInBufferFromPtNodePosAndGetNodeCodePoints(mReadingState.mPos,
MAX_WORD_LENGTH, mMergedNodeCodePoints); MAX_WORD_LENGTH, mMergedNodeCodePoints);
if (mNodeReader.getCodePointCount() <= 0) { if (mNodeReader.getCodePointCount() <= 0) {
// Empty node is not allowed. // Empty node is not allowed.
@ -271,7 +271,7 @@ class DynamicPatriciaTrieReadingHelper {
} else { } else {
mReadingState = mReadingStateStack.back(); mReadingState = mReadingStateStack.back();
mReadingStateStack.pop_back(); mReadingStateStack.pop_back();
fetchNodeInfo(); fetchPtNodeInfo();
} }
} }
}; };

View File

@ -90,7 +90,7 @@ bool DynamicPatriciaTrieWritingHelper::addBigramWords(const int word0Pos, const
const int probability) { const int probability) {
int mMergedNodeCodePoints[MAX_WORD_LENGTH]; int mMergedNodeCodePoints[MAX_WORD_LENGTH];
DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy); DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy);
nodeReader.fetchNodeInfoFromBufferAndGetNodeCodePoints(word0Pos, MAX_WORD_LENGTH, nodeReader.fetchNodeInfoInBufferFromPtNodePosAndGetNodeCodePoints(word0Pos, MAX_WORD_LENGTH,
mMergedNodeCodePoints); mMergedNodeCodePoints);
// Move node to add bigram entry. // Move node to add bigram entry.
const int newNodePos = mBuffer->getTailPosition(); const int newNodePos = mBuffer->getTailPosition();
@ -104,7 +104,7 @@ bool DynamicPatriciaTrieWritingHelper::addBigramWords(const int word0Pos, const
&writingPos)) { &writingPos)) {
return false; return false;
} }
nodeReader.fetchNodeInfoFromBuffer(newNodePos); nodeReader.fetchNodeInfoInBufferFromPtNodePos(newNodePos);
if (nodeReader.getBigramsPos() != NOT_A_DICT_POS) { if (nodeReader.getBigramsPos() != NOT_A_DICT_POS) {
// Insert a new bigram entry into the existing bigram list. // Insert a new bigram entry into the existing bigram list.
int bigramListPos = nodeReader.getBigramsPos(); int bigramListPos = nodeReader.getBigramsPos();
@ -131,7 +131,7 @@ bool DynamicPatriciaTrieWritingHelper::addBigramWords(const int word0Pos, const
// Remove a bigram relation from word0Pos to word1Pos. // Remove a bigram relation from word0Pos to word1Pos.
bool DynamicPatriciaTrieWritingHelper::removeBigramWords(const int word0Pos, const int word1Pos) { bool DynamicPatriciaTrieWritingHelper::removeBigramWords(const int word0Pos, const int word1Pos) {
DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy); DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy);
nodeReader.fetchNodeInfoFromBuffer(word0Pos); nodeReader.fetchNodeInfoInBufferFromPtNodePos(word0Pos);
if (nodeReader.getBigramsPos() == NOT_A_DICT_POS) { if (nodeReader.getBigramsPos() == NOT_A_DICT_POS) {
return false; return false;
} }
@ -217,7 +217,7 @@ bool DynamicPatriciaTrieWritingHelper::markNodeAsMovedAndSetPosition(
// Update children's parent position. // Update children's parent position.
DynamicPatriciaTrieReadingHelper readingHelper(mBuffer, mBigramPolicy, mShortcutPolicy); DynamicPatriciaTrieReadingHelper readingHelper(mBuffer, mBigramPolicy, mShortcutPolicy);
const DynamicPatriciaTrieNodeReader *const nodeReader = readingHelper.getNodeReader(); const DynamicPatriciaTrieNodeReader *const nodeReader = readingHelper.getNodeReader();
readingHelper.initWithNodeArrayPos(originalNode->getChildrenPos()); readingHelper.initWithPtNodeArrayPos(originalNode->getChildrenPos());
while (!readingHelper.isEnd()) { while (!readingHelper.isEnd()) {
const int childPtNodeWrittenPos = nodeReader->getHeadPos(); const int childPtNodeWrittenPos = nodeReader->getHeadPos();
const int parentOffset = movedPos - childPtNodeWrittenPos; const int parentOffset = movedPos - childPtNodeWrittenPos;
@ -452,7 +452,7 @@ bool DynamicPatriciaTrieWritingHelper::reallocatePtNodeAndAddNewPtNodes(
} }
// Load node info. Information of the 1st part will be fetched. // Load node info. Information of the 1st part will be fetched.
DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy); DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy);
nodeReader.fetchNodeInfoFromBuffer(firstPartOfReallocatedPtNodePos); nodeReader.fetchNodeInfoInBufferFromPtNodePos(firstPartOfReallocatedPtNodePos);
// Update children position. // Update children position.
int childrenPosFieldPos = nodeReader.getChildrenPosFieldPos(); int childrenPosFieldPos = nodeReader.getChildrenPosFieldPos();
if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(mBuffer, if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(mBuffer,
@ -519,7 +519,7 @@ bool DynamicPatriciaTrieWritingHelper::writeBufferToFilePointer(FILE *const file
bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos, bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
BufferWithExtendableBuffer *const bufferToWrite) { BufferWithExtendableBuffer *const bufferToWrite) {
DynamicPatriciaTrieReadingHelper readingHelper(mBuffer, mBigramPolicy, mShortcutPolicy); DynamicPatriciaTrieReadingHelper readingHelper(mBuffer, mBigramPolicy, mShortcutPolicy);
readingHelper.initWithNodeArrayPos(rootPtNodeArrayPos); readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
DynamicPatriciaTrieGcEventListeners DynamicPatriciaTrieGcEventListeners
::ListenerForUpdatingUnigramProbabilityAndMarkingUselessPtNodesAsDeleted ::ListenerForUpdatingUnigramProbabilityAndMarkingUselessPtNodesAsDeleted
listenerForUpdatingUnigramProbabilityAndMarkingUselessPtNodesAsDeleted( listenerForUpdatingUnigramProbabilityAndMarkingUselessPtNodesAsDeleted(
@ -528,6 +528,14 @@ bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
&listenerForUpdatingUnigramProbabilityAndMarkingUselessPtNodesAsDeleted)) { &listenerForUpdatingUnigramProbabilityAndMarkingUselessPtNodesAsDeleted)) {
return false; return false;
} }
readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
DynamicPatriciaTrieGcEventListeners::ListenerForUpdatingBigramProbability
listenerForupdatingBigramProbability(mBigramPolicy);
if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(
&listenerForupdatingBigramProbability)) {
return false;
}
// TODO: Implement. // TODO: Implement.
return false; return false;
} }

View File

@ -49,7 +49,7 @@ void PatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode,
// with a z, it's the last PtNode of the root array, so all children addresses will be smaller // with a z, it's the last PtNode of the root array, so all children addresses will be smaller
// than the position we look for, and we have to descend the z node). // than the position we look for, and we have to descend the z node).
/* Parameters : /* Parameters :
* nodePos: the byte position of the terminal PtNode of the word we are searching for (this is * ptNodePos: the byte position of the terminal PtNode of the word we are searching for (this is
* what is stored as the "bigram position" in each bigram) * what is stored as the "bigram position" in each bigram)
* outCodePoints: an array to write the found word, with MAX_WORD_LENGTH size. * outCodePoints: an array to write the found word, with MAX_WORD_LENGTH size.
* outUnigramProbability: a pointer to an int to write the probability into. * outUnigramProbability: a pointer to an int to write the probability into.
@ -57,7 +57,7 @@ void PatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode,
*/ */
// TODO: Split this function to be more readable // TODO: Split this function to be more readable
int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount( int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
const int nodePos, const int maxCodePointCount, int *const outCodePoints, const int ptNodePos, const int maxCodePointCount, int *const outCodePoints,
int *const outUnigramProbability) const { int *const outUnigramProbability) const {
int pos = getRootPosition(); int pos = getRootPosition();
int wordPos = 0; int wordPos = 0;
@ -78,7 +78,7 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos); PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos);
const int character = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition( const int character = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(
mDictRoot, &pos); mDictRoot, &pos);
if (nodePos == startPos) { if (ptNodePos == startPos) {
// We found the position. Copy the rest of the code points in the buffer and return // We found the position. Copy the rest of the code points in the buffer and return
// the length. // the length.
outCodePoints[wordPos] = character; outCodePoints[wordPos] = character;
@ -121,7 +121,7 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
// Here comes the tricky part. First, read the children position. // Here comes the tricky part. First, read the children position.
const int childrenPos = PatriciaTrieReadingUtils const int childrenPos = PatriciaTrieReadingUtils
::readChildrenPositionAndAdvancePosition(mDictRoot, flags, &currentPos); ::readChildrenPositionAndAdvancePosition(mDictRoot, flags, &currentPos);
if (childrenPos > nodePos) { if (childrenPos > ptNodePos) {
// If the children pos is greater than the position, it means the previous // If the children pos is greater than the position, it means the previous
// PtNode, which position is stored in lastCandidatePtNodePos, was the right // PtNode, which position is stored in lastCandidatePtNodePos, was the right
// one. // one.
@ -213,7 +213,7 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
} }
} }
// If we have looked through all the PtNodes and found no match, the nodePos is // If we have looked through all the PtNodes and found no match, the ptNodePos is
// not the position of a terminal in this dictionary. // not the position of a terminal in this dictionary.
return 0; return 0;
} }
@ -319,11 +319,11 @@ int PatriciaTriePolicy::getProbability(const int unigramProbability,
} }
} }
int PatriciaTriePolicy::getUnigramProbabilityOfPtNode(const int nodePos) const { int PatriciaTriePolicy::getUnigramProbabilityOfPtNode(const int ptNodePos) const {
if (nodePos == NOT_A_DICT_POS) { if (ptNodePos == NOT_A_DICT_POS) {
return NOT_A_PROBABILITY; return NOT_A_PROBABILITY;
} }
int pos = nodePos; int pos = ptNodePos;
const PatriciaTrieReadingUtils::NodeFlags flags = const PatriciaTrieReadingUtils::NodeFlags flags =
PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos); PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos);
if (!PatriciaTrieReadingUtils::isTerminal(flags)) { if (!PatriciaTrieReadingUtils::isTerminal(flags)) {
@ -341,11 +341,11 @@ int PatriciaTriePolicy::getUnigramProbabilityOfPtNode(const int nodePos) const {
mDictRoot, &pos), NOT_A_PROBABILITY); mDictRoot, &pos), NOT_A_PROBABILITY);
} }
int PatriciaTriePolicy::getShortcutPositionOfNode(const int nodePos) const { int PatriciaTriePolicy::getShortcutPositionOfPtNode(const int ptNodePos) const {
if (nodePos == NOT_A_DICT_POS) { if (ptNodePos == NOT_A_DICT_POS) {
return NOT_A_DICT_POS; return NOT_A_DICT_POS;
} }
int pos = nodePos; int pos = ptNodePos;
const PatriciaTrieReadingUtils::NodeFlags flags = const PatriciaTrieReadingUtils::NodeFlags flags =
PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos); PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos);
if (!PatriciaTrieReadingUtils::hasShortcutTargets(flags)) { if (!PatriciaTrieReadingUtils::hasShortcutTargets(flags)) {
@ -361,11 +361,11 @@ int PatriciaTriePolicy::getShortcutPositionOfNode(const int nodePos) const {
return pos; return pos;
} }
int PatriciaTriePolicy::getBigramsPositionOfNode(const int nodePos) const { int PatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) const {
if (nodePos == NOT_A_DICT_POS) { if (ptNodePos == NOT_A_DICT_POS) {
return NOT_A_DICT_POS; return NOT_A_DICT_POS;
} }
int pos = nodePos; int pos = ptNodePos;
const PatriciaTrieReadingUtils::NodeFlags flags = const PatriciaTrieReadingUtils::NodeFlags flags =
PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos); PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos);
if (!PatriciaTrieReadingUtils::hasBigrams(flags)) { if (!PatriciaTrieReadingUtils::hasBigrams(flags)) {
@ -385,8 +385,8 @@ int PatriciaTriePolicy::getBigramsPositionOfNode(const int nodePos) const {
} }
int PatriciaTriePolicy::createAndGetLeavingChildNode(const DicNode *const dicNode, int PatriciaTriePolicy::createAndGetLeavingChildNode(const DicNode *const dicNode,
const int nodePos, DicNodeVector *childDicNodes) const { const int ptNodePos, DicNodeVector *childDicNodes) const {
int pos = nodePos; int pos = ptNodePos;
const PatriciaTrieReadingUtils::NodeFlags flags = const PatriciaTrieReadingUtils::NodeFlags flags =
PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos); PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos);
int mergedNodeCodePoints[MAX_WORD_LENGTH]; int mergedNodeCodePoints[MAX_WORD_LENGTH];
@ -404,7 +404,7 @@ int PatriciaTriePolicy::createAndGetLeavingChildNode(const DicNode *const dicNod
if (PatriciaTrieReadingUtils::hasBigrams(flags)) { if (PatriciaTrieReadingUtils::hasBigrams(flags)) {
getBigramsStructurePolicy()->skipAllBigrams(&pos); getBigramsStructurePolicy()->skipAllBigrams(&pos);
} }
childDicNodes->pushLeavingChild(dicNode, nodePos, childrenPos, probability, childDicNodes->pushLeavingChild(dicNode, ptNodePos, childrenPos, probability,
PatriciaTrieReadingUtils::isTerminal(flags), PatriciaTrieReadingUtils::isTerminal(flags),
PatriciaTrieReadingUtils::hasChildrenInFlags(flags), PatriciaTrieReadingUtils::hasChildrenInFlags(flags),
PatriciaTrieReadingUtils::isBlacklisted(flags) || PatriciaTrieReadingUtils::isBlacklisted(flags) ||

View File

@ -58,11 +58,11 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
int getProbability(const int unigramProbability, const int bigramProbability) const; int getProbability(const int unigramProbability, const int bigramProbability) const;
int getUnigramProbabilityOfPtNode(const int nodePos) const; int getUnigramProbabilityOfPtNode(const int ptNodePos) const;
int getShortcutPositionOfNode(const int nodePos) const; int getShortcutPositionOfPtNode(const int ptNodePos) const;
int getBigramsPositionOfNode(const int nodePos) const; int getBigramsPositionOfPtNode(const int ptNodePos) const;
const DictionaryHeaderStructurePolicy *getHeaderStructurePolicy() const { const DictionaryHeaderStructurePolicy *getHeaderStructurePolicy() const {
return &mHeaderPolicy; return &mHeaderPolicy;
@ -121,7 +121,7 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
const BigramListPolicy mBigramListPolicy; const BigramListPolicy mBigramListPolicy;
const ShortcutListPolicy mShortcutListPolicy; const ShortcutListPolicy mShortcutListPolicy;
int createAndGetLeavingChildNode(const DicNode *const dicNode, const int nodePos, int createAndGetLeavingChildNode(const DicNode *const dicNode, const int ptNodePos,
DicNodeVector *const childDicNodes) const; DicNodeVector *const childDicNodes) const;
}; };
} // namespace latinime } // namespace latinime