Merge "GC step 2. Finding garbage bigram entries."

This commit is contained in:
Keisuke Kuroyanagi 2013-09-20 11:11:55 +00:00 committed by Android (Google) Code Review
commit b71f63bc1c
19 changed files with 383 additions and 263 deletions

View file

@ -147,7 +147,7 @@ int BigramDictionary::getBigramListPositionForWord(const int *prevWord, const in
int pos = mDictionaryStructurePolicy->getTerminalNodePositionOfWord(prevWord, prevWordLength,
forceLowerCaseSearch);
if (NOT_A_DICT_POS == pos) return NOT_A_DICT_POS;
return mDictionaryStructurePolicy->getBigramsPositionOfNode(pos);
return mDictionaryStructurePolicy->getBigramsPositionOfPtNode(pos);
}
int BigramDictionary::getBigramProbability(const int *word0, int length0, const int *word1,

View file

@ -68,7 +68,7 @@ class MultiBigramMap {
void init(const DictionaryStructureWithBufferPolicy *const structurePolicy,
const int nodePos) {
const int bigramsListPos = structurePolicy->getBigramsPositionOfNode(nodePos);
const int bigramsListPos = structurePolicy->getBigramsPositionOfPtNode(nodePos);
BinaryDictionaryBigramsIterator bigramsIt(structurePolicy->getBigramsStructurePolicy(),
bigramsListPos);
while (bigramsIt.hasNext()) {
@ -112,7 +112,7 @@ class MultiBigramMap {
const DictionaryStructureWithBufferPolicy *const structurePolicy, const int nodePos,
const int nextWordPosition, const int unigramProbability) {
int bigramProbability = NOT_A_PROBABILITY;
const int bigramsListPos = structurePolicy->getBigramsPositionOfNode(nodePos);
const int bigramsListPos = structurePolicy->getBigramsPositionOfPtNode(nodePos);
BinaryDictionaryBigramsIterator bigramsIt(structurePolicy->getBigramsStructurePolicy(),
bigramsListPos);
while (bigramsIt.hasNext()) {

View file

@ -52,9 +52,9 @@ class DictionaryStructureWithBufferPolicy {
virtual int getUnigramProbabilityOfPtNode(const int nodePos) const = 0;
virtual int getShortcutPositionOfNode(const int nodePos) const = 0;
virtual int getShortcutPositionOfPtNode(const int nodePos) const = 0;
virtual int getBigramsPositionOfNode(const int nodePos) const = 0;
virtual int getBigramsPositionOfPtNode(const int nodePos) const = 0;
virtual const DictionaryHeaderStructurePolicy *getHeaderStructurePolicy() const = 0;

View file

@ -223,7 +223,7 @@ int Suggest::outputSuggestions(DicTraverseSession *traverseSession, int *frequen
BinaryDictionaryShortcutIterator shortcutIt(
traverseSession->getDictionaryStructurePolicy()->getShortcutsStructurePolicy(),
traverseSession->getDictionaryStructurePolicy()
->getShortcutPositionOfNode(terminalDicNode->getPos()));
->getShortcutPositionOfPtNode(terminalDicNode->getPos()));
// Shortcut is not supported for multiple words suggestions.
// TODO: Check shortcuts during traversal for multiple words suggestions.
const bool sameAsTyped = TRAVERSAL->sameAsTyped(traverseSession, terminalDicNode);

View file

@ -33,10 +33,9 @@ class BigramListPolicy : public DictionaryBigramsStructurePolicy {
void getNextBigram(int *const outBigramPos, int *const outProbability, bool *const outHasNext,
int *const pos) const {
const BigramListReadWriteUtils::BigramFlags flags =
BigramListReadWriteUtils::getFlagsAndForwardPointer(mBigramsBuf, pos);
*outBigramPos = BigramListReadWriteUtils::getBigramAddressAndForwardPointer(
mBigramsBuf, flags, pos);
BigramListReadWriteUtils::BigramFlags flags;
BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition(mBigramsBuf, &flags,
outBigramPos, pos);
*outProbability = BigramListReadWriteUtils::getProbabilityFromFlags(flags);
*outHasNext = BigramListReadWriteUtils::hasNext(flags);
}

View file

@ -17,6 +17,7 @@
#include "suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h"
#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
namespace latinime {
@ -38,23 +39,31 @@ const BigramListReadWriteUtils::BigramFlags
BigramListReadWriteUtils::MASK_ATTRIBUTE_PROBABILITY = 0x0F;
const int BigramListReadWriteUtils::ATTRIBUTE_ADDRESS_SHIFT = 4;
/* static */ BigramListReadWriteUtils::BigramFlags
BigramListReadWriteUtils::getFlagsAndForwardPointer(const uint8_t *const bigramsBuf,
int *const pos) {
return ByteArrayUtils::readUint8AndAdvancePosition(bigramsBuf, pos);
/* static */ void BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition(
const uint8_t *const bigramsBuf, BigramFlags *const outBigramFlags,
int *const outTargetPtNodePos, int *const bigramEntryPos) {
const BigramFlags bigramFlags = ByteArrayUtils::readUint8AndAdvancePosition(bigramsBuf,
bigramEntryPos);
if (outBigramFlags) {
*outBigramFlags = bigramFlags;
}
const int targetPos = getBigramAddressAndAdvancePosition(bigramsBuf, bigramFlags,
bigramEntryPos);
if (outTargetPtNodePos) {
*outTargetPtNodePos = targetPos;
}
}
/* static */ void BigramListReadWriteUtils::skipExistingBigrams(const uint8_t *const bigramsBuf,
int *const pos) {
BigramFlags flags = getFlagsAndForwardPointer(bigramsBuf, pos);
while (hasNext(flags)) {
*pos += attributeAddressSize(flags);
flags = getFlagsAndForwardPointer(bigramsBuf, pos);
}
*pos += attributeAddressSize(flags);
int *const bigramListPos) {
BigramFlags flags;
do {
getBigramEntryPropertiesAndAdvancePosition(bigramsBuf, &flags, 0 /* outTargetPtNodePos */,
bigramListPos);
} while(hasNext(flags));
}
/* static */ int BigramListReadWriteUtils::getBigramAddressAndForwardPointer(
/* static */ int BigramListReadWriteUtils::getBigramAddressAndAdvancePosition(
const uint8_t *const bigramsBuf, const BigramFlags flags, int *const pos) {
int offset = 0;
const int origin = *pos;
@ -79,4 +88,59 @@ const int BigramListReadWriteUtils::ATTRIBUTE_ADDRESS_SHIFT = 4;
}
}
/* static */ bool BigramListReadWriteUtils::createAndWriteBigramEntry(
BufferWithExtendableBuffer *const buffer, const int targetPos, const int probability,
const bool hasNext, int *const writingPos) {
BigramFlags flags;
if (!createAndGetBigramFlags(*writingPos, targetPos, probability, hasNext, &flags)) {
return false;
}
return writeBigramEntry(buffer, flags, targetPos, writingPos);
}
/* static */ bool BigramListReadWriteUtils::writeBigramEntry(
BufferWithExtendableBuffer *const bufferToWrite, const BigramFlags flags,
const int targetPtNodePos, int *const writingPos) {
if (!bufferToWrite->writeUintAndAdvancePosition(flags, 1 /* size */, writingPos)) {
return false;
}
const int offset = (targetPtNodePos != NOT_A_DICT_POS) ? targetPtNodePos - *writingPos : 0;
const uint32_t absOffest = abs(offset);
const int bigramTargetFieldSize = attributeAddressSize(flags);
return bufferToWrite->writeUintAndAdvancePosition(absOffest, bigramTargetFieldSize,
writingPos);
}
// Returns true if the bigram entry is valid and put entry flags into out*.
/* static */ bool BigramListReadWriteUtils::createAndGetBigramFlags(const int entryPos,
const int targetPos, const int probability, const bool hasNext,
BigramFlags *const outBigramFlags) {
BigramFlags flags = probability & MASK_ATTRIBUTE_PROBABILITY;
if (hasNext) {
flags |= FLAG_ATTRIBUTE_HAS_NEXT;
}
const int targetFieldPos = entryPos + 1;
const int offset = (targetPos != NOT_A_DICT_POS) ? targetPos - targetFieldPos : 0;
if (offset < 0) {
flags |= FLAG_ATTRIBUTE_OFFSET_NEGATIVE;
}
const uint32_t absOffest = abs(offset);
if ((absOffest >> 24) != 0) {
// Offset is too large.
return false;
} else if ((absOffest >> 16) != 0) {
flags |= FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES;
} else if ((absOffest >> 8) != 0) {
flags |= FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES;
} else {
flags |= FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE;
}
// Currently, all newly written bigram position fields are 3 bytes to simplify dictionary
// writing.
// TODO: Remove following 2 lines and optimize memory space.
flags = (flags & (~MASK_ATTRIBUTE_ADDRESS_TYPE)) | FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES;
*outBigramFlags = flags;
return true;
}
} // namespace latinime

View file

@ -24,11 +24,15 @@
namespace latinime {
class BufferWithExtendableBuffer;
class BigramListReadWriteUtils {
public:
typedef uint8_t BigramFlags;
static BigramFlags getFlagsAndForwardPointer(const uint8_t *const bigramsBuf, int *const pos);
static void getBigramEntryPropertiesAndAdvancePosition(const uint8_t *const bigramsBuf,
BigramFlags *const outBigramFlags, int *const outTargetPtNodePos,
int *const bigramEntryPos);
static AK_FORCE_INLINE int getProbabilityFromFlags(const BigramFlags flags) {
return flags & MASK_ATTRIBUTE_PROBABILITY;
@ -39,10 +43,7 @@ public:
}
// Bigrams reading methods
static void skipExistingBigrams(const uint8_t *const bigramsBuf, int *const pos);
static int getBigramAddressAndForwardPointer(const uint8_t *const bigramsBuf,
const BigramFlags flags, int *const pos);
static void skipExistingBigrams(const uint8_t *const bigramsBuf, int *const bigramListPos);
// Returns the size of the bigram position field that is stored in bigram flags.
static AK_FORCE_INLINE int attributeAddressSize(const BigramFlags flags) {
@ -67,48 +68,11 @@ public:
return (flags & (~MASK_ATTRIBUTE_PROBABILITY)) | (probability & MASK_ATTRIBUTE_PROBABILITY);
}
// Returns true if the bigram entry is valid and put entry values into out*.
static AK_FORCE_INLINE bool createBigramEntryAndGetFlagsAndOffsetAndOffsetFieldSize(
const int entryPos, const int targetPos, const int probability, const bool hasNext,
BigramFlags *const outBigramFlags, uint32_t *const outOffset,
int *const outOffsetFieldSize) {
if (targetPos == NOT_A_DICT_POS) {
return false;
}
BigramFlags flags = probability & MASK_ATTRIBUTE_PROBABILITY;
if (hasNext) {
flags |= FLAG_ATTRIBUTE_HAS_NEXT;
}
const int targetFieldPos = entryPos + 1;
const int offset = targetPos - targetFieldPos;
if (offset < 0) {
flags |= FLAG_ATTRIBUTE_OFFSET_NEGATIVE;
}
const uint32_t absOffest = abs(offset);
if ((absOffest >> 24) != 0) {
// Offset is too large.
return false;
} else if ((absOffest >> 16) != 0) {
flags |= FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES;
*outOffsetFieldSize = 3;
} else if ((absOffest >> 8) != 0) {
flags |= FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES;
*outOffsetFieldSize = 2;
} else {
flags |= FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE;
*outOffsetFieldSize = 1;
}
static bool createAndWriteBigramEntry(BufferWithExtendableBuffer *const buffer,
const int targetPos, const int probability, const bool hasNext, int *const writingPos);
// Currently, all newly written bigram position fields are 3 bytes to simplify dictionary
// writing.
// TODO: Remove following 2 lines and optimize memory space.
flags = (flags & (~MASK_ATTRIBUTE_ADDRESS_TYPE)) | FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES;
*outOffsetFieldSize = 3;
*outBigramFlags = flags;
*outOffset = absOffest;
return true;
}
static bool writeBigramEntry(BufferWithExtendableBuffer *const buffer, const BigramFlags flags,
const int targetOffset, int *const writingPos);
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(BigramListReadWriteUtils);
@ -122,9 +86,16 @@ private:
static const BigramFlags MASK_ATTRIBUTE_PROBABILITY;
static const int ATTRIBUTE_ADDRESS_SHIFT;
// Returns true if the bigram entry is valid and put entry flags into out*.
static bool createAndGetBigramFlags(const int entryPos, const int targetPos,
const int probability, const bool hasNext, BigramFlags *const outBigramFlags);
static AK_FORCE_INLINE bool isOffsetNegative(const BigramFlags flags) {
return (flags & FLAG_ATTRIBUTE_OFFSET_NEGATIVE) != 0;
}
static int getBigramAddressAndAdvancePosition(const uint8_t *const bigramsBuf,
const BigramFlags flags, int *const pos);
};
} // namespace latinime
#endif // LATINIME_BIGRAM_LIST_READ_WRITE_UTILS_H

View file

@ -16,41 +16,47 @@
#include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h"
#include "suggest/core/policy/dictionary_shortcuts_structure_policy.h"
#include "suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h"
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
namespace latinime {
const int DynamicBigramListPolicy::BIGRAM_LINK_COUNT_LIMIT = 10000;
const int DynamicBigramListPolicy::CONTINUING_BIGRAM_LINK_COUNT_LIMIT = 10000;
const int DynamicBigramListPolicy::BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT = 100000;
void DynamicBigramListPolicy::getNextBigram(int *const outBigramPos, int *const outProbability,
bool *const outHasNext, int *const pos) const {
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*pos);
bool *const outHasNext, int *const bigramEntryPos) const {
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*bigramEntryPos);
const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer);
if (usesAdditionalBuffer) {
*pos -= mBuffer->getOriginalBufferSize();
*bigramEntryPos -= mBuffer->getOriginalBufferSize();
}
const BigramListReadWriteUtils::BigramFlags flags =
BigramListReadWriteUtils::getFlagsAndForwardPointer(buffer, pos);
int originalBigramPos = BigramListReadWriteUtils::getBigramAddressAndForwardPointer(
buffer, flags, pos);
BigramListReadWriteUtils::BigramFlags bigramFlags;
int originalBigramPos;
BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition(buffer, &bigramFlags,
&originalBigramPos, bigramEntryPos);
if (usesAdditionalBuffer && originalBigramPos != NOT_A_DICT_POS) {
originalBigramPos += mBuffer->getOriginalBufferSize();
}
*outBigramPos = followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos);
*outProbability = BigramListReadWriteUtils::getProbabilityFromFlags(flags);
*outHasNext = BigramListReadWriteUtils::hasNext(flags);
*outProbability = BigramListReadWriteUtils::getProbabilityFromFlags(bigramFlags);
*outHasNext = BigramListReadWriteUtils::hasNext(bigramFlags);
if (usesAdditionalBuffer) {
*pos += mBuffer->getOriginalBufferSize();
*bigramEntryPos += mBuffer->getOriginalBufferSize();
}
}
void DynamicBigramListPolicy::skipAllBigrams(int *const pos) const {
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*pos);
void DynamicBigramListPolicy::skipAllBigrams(int *const bigramListPos) const {
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*bigramListPos);
const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer);
if (usesAdditionalBuffer) {
*pos -= mBuffer->getOriginalBufferSize();
*bigramListPos -= mBuffer->getOriginalBufferSize();
}
BigramListReadWriteUtils::skipExistingBigrams(buffer, pos);
BigramListReadWriteUtils::skipExistingBigrams(buffer, bigramListPos);
if (usesAdditionalBuffer) {
*pos += mBuffer->getOriginalBufferSize();
*bigramListPos += mBuffer->getOriginalBufferSize();
}
}
@ -61,13 +67,19 @@ bool DynamicBigramListPolicy::copyAllBigrams(BufferWithExtendableBuffer *const b
*fromPos -= mBuffer->getOriginalBufferSize();
}
*outBigramsCount = 0;
BigramListReadWriteUtils::BigramFlags flags;
BigramListReadWriteUtils::BigramFlags bigramFlags;
int bigramEntryCount = 0;
do {
if (++bigramEntryCount > BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT) {
AKLOGE("Too many bigram entries. %d", BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT);
ASSERT(false);
return false;
}
// The buffer address can be changed after calling buffer writing methods.
const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer);
flags = BigramListReadWriteUtils::getFlagsAndForwardPointer(buffer, fromPos);
int originalBigramPos = BigramListReadWriteUtils::getBigramAddressAndForwardPointer(
buffer, flags, fromPos);
int originalBigramPos;
BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition(
mBuffer->getBuffer(usesAdditionalBuffer), &bigramFlags, &originalBigramPos,
fromPos);
if (originalBigramPos == NOT_A_DICT_POS) {
// skip invalid bigram entry.
continue;
@ -76,132 +88,163 @@ bool DynamicBigramListPolicy::copyAllBigrams(BufferWithExtendableBuffer *const b
originalBigramPos += mBuffer->getOriginalBufferSize();
}
const int bigramPos = followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos);
BigramListReadWriteUtils::BigramFlags newBigramFlags;
uint32_t newBigramOffset;
int newBigramOffsetFieldSize;
if(!BigramListReadWriteUtils::createBigramEntryAndGetFlagsAndOffsetAndOffsetFieldSize(
*toPos, bigramPos, BigramListReadWriteUtils::getProbabilityFromFlags(flags),
BigramListReadWriteUtils::hasNext(flags), &newBigramFlags, &newBigramOffset,
&newBigramOffsetFieldSize)) {
continue;
}
// Write bigram entry. Target buffer is always the additional buffer.
if (!bufferToWrite->writeUintAndAdvancePosition(newBigramFlags, 1 /* size */,toPos)) {
return false;
}
if (!bufferToWrite->writeUintAndAdvancePosition(newBigramOffset, newBigramOffsetFieldSize,
toPos)) {
if (!BigramListReadWriteUtils::createAndWriteBigramEntry(bufferToWrite, bigramPos,
BigramListReadWriteUtils::getProbabilityFromFlags(bigramFlags),
BigramListReadWriteUtils::hasNext(bigramFlags), toPos)) {
return false;
}
(*outBigramsCount)++;
} while(BigramListReadWriteUtils::hasNext(flags));
} while(BigramListReadWriteUtils::hasNext(bigramFlags));
if (usesAdditionalBuffer) {
*fromPos += mBuffer->getOriginalBufferSize();
}
return true;
}
bool DynamicBigramListPolicy::addNewBigramEntryToBigramList(const int bigramPos,
const int probability, int *const pos) {
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*pos);
// Finding useless bigram entries and remove them. Bigram entry is useless when the target PtNode
// has been deleted or is not a valid terminal.
bool DynamicBigramListPolicy::updateAllBigramEntriesAndDeleteUselessEntries(
int *const bigramListPos) {
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*bigramListPos);
if (usesAdditionalBuffer) {
*pos -= mBuffer->getOriginalBufferSize();
*bigramListPos -= mBuffer->getOriginalBufferSize();
}
BigramListReadWriteUtils::BigramFlags flags;
DynamicPatriciaTrieNodeReader nodeReader(mBuffer, this /* bigramsPolicy */, mShortcutPolicy);
BigramListReadWriteUtils::BigramFlags bigramFlags;
int bigramEntryCount = 0;
do {
int entryPos = *pos;
if (++bigramEntryCount > BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT) {
AKLOGE("Too many bigram entries. %d", BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT);
ASSERT(false);
return false;
}
int bigramEntryPos = *bigramListPos;
int originalBigramPos;
// The buffer address can be changed after calling buffer writing methods.
BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition(
mBuffer->getBuffer(usesAdditionalBuffer), &bigramFlags, &originalBigramPos,
bigramListPos);
if (usesAdditionalBuffer) {
bigramEntryPos += mBuffer->getOriginalBufferSize();
}
if (originalBigramPos == NOT_A_DICT_POS) {
// This entry has already been removed.
continue;
}
if (usesAdditionalBuffer) {
originalBigramPos += mBuffer->getOriginalBufferSize();
}
const int bigramTargetNodePos =
followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos);
nodeReader.fetchNodeInfoInBufferFromPtNodePos(bigramTargetNodePos);
// TODO: Update probability for supporting probability decaying.
if (nodeReader.isDeleted() || !nodeReader.isTerminal()
|| bigramTargetNodePos == NOT_A_DICT_POS) {
// The target is no longer valid terminal. Invalidate the current bigram entry.
if (!BigramListReadWriteUtils::writeBigramEntry(mBuffer, bigramFlags,
NOT_A_DICT_POS /* targetOffset */, &bigramEntryPos)) {
return false;
}
}
} while(BigramListReadWriteUtils::hasNext(bigramFlags));
return true;
}
bool DynamicBigramListPolicy::addNewBigramEntryToBigramList(const int bigramTargetPos,
const int probability, int *const bigramListPos) {
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*bigramListPos);
if (usesAdditionalBuffer) {
*bigramListPos -= mBuffer->getOriginalBufferSize();
}
BigramListReadWriteUtils::BigramFlags bigramFlags;
int bigramEntryCount = 0;
do {
if (++bigramEntryCount > BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT) {
AKLOGE("Too many bigram entries. %d", BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT);
ASSERT(false);
return false;
}
int entryPos = *bigramListPos;
if (usesAdditionalBuffer) {
entryPos += mBuffer->getOriginalBufferSize();
}
int originalBigramPos;
// The buffer address can be changed after calling buffer writing methods.
const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer);
flags = BigramListReadWriteUtils::getFlagsAndForwardPointer(buffer, pos);
int originalBigramPos = BigramListReadWriteUtils::getBigramAddressAndForwardPointer(
buffer, flags, pos);
BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition(
mBuffer->getBuffer(usesAdditionalBuffer), &bigramFlags, &originalBigramPos,
bigramListPos);
if (usesAdditionalBuffer && originalBigramPos != NOT_A_DICT_POS) {
originalBigramPos += mBuffer->getOriginalBufferSize();
}
if (followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos) == bigramPos) {
if (followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos) == bigramTargetPos) {
// Update this bigram entry.
const BigramListReadWriteUtils::BigramFlags updatedFlags =
BigramListReadWriteUtils::setProbabilityInFlags(flags, probability);
return mBuffer->writeUintAndAdvancePosition(updatedFlags, 1 /* size */, &entryPos);
BigramListReadWriteUtils::setProbabilityInFlags(bigramFlags, probability);
return BigramListReadWriteUtils::writeBigramEntry(mBuffer, updatedFlags,
originalBigramPos, &entryPos);
}
if (BigramListReadWriteUtils::hasNext(flags)) {
if (BigramListReadWriteUtils::hasNext(bigramFlags)) {
continue;
}
// The current last entry is found.
// First, update the flags of the last entry.
const BigramListReadWriteUtils::BigramFlags updatedFlags =
BigramListReadWriteUtils::setHasNextFlag(flags);
if (!mBuffer->writeUintAndAdvancePosition(updatedFlags, 1 /* size */, &entryPos)) {
BigramListReadWriteUtils::setHasNextFlag(bigramFlags);
if (!BigramListReadWriteUtils::writeBigramEntry(mBuffer, updatedFlags, originalBigramPos,
&entryPos)) {
return false;
}
if (usesAdditionalBuffer) {
*pos += mBuffer->getOriginalBufferSize();
*bigramListPos += mBuffer->getOriginalBufferSize();
}
// Then, add a new entry after the last entry.
return writeNewBigramEntry(bigramPos, probability, pos);
} while(BigramListReadWriteUtils::hasNext(flags));
return writeNewBigramEntry(bigramTargetPos, probability, bigramListPos);
} while(BigramListReadWriteUtils::hasNext(bigramFlags));
// We return directly from the while loop.
ASSERT(false);
return false;
}
bool DynamicBigramListPolicy::writeNewBigramEntry(const int bigramPos, const int probability,
bool DynamicBigramListPolicy::writeNewBigramEntry(const int bigramTargetPos, const int probability,
int *const writingPos) {
BigramListReadWriteUtils::BigramFlags newBigramFlags;
uint32_t newBigramOffset;
int newBigramOffsetFieldSize;
if(!BigramListReadWriteUtils::createBigramEntryAndGetFlagsAndOffsetAndOffsetFieldSize(
*writingPos, bigramPos, probability, false /* hasNext */, &newBigramFlags,
&newBigramOffset, &newBigramOffsetFieldSize)) {
return false;
}
// Write bigram flags.
if (!mBuffer->writeUintAndAdvancePosition(newBigramFlags, 1 /* size */, writingPos)) {
return false;
}
// Write bigram positon offset.
if (!mBuffer->writeUintAndAdvancePosition(newBigramOffset, newBigramOffsetFieldSize,
writingPos)) {
return false;
}
return true;
// hasNext is false because we are adding a new bigram entry at the end of the bigram list.
return BigramListReadWriteUtils::createAndWriteBigramEntry(mBuffer, bigramTargetPos,
probability, false /* hasNext */, writingPos);
}
bool DynamicBigramListPolicy::removeBigram(const int bigramListPos, const int targetBigramPos) {
bool DynamicBigramListPolicy::removeBigram(const int bigramListPos, const int bigramTargetPos) {
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(bigramListPos);
int pos = bigramListPos;
if (usesAdditionalBuffer) {
pos -= mBuffer->getOriginalBufferSize();
}
BigramListReadWriteUtils::BigramFlags flags;
BigramListReadWriteUtils::BigramFlags bigramFlags;
int bigramEntryCount = 0;
do {
// The buffer address can be changed after calling buffer writing methods.
const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer);
flags = BigramListReadWriteUtils::getFlagsAndForwardPointer(buffer, &pos);
int bigramOffsetFieldPos = pos;
if (usesAdditionalBuffer) {
bigramOffsetFieldPos += mBuffer->getOriginalBufferSize();
if (++bigramEntryCount > BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT) {
AKLOGE("Too many bigram entries. %d", BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT);
ASSERT(false);
return false;
}
int bigramEntryPos = pos;
int originalBigramPos;
// The buffer address can be changed after calling buffer writing methods.
BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition(
mBuffer->getBuffer(usesAdditionalBuffer), &bigramFlags, &originalBigramPos, &pos);
if (usesAdditionalBuffer) {
bigramEntryPos += mBuffer->getOriginalBufferSize();
}
int originalBigramPos = BigramListReadWriteUtils::getBigramAddressAndForwardPointer(
buffer, flags, &pos);
if (usesAdditionalBuffer && originalBigramPos != NOT_A_DICT_POS) {
originalBigramPos += mBuffer->getOriginalBufferSize();
}
const int bigramPos = followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos);
if (bigramPos != targetBigramPos) {
if (bigramPos != bigramTargetPos) {
continue;
}
// Target entry is found. Write 0 into the bigram pos field to mark the bigram invalid.
const int bigramOffsetFieldSize = BigramListReadWriteUtils::attributeAddressSize(flags);
if (!mBuffer->writeUintAndAdvancePosition(0 /* data */, bigramOffsetFieldSize,
&bigramOffsetFieldPos)) {
return false;
}
return true;
} while(BigramListReadWriteUtils::hasNext(flags));
// Target entry is found. Write an invalid target position to mark the bigram invalid.
return BigramListReadWriteUtils::writeBigramEntry(mBuffer, bigramFlags,
NOT_A_DICT_POS /* targetOffset */, &bigramEntryPos);
} while(BigramListReadWriteUtils::hasNext(bigramFlags));
return false;
}
@ -212,14 +255,14 @@ int DynamicBigramListPolicy::followBigramLinkAndGetCurrentBigramPtNodePos(
}
int currentPos = originalBigramPos;
DynamicPatriciaTrieNodeReader nodeReader(mBuffer, this /* bigramsPolicy */, mShortcutPolicy);
nodeReader.fetchNodeInfoFromBuffer(currentPos);
nodeReader.fetchNodeInfoInBufferFromPtNodePos(currentPos);
int bigramLinkCount = 0;
while (nodeReader.getBigramLinkedNodePos() != NOT_A_DICT_POS) {
currentPos = nodeReader.getBigramLinkedNodePos();
nodeReader.fetchNodeInfoFromBuffer(currentPos);
nodeReader.fetchNodeInfoInBufferFromPtNodePos(currentPos);
bigramLinkCount++;
if (bigramLinkCount > BIGRAM_LINK_COUNT_LIMIT) {
AKLOGI("Bigram link is invalid. start position: %d", bigramPos);
if (bigramLinkCount > CONTINUING_BIGRAM_LINK_COUNT_LIMIT) {
AKLOGE("Bigram link is invalid. start position: %d", bigramPos);
ASSERT(false);
return NOT_A_DICT_POS;
}

View file

@ -21,13 +21,12 @@
#include "defines.h"
#include "suggest/core/policy/dictionary_bigrams_structure_policy.h"
#include "suggest/core/policy/dictionary_shortcuts_structure_policy.h"
#include "suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h"
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
namespace latinime {
class BufferWithExtendableBuffer;
class DictionaryShortcutsStructurePolicy;
/*
* This is a dynamic version of BigramListPolicy and supports an additional buffer.
*/
@ -40,9 +39,9 @@ class DynamicBigramListPolicy : public DictionaryBigramsStructurePolicy {
~DynamicBigramListPolicy() {}
void getNextBigram(int *const outBigramPos, int *const outProbability, bool *const outHasNext,
int *const pos) const;
int *const bigramEntryPos) const;
void skipAllBigrams(int *const pos) const;
void skipAllBigrams(int *const bigramListPos) const;
// Copy bigrams from the bigram list that starts at fromPos in mBuffer to toPos in
// bufferToWrite and advance these positions after bigram lists. This method skips invalid
@ -50,18 +49,22 @@ class DynamicBigramListPolicy : public DictionaryBigramsStructurePolicy {
bool copyAllBigrams(BufferWithExtendableBuffer *const bufferToWrite, int *const fromPos,
int *const toPos, int *const outBigramsCount) const;
bool addNewBigramEntryToBigramList(const int bigramPos, const int probability, int *const pos);
bool updateAllBigramEntriesAndDeleteUselessEntries(int *const bigramListPos);
bool writeNewBigramEntry(const int bigramPos, const int probability,
bool addNewBigramEntryToBigramList(const int bigramTargetPos, const int probability,
int *const bigramListPos);
bool writeNewBigramEntry(const int bigramTargetPos, const int probability,
int *const writingPos);
// Return if targetBigramPos is found or not.
bool removeBigram(const int bigramListPos, const int targetBigramPos);
bool removeBigram(const int bigramListPos, const int bigramTargetPos);
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicBigramListPolicy);
static const int BIGRAM_LINK_COUNT_LIMIT;
static const int CONTINUING_BIGRAM_LINK_COUNT_LIMIT;
static const int BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT;
BufferWithExtendableBuffer *const mBuffer;
const DictionaryShortcutsStructurePolicy *const mShortcutPolicy;

View file

@ -20,6 +20,7 @@
#include <vector>
#include "defines.h"
#include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h"
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h"
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h"
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h"
@ -68,6 +69,36 @@ class DynamicPatriciaTrieGcEventListeners {
int mChildrenValue;
};
// Updates all bigram entries that are held by valid PtNodes. This removes useless bigram
// entries.
class ListenerForUpdatingBigramProbability
: public DynamicPatriciaTrieReadingHelper::TraversingEventListener {
public:
ListenerForUpdatingBigramProbability(DynamicBigramListPolicy *const bigramPolicy)
: mBigramPolicy(bigramPolicy) {}
bool onAscend() { return true; }
bool onDescend() { return true; }
bool onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node) {
if (!node->isDeleted()) {
int pos = node->getBigramsPos();
if (pos != NOT_A_DICT_POS) {
if (!mBigramPolicy->updateAllBigramEntriesAndDeleteUselessEntries(&pos)) {
return false;
}
}
}
return true;
}
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(ListenerForUpdatingBigramProbability);
DynamicBigramListPolicy *const mBigramPolicy;
};
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTrieGcEventListeners);
};

View file

@ -23,26 +23,26 @@
namespace latinime {
void DynamicPatriciaTrieNodeReader::fetchNodeInfoFromBufferAndProcessMovedNode(const int nodePos,
const int maxCodePointCount, int *const outCodePoints) {
if (nodePos < 0 || nodePos >= mBuffer->getTailPosition()) {
void DynamicPatriciaTrieNodeReader::fetchPtNodeInfoFromBufferAndProcessMovedPtNode(
const int ptNodePos, const int maxCodePointCount, int *const outCodePoints) {
if (ptNodePos < 0 || ptNodePos >= mBuffer->getTailPosition()) {
AKLOGE("Fetching PtNode info form invalid dictionary position: %d, dictionary size: %d",
nodePos, mBuffer->getTailPosition());
ptNodePos, mBuffer->getTailPosition());
ASSERT(false);
invalidatePtNodeInfo();
return;
}
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(nodePos);
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(ptNodePos);
const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
int pos = nodePos;
mHeadPos = nodePos;
int pos = ptNodePos;
mHeadPos = ptNodePos;
if (usesAdditionalBuffer) {
pos -= mBuffer->getOriginalBufferSize();
}
mFlags = PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos);
const int parentPos =
DynamicPatriciaTrieReadingUtils::getParentPosAndAdvancePosition(dictBuf, &pos);
mParentPos = (parentPos != 0) ? nodePos + parentPos : NOT_A_DICT_POS;
mParentPos = (parentPos != 0) ? ptNodePos + parentPos : NOT_A_DICT_POS;
if (outCodePoints != 0) {
mCodePointCount = PatriciaTrieReadingUtils::getCharsAndAdvancePosition(
dictBuf, mFlags, maxCodePointCount, outCodePoints, &pos);
@ -99,7 +99,8 @@ void DynamicPatriciaTrieNodeReader::fetchNodeInfoFromBufferAndProcessMovedNode(c
// Read destination node if the read node is a moved node.
if (DynamicPatriciaTrieReadingUtils::isMoved(mFlags)) {
// The destination position is stored at the same place as the parent position.
fetchNodeInfoFromBufferAndProcessMovedNode(mParentPos, maxCodePointCount, outCodePoints);
fetchPtNodeInfoFromBufferAndProcessMovedPtNode(mParentPos, maxCodePointCount,
outCodePoints);
}
}

View file

@ -48,17 +48,17 @@ class DynamicPatriciaTrieNodeReader {
~DynamicPatriciaTrieNodeReader() {}
// Reads node information from dictionary buffer and updates members with the information.
AK_FORCE_INLINE void fetchNodeInfoFromBuffer(const int nodePos) {
fetchNodeInfoFromBufferAndGetNodeCodePoints(nodePos , 0 /* maxCodePointCount */,
0 /* outCodePoints */);
// Reads PtNode information from dictionary buffer and updates members with the information.
AK_FORCE_INLINE void fetchNodeInfoInBufferFromPtNodePos(const int ptNodePos) {
fetchNodeInfoInBufferFromPtNodePosAndGetNodeCodePoints(ptNodePos ,
0 /* maxCodePointCount */, 0 /* outCodePoints */);
}
AK_FORCE_INLINE void fetchNodeInfoFromBufferAndGetNodeCodePoints(const int nodePos,
const int maxCodePointCount, int *const outCodePoints) {
AK_FORCE_INLINE void fetchNodeInfoInBufferFromPtNodePosAndGetNodeCodePoints(
const int ptNodePos, const int maxCodePointCount, int *const outCodePoints) {
mSiblingPos = NOT_A_DICT_POS;
mBigramLinkedNodePos = NOT_A_DICT_POS;
fetchNodeInfoFromBufferAndProcessMovedNode(nodePos, maxCodePointCount, outCodePoints);
fetchPtNodeInfoFromBufferAndProcessMovedPtNode(ptNodePos, maxCodePointCount, outCodePoints);
}
// HeadPos is different from NodePos when the current PtNode is a moved PtNode.
@ -154,8 +154,8 @@ class DynamicPatriciaTrieNodeReader {
int mBigramPos;
int mSiblingPos;
void fetchNodeInfoFromBufferAndProcessMovedNode(const int nodePos, const int maxCodePointCount,
int *const outCodePoints);
void fetchPtNodeInfoFromBufferAndProcessMovedPtNode(const int ptNodePos,
const int maxCodePointCount, int *const outCodePoints);
void invalidatePtNodeInfo();
};

View file

@ -35,7 +35,7 @@ void DynamicPatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const d
}
DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer,
getBigramsStructurePolicy(), getShortcutsStructurePolicy());
readingHelper.initWithNodeArrayPos(dicNode->getChildrenPos());
readingHelper.initWithPtNodeArrayPos(dicNode->getChildrenPos());
const DynamicPatriciaTrieNodeReader *const nodeReader = readingHelper.getNodeReader();
while (!readingHelper.isEnd()) {
childDicNodes->pushLeavingChild(dicNode, nodeReader->getHeadPos(),
@ -48,7 +48,7 @@ void DynamicPatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const d
}
int DynamicPatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
const int nodePos, const int maxCodePointCount, int *const outCodePoints,
const int ptNodePos, const int maxCodePointCount, int *const outCodePoints,
int *const outUnigramProbability) const {
// This method traverses parent nodes from the terminal by following parent pointers; thus,
// node code points are stored in the buffer in the reverse order.
@ -56,9 +56,9 @@ int DynamicPatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCoun
DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer,
getBigramsStructurePolicy(), getShortcutsStructurePolicy());
// First, read the terminal node and get its probability.
readingHelper.initWithNodePos(nodePos);
readingHelper.initWithPtNodePos(ptNodePos);
if (!readingHelper.isValidTerminalNode()) {
// Node at the nodePos is not a valid terminal node.
// Node at the ptNodePos is not a valid terminal node.
*outUnigramProbability = NOT_A_PROBABILITY;
return 0;
}
@ -67,7 +67,7 @@ int DynamicPatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCoun
// Then, following parent node link to the dictionary root and fetch node code points.
while (!readingHelper.isEnd()) {
if (readingHelper.getTotalCodePointCount() > maxCodePointCount) {
// The nodePos is not a valid terminal node position in the dictionary.
// The ptNodePos is not a valid terminal node position in the dictionary.
*outUnigramProbability = NOT_A_PROBABILITY;
return 0;
}
@ -98,7 +98,7 @@ int DynamicPatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const in
}
DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer,
getBigramsStructurePolicy(), getShortcutsStructurePolicy());
readingHelper.initWithNodeArrayPos(getRootPosition());
readingHelper.initWithPtNodeArrayPos(getRootPosition());
const DynamicPatriciaTrieNodeReader *const nodeReader = readingHelper.getNodeReader();
while (!readingHelper.isEnd()) {
const int matchedCodePointCount = readingHelper.getPrevTotalCodePointCount();
@ -148,39 +148,39 @@ int DynamicPatriciaTriePolicy::getProbability(const int unigramProbability,
}
}
int DynamicPatriciaTriePolicy::getUnigramProbabilityOfPtNode(const int nodePos) const {
if (nodePos == NOT_A_DICT_POS) {
int DynamicPatriciaTriePolicy::getUnigramProbabilityOfPtNode(const int ptNodePos) const {
if (ptNodePos == NOT_A_DICT_POS) {
return NOT_A_PROBABILITY;
}
DynamicPatriciaTrieNodeReader nodeReader(&mBufferWithExtendableBuffer,
getBigramsStructurePolicy(), getShortcutsStructurePolicy());
nodeReader.fetchNodeInfoFromBuffer(nodePos);
nodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos);
if (nodeReader.isDeleted() || nodeReader.isBlacklisted() || nodeReader.isNotAWord()) {
return NOT_A_PROBABILITY;
}
return getProbability(nodeReader.getProbability(), NOT_A_PROBABILITY);
}
int DynamicPatriciaTriePolicy::getShortcutPositionOfNode(const int nodePos) const {
if (nodePos == NOT_A_DICT_POS) {
int DynamicPatriciaTriePolicy::getShortcutPositionOfPtNode(const int ptNodePos) const {
if (ptNodePos == NOT_A_DICT_POS) {
return NOT_A_DICT_POS;
}
DynamicPatriciaTrieNodeReader nodeReader(&mBufferWithExtendableBuffer,
getBigramsStructurePolicy(), getShortcutsStructurePolicy());
nodeReader.fetchNodeInfoFromBuffer(nodePos);
nodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos);
if (nodeReader.isDeleted()) {
return NOT_A_DICT_POS;
}
return nodeReader.getShortcutPos();
}
int DynamicPatriciaTriePolicy::getBigramsPositionOfNode(const int nodePos) const {
if (nodePos == NOT_A_DICT_POS) {
int DynamicPatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) const {
if (ptNodePos == NOT_A_DICT_POS) {
return NOT_A_DICT_POS;
}
DynamicPatriciaTrieNodeReader nodeReader(&mBufferWithExtendableBuffer,
getBigramsStructurePolicy(), getShortcutsStructurePolicy());
nodeReader.fetchNodeInfoFromBuffer(nodePos);
nodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos);
if (nodeReader.isDeleted()) {
return NOT_A_DICT_POS;
}
@ -195,7 +195,7 @@ bool DynamicPatriciaTriePolicy::addUnigramWord(const int *const word, const int
}
DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer,
getBigramsStructurePolicy(), getShortcutsStructurePolicy());
readingHelper.initWithNodeArrayPos(getRootPosition());
readingHelper.initWithPtNodeArrayPos(getRootPosition());
DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer,
&mBigramListPolicy, &mShortcutListPolicy);
return writingHelper.addUnigramWord(&readingHelper, word, length, probability);

View file

@ -51,7 +51,7 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
DicNodeVector *const childDicNodes) const;
int getCodePointsAndProbabilityAndReturnCodePointCount(
const int terminalNodePos, const int maxCodePointCount, int *const outCodePoints,
const int terminalPtNodePos, const int maxCodePointCount, int *const outCodePoints,
int *const outUnigramProbability) const;
int getTerminalNodePositionOfWord(const int *const inWord,
@ -59,11 +59,11 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
int getProbability(const int unigramProbability, const int bigramProbability) const;
int getUnigramProbabilityOfPtNode(const int nodePos) const;
int getUnigramProbabilityOfPtNode(const int ptNodePos) const;
int getShortcutPositionOfNode(const int nodePos) const;
int getShortcutPositionOfPtNode(const int ptNodePos) const;
int getBigramsPositionOfNode(const int nodePos) const;
int getBigramsPositionOfPtNode(const int ptNodePos) const;
const DictionaryHeaderStructurePolicy *getHeaderStructurePolicy() const {
return &mHeaderPolicy;

View file

@ -72,7 +72,7 @@ bool DynamicPatriciaTrieReadingHelper::traverseAllPtNodesInPostorderDepthFirstMa
// Read node array size and process empty node arrays. Nodes and arrays are counted up in this
// method to avoid an infinite loop.
void DynamicPatriciaTrieReadingHelper::nextNodeArray() {
void DynamicPatriciaTrieReadingHelper::nextPtNodeArray() {
mReadingState.mPosOfLastPtNodeArrayHead = mReadingState.mPos;
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(mReadingState.mPos);
const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
@ -123,7 +123,7 @@ void DynamicPatriciaTrieReadingHelper::followForwardLink() {
if (DynamicPatriciaTrieReadingUtils::isValidForwardLinkPosition(forwardLinkPosition)) {
// Follow the forward link.
mReadingState.mPos += forwardLinkPosition;
nextNodeArray();
nextPtNodeArray();
} else {
// All node arrays have been read.
mReadingState.mPos = NOT_A_DICT_POS;

View file

@ -73,32 +73,32 @@ class DynamicPatriciaTrieReadingHelper {
return mReadingState.mPos == NOT_A_DICT_POS;
}
// Initialize reading state with the head position of a node array.
AK_FORCE_INLINE void initWithNodeArrayPos(const int nodeArrayPos) {
if (nodeArrayPos == NOT_A_DICT_POS) {
// Initialize reading state with the head position of a PtNode array.
AK_FORCE_INLINE void initWithPtNodeArrayPos(const int ptNodeArrayPos) {
if (ptNodeArrayPos == NOT_A_DICT_POS) {
mReadingState.mPos = NOT_A_DICT_POS;
} else {
mIsError = false;
mReadingState.mPos = nodeArrayPos;
mReadingState.mPos = ptNodeArrayPos;
mReadingState.mPrevTotalCodePointCount = 0;
mReadingState.mTotalNodeCount = 0;
mReadingState.mNodeArrayCount = 0;
mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS;
mReadingStateStack.clear();
nextNodeArray();
nextPtNodeArray();
if (!isEnd()) {
fetchNodeInfo();
fetchPtNodeInfo();
}
}
}
// Initialize reading state with the head position of a node.
AK_FORCE_INLINE void initWithNodePos(const int nodePos) {
if (nodePos == NOT_A_DICT_POS) {
AK_FORCE_INLINE void initWithPtNodePos(const int ptNodePos) {
if (ptNodePos == NOT_A_DICT_POS) {
mReadingState.mPos = NOT_A_DICT_POS;
} else {
mIsError = false;
mReadingState.mPos = nodePos;
mReadingState.mPos = ptNodePos;
mReadingState.mNodeCount = 1;
mReadingState.mPrevTotalCodePointCount = 0;
mReadingState.mTotalNodeCount = 1;
@ -106,7 +106,7 @@ class DynamicPatriciaTrieReadingHelper {
mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS;
mReadingState.mPosOfLastPtNodeArrayHead = NOT_A_DICT_POS;
mReadingStateStack.clear();
fetchNodeInfo();
fetchPtNodeInfo();
}
}
@ -151,10 +151,10 @@ class DynamicPatriciaTrieReadingHelper {
// All nodes in the current node array have been read.
followForwardLink();
if (!isEnd()) {
fetchNodeInfo();
fetchPtNodeInfo();
}
} else {
fetchNodeInfo();
fetchPtNodeInfo();
}
}
@ -167,9 +167,9 @@ class DynamicPatriciaTrieReadingHelper {
mReadingState.mPos = mNodeReader.getChildrenPos();
mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS;
// Read children node array.
nextNodeArray();
nextPtNodeArray();
if (!isEnd()) {
fetchNodeInfo();
fetchPtNodeInfo();
}
} else {
mReadingState.mPos = NOT_A_DICT_POS;
@ -186,7 +186,7 @@ class DynamicPatriciaTrieReadingHelper {
mReadingState.mPos = mNodeReader.getParentPos();
mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS;
mReadingState.mPosOfLastPtNodeArrayHead = NOT_A_DICT_POS;
fetchNodeInfo();
fetchPtNodeInfo();
} else {
mReadingState.mPos = NOT_A_DICT_POS;
}
@ -202,7 +202,7 @@ class DynamicPatriciaTrieReadingHelper {
AK_FORCE_INLINE void reloadCurrentPtNodeInfo() {
if (!isEnd()) {
fetchNodeInfo();
fetchPtNodeInfo();
}
}
@ -240,12 +240,12 @@ class DynamicPatriciaTrieReadingHelper {
int mMergedNodeCodePoints[MAX_WORD_LENGTH];
std::vector<ReadingState> mReadingStateStack;
void nextNodeArray();
void nextPtNodeArray();
void followForwardLink();
AK_FORCE_INLINE void fetchNodeInfo() {
mNodeReader.fetchNodeInfoFromBufferAndGetNodeCodePoints(mReadingState.mPos,
AK_FORCE_INLINE void fetchPtNodeInfo() {
mNodeReader.fetchNodeInfoInBufferFromPtNodePosAndGetNodeCodePoints(mReadingState.mPos,
MAX_WORD_LENGTH, mMergedNodeCodePoints);
if (mNodeReader.getCodePointCount() <= 0) {
// Empty node is not allowed.
@ -271,7 +271,7 @@ class DynamicPatriciaTrieReadingHelper {
} else {
mReadingState = mReadingStateStack.back();
mReadingStateStack.pop_back();
fetchNodeInfo();
fetchPtNodeInfo();
}
}
};

View file

@ -90,7 +90,7 @@ bool DynamicPatriciaTrieWritingHelper::addBigramWords(const int word0Pos, const
const int probability) {
int mMergedNodeCodePoints[MAX_WORD_LENGTH];
DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy);
nodeReader.fetchNodeInfoFromBufferAndGetNodeCodePoints(word0Pos, MAX_WORD_LENGTH,
nodeReader.fetchNodeInfoInBufferFromPtNodePosAndGetNodeCodePoints(word0Pos, MAX_WORD_LENGTH,
mMergedNodeCodePoints);
// Move node to add bigram entry.
const int newNodePos = mBuffer->getTailPosition();
@ -104,7 +104,7 @@ bool DynamicPatriciaTrieWritingHelper::addBigramWords(const int word0Pos, const
&writingPos)) {
return false;
}
nodeReader.fetchNodeInfoFromBuffer(newNodePos);
nodeReader.fetchNodeInfoInBufferFromPtNodePos(newNodePos);
if (nodeReader.getBigramsPos() != NOT_A_DICT_POS) {
// Insert a new bigram entry into the existing bigram list.
int bigramListPos = nodeReader.getBigramsPos();
@ -131,7 +131,7 @@ bool DynamicPatriciaTrieWritingHelper::addBigramWords(const int word0Pos, const
// Remove a bigram relation from word0Pos to word1Pos.
bool DynamicPatriciaTrieWritingHelper::removeBigramWords(const int word0Pos, const int word1Pos) {
DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy);
nodeReader.fetchNodeInfoFromBuffer(word0Pos);
nodeReader.fetchNodeInfoInBufferFromPtNodePos(word0Pos);
if (nodeReader.getBigramsPos() == NOT_A_DICT_POS) {
return false;
}
@ -217,7 +217,7 @@ bool DynamicPatriciaTrieWritingHelper::markNodeAsMovedAndSetPosition(
// Update children's parent position.
DynamicPatriciaTrieReadingHelper readingHelper(mBuffer, mBigramPolicy, mShortcutPolicy);
const DynamicPatriciaTrieNodeReader *const nodeReader = readingHelper.getNodeReader();
readingHelper.initWithNodeArrayPos(originalNode->getChildrenPos());
readingHelper.initWithPtNodeArrayPos(originalNode->getChildrenPos());
while (!readingHelper.isEnd()) {
const int childPtNodeWrittenPos = nodeReader->getHeadPos();
const int parentOffset = movedPos - childPtNodeWrittenPos;
@ -452,7 +452,7 @@ bool DynamicPatriciaTrieWritingHelper::reallocatePtNodeAndAddNewPtNodes(
}
// Load node info. Information of the 1st part will be fetched.
DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy);
nodeReader.fetchNodeInfoFromBuffer(firstPartOfReallocatedPtNodePos);
nodeReader.fetchNodeInfoInBufferFromPtNodePos(firstPartOfReallocatedPtNodePos);
// Update children position.
int childrenPosFieldPos = nodeReader.getChildrenPosFieldPos();
if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(mBuffer,
@ -519,7 +519,7 @@ bool DynamicPatriciaTrieWritingHelper::writeBufferToFilePointer(FILE *const file
bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
BufferWithExtendableBuffer *const bufferToWrite) {
DynamicPatriciaTrieReadingHelper readingHelper(mBuffer, mBigramPolicy, mShortcutPolicy);
readingHelper.initWithNodeArrayPos(rootPtNodeArrayPos);
readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
DynamicPatriciaTrieGcEventListeners
::ListenerForUpdatingUnigramProbabilityAndMarkingUselessPtNodesAsDeleted
listenerForUpdatingUnigramProbabilityAndMarkingUselessPtNodesAsDeleted(
@ -528,6 +528,14 @@ bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
&listenerForUpdatingUnigramProbabilityAndMarkingUselessPtNodesAsDeleted)) {
return false;
}
readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
DynamicPatriciaTrieGcEventListeners::ListenerForUpdatingBigramProbability
listenerForupdatingBigramProbability(mBigramPolicy);
if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(
&listenerForupdatingBigramProbability)) {
return false;
}
// TODO: Implement.
return false;
}

View file

@ -49,7 +49,7 @@ void PatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode,
// with a z, it's the last PtNode of the root array, so all children addresses will be smaller
// than the position we look for, and we have to descend the z node).
/* Parameters :
* nodePos: the byte position of the terminal PtNode of the word we are searching for (this is
* ptNodePos: the byte position of the terminal PtNode of the word we are searching for (this is
* what is stored as the "bigram position" in each bigram)
* outCodePoints: an array to write the found word, with MAX_WORD_LENGTH size.
* outUnigramProbability: a pointer to an int to write the probability into.
@ -57,7 +57,7 @@ void PatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode,
*/
// TODO: Split this function to be more readable
int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
const int nodePos, const int maxCodePointCount, int *const outCodePoints,
const int ptNodePos, const int maxCodePointCount, int *const outCodePoints,
int *const outUnigramProbability) const {
int pos = getRootPosition();
int wordPos = 0;
@ -78,7 +78,7 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos);
const int character = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(
mDictRoot, &pos);
if (nodePos == startPos) {
if (ptNodePos == startPos) {
// We found the position. Copy the rest of the code points in the buffer and return
// the length.
outCodePoints[wordPos] = character;
@ -121,7 +121,7 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
// Here comes the tricky part. First, read the children position.
const int childrenPos = PatriciaTrieReadingUtils
::readChildrenPositionAndAdvancePosition(mDictRoot, flags, &currentPos);
if (childrenPos > nodePos) {
if (childrenPos > ptNodePos) {
// If the children pos is greater than the position, it means the previous
// PtNode, which position is stored in lastCandidatePtNodePos, was the right
// one.
@ -213,7 +213,7 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
}
}
// If we have looked through all the PtNodes and found no match, the nodePos is
// If we have looked through all the PtNodes and found no match, the ptNodePos is
// not the position of a terminal in this dictionary.
return 0;
}
@ -319,11 +319,11 @@ int PatriciaTriePolicy::getProbability(const int unigramProbability,
}
}
int PatriciaTriePolicy::getUnigramProbabilityOfPtNode(const int nodePos) const {
if (nodePos == NOT_A_DICT_POS) {
int PatriciaTriePolicy::getUnigramProbabilityOfPtNode(const int ptNodePos) const {
if (ptNodePos == NOT_A_DICT_POS) {
return NOT_A_PROBABILITY;
}
int pos = nodePos;
int pos = ptNodePos;
const PatriciaTrieReadingUtils::NodeFlags flags =
PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos);
if (!PatriciaTrieReadingUtils::isTerminal(flags)) {
@ -341,11 +341,11 @@ int PatriciaTriePolicy::getUnigramProbabilityOfPtNode(const int nodePos) const {
mDictRoot, &pos), NOT_A_PROBABILITY);
}
int PatriciaTriePolicy::getShortcutPositionOfNode(const int nodePos) const {
if (nodePos == NOT_A_DICT_POS) {
int PatriciaTriePolicy::getShortcutPositionOfPtNode(const int ptNodePos) const {
if (ptNodePos == NOT_A_DICT_POS) {
return NOT_A_DICT_POS;
}
int pos = nodePos;
int pos = ptNodePos;
const PatriciaTrieReadingUtils::NodeFlags flags =
PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos);
if (!PatriciaTrieReadingUtils::hasShortcutTargets(flags)) {
@ -361,11 +361,11 @@ int PatriciaTriePolicy::getShortcutPositionOfNode(const int nodePos) const {
return pos;
}
int PatriciaTriePolicy::getBigramsPositionOfNode(const int nodePos) const {
if (nodePos == NOT_A_DICT_POS) {
int PatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) const {
if (ptNodePos == NOT_A_DICT_POS) {
return NOT_A_DICT_POS;
}
int pos = nodePos;
int pos = ptNodePos;
const PatriciaTrieReadingUtils::NodeFlags flags =
PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos);
if (!PatriciaTrieReadingUtils::hasBigrams(flags)) {
@ -385,8 +385,8 @@ int PatriciaTriePolicy::getBigramsPositionOfNode(const int nodePos) const {
}
int PatriciaTriePolicy::createAndGetLeavingChildNode(const DicNode *const dicNode,
const int nodePos, DicNodeVector *childDicNodes) const {
int pos = nodePos;
const int ptNodePos, DicNodeVector *childDicNodes) const {
int pos = ptNodePos;
const PatriciaTrieReadingUtils::NodeFlags flags =
PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos);
int mergedNodeCodePoints[MAX_WORD_LENGTH];
@ -404,7 +404,7 @@ int PatriciaTriePolicy::createAndGetLeavingChildNode(const DicNode *const dicNod
if (PatriciaTrieReadingUtils::hasBigrams(flags)) {
getBigramsStructurePolicy()->skipAllBigrams(&pos);
}
childDicNodes->pushLeavingChild(dicNode, nodePos, childrenPos, probability,
childDicNodes->pushLeavingChild(dicNode, ptNodePos, childrenPos, probability,
PatriciaTrieReadingUtils::isTerminal(flags),
PatriciaTrieReadingUtils::hasChildrenInFlags(flags),
PatriciaTrieReadingUtils::isBlacklisted(flags) ||

View file

@ -58,11 +58,11 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
int getProbability(const int unigramProbability, const int bigramProbability) const;
int getUnigramProbabilityOfPtNode(const int nodePos) const;
int getUnigramProbabilityOfPtNode(const int ptNodePos) const;
int getShortcutPositionOfNode(const int nodePos) const;
int getShortcutPositionOfPtNode(const int ptNodePos) const;
int getBigramsPositionOfNode(const int nodePos) const;
int getBigramsPositionOfPtNode(const int ptNodePos) const;
const DictionaryHeaderStructurePolicy *getHeaderStructurePolicy() const {
return &mHeaderPolicy;
@ -121,7 +121,7 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
const BigramListPolicy mBigramListPolicy;
const ShortcutListPolicy mShortcutListPolicy;
int createAndGetLeavingChildNode(const DicNode *const dicNode, const int nodePos,
int createAndGetLeavingChildNode(const DicNode *const dicNode, const int ptNodePos,
DicNodeVector *const childDicNodes) const;
};
} // namespace latinime