Merge "GC step 2. Finding garbage bigram entries."
This commit is contained in:
commit
b71f63bc1c
19 changed files with 383 additions and 263 deletions
|
@ -147,7 +147,7 @@ int BigramDictionary::getBigramListPositionForWord(const int *prevWord, const in
|
|||
int pos = mDictionaryStructurePolicy->getTerminalNodePositionOfWord(prevWord, prevWordLength,
|
||||
forceLowerCaseSearch);
|
||||
if (NOT_A_DICT_POS == pos) return NOT_A_DICT_POS;
|
||||
return mDictionaryStructurePolicy->getBigramsPositionOfNode(pos);
|
||||
return mDictionaryStructurePolicy->getBigramsPositionOfPtNode(pos);
|
||||
}
|
||||
|
||||
int BigramDictionary::getBigramProbability(const int *word0, int length0, const int *word1,
|
||||
|
|
|
@ -68,7 +68,7 @@ class MultiBigramMap {
|
|||
|
||||
void init(const DictionaryStructureWithBufferPolicy *const structurePolicy,
|
||||
const int nodePos) {
|
||||
const int bigramsListPos = structurePolicy->getBigramsPositionOfNode(nodePos);
|
||||
const int bigramsListPos = structurePolicy->getBigramsPositionOfPtNode(nodePos);
|
||||
BinaryDictionaryBigramsIterator bigramsIt(structurePolicy->getBigramsStructurePolicy(),
|
||||
bigramsListPos);
|
||||
while (bigramsIt.hasNext()) {
|
||||
|
@ -112,7 +112,7 @@ class MultiBigramMap {
|
|||
const DictionaryStructureWithBufferPolicy *const structurePolicy, const int nodePos,
|
||||
const int nextWordPosition, const int unigramProbability) {
|
||||
int bigramProbability = NOT_A_PROBABILITY;
|
||||
const int bigramsListPos = structurePolicy->getBigramsPositionOfNode(nodePos);
|
||||
const int bigramsListPos = structurePolicy->getBigramsPositionOfPtNode(nodePos);
|
||||
BinaryDictionaryBigramsIterator bigramsIt(structurePolicy->getBigramsStructurePolicy(),
|
||||
bigramsListPos);
|
||||
while (bigramsIt.hasNext()) {
|
||||
|
|
|
@ -52,9 +52,9 @@ class DictionaryStructureWithBufferPolicy {
|
|||
|
||||
virtual int getUnigramProbabilityOfPtNode(const int nodePos) const = 0;
|
||||
|
||||
virtual int getShortcutPositionOfNode(const int nodePos) const = 0;
|
||||
virtual int getShortcutPositionOfPtNode(const int nodePos) const = 0;
|
||||
|
||||
virtual int getBigramsPositionOfNode(const int nodePos) const = 0;
|
||||
virtual int getBigramsPositionOfPtNode(const int nodePos) const = 0;
|
||||
|
||||
virtual const DictionaryHeaderStructurePolicy *getHeaderStructurePolicy() const = 0;
|
||||
|
||||
|
|
|
@ -223,7 +223,7 @@ int Suggest::outputSuggestions(DicTraverseSession *traverseSession, int *frequen
|
|||
BinaryDictionaryShortcutIterator shortcutIt(
|
||||
traverseSession->getDictionaryStructurePolicy()->getShortcutsStructurePolicy(),
|
||||
traverseSession->getDictionaryStructurePolicy()
|
||||
->getShortcutPositionOfNode(terminalDicNode->getPos()));
|
||||
->getShortcutPositionOfPtNode(terminalDicNode->getPos()));
|
||||
// Shortcut is not supported for multiple words suggestions.
|
||||
// TODO: Check shortcuts during traversal for multiple words suggestions.
|
||||
const bool sameAsTyped = TRAVERSAL->sameAsTyped(traverseSession, terminalDicNode);
|
||||
|
|
|
@ -33,10 +33,9 @@ class BigramListPolicy : public DictionaryBigramsStructurePolicy {
|
|||
|
||||
void getNextBigram(int *const outBigramPos, int *const outProbability, bool *const outHasNext,
|
||||
int *const pos) const {
|
||||
const BigramListReadWriteUtils::BigramFlags flags =
|
||||
BigramListReadWriteUtils::getFlagsAndForwardPointer(mBigramsBuf, pos);
|
||||
*outBigramPos = BigramListReadWriteUtils::getBigramAddressAndForwardPointer(
|
||||
mBigramsBuf, flags, pos);
|
||||
BigramListReadWriteUtils::BigramFlags flags;
|
||||
BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition(mBigramsBuf, &flags,
|
||||
outBigramPos, pos);
|
||||
*outProbability = BigramListReadWriteUtils::getProbabilityFromFlags(flags);
|
||||
*outHasNext = BigramListReadWriteUtils::hasNext(flags);
|
||||
}
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
#include "suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h"
|
||||
|
||||
#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h"
|
||||
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
|
||||
|
||||
namespace latinime {
|
||||
|
||||
|
@ -38,23 +39,31 @@ const BigramListReadWriteUtils::BigramFlags
|
|||
BigramListReadWriteUtils::MASK_ATTRIBUTE_PROBABILITY = 0x0F;
|
||||
const int BigramListReadWriteUtils::ATTRIBUTE_ADDRESS_SHIFT = 4;
|
||||
|
||||
/* static */ BigramListReadWriteUtils::BigramFlags
|
||||
BigramListReadWriteUtils::getFlagsAndForwardPointer(const uint8_t *const bigramsBuf,
|
||||
int *const pos) {
|
||||
return ByteArrayUtils::readUint8AndAdvancePosition(bigramsBuf, pos);
|
||||
/* static */ void BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition(
|
||||
const uint8_t *const bigramsBuf, BigramFlags *const outBigramFlags,
|
||||
int *const outTargetPtNodePos, int *const bigramEntryPos) {
|
||||
const BigramFlags bigramFlags = ByteArrayUtils::readUint8AndAdvancePosition(bigramsBuf,
|
||||
bigramEntryPos);
|
||||
if (outBigramFlags) {
|
||||
*outBigramFlags = bigramFlags;
|
||||
}
|
||||
const int targetPos = getBigramAddressAndAdvancePosition(bigramsBuf, bigramFlags,
|
||||
bigramEntryPos);
|
||||
if (outTargetPtNodePos) {
|
||||
*outTargetPtNodePos = targetPos;
|
||||
}
|
||||
}
|
||||
|
||||
/* static */ void BigramListReadWriteUtils::skipExistingBigrams(const uint8_t *const bigramsBuf,
|
||||
int *const pos) {
|
||||
BigramFlags flags = getFlagsAndForwardPointer(bigramsBuf, pos);
|
||||
while (hasNext(flags)) {
|
||||
*pos += attributeAddressSize(flags);
|
||||
flags = getFlagsAndForwardPointer(bigramsBuf, pos);
|
||||
}
|
||||
*pos += attributeAddressSize(flags);
|
||||
int *const bigramListPos) {
|
||||
BigramFlags flags;
|
||||
do {
|
||||
getBigramEntryPropertiesAndAdvancePosition(bigramsBuf, &flags, 0 /* outTargetPtNodePos */,
|
||||
bigramListPos);
|
||||
} while(hasNext(flags));
|
||||
}
|
||||
|
||||
/* static */ int BigramListReadWriteUtils::getBigramAddressAndForwardPointer(
|
||||
/* static */ int BigramListReadWriteUtils::getBigramAddressAndAdvancePosition(
|
||||
const uint8_t *const bigramsBuf, const BigramFlags flags, int *const pos) {
|
||||
int offset = 0;
|
||||
const int origin = *pos;
|
||||
|
@ -79,4 +88,59 @@ const int BigramListReadWriteUtils::ATTRIBUTE_ADDRESS_SHIFT = 4;
|
|||
}
|
||||
}
|
||||
|
||||
/* static */ bool BigramListReadWriteUtils::createAndWriteBigramEntry(
|
||||
BufferWithExtendableBuffer *const buffer, const int targetPos, const int probability,
|
||||
const bool hasNext, int *const writingPos) {
|
||||
BigramFlags flags;
|
||||
if (!createAndGetBigramFlags(*writingPos, targetPos, probability, hasNext, &flags)) {
|
||||
return false;
|
||||
}
|
||||
return writeBigramEntry(buffer, flags, targetPos, writingPos);
|
||||
}
|
||||
|
||||
/* static */ bool BigramListReadWriteUtils::writeBigramEntry(
|
||||
BufferWithExtendableBuffer *const bufferToWrite, const BigramFlags flags,
|
||||
const int targetPtNodePos, int *const writingPos) {
|
||||
if (!bufferToWrite->writeUintAndAdvancePosition(flags, 1 /* size */, writingPos)) {
|
||||
return false;
|
||||
}
|
||||
const int offset = (targetPtNodePos != NOT_A_DICT_POS) ? targetPtNodePos - *writingPos : 0;
|
||||
const uint32_t absOffest = abs(offset);
|
||||
const int bigramTargetFieldSize = attributeAddressSize(flags);
|
||||
return bufferToWrite->writeUintAndAdvancePosition(absOffest, bigramTargetFieldSize,
|
||||
writingPos);
|
||||
}
|
||||
|
||||
// Returns true if the bigram entry is valid and put entry flags into out*.
|
||||
/* static */ bool BigramListReadWriteUtils::createAndGetBigramFlags(const int entryPos,
|
||||
const int targetPos, const int probability, const bool hasNext,
|
||||
BigramFlags *const outBigramFlags) {
|
||||
BigramFlags flags = probability & MASK_ATTRIBUTE_PROBABILITY;
|
||||
if (hasNext) {
|
||||
flags |= FLAG_ATTRIBUTE_HAS_NEXT;
|
||||
}
|
||||
const int targetFieldPos = entryPos + 1;
|
||||
const int offset = (targetPos != NOT_A_DICT_POS) ? targetPos - targetFieldPos : 0;
|
||||
if (offset < 0) {
|
||||
flags |= FLAG_ATTRIBUTE_OFFSET_NEGATIVE;
|
||||
}
|
||||
const uint32_t absOffest = abs(offset);
|
||||
if ((absOffest >> 24) != 0) {
|
||||
// Offset is too large.
|
||||
return false;
|
||||
} else if ((absOffest >> 16) != 0) {
|
||||
flags |= FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES;
|
||||
} else if ((absOffest >> 8) != 0) {
|
||||
flags |= FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES;
|
||||
} else {
|
||||
flags |= FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE;
|
||||
}
|
||||
// Currently, all newly written bigram position fields are 3 bytes to simplify dictionary
|
||||
// writing.
|
||||
// TODO: Remove following 2 lines and optimize memory space.
|
||||
flags = (flags & (~MASK_ATTRIBUTE_ADDRESS_TYPE)) | FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES;
|
||||
*outBigramFlags = flags;
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace latinime
|
||||
|
|
|
@ -24,11 +24,15 @@
|
|||
|
||||
namespace latinime {
|
||||
|
||||
class BufferWithExtendableBuffer;
|
||||
|
||||
class BigramListReadWriteUtils {
|
||||
public:
|
||||
typedef uint8_t BigramFlags;
|
||||
|
||||
static BigramFlags getFlagsAndForwardPointer(const uint8_t *const bigramsBuf, int *const pos);
|
||||
static void getBigramEntryPropertiesAndAdvancePosition(const uint8_t *const bigramsBuf,
|
||||
BigramFlags *const outBigramFlags, int *const outTargetPtNodePos,
|
||||
int *const bigramEntryPos);
|
||||
|
||||
static AK_FORCE_INLINE int getProbabilityFromFlags(const BigramFlags flags) {
|
||||
return flags & MASK_ATTRIBUTE_PROBABILITY;
|
||||
|
@ -39,10 +43,7 @@ public:
|
|||
}
|
||||
|
||||
// Bigrams reading methods
|
||||
static void skipExistingBigrams(const uint8_t *const bigramsBuf, int *const pos);
|
||||
|
||||
static int getBigramAddressAndForwardPointer(const uint8_t *const bigramsBuf,
|
||||
const BigramFlags flags, int *const pos);
|
||||
static void skipExistingBigrams(const uint8_t *const bigramsBuf, int *const bigramListPos);
|
||||
|
||||
// Returns the size of the bigram position field that is stored in bigram flags.
|
||||
static AK_FORCE_INLINE int attributeAddressSize(const BigramFlags flags) {
|
||||
|
@ -67,48 +68,11 @@ public:
|
|||
return (flags & (~MASK_ATTRIBUTE_PROBABILITY)) | (probability & MASK_ATTRIBUTE_PROBABILITY);
|
||||
}
|
||||
|
||||
// Returns true if the bigram entry is valid and put entry values into out*.
|
||||
static AK_FORCE_INLINE bool createBigramEntryAndGetFlagsAndOffsetAndOffsetFieldSize(
|
||||
const int entryPos, const int targetPos, const int probability, const bool hasNext,
|
||||
BigramFlags *const outBigramFlags, uint32_t *const outOffset,
|
||||
int *const outOffsetFieldSize) {
|
||||
if (targetPos == NOT_A_DICT_POS) {
|
||||
return false;
|
||||
}
|
||||
BigramFlags flags = probability & MASK_ATTRIBUTE_PROBABILITY;
|
||||
if (hasNext) {
|
||||
flags |= FLAG_ATTRIBUTE_HAS_NEXT;
|
||||
}
|
||||
const int targetFieldPos = entryPos + 1;
|
||||
const int offset = targetPos - targetFieldPos;
|
||||
if (offset < 0) {
|
||||
flags |= FLAG_ATTRIBUTE_OFFSET_NEGATIVE;
|
||||
}
|
||||
const uint32_t absOffest = abs(offset);
|
||||
if ((absOffest >> 24) != 0) {
|
||||
// Offset is too large.
|
||||
return false;
|
||||
} else if ((absOffest >> 16) != 0) {
|
||||
flags |= FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES;
|
||||
*outOffsetFieldSize = 3;
|
||||
} else if ((absOffest >> 8) != 0) {
|
||||
flags |= FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES;
|
||||
*outOffsetFieldSize = 2;
|
||||
} else {
|
||||
flags |= FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE;
|
||||
*outOffsetFieldSize = 1;
|
||||
}
|
||||
static bool createAndWriteBigramEntry(BufferWithExtendableBuffer *const buffer,
|
||||
const int targetPos, const int probability, const bool hasNext, int *const writingPos);
|
||||
|
||||
// Currently, all newly written bigram position fields are 3 bytes to simplify dictionary
|
||||
// writing.
|
||||
// TODO: Remove following 2 lines and optimize memory space.
|
||||
flags = (flags & (~MASK_ATTRIBUTE_ADDRESS_TYPE)) | FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES;
|
||||
*outOffsetFieldSize = 3;
|
||||
|
||||
*outBigramFlags = flags;
|
||||
*outOffset = absOffest;
|
||||
return true;
|
||||
}
|
||||
static bool writeBigramEntry(BufferWithExtendableBuffer *const buffer, const BigramFlags flags,
|
||||
const int targetOffset, int *const writingPos);
|
||||
|
||||
private:
|
||||
DISALLOW_IMPLICIT_CONSTRUCTORS(BigramListReadWriteUtils);
|
||||
|
@ -122,9 +86,16 @@ private:
|
|||
static const BigramFlags MASK_ATTRIBUTE_PROBABILITY;
|
||||
static const int ATTRIBUTE_ADDRESS_SHIFT;
|
||||
|
||||
// Returns true if the bigram entry is valid and put entry flags into out*.
|
||||
static bool createAndGetBigramFlags(const int entryPos, const int targetPos,
|
||||
const int probability, const bool hasNext, BigramFlags *const outBigramFlags);
|
||||
|
||||
static AK_FORCE_INLINE bool isOffsetNegative(const BigramFlags flags) {
|
||||
return (flags & FLAG_ATTRIBUTE_OFFSET_NEGATIVE) != 0;
|
||||
}
|
||||
|
||||
static int getBigramAddressAndAdvancePosition(const uint8_t *const bigramsBuf,
|
||||
const BigramFlags flags, int *const pos);
|
||||
};
|
||||
} // namespace latinime
|
||||
#endif // LATINIME_BIGRAM_LIST_READ_WRITE_UTILS_H
|
||||
|
|
|
@ -16,41 +16,47 @@
|
|||
|
||||
#include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h"
|
||||
|
||||
#include "suggest/core/policy/dictionary_shortcuts_structure_policy.h"
|
||||
#include "suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h"
|
||||
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h"
|
||||
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
|
||||
|
||||
namespace latinime {
|
||||
|
||||
const int DynamicBigramListPolicy::BIGRAM_LINK_COUNT_LIMIT = 10000;
|
||||
const int DynamicBigramListPolicy::CONTINUING_BIGRAM_LINK_COUNT_LIMIT = 10000;
|
||||
const int DynamicBigramListPolicy::BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT = 100000;
|
||||
|
||||
void DynamicBigramListPolicy::getNextBigram(int *const outBigramPos, int *const outProbability,
|
||||
bool *const outHasNext, int *const pos) const {
|
||||
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*pos);
|
||||
bool *const outHasNext, int *const bigramEntryPos) const {
|
||||
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*bigramEntryPos);
|
||||
const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer);
|
||||
if (usesAdditionalBuffer) {
|
||||
*pos -= mBuffer->getOriginalBufferSize();
|
||||
*bigramEntryPos -= mBuffer->getOriginalBufferSize();
|
||||
}
|
||||
const BigramListReadWriteUtils::BigramFlags flags =
|
||||
BigramListReadWriteUtils::getFlagsAndForwardPointer(buffer, pos);
|
||||
int originalBigramPos = BigramListReadWriteUtils::getBigramAddressAndForwardPointer(
|
||||
buffer, flags, pos);
|
||||
BigramListReadWriteUtils::BigramFlags bigramFlags;
|
||||
int originalBigramPos;
|
||||
BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition(buffer, &bigramFlags,
|
||||
&originalBigramPos, bigramEntryPos);
|
||||
if (usesAdditionalBuffer && originalBigramPos != NOT_A_DICT_POS) {
|
||||
originalBigramPos += mBuffer->getOriginalBufferSize();
|
||||
}
|
||||
*outBigramPos = followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos);
|
||||
*outProbability = BigramListReadWriteUtils::getProbabilityFromFlags(flags);
|
||||
*outHasNext = BigramListReadWriteUtils::hasNext(flags);
|
||||
*outProbability = BigramListReadWriteUtils::getProbabilityFromFlags(bigramFlags);
|
||||
*outHasNext = BigramListReadWriteUtils::hasNext(bigramFlags);
|
||||
if (usesAdditionalBuffer) {
|
||||
*pos += mBuffer->getOriginalBufferSize();
|
||||
*bigramEntryPos += mBuffer->getOriginalBufferSize();
|
||||
}
|
||||
}
|
||||
|
||||
void DynamicBigramListPolicy::skipAllBigrams(int *const pos) const {
|
||||
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*pos);
|
||||
void DynamicBigramListPolicy::skipAllBigrams(int *const bigramListPos) const {
|
||||
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*bigramListPos);
|
||||
const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer);
|
||||
if (usesAdditionalBuffer) {
|
||||
*pos -= mBuffer->getOriginalBufferSize();
|
||||
*bigramListPos -= mBuffer->getOriginalBufferSize();
|
||||
}
|
||||
BigramListReadWriteUtils::skipExistingBigrams(buffer, pos);
|
||||
BigramListReadWriteUtils::skipExistingBigrams(buffer, bigramListPos);
|
||||
if (usesAdditionalBuffer) {
|
||||
*pos += mBuffer->getOriginalBufferSize();
|
||||
*bigramListPos += mBuffer->getOriginalBufferSize();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -61,13 +67,19 @@ bool DynamicBigramListPolicy::copyAllBigrams(BufferWithExtendableBuffer *const b
|
|||
*fromPos -= mBuffer->getOriginalBufferSize();
|
||||
}
|
||||
*outBigramsCount = 0;
|
||||
BigramListReadWriteUtils::BigramFlags flags;
|
||||
BigramListReadWriteUtils::BigramFlags bigramFlags;
|
||||
int bigramEntryCount = 0;
|
||||
do {
|
||||
if (++bigramEntryCount > BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT) {
|
||||
AKLOGE("Too many bigram entries. %d", BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT);
|
||||
ASSERT(false);
|
||||
return false;
|
||||
}
|
||||
// The buffer address can be changed after calling buffer writing methods.
|
||||
const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer);
|
||||
flags = BigramListReadWriteUtils::getFlagsAndForwardPointer(buffer, fromPos);
|
||||
int originalBigramPos = BigramListReadWriteUtils::getBigramAddressAndForwardPointer(
|
||||
buffer, flags, fromPos);
|
||||
int originalBigramPos;
|
||||
BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition(
|
||||
mBuffer->getBuffer(usesAdditionalBuffer), &bigramFlags, &originalBigramPos,
|
||||
fromPos);
|
||||
if (originalBigramPos == NOT_A_DICT_POS) {
|
||||
// skip invalid bigram entry.
|
||||
continue;
|
||||
|
@ -76,132 +88,163 @@ bool DynamicBigramListPolicy::copyAllBigrams(BufferWithExtendableBuffer *const b
|
|||
originalBigramPos += mBuffer->getOriginalBufferSize();
|
||||
}
|
||||
const int bigramPos = followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos);
|
||||
BigramListReadWriteUtils::BigramFlags newBigramFlags;
|
||||
uint32_t newBigramOffset;
|
||||
int newBigramOffsetFieldSize;
|
||||
if(!BigramListReadWriteUtils::createBigramEntryAndGetFlagsAndOffsetAndOffsetFieldSize(
|
||||
*toPos, bigramPos, BigramListReadWriteUtils::getProbabilityFromFlags(flags),
|
||||
BigramListReadWriteUtils::hasNext(flags), &newBigramFlags, &newBigramOffset,
|
||||
&newBigramOffsetFieldSize)) {
|
||||
continue;
|
||||
}
|
||||
// Write bigram entry. Target buffer is always the additional buffer.
|
||||
if (!bufferToWrite->writeUintAndAdvancePosition(newBigramFlags, 1 /* size */,toPos)) {
|
||||
return false;
|
||||
}
|
||||
if (!bufferToWrite->writeUintAndAdvancePosition(newBigramOffset, newBigramOffsetFieldSize,
|
||||
toPos)) {
|
||||
if (!BigramListReadWriteUtils::createAndWriteBigramEntry(bufferToWrite, bigramPos,
|
||||
BigramListReadWriteUtils::getProbabilityFromFlags(bigramFlags),
|
||||
BigramListReadWriteUtils::hasNext(bigramFlags), toPos)) {
|
||||
return false;
|
||||
}
|
||||
(*outBigramsCount)++;
|
||||
} while(BigramListReadWriteUtils::hasNext(flags));
|
||||
} while(BigramListReadWriteUtils::hasNext(bigramFlags));
|
||||
if (usesAdditionalBuffer) {
|
||||
*fromPos += mBuffer->getOriginalBufferSize();
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool DynamicBigramListPolicy::addNewBigramEntryToBigramList(const int bigramPos,
|
||||
const int probability, int *const pos) {
|
||||
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*pos);
|
||||
// Finding useless bigram entries and remove them. Bigram entry is useless when the target PtNode
|
||||
// has been deleted or is not a valid terminal.
|
||||
bool DynamicBigramListPolicy::updateAllBigramEntriesAndDeleteUselessEntries(
|
||||
int *const bigramListPos) {
|
||||
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*bigramListPos);
|
||||
if (usesAdditionalBuffer) {
|
||||
*pos -= mBuffer->getOriginalBufferSize();
|
||||
*bigramListPos -= mBuffer->getOriginalBufferSize();
|
||||
}
|
||||
BigramListReadWriteUtils::BigramFlags flags;
|
||||
DynamicPatriciaTrieNodeReader nodeReader(mBuffer, this /* bigramsPolicy */, mShortcutPolicy);
|
||||
BigramListReadWriteUtils::BigramFlags bigramFlags;
|
||||
int bigramEntryCount = 0;
|
||||
do {
|
||||
int entryPos = *pos;
|
||||
if (++bigramEntryCount > BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT) {
|
||||
AKLOGE("Too many bigram entries. %d", BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT);
|
||||
ASSERT(false);
|
||||
return false;
|
||||
}
|
||||
int bigramEntryPos = *bigramListPos;
|
||||
int originalBigramPos;
|
||||
// The buffer address can be changed after calling buffer writing methods.
|
||||
BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition(
|
||||
mBuffer->getBuffer(usesAdditionalBuffer), &bigramFlags, &originalBigramPos,
|
||||
bigramListPos);
|
||||
if (usesAdditionalBuffer) {
|
||||
bigramEntryPos += mBuffer->getOriginalBufferSize();
|
||||
}
|
||||
if (originalBigramPos == NOT_A_DICT_POS) {
|
||||
// This entry has already been removed.
|
||||
continue;
|
||||
}
|
||||
if (usesAdditionalBuffer) {
|
||||
originalBigramPos += mBuffer->getOriginalBufferSize();
|
||||
}
|
||||
const int bigramTargetNodePos =
|
||||
followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos);
|
||||
nodeReader.fetchNodeInfoInBufferFromPtNodePos(bigramTargetNodePos);
|
||||
// TODO: Update probability for supporting probability decaying.
|
||||
if (nodeReader.isDeleted() || !nodeReader.isTerminal()
|
||||
|| bigramTargetNodePos == NOT_A_DICT_POS) {
|
||||
// The target is no longer valid terminal. Invalidate the current bigram entry.
|
||||
if (!BigramListReadWriteUtils::writeBigramEntry(mBuffer, bigramFlags,
|
||||
NOT_A_DICT_POS /* targetOffset */, &bigramEntryPos)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
} while(BigramListReadWriteUtils::hasNext(bigramFlags));
|
||||
return true;
|
||||
}
|
||||
|
||||
bool DynamicBigramListPolicy::addNewBigramEntryToBigramList(const int bigramTargetPos,
|
||||
const int probability, int *const bigramListPos) {
|
||||
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*bigramListPos);
|
||||
if (usesAdditionalBuffer) {
|
||||
*bigramListPos -= mBuffer->getOriginalBufferSize();
|
||||
}
|
||||
BigramListReadWriteUtils::BigramFlags bigramFlags;
|
||||
int bigramEntryCount = 0;
|
||||
do {
|
||||
if (++bigramEntryCount > BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT) {
|
||||
AKLOGE("Too many bigram entries. %d", BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT);
|
||||
ASSERT(false);
|
||||
return false;
|
||||
}
|
||||
int entryPos = *bigramListPos;
|
||||
if (usesAdditionalBuffer) {
|
||||
entryPos += mBuffer->getOriginalBufferSize();
|
||||
}
|
||||
int originalBigramPos;
|
||||
// The buffer address can be changed after calling buffer writing methods.
|
||||
const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer);
|
||||
flags = BigramListReadWriteUtils::getFlagsAndForwardPointer(buffer, pos);
|
||||
int originalBigramPos = BigramListReadWriteUtils::getBigramAddressAndForwardPointer(
|
||||
buffer, flags, pos);
|
||||
BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition(
|
||||
mBuffer->getBuffer(usesAdditionalBuffer), &bigramFlags, &originalBigramPos,
|
||||
bigramListPos);
|
||||
if (usesAdditionalBuffer && originalBigramPos != NOT_A_DICT_POS) {
|
||||
originalBigramPos += mBuffer->getOriginalBufferSize();
|
||||
}
|
||||
if (followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos) == bigramPos) {
|
||||
if (followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos) == bigramTargetPos) {
|
||||
// Update this bigram entry.
|
||||
const BigramListReadWriteUtils::BigramFlags updatedFlags =
|
||||
BigramListReadWriteUtils::setProbabilityInFlags(flags, probability);
|
||||
return mBuffer->writeUintAndAdvancePosition(updatedFlags, 1 /* size */, &entryPos);
|
||||
BigramListReadWriteUtils::setProbabilityInFlags(bigramFlags, probability);
|
||||
return BigramListReadWriteUtils::writeBigramEntry(mBuffer, updatedFlags,
|
||||
originalBigramPos, &entryPos);
|
||||
}
|
||||
if (BigramListReadWriteUtils::hasNext(flags)) {
|
||||
if (BigramListReadWriteUtils::hasNext(bigramFlags)) {
|
||||
continue;
|
||||
}
|
||||
// The current last entry is found.
|
||||
// First, update the flags of the last entry.
|
||||
const BigramListReadWriteUtils::BigramFlags updatedFlags =
|
||||
BigramListReadWriteUtils::setHasNextFlag(flags);
|
||||
if (!mBuffer->writeUintAndAdvancePosition(updatedFlags, 1 /* size */, &entryPos)) {
|
||||
BigramListReadWriteUtils::setHasNextFlag(bigramFlags);
|
||||
if (!BigramListReadWriteUtils::writeBigramEntry(mBuffer, updatedFlags, originalBigramPos,
|
||||
&entryPos)) {
|
||||
return false;
|
||||
}
|
||||
if (usesAdditionalBuffer) {
|
||||
*pos += mBuffer->getOriginalBufferSize();
|
||||
*bigramListPos += mBuffer->getOriginalBufferSize();
|
||||
}
|
||||
// Then, add a new entry after the last entry.
|
||||
return writeNewBigramEntry(bigramPos, probability, pos);
|
||||
} while(BigramListReadWriteUtils::hasNext(flags));
|
||||
return writeNewBigramEntry(bigramTargetPos, probability, bigramListPos);
|
||||
} while(BigramListReadWriteUtils::hasNext(bigramFlags));
|
||||
// We return directly from the while loop.
|
||||
ASSERT(false);
|
||||
return false;
|
||||
}
|
||||
|
||||
bool DynamicBigramListPolicy::writeNewBigramEntry(const int bigramPos, const int probability,
|
||||
bool DynamicBigramListPolicy::writeNewBigramEntry(const int bigramTargetPos, const int probability,
|
||||
int *const writingPos) {
|
||||
BigramListReadWriteUtils::BigramFlags newBigramFlags;
|
||||
uint32_t newBigramOffset;
|
||||
int newBigramOffsetFieldSize;
|
||||
if(!BigramListReadWriteUtils::createBigramEntryAndGetFlagsAndOffsetAndOffsetFieldSize(
|
||||
*writingPos, bigramPos, probability, false /* hasNext */, &newBigramFlags,
|
||||
&newBigramOffset, &newBigramOffsetFieldSize)) {
|
||||
return false;
|
||||
}
|
||||
// Write bigram flags.
|
||||
if (!mBuffer->writeUintAndAdvancePosition(newBigramFlags, 1 /* size */, writingPos)) {
|
||||
return false;
|
||||
}
|
||||
// Write bigram positon offset.
|
||||
if (!mBuffer->writeUintAndAdvancePosition(newBigramOffset, newBigramOffsetFieldSize,
|
||||
writingPos)) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
// hasNext is false because we are adding a new bigram entry at the end of the bigram list.
|
||||
return BigramListReadWriteUtils::createAndWriteBigramEntry(mBuffer, bigramTargetPos,
|
||||
probability, false /* hasNext */, writingPos);
|
||||
}
|
||||
|
||||
bool DynamicBigramListPolicy::removeBigram(const int bigramListPos, const int targetBigramPos) {
|
||||
bool DynamicBigramListPolicy::removeBigram(const int bigramListPos, const int bigramTargetPos) {
|
||||
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(bigramListPos);
|
||||
int pos = bigramListPos;
|
||||
if (usesAdditionalBuffer) {
|
||||
pos -= mBuffer->getOriginalBufferSize();
|
||||
}
|
||||
BigramListReadWriteUtils::BigramFlags flags;
|
||||
BigramListReadWriteUtils::BigramFlags bigramFlags;
|
||||
int bigramEntryCount = 0;
|
||||
do {
|
||||
// The buffer address can be changed after calling buffer writing methods.
|
||||
const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer);
|
||||
flags = BigramListReadWriteUtils::getFlagsAndForwardPointer(buffer, &pos);
|
||||
int bigramOffsetFieldPos = pos;
|
||||
if (usesAdditionalBuffer) {
|
||||
bigramOffsetFieldPos += mBuffer->getOriginalBufferSize();
|
||||
if (++bigramEntryCount > BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT) {
|
||||
AKLOGE("Too many bigram entries. %d", BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT);
|
||||
ASSERT(false);
|
||||
return false;
|
||||
}
|
||||
int bigramEntryPos = pos;
|
||||
int originalBigramPos;
|
||||
// The buffer address can be changed after calling buffer writing methods.
|
||||
BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition(
|
||||
mBuffer->getBuffer(usesAdditionalBuffer), &bigramFlags, &originalBigramPos, &pos);
|
||||
if (usesAdditionalBuffer) {
|
||||
bigramEntryPos += mBuffer->getOriginalBufferSize();
|
||||
}
|
||||
int originalBigramPos = BigramListReadWriteUtils::getBigramAddressAndForwardPointer(
|
||||
buffer, flags, &pos);
|
||||
if (usesAdditionalBuffer && originalBigramPos != NOT_A_DICT_POS) {
|
||||
originalBigramPos += mBuffer->getOriginalBufferSize();
|
||||
}
|
||||
const int bigramPos = followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos);
|
||||
if (bigramPos != targetBigramPos) {
|
||||
if (bigramPos != bigramTargetPos) {
|
||||
continue;
|
||||
}
|
||||
// Target entry is found. Write 0 into the bigram pos field to mark the bigram invalid.
|
||||
const int bigramOffsetFieldSize = BigramListReadWriteUtils::attributeAddressSize(flags);
|
||||
if (!mBuffer->writeUintAndAdvancePosition(0 /* data */, bigramOffsetFieldSize,
|
||||
&bigramOffsetFieldPos)) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
} while(BigramListReadWriteUtils::hasNext(flags));
|
||||
// Target entry is found. Write an invalid target position to mark the bigram invalid.
|
||||
return BigramListReadWriteUtils::writeBigramEntry(mBuffer, bigramFlags,
|
||||
NOT_A_DICT_POS /* targetOffset */, &bigramEntryPos);
|
||||
} while(BigramListReadWriteUtils::hasNext(bigramFlags));
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -212,14 +255,14 @@ int DynamicBigramListPolicy::followBigramLinkAndGetCurrentBigramPtNodePos(
|
|||
}
|
||||
int currentPos = originalBigramPos;
|
||||
DynamicPatriciaTrieNodeReader nodeReader(mBuffer, this /* bigramsPolicy */, mShortcutPolicy);
|
||||
nodeReader.fetchNodeInfoFromBuffer(currentPos);
|
||||
nodeReader.fetchNodeInfoInBufferFromPtNodePos(currentPos);
|
||||
int bigramLinkCount = 0;
|
||||
while (nodeReader.getBigramLinkedNodePos() != NOT_A_DICT_POS) {
|
||||
currentPos = nodeReader.getBigramLinkedNodePos();
|
||||
nodeReader.fetchNodeInfoFromBuffer(currentPos);
|
||||
nodeReader.fetchNodeInfoInBufferFromPtNodePos(currentPos);
|
||||
bigramLinkCount++;
|
||||
if (bigramLinkCount > BIGRAM_LINK_COUNT_LIMIT) {
|
||||
AKLOGI("Bigram link is invalid. start position: %d", bigramPos);
|
||||
if (bigramLinkCount > CONTINUING_BIGRAM_LINK_COUNT_LIMIT) {
|
||||
AKLOGE("Bigram link is invalid. start position: %d", bigramPos);
|
||||
ASSERT(false);
|
||||
return NOT_A_DICT_POS;
|
||||
}
|
||||
|
|
|
@ -21,13 +21,12 @@
|
|||
|
||||
#include "defines.h"
|
||||
#include "suggest/core/policy/dictionary_bigrams_structure_policy.h"
|
||||
#include "suggest/core/policy/dictionary_shortcuts_structure_policy.h"
|
||||
#include "suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h"
|
||||
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h"
|
||||
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
|
||||
|
||||
namespace latinime {
|
||||
|
||||
class BufferWithExtendableBuffer;
|
||||
class DictionaryShortcutsStructurePolicy;
|
||||
|
||||
/*
|
||||
* This is a dynamic version of BigramListPolicy and supports an additional buffer.
|
||||
*/
|
||||
|
@ -40,9 +39,9 @@ class DynamicBigramListPolicy : public DictionaryBigramsStructurePolicy {
|
|||
~DynamicBigramListPolicy() {}
|
||||
|
||||
void getNextBigram(int *const outBigramPos, int *const outProbability, bool *const outHasNext,
|
||||
int *const pos) const;
|
||||
int *const bigramEntryPos) const;
|
||||
|
||||
void skipAllBigrams(int *const pos) const;
|
||||
void skipAllBigrams(int *const bigramListPos) const;
|
||||
|
||||
// Copy bigrams from the bigram list that starts at fromPos in mBuffer to toPos in
|
||||
// bufferToWrite and advance these positions after bigram lists. This method skips invalid
|
||||
|
@ -50,18 +49,22 @@ class DynamicBigramListPolicy : public DictionaryBigramsStructurePolicy {
|
|||
bool copyAllBigrams(BufferWithExtendableBuffer *const bufferToWrite, int *const fromPos,
|
||||
int *const toPos, int *const outBigramsCount) const;
|
||||
|
||||
bool addNewBigramEntryToBigramList(const int bigramPos, const int probability, int *const pos);
|
||||
bool updateAllBigramEntriesAndDeleteUselessEntries(int *const bigramListPos);
|
||||
|
||||
bool writeNewBigramEntry(const int bigramPos, const int probability,
|
||||
bool addNewBigramEntryToBigramList(const int bigramTargetPos, const int probability,
|
||||
int *const bigramListPos);
|
||||
|
||||
bool writeNewBigramEntry(const int bigramTargetPos, const int probability,
|
||||
int *const writingPos);
|
||||
|
||||
// Return if targetBigramPos is found or not.
|
||||
bool removeBigram(const int bigramListPos, const int targetBigramPos);
|
||||
bool removeBigram(const int bigramListPos, const int bigramTargetPos);
|
||||
|
||||
private:
|
||||
DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicBigramListPolicy);
|
||||
|
||||
static const int BIGRAM_LINK_COUNT_LIMIT;
|
||||
static const int CONTINUING_BIGRAM_LINK_COUNT_LIMIT;
|
||||
static const int BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT;
|
||||
|
||||
BufferWithExtendableBuffer *const mBuffer;
|
||||
const DictionaryShortcutsStructurePolicy *const mShortcutPolicy;
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
#include <vector>
|
||||
|
||||
#include "defines.h"
|
||||
#include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h"
|
||||
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h"
|
||||
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h"
|
||||
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h"
|
||||
|
@ -68,6 +69,36 @@ class DynamicPatriciaTrieGcEventListeners {
|
|||
int mChildrenValue;
|
||||
};
|
||||
|
||||
// Updates all bigram entries that are held by valid PtNodes. This removes useless bigram
|
||||
// entries.
|
||||
class ListenerForUpdatingBigramProbability
|
||||
: public DynamicPatriciaTrieReadingHelper::TraversingEventListener {
|
||||
public:
|
||||
ListenerForUpdatingBigramProbability(DynamicBigramListPolicy *const bigramPolicy)
|
||||
: mBigramPolicy(bigramPolicy) {}
|
||||
|
||||
bool onAscend() { return true; }
|
||||
|
||||
bool onDescend() { return true; }
|
||||
|
||||
bool onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node) {
|
||||
if (!node->isDeleted()) {
|
||||
int pos = node->getBigramsPos();
|
||||
if (pos != NOT_A_DICT_POS) {
|
||||
if (!mBigramPolicy->updateAllBigramEntriesAndDeleteUselessEntries(&pos)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
private:
|
||||
DISALLOW_IMPLICIT_CONSTRUCTORS(ListenerForUpdatingBigramProbability);
|
||||
|
||||
DynamicBigramListPolicy *const mBigramPolicy;
|
||||
};
|
||||
|
||||
private:
|
||||
DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTrieGcEventListeners);
|
||||
};
|
||||
|
|
|
@ -23,26 +23,26 @@
|
|||
|
||||
namespace latinime {
|
||||
|
||||
void DynamicPatriciaTrieNodeReader::fetchNodeInfoFromBufferAndProcessMovedNode(const int nodePos,
|
||||
const int maxCodePointCount, int *const outCodePoints) {
|
||||
if (nodePos < 0 || nodePos >= mBuffer->getTailPosition()) {
|
||||
void DynamicPatriciaTrieNodeReader::fetchPtNodeInfoFromBufferAndProcessMovedPtNode(
|
||||
const int ptNodePos, const int maxCodePointCount, int *const outCodePoints) {
|
||||
if (ptNodePos < 0 || ptNodePos >= mBuffer->getTailPosition()) {
|
||||
AKLOGE("Fetching PtNode info form invalid dictionary position: %d, dictionary size: %d",
|
||||
nodePos, mBuffer->getTailPosition());
|
||||
ptNodePos, mBuffer->getTailPosition());
|
||||
ASSERT(false);
|
||||
invalidatePtNodeInfo();
|
||||
return;
|
||||
}
|
||||
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(nodePos);
|
||||
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(ptNodePos);
|
||||
const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
|
||||
int pos = nodePos;
|
||||
mHeadPos = nodePos;
|
||||
int pos = ptNodePos;
|
||||
mHeadPos = ptNodePos;
|
||||
if (usesAdditionalBuffer) {
|
||||
pos -= mBuffer->getOriginalBufferSize();
|
||||
}
|
||||
mFlags = PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos);
|
||||
const int parentPos =
|
||||
DynamicPatriciaTrieReadingUtils::getParentPosAndAdvancePosition(dictBuf, &pos);
|
||||
mParentPos = (parentPos != 0) ? nodePos + parentPos : NOT_A_DICT_POS;
|
||||
mParentPos = (parentPos != 0) ? ptNodePos + parentPos : NOT_A_DICT_POS;
|
||||
if (outCodePoints != 0) {
|
||||
mCodePointCount = PatriciaTrieReadingUtils::getCharsAndAdvancePosition(
|
||||
dictBuf, mFlags, maxCodePointCount, outCodePoints, &pos);
|
||||
|
@ -99,7 +99,8 @@ void DynamicPatriciaTrieNodeReader::fetchNodeInfoFromBufferAndProcessMovedNode(c
|
|||
// Read destination node if the read node is a moved node.
|
||||
if (DynamicPatriciaTrieReadingUtils::isMoved(mFlags)) {
|
||||
// The destination position is stored at the same place as the parent position.
|
||||
fetchNodeInfoFromBufferAndProcessMovedNode(mParentPos, maxCodePointCount, outCodePoints);
|
||||
fetchPtNodeInfoFromBufferAndProcessMovedPtNode(mParentPos, maxCodePointCount,
|
||||
outCodePoints);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -48,17 +48,17 @@ class DynamicPatriciaTrieNodeReader {
|
|||
|
||||
~DynamicPatriciaTrieNodeReader() {}
|
||||
|
||||
// Reads node information from dictionary buffer and updates members with the information.
|
||||
AK_FORCE_INLINE void fetchNodeInfoFromBuffer(const int nodePos) {
|
||||
fetchNodeInfoFromBufferAndGetNodeCodePoints(nodePos , 0 /* maxCodePointCount */,
|
||||
0 /* outCodePoints */);
|
||||
// Reads PtNode information from dictionary buffer and updates members with the information.
|
||||
AK_FORCE_INLINE void fetchNodeInfoInBufferFromPtNodePos(const int ptNodePos) {
|
||||
fetchNodeInfoInBufferFromPtNodePosAndGetNodeCodePoints(ptNodePos ,
|
||||
0 /* maxCodePointCount */, 0 /* outCodePoints */);
|
||||
}
|
||||
|
||||
AK_FORCE_INLINE void fetchNodeInfoFromBufferAndGetNodeCodePoints(const int nodePos,
|
||||
const int maxCodePointCount, int *const outCodePoints) {
|
||||
AK_FORCE_INLINE void fetchNodeInfoInBufferFromPtNodePosAndGetNodeCodePoints(
|
||||
const int ptNodePos, const int maxCodePointCount, int *const outCodePoints) {
|
||||
mSiblingPos = NOT_A_DICT_POS;
|
||||
mBigramLinkedNodePos = NOT_A_DICT_POS;
|
||||
fetchNodeInfoFromBufferAndProcessMovedNode(nodePos, maxCodePointCount, outCodePoints);
|
||||
fetchPtNodeInfoFromBufferAndProcessMovedPtNode(ptNodePos, maxCodePointCount, outCodePoints);
|
||||
}
|
||||
|
||||
// HeadPos is different from NodePos when the current PtNode is a moved PtNode.
|
||||
|
@ -154,8 +154,8 @@ class DynamicPatriciaTrieNodeReader {
|
|||
int mBigramPos;
|
||||
int mSiblingPos;
|
||||
|
||||
void fetchNodeInfoFromBufferAndProcessMovedNode(const int nodePos, const int maxCodePointCount,
|
||||
int *const outCodePoints);
|
||||
void fetchPtNodeInfoFromBufferAndProcessMovedPtNode(const int ptNodePos,
|
||||
const int maxCodePointCount, int *const outCodePoints);
|
||||
|
||||
void invalidatePtNodeInfo();
|
||||
};
|
||||
|
|
|
@ -35,7 +35,7 @@ void DynamicPatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const d
|
|||
}
|
||||
DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer,
|
||||
getBigramsStructurePolicy(), getShortcutsStructurePolicy());
|
||||
readingHelper.initWithNodeArrayPos(dicNode->getChildrenPos());
|
||||
readingHelper.initWithPtNodeArrayPos(dicNode->getChildrenPos());
|
||||
const DynamicPatriciaTrieNodeReader *const nodeReader = readingHelper.getNodeReader();
|
||||
while (!readingHelper.isEnd()) {
|
||||
childDicNodes->pushLeavingChild(dicNode, nodeReader->getHeadPos(),
|
||||
|
@ -48,7 +48,7 @@ void DynamicPatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const d
|
|||
}
|
||||
|
||||
int DynamicPatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
|
||||
const int nodePos, const int maxCodePointCount, int *const outCodePoints,
|
||||
const int ptNodePos, const int maxCodePointCount, int *const outCodePoints,
|
||||
int *const outUnigramProbability) const {
|
||||
// This method traverses parent nodes from the terminal by following parent pointers; thus,
|
||||
// node code points are stored in the buffer in the reverse order.
|
||||
|
@ -56,9 +56,9 @@ int DynamicPatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCoun
|
|||
DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer,
|
||||
getBigramsStructurePolicy(), getShortcutsStructurePolicy());
|
||||
// First, read the terminal node and get its probability.
|
||||
readingHelper.initWithNodePos(nodePos);
|
||||
readingHelper.initWithPtNodePos(ptNodePos);
|
||||
if (!readingHelper.isValidTerminalNode()) {
|
||||
// Node at the nodePos is not a valid terminal node.
|
||||
// Node at the ptNodePos is not a valid terminal node.
|
||||
*outUnigramProbability = NOT_A_PROBABILITY;
|
||||
return 0;
|
||||
}
|
||||
|
@ -67,7 +67,7 @@ int DynamicPatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCoun
|
|||
// Then, following parent node link to the dictionary root and fetch node code points.
|
||||
while (!readingHelper.isEnd()) {
|
||||
if (readingHelper.getTotalCodePointCount() > maxCodePointCount) {
|
||||
// The nodePos is not a valid terminal node position in the dictionary.
|
||||
// The ptNodePos is not a valid terminal node position in the dictionary.
|
||||
*outUnigramProbability = NOT_A_PROBABILITY;
|
||||
return 0;
|
||||
}
|
||||
|
@ -98,7 +98,7 @@ int DynamicPatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const in
|
|||
}
|
||||
DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer,
|
||||
getBigramsStructurePolicy(), getShortcutsStructurePolicy());
|
||||
readingHelper.initWithNodeArrayPos(getRootPosition());
|
||||
readingHelper.initWithPtNodeArrayPos(getRootPosition());
|
||||
const DynamicPatriciaTrieNodeReader *const nodeReader = readingHelper.getNodeReader();
|
||||
while (!readingHelper.isEnd()) {
|
||||
const int matchedCodePointCount = readingHelper.getPrevTotalCodePointCount();
|
||||
|
@ -148,39 +148,39 @@ int DynamicPatriciaTriePolicy::getProbability(const int unigramProbability,
|
|||
}
|
||||
}
|
||||
|
||||
int DynamicPatriciaTriePolicy::getUnigramProbabilityOfPtNode(const int nodePos) const {
|
||||
if (nodePos == NOT_A_DICT_POS) {
|
||||
int DynamicPatriciaTriePolicy::getUnigramProbabilityOfPtNode(const int ptNodePos) const {
|
||||
if (ptNodePos == NOT_A_DICT_POS) {
|
||||
return NOT_A_PROBABILITY;
|
||||
}
|
||||
DynamicPatriciaTrieNodeReader nodeReader(&mBufferWithExtendableBuffer,
|
||||
getBigramsStructurePolicy(), getShortcutsStructurePolicy());
|
||||
nodeReader.fetchNodeInfoFromBuffer(nodePos);
|
||||
nodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos);
|
||||
if (nodeReader.isDeleted() || nodeReader.isBlacklisted() || nodeReader.isNotAWord()) {
|
||||
return NOT_A_PROBABILITY;
|
||||
}
|
||||
return getProbability(nodeReader.getProbability(), NOT_A_PROBABILITY);
|
||||
}
|
||||
|
||||
int DynamicPatriciaTriePolicy::getShortcutPositionOfNode(const int nodePos) const {
|
||||
if (nodePos == NOT_A_DICT_POS) {
|
||||
int DynamicPatriciaTriePolicy::getShortcutPositionOfPtNode(const int ptNodePos) const {
|
||||
if (ptNodePos == NOT_A_DICT_POS) {
|
||||
return NOT_A_DICT_POS;
|
||||
}
|
||||
DynamicPatriciaTrieNodeReader nodeReader(&mBufferWithExtendableBuffer,
|
||||
getBigramsStructurePolicy(), getShortcutsStructurePolicy());
|
||||
nodeReader.fetchNodeInfoFromBuffer(nodePos);
|
||||
nodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos);
|
||||
if (nodeReader.isDeleted()) {
|
||||
return NOT_A_DICT_POS;
|
||||
}
|
||||
return nodeReader.getShortcutPos();
|
||||
}
|
||||
|
||||
int DynamicPatriciaTriePolicy::getBigramsPositionOfNode(const int nodePos) const {
|
||||
if (nodePos == NOT_A_DICT_POS) {
|
||||
int DynamicPatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) const {
|
||||
if (ptNodePos == NOT_A_DICT_POS) {
|
||||
return NOT_A_DICT_POS;
|
||||
}
|
||||
DynamicPatriciaTrieNodeReader nodeReader(&mBufferWithExtendableBuffer,
|
||||
getBigramsStructurePolicy(), getShortcutsStructurePolicy());
|
||||
nodeReader.fetchNodeInfoFromBuffer(nodePos);
|
||||
nodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos);
|
||||
if (nodeReader.isDeleted()) {
|
||||
return NOT_A_DICT_POS;
|
||||
}
|
||||
|
@ -195,7 +195,7 @@ bool DynamicPatriciaTriePolicy::addUnigramWord(const int *const word, const int
|
|||
}
|
||||
DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer,
|
||||
getBigramsStructurePolicy(), getShortcutsStructurePolicy());
|
||||
readingHelper.initWithNodeArrayPos(getRootPosition());
|
||||
readingHelper.initWithPtNodeArrayPos(getRootPosition());
|
||||
DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer,
|
||||
&mBigramListPolicy, &mShortcutListPolicy);
|
||||
return writingHelper.addUnigramWord(&readingHelper, word, length, probability);
|
||||
|
|
|
@ -51,7 +51,7 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
|||
DicNodeVector *const childDicNodes) const;
|
||||
|
||||
int getCodePointsAndProbabilityAndReturnCodePointCount(
|
||||
const int terminalNodePos, const int maxCodePointCount, int *const outCodePoints,
|
||||
const int terminalPtNodePos, const int maxCodePointCount, int *const outCodePoints,
|
||||
int *const outUnigramProbability) const;
|
||||
|
||||
int getTerminalNodePositionOfWord(const int *const inWord,
|
||||
|
@ -59,11 +59,11 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
|||
|
||||
int getProbability(const int unigramProbability, const int bigramProbability) const;
|
||||
|
||||
int getUnigramProbabilityOfPtNode(const int nodePos) const;
|
||||
int getUnigramProbabilityOfPtNode(const int ptNodePos) const;
|
||||
|
||||
int getShortcutPositionOfNode(const int nodePos) const;
|
||||
int getShortcutPositionOfPtNode(const int ptNodePos) const;
|
||||
|
||||
int getBigramsPositionOfNode(const int nodePos) const;
|
||||
int getBigramsPositionOfPtNode(const int ptNodePos) const;
|
||||
|
||||
const DictionaryHeaderStructurePolicy *getHeaderStructurePolicy() const {
|
||||
return &mHeaderPolicy;
|
||||
|
|
|
@ -72,7 +72,7 @@ bool DynamicPatriciaTrieReadingHelper::traverseAllPtNodesInPostorderDepthFirstMa
|
|||
|
||||
// Read node array size and process empty node arrays. Nodes and arrays are counted up in this
|
||||
// method to avoid an infinite loop.
|
||||
void DynamicPatriciaTrieReadingHelper::nextNodeArray() {
|
||||
void DynamicPatriciaTrieReadingHelper::nextPtNodeArray() {
|
||||
mReadingState.mPosOfLastPtNodeArrayHead = mReadingState.mPos;
|
||||
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(mReadingState.mPos);
|
||||
const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
|
||||
|
@ -123,7 +123,7 @@ void DynamicPatriciaTrieReadingHelper::followForwardLink() {
|
|||
if (DynamicPatriciaTrieReadingUtils::isValidForwardLinkPosition(forwardLinkPosition)) {
|
||||
// Follow the forward link.
|
||||
mReadingState.mPos += forwardLinkPosition;
|
||||
nextNodeArray();
|
||||
nextPtNodeArray();
|
||||
} else {
|
||||
// All node arrays have been read.
|
||||
mReadingState.mPos = NOT_A_DICT_POS;
|
||||
|
|
|
@ -73,32 +73,32 @@ class DynamicPatriciaTrieReadingHelper {
|
|||
return mReadingState.mPos == NOT_A_DICT_POS;
|
||||
}
|
||||
|
||||
// Initialize reading state with the head position of a node array.
|
||||
AK_FORCE_INLINE void initWithNodeArrayPos(const int nodeArrayPos) {
|
||||
if (nodeArrayPos == NOT_A_DICT_POS) {
|
||||
// Initialize reading state with the head position of a PtNode array.
|
||||
AK_FORCE_INLINE void initWithPtNodeArrayPos(const int ptNodeArrayPos) {
|
||||
if (ptNodeArrayPos == NOT_A_DICT_POS) {
|
||||
mReadingState.mPos = NOT_A_DICT_POS;
|
||||
} else {
|
||||
mIsError = false;
|
||||
mReadingState.mPos = nodeArrayPos;
|
||||
mReadingState.mPos = ptNodeArrayPos;
|
||||
mReadingState.mPrevTotalCodePointCount = 0;
|
||||
mReadingState.mTotalNodeCount = 0;
|
||||
mReadingState.mNodeArrayCount = 0;
|
||||
mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS;
|
||||
mReadingStateStack.clear();
|
||||
nextNodeArray();
|
||||
nextPtNodeArray();
|
||||
if (!isEnd()) {
|
||||
fetchNodeInfo();
|
||||
fetchPtNodeInfo();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Initialize reading state with the head position of a node.
|
||||
AK_FORCE_INLINE void initWithNodePos(const int nodePos) {
|
||||
if (nodePos == NOT_A_DICT_POS) {
|
||||
AK_FORCE_INLINE void initWithPtNodePos(const int ptNodePos) {
|
||||
if (ptNodePos == NOT_A_DICT_POS) {
|
||||
mReadingState.mPos = NOT_A_DICT_POS;
|
||||
} else {
|
||||
mIsError = false;
|
||||
mReadingState.mPos = nodePos;
|
||||
mReadingState.mPos = ptNodePos;
|
||||
mReadingState.mNodeCount = 1;
|
||||
mReadingState.mPrevTotalCodePointCount = 0;
|
||||
mReadingState.mTotalNodeCount = 1;
|
||||
|
@ -106,7 +106,7 @@ class DynamicPatriciaTrieReadingHelper {
|
|||
mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS;
|
||||
mReadingState.mPosOfLastPtNodeArrayHead = NOT_A_DICT_POS;
|
||||
mReadingStateStack.clear();
|
||||
fetchNodeInfo();
|
||||
fetchPtNodeInfo();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -151,10 +151,10 @@ class DynamicPatriciaTrieReadingHelper {
|
|||
// All nodes in the current node array have been read.
|
||||
followForwardLink();
|
||||
if (!isEnd()) {
|
||||
fetchNodeInfo();
|
||||
fetchPtNodeInfo();
|
||||
}
|
||||
} else {
|
||||
fetchNodeInfo();
|
||||
fetchPtNodeInfo();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -167,9 +167,9 @@ class DynamicPatriciaTrieReadingHelper {
|
|||
mReadingState.mPos = mNodeReader.getChildrenPos();
|
||||
mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS;
|
||||
// Read children node array.
|
||||
nextNodeArray();
|
||||
nextPtNodeArray();
|
||||
if (!isEnd()) {
|
||||
fetchNodeInfo();
|
||||
fetchPtNodeInfo();
|
||||
}
|
||||
} else {
|
||||
mReadingState.mPos = NOT_A_DICT_POS;
|
||||
|
@ -186,7 +186,7 @@ class DynamicPatriciaTrieReadingHelper {
|
|||
mReadingState.mPos = mNodeReader.getParentPos();
|
||||
mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS;
|
||||
mReadingState.mPosOfLastPtNodeArrayHead = NOT_A_DICT_POS;
|
||||
fetchNodeInfo();
|
||||
fetchPtNodeInfo();
|
||||
} else {
|
||||
mReadingState.mPos = NOT_A_DICT_POS;
|
||||
}
|
||||
|
@ -202,7 +202,7 @@ class DynamicPatriciaTrieReadingHelper {
|
|||
|
||||
AK_FORCE_INLINE void reloadCurrentPtNodeInfo() {
|
||||
if (!isEnd()) {
|
||||
fetchNodeInfo();
|
||||
fetchPtNodeInfo();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -240,12 +240,12 @@ class DynamicPatriciaTrieReadingHelper {
|
|||
int mMergedNodeCodePoints[MAX_WORD_LENGTH];
|
||||
std::vector<ReadingState> mReadingStateStack;
|
||||
|
||||
void nextNodeArray();
|
||||
void nextPtNodeArray();
|
||||
|
||||
void followForwardLink();
|
||||
|
||||
AK_FORCE_INLINE void fetchNodeInfo() {
|
||||
mNodeReader.fetchNodeInfoFromBufferAndGetNodeCodePoints(mReadingState.mPos,
|
||||
AK_FORCE_INLINE void fetchPtNodeInfo() {
|
||||
mNodeReader.fetchNodeInfoInBufferFromPtNodePosAndGetNodeCodePoints(mReadingState.mPos,
|
||||
MAX_WORD_LENGTH, mMergedNodeCodePoints);
|
||||
if (mNodeReader.getCodePointCount() <= 0) {
|
||||
// Empty node is not allowed.
|
||||
|
@ -271,7 +271,7 @@ class DynamicPatriciaTrieReadingHelper {
|
|||
} else {
|
||||
mReadingState = mReadingStateStack.back();
|
||||
mReadingStateStack.pop_back();
|
||||
fetchNodeInfo();
|
||||
fetchPtNodeInfo();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
|
|
@ -90,7 +90,7 @@ bool DynamicPatriciaTrieWritingHelper::addBigramWords(const int word0Pos, const
|
|||
const int probability) {
|
||||
int mMergedNodeCodePoints[MAX_WORD_LENGTH];
|
||||
DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy);
|
||||
nodeReader.fetchNodeInfoFromBufferAndGetNodeCodePoints(word0Pos, MAX_WORD_LENGTH,
|
||||
nodeReader.fetchNodeInfoInBufferFromPtNodePosAndGetNodeCodePoints(word0Pos, MAX_WORD_LENGTH,
|
||||
mMergedNodeCodePoints);
|
||||
// Move node to add bigram entry.
|
||||
const int newNodePos = mBuffer->getTailPosition();
|
||||
|
@ -104,7 +104,7 @@ bool DynamicPatriciaTrieWritingHelper::addBigramWords(const int word0Pos, const
|
|||
&writingPos)) {
|
||||
return false;
|
||||
}
|
||||
nodeReader.fetchNodeInfoFromBuffer(newNodePos);
|
||||
nodeReader.fetchNodeInfoInBufferFromPtNodePos(newNodePos);
|
||||
if (nodeReader.getBigramsPos() != NOT_A_DICT_POS) {
|
||||
// Insert a new bigram entry into the existing bigram list.
|
||||
int bigramListPos = nodeReader.getBigramsPos();
|
||||
|
@ -131,7 +131,7 @@ bool DynamicPatriciaTrieWritingHelper::addBigramWords(const int word0Pos, const
|
|||
// Remove a bigram relation from word0Pos to word1Pos.
|
||||
bool DynamicPatriciaTrieWritingHelper::removeBigramWords(const int word0Pos, const int word1Pos) {
|
||||
DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy);
|
||||
nodeReader.fetchNodeInfoFromBuffer(word0Pos);
|
||||
nodeReader.fetchNodeInfoInBufferFromPtNodePos(word0Pos);
|
||||
if (nodeReader.getBigramsPos() == NOT_A_DICT_POS) {
|
||||
return false;
|
||||
}
|
||||
|
@ -217,7 +217,7 @@ bool DynamicPatriciaTrieWritingHelper::markNodeAsMovedAndSetPosition(
|
|||
// Update children's parent position.
|
||||
DynamicPatriciaTrieReadingHelper readingHelper(mBuffer, mBigramPolicy, mShortcutPolicy);
|
||||
const DynamicPatriciaTrieNodeReader *const nodeReader = readingHelper.getNodeReader();
|
||||
readingHelper.initWithNodeArrayPos(originalNode->getChildrenPos());
|
||||
readingHelper.initWithPtNodeArrayPos(originalNode->getChildrenPos());
|
||||
while (!readingHelper.isEnd()) {
|
||||
const int childPtNodeWrittenPos = nodeReader->getHeadPos();
|
||||
const int parentOffset = movedPos - childPtNodeWrittenPos;
|
||||
|
@ -452,7 +452,7 @@ bool DynamicPatriciaTrieWritingHelper::reallocatePtNodeAndAddNewPtNodes(
|
|||
}
|
||||
// Load node info. Information of the 1st part will be fetched.
|
||||
DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy);
|
||||
nodeReader.fetchNodeInfoFromBuffer(firstPartOfReallocatedPtNodePos);
|
||||
nodeReader.fetchNodeInfoInBufferFromPtNodePos(firstPartOfReallocatedPtNodePos);
|
||||
// Update children position.
|
||||
int childrenPosFieldPos = nodeReader.getChildrenPosFieldPos();
|
||||
if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(mBuffer,
|
||||
|
@ -519,7 +519,7 @@ bool DynamicPatriciaTrieWritingHelper::writeBufferToFilePointer(FILE *const file
|
|||
bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
|
||||
BufferWithExtendableBuffer *const bufferToWrite) {
|
||||
DynamicPatriciaTrieReadingHelper readingHelper(mBuffer, mBigramPolicy, mShortcutPolicy);
|
||||
readingHelper.initWithNodeArrayPos(rootPtNodeArrayPos);
|
||||
readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
|
||||
DynamicPatriciaTrieGcEventListeners
|
||||
::ListenerForUpdatingUnigramProbabilityAndMarkingUselessPtNodesAsDeleted
|
||||
listenerForUpdatingUnigramProbabilityAndMarkingUselessPtNodesAsDeleted(
|
||||
|
@ -528,6 +528,14 @@ bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
|
|||
&listenerForUpdatingUnigramProbabilityAndMarkingUselessPtNodesAsDeleted)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
|
||||
DynamicPatriciaTrieGcEventListeners::ListenerForUpdatingBigramProbability
|
||||
listenerForupdatingBigramProbability(mBigramPolicy);
|
||||
if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(
|
||||
&listenerForupdatingBigramProbability)) {
|
||||
return false;
|
||||
}
|
||||
// TODO: Implement.
|
||||
return false;
|
||||
}
|
||||
|
|
|
@ -49,7 +49,7 @@ void PatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode,
|
|||
// with a z, it's the last PtNode of the root array, so all children addresses will be smaller
|
||||
// than the position we look for, and we have to descend the z node).
|
||||
/* Parameters :
|
||||
* nodePos: the byte position of the terminal PtNode of the word we are searching for (this is
|
||||
* ptNodePos: the byte position of the terminal PtNode of the word we are searching for (this is
|
||||
* what is stored as the "bigram position" in each bigram)
|
||||
* outCodePoints: an array to write the found word, with MAX_WORD_LENGTH size.
|
||||
* outUnigramProbability: a pointer to an int to write the probability into.
|
||||
|
@ -57,7 +57,7 @@ void PatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode,
|
|||
*/
|
||||
// TODO: Split this function to be more readable
|
||||
int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
|
||||
const int nodePos, const int maxCodePointCount, int *const outCodePoints,
|
||||
const int ptNodePos, const int maxCodePointCount, int *const outCodePoints,
|
||||
int *const outUnigramProbability) const {
|
||||
int pos = getRootPosition();
|
||||
int wordPos = 0;
|
||||
|
@ -78,7 +78,7 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
|
|||
PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos);
|
||||
const int character = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(
|
||||
mDictRoot, &pos);
|
||||
if (nodePos == startPos) {
|
||||
if (ptNodePos == startPos) {
|
||||
// We found the position. Copy the rest of the code points in the buffer and return
|
||||
// the length.
|
||||
outCodePoints[wordPos] = character;
|
||||
|
@ -121,7 +121,7 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
|
|||
// Here comes the tricky part. First, read the children position.
|
||||
const int childrenPos = PatriciaTrieReadingUtils
|
||||
::readChildrenPositionAndAdvancePosition(mDictRoot, flags, ¤tPos);
|
||||
if (childrenPos > nodePos) {
|
||||
if (childrenPos > ptNodePos) {
|
||||
// If the children pos is greater than the position, it means the previous
|
||||
// PtNode, which position is stored in lastCandidatePtNodePos, was the right
|
||||
// one.
|
||||
|
@ -213,7 +213,7 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
|
|||
|
||||
}
|
||||
}
|
||||
// If we have looked through all the PtNodes and found no match, the nodePos is
|
||||
// If we have looked through all the PtNodes and found no match, the ptNodePos is
|
||||
// not the position of a terminal in this dictionary.
|
||||
return 0;
|
||||
}
|
||||
|
@ -319,11 +319,11 @@ int PatriciaTriePolicy::getProbability(const int unigramProbability,
|
|||
}
|
||||
}
|
||||
|
||||
int PatriciaTriePolicy::getUnigramProbabilityOfPtNode(const int nodePos) const {
|
||||
if (nodePos == NOT_A_DICT_POS) {
|
||||
int PatriciaTriePolicy::getUnigramProbabilityOfPtNode(const int ptNodePos) const {
|
||||
if (ptNodePos == NOT_A_DICT_POS) {
|
||||
return NOT_A_PROBABILITY;
|
||||
}
|
||||
int pos = nodePos;
|
||||
int pos = ptNodePos;
|
||||
const PatriciaTrieReadingUtils::NodeFlags flags =
|
||||
PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos);
|
||||
if (!PatriciaTrieReadingUtils::isTerminal(flags)) {
|
||||
|
@ -341,11 +341,11 @@ int PatriciaTriePolicy::getUnigramProbabilityOfPtNode(const int nodePos) const {
|
|||
mDictRoot, &pos), NOT_A_PROBABILITY);
|
||||
}
|
||||
|
||||
int PatriciaTriePolicy::getShortcutPositionOfNode(const int nodePos) const {
|
||||
if (nodePos == NOT_A_DICT_POS) {
|
||||
int PatriciaTriePolicy::getShortcutPositionOfPtNode(const int ptNodePos) const {
|
||||
if (ptNodePos == NOT_A_DICT_POS) {
|
||||
return NOT_A_DICT_POS;
|
||||
}
|
||||
int pos = nodePos;
|
||||
int pos = ptNodePos;
|
||||
const PatriciaTrieReadingUtils::NodeFlags flags =
|
||||
PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos);
|
||||
if (!PatriciaTrieReadingUtils::hasShortcutTargets(flags)) {
|
||||
|
@ -361,11 +361,11 @@ int PatriciaTriePolicy::getShortcutPositionOfNode(const int nodePos) const {
|
|||
return pos;
|
||||
}
|
||||
|
||||
int PatriciaTriePolicy::getBigramsPositionOfNode(const int nodePos) const {
|
||||
if (nodePos == NOT_A_DICT_POS) {
|
||||
int PatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) const {
|
||||
if (ptNodePos == NOT_A_DICT_POS) {
|
||||
return NOT_A_DICT_POS;
|
||||
}
|
||||
int pos = nodePos;
|
||||
int pos = ptNodePos;
|
||||
const PatriciaTrieReadingUtils::NodeFlags flags =
|
||||
PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos);
|
||||
if (!PatriciaTrieReadingUtils::hasBigrams(flags)) {
|
||||
|
@ -385,8 +385,8 @@ int PatriciaTriePolicy::getBigramsPositionOfNode(const int nodePos) const {
|
|||
}
|
||||
|
||||
int PatriciaTriePolicy::createAndGetLeavingChildNode(const DicNode *const dicNode,
|
||||
const int nodePos, DicNodeVector *childDicNodes) const {
|
||||
int pos = nodePos;
|
||||
const int ptNodePos, DicNodeVector *childDicNodes) const {
|
||||
int pos = ptNodePos;
|
||||
const PatriciaTrieReadingUtils::NodeFlags flags =
|
||||
PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos);
|
||||
int mergedNodeCodePoints[MAX_WORD_LENGTH];
|
||||
|
@ -404,7 +404,7 @@ int PatriciaTriePolicy::createAndGetLeavingChildNode(const DicNode *const dicNod
|
|||
if (PatriciaTrieReadingUtils::hasBigrams(flags)) {
|
||||
getBigramsStructurePolicy()->skipAllBigrams(&pos);
|
||||
}
|
||||
childDicNodes->pushLeavingChild(dicNode, nodePos, childrenPos, probability,
|
||||
childDicNodes->pushLeavingChild(dicNode, ptNodePos, childrenPos, probability,
|
||||
PatriciaTrieReadingUtils::isTerminal(flags),
|
||||
PatriciaTrieReadingUtils::hasChildrenInFlags(flags),
|
||||
PatriciaTrieReadingUtils::isBlacklisted(flags) ||
|
||||
|
|
|
@ -58,11 +58,11 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
|||
|
||||
int getProbability(const int unigramProbability, const int bigramProbability) const;
|
||||
|
||||
int getUnigramProbabilityOfPtNode(const int nodePos) const;
|
||||
int getUnigramProbabilityOfPtNode(const int ptNodePos) const;
|
||||
|
||||
int getShortcutPositionOfNode(const int nodePos) const;
|
||||
int getShortcutPositionOfPtNode(const int ptNodePos) const;
|
||||
|
||||
int getBigramsPositionOfNode(const int nodePos) const;
|
||||
int getBigramsPositionOfPtNode(const int ptNodePos) const;
|
||||
|
||||
const DictionaryHeaderStructurePolicy *getHeaderStructurePolicy() const {
|
||||
return &mHeaderPolicy;
|
||||
|
@ -121,7 +121,7 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
|||
const BigramListPolicy mBigramListPolicy;
|
||||
const ShortcutListPolicy mShortcutListPolicy;
|
||||
|
||||
int createAndGetLeavingChildNode(const DicNode *const dicNode, const int nodePos,
|
||||
int createAndGetLeavingChildNode(const DicNode *const dicNode, const int ptNodePos,
|
||||
DicNodeVector *const childDicNodes) const;
|
||||
};
|
||||
} // namespace latinime
|
||||
|
|
Loading…
Reference in a new issue