Merge "GC step 2. Finding garbage bigram entries."
commit
b71f63bc1c
|
@ -147,7 +147,7 @@ int BigramDictionary::getBigramListPositionForWord(const int *prevWord, const in
|
||||||
int pos = mDictionaryStructurePolicy->getTerminalNodePositionOfWord(prevWord, prevWordLength,
|
int pos = mDictionaryStructurePolicy->getTerminalNodePositionOfWord(prevWord, prevWordLength,
|
||||||
forceLowerCaseSearch);
|
forceLowerCaseSearch);
|
||||||
if (NOT_A_DICT_POS == pos) return NOT_A_DICT_POS;
|
if (NOT_A_DICT_POS == pos) return NOT_A_DICT_POS;
|
||||||
return mDictionaryStructurePolicy->getBigramsPositionOfNode(pos);
|
return mDictionaryStructurePolicy->getBigramsPositionOfPtNode(pos);
|
||||||
}
|
}
|
||||||
|
|
||||||
int BigramDictionary::getBigramProbability(const int *word0, int length0, const int *word1,
|
int BigramDictionary::getBigramProbability(const int *word0, int length0, const int *word1,
|
||||||
|
|
|
@ -68,7 +68,7 @@ class MultiBigramMap {
|
||||||
|
|
||||||
void init(const DictionaryStructureWithBufferPolicy *const structurePolicy,
|
void init(const DictionaryStructureWithBufferPolicy *const structurePolicy,
|
||||||
const int nodePos) {
|
const int nodePos) {
|
||||||
const int bigramsListPos = structurePolicy->getBigramsPositionOfNode(nodePos);
|
const int bigramsListPos = structurePolicy->getBigramsPositionOfPtNode(nodePos);
|
||||||
BinaryDictionaryBigramsIterator bigramsIt(structurePolicy->getBigramsStructurePolicy(),
|
BinaryDictionaryBigramsIterator bigramsIt(structurePolicy->getBigramsStructurePolicy(),
|
||||||
bigramsListPos);
|
bigramsListPos);
|
||||||
while (bigramsIt.hasNext()) {
|
while (bigramsIt.hasNext()) {
|
||||||
|
@ -112,7 +112,7 @@ class MultiBigramMap {
|
||||||
const DictionaryStructureWithBufferPolicy *const structurePolicy, const int nodePos,
|
const DictionaryStructureWithBufferPolicy *const structurePolicy, const int nodePos,
|
||||||
const int nextWordPosition, const int unigramProbability) {
|
const int nextWordPosition, const int unigramProbability) {
|
||||||
int bigramProbability = NOT_A_PROBABILITY;
|
int bigramProbability = NOT_A_PROBABILITY;
|
||||||
const int bigramsListPos = structurePolicy->getBigramsPositionOfNode(nodePos);
|
const int bigramsListPos = structurePolicy->getBigramsPositionOfPtNode(nodePos);
|
||||||
BinaryDictionaryBigramsIterator bigramsIt(structurePolicy->getBigramsStructurePolicy(),
|
BinaryDictionaryBigramsIterator bigramsIt(structurePolicy->getBigramsStructurePolicy(),
|
||||||
bigramsListPos);
|
bigramsListPos);
|
||||||
while (bigramsIt.hasNext()) {
|
while (bigramsIt.hasNext()) {
|
||||||
|
|
|
@ -52,9 +52,9 @@ class DictionaryStructureWithBufferPolicy {
|
||||||
|
|
||||||
virtual int getUnigramProbabilityOfPtNode(const int nodePos) const = 0;
|
virtual int getUnigramProbabilityOfPtNode(const int nodePos) const = 0;
|
||||||
|
|
||||||
virtual int getShortcutPositionOfNode(const int nodePos) const = 0;
|
virtual int getShortcutPositionOfPtNode(const int nodePos) const = 0;
|
||||||
|
|
||||||
virtual int getBigramsPositionOfNode(const int nodePos) const = 0;
|
virtual int getBigramsPositionOfPtNode(const int nodePos) const = 0;
|
||||||
|
|
||||||
virtual const DictionaryHeaderStructurePolicy *getHeaderStructurePolicy() const = 0;
|
virtual const DictionaryHeaderStructurePolicy *getHeaderStructurePolicy() const = 0;
|
||||||
|
|
||||||
|
|
|
@ -223,7 +223,7 @@ int Suggest::outputSuggestions(DicTraverseSession *traverseSession, int *frequen
|
||||||
BinaryDictionaryShortcutIterator shortcutIt(
|
BinaryDictionaryShortcutIterator shortcutIt(
|
||||||
traverseSession->getDictionaryStructurePolicy()->getShortcutsStructurePolicy(),
|
traverseSession->getDictionaryStructurePolicy()->getShortcutsStructurePolicy(),
|
||||||
traverseSession->getDictionaryStructurePolicy()
|
traverseSession->getDictionaryStructurePolicy()
|
||||||
->getShortcutPositionOfNode(terminalDicNode->getPos()));
|
->getShortcutPositionOfPtNode(terminalDicNode->getPos()));
|
||||||
// Shortcut is not supported for multiple words suggestions.
|
// Shortcut is not supported for multiple words suggestions.
|
||||||
// TODO: Check shortcuts during traversal for multiple words suggestions.
|
// TODO: Check shortcuts during traversal for multiple words suggestions.
|
||||||
const bool sameAsTyped = TRAVERSAL->sameAsTyped(traverseSession, terminalDicNode);
|
const bool sameAsTyped = TRAVERSAL->sameAsTyped(traverseSession, terminalDicNode);
|
||||||
|
|
|
@ -33,10 +33,9 @@ class BigramListPolicy : public DictionaryBigramsStructurePolicy {
|
||||||
|
|
||||||
void getNextBigram(int *const outBigramPos, int *const outProbability, bool *const outHasNext,
|
void getNextBigram(int *const outBigramPos, int *const outProbability, bool *const outHasNext,
|
||||||
int *const pos) const {
|
int *const pos) const {
|
||||||
const BigramListReadWriteUtils::BigramFlags flags =
|
BigramListReadWriteUtils::BigramFlags flags;
|
||||||
BigramListReadWriteUtils::getFlagsAndForwardPointer(mBigramsBuf, pos);
|
BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition(mBigramsBuf, &flags,
|
||||||
*outBigramPos = BigramListReadWriteUtils::getBigramAddressAndForwardPointer(
|
outBigramPos, pos);
|
||||||
mBigramsBuf, flags, pos);
|
|
||||||
*outProbability = BigramListReadWriteUtils::getProbabilityFromFlags(flags);
|
*outProbability = BigramListReadWriteUtils::getProbabilityFromFlags(flags);
|
||||||
*outHasNext = BigramListReadWriteUtils::hasNext(flags);
|
*outHasNext = BigramListReadWriteUtils::hasNext(flags);
|
||||||
}
|
}
|
||||||
|
|
|
@ -17,6 +17,7 @@
|
||||||
#include "suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h"
|
#include "suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h"
|
||||||
|
|
||||||
#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h"
|
#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
|
||||||
|
@ -38,23 +39,31 @@ const BigramListReadWriteUtils::BigramFlags
|
||||||
BigramListReadWriteUtils::MASK_ATTRIBUTE_PROBABILITY = 0x0F;
|
BigramListReadWriteUtils::MASK_ATTRIBUTE_PROBABILITY = 0x0F;
|
||||||
const int BigramListReadWriteUtils::ATTRIBUTE_ADDRESS_SHIFT = 4;
|
const int BigramListReadWriteUtils::ATTRIBUTE_ADDRESS_SHIFT = 4;
|
||||||
|
|
||||||
/* static */ BigramListReadWriteUtils::BigramFlags
|
/* static */ void BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition(
|
||||||
BigramListReadWriteUtils::getFlagsAndForwardPointer(const uint8_t *const bigramsBuf,
|
const uint8_t *const bigramsBuf, BigramFlags *const outBigramFlags,
|
||||||
int *const pos) {
|
int *const outTargetPtNodePos, int *const bigramEntryPos) {
|
||||||
return ByteArrayUtils::readUint8AndAdvancePosition(bigramsBuf, pos);
|
const BigramFlags bigramFlags = ByteArrayUtils::readUint8AndAdvancePosition(bigramsBuf,
|
||||||
|
bigramEntryPos);
|
||||||
|
if (outBigramFlags) {
|
||||||
|
*outBigramFlags = bigramFlags;
|
||||||
|
}
|
||||||
|
const int targetPos = getBigramAddressAndAdvancePosition(bigramsBuf, bigramFlags,
|
||||||
|
bigramEntryPos);
|
||||||
|
if (outTargetPtNodePos) {
|
||||||
|
*outTargetPtNodePos = targetPos;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* static */ void BigramListReadWriteUtils::skipExistingBigrams(const uint8_t *const bigramsBuf,
|
/* static */ void BigramListReadWriteUtils::skipExistingBigrams(const uint8_t *const bigramsBuf,
|
||||||
int *const pos) {
|
int *const bigramListPos) {
|
||||||
BigramFlags flags = getFlagsAndForwardPointer(bigramsBuf, pos);
|
BigramFlags flags;
|
||||||
while (hasNext(flags)) {
|
do {
|
||||||
*pos += attributeAddressSize(flags);
|
getBigramEntryPropertiesAndAdvancePosition(bigramsBuf, &flags, 0 /* outTargetPtNodePos */,
|
||||||
flags = getFlagsAndForwardPointer(bigramsBuf, pos);
|
bigramListPos);
|
||||||
}
|
} while(hasNext(flags));
|
||||||
*pos += attributeAddressSize(flags);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* static */ int BigramListReadWriteUtils::getBigramAddressAndForwardPointer(
|
/* static */ int BigramListReadWriteUtils::getBigramAddressAndAdvancePosition(
|
||||||
const uint8_t *const bigramsBuf, const BigramFlags flags, int *const pos) {
|
const uint8_t *const bigramsBuf, const BigramFlags flags, int *const pos) {
|
||||||
int offset = 0;
|
int offset = 0;
|
||||||
const int origin = *pos;
|
const int origin = *pos;
|
||||||
|
@ -79,4 +88,59 @@ const int BigramListReadWriteUtils::ATTRIBUTE_ADDRESS_SHIFT = 4;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* static */ bool BigramListReadWriteUtils::createAndWriteBigramEntry(
|
||||||
|
BufferWithExtendableBuffer *const buffer, const int targetPos, const int probability,
|
||||||
|
const bool hasNext, int *const writingPos) {
|
||||||
|
BigramFlags flags;
|
||||||
|
if (!createAndGetBigramFlags(*writingPos, targetPos, probability, hasNext, &flags)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return writeBigramEntry(buffer, flags, targetPos, writingPos);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* static */ bool BigramListReadWriteUtils::writeBigramEntry(
|
||||||
|
BufferWithExtendableBuffer *const bufferToWrite, const BigramFlags flags,
|
||||||
|
const int targetPtNodePos, int *const writingPos) {
|
||||||
|
if (!bufferToWrite->writeUintAndAdvancePosition(flags, 1 /* size */, writingPos)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
const int offset = (targetPtNodePos != NOT_A_DICT_POS) ? targetPtNodePos - *writingPos : 0;
|
||||||
|
const uint32_t absOffest = abs(offset);
|
||||||
|
const int bigramTargetFieldSize = attributeAddressSize(flags);
|
||||||
|
return bufferToWrite->writeUintAndAdvancePosition(absOffest, bigramTargetFieldSize,
|
||||||
|
writingPos);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Returns true if the bigram entry is valid and put entry flags into out*.
|
||||||
|
/* static */ bool BigramListReadWriteUtils::createAndGetBigramFlags(const int entryPos,
|
||||||
|
const int targetPos, const int probability, const bool hasNext,
|
||||||
|
BigramFlags *const outBigramFlags) {
|
||||||
|
BigramFlags flags = probability & MASK_ATTRIBUTE_PROBABILITY;
|
||||||
|
if (hasNext) {
|
||||||
|
flags |= FLAG_ATTRIBUTE_HAS_NEXT;
|
||||||
|
}
|
||||||
|
const int targetFieldPos = entryPos + 1;
|
||||||
|
const int offset = (targetPos != NOT_A_DICT_POS) ? targetPos - targetFieldPos : 0;
|
||||||
|
if (offset < 0) {
|
||||||
|
flags |= FLAG_ATTRIBUTE_OFFSET_NEGATIVE;
|
||||||
|
}
|
||||||
|
const uint32_t absOffest = abs(offset);
|
||||||
|
if ((absOffest >> 24) != 0) {
|
||||||
|
// Offset is too large.
|
||||||
|
return false;
|
||||||
|
} else if ((absOffest >> 16) != 0) {
|
||||||
|
flags |= FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES;
|
||||||
|
} else if ((absOffest >> 8) != 0) {
|
||||||
|
flags |= FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES;
|
||||||
|
} else {
|
||||||
|
flags |= FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE;
|
||||||
|
}
|
||||||
|
// Currently, all newly written bigram position fields are 3 bytes to simplify dictionary
|
||||||
|
// writing.
|
||||||
|
// TODO: Remove following 2 lines and optimize memory space.
|
||||||
|
flags = (flags & (~MASK_ATTRIBUTE_ADDRESS_TYPE)) | FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES;
|
||||||
|
*outBigramFlags = flags;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
|
|
|
@ -24,11 +24,15 @@
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
|
||||||
|
class BufferWithExtendableBuffer;
|
||||||
|
|
||||||
class BigramListReadWriteUtils {
|
class BigramListReadWriteUtils {
|
||||||
public:
|
public:
|
||||||
typedef uint8_t BigramFlags;
|
typedef uint8_t BigramFlags;
|
||||||
|
|
||||||
static BigramFlags getFlagsAndForwardPointer(const uint8_t *const bigramsBuf, int *const pos);
|
static void getBigramEntryPropertiesAndAdvancePosition(const uint8_t *const bigramsBuf,
|
||||||
|
BigramFlags *const outBigramFlags, int *const outTargetPtNodePos,
|
||||||
|
int *const bigramEntryPos);
|
||||||
|
|
||||||
static AK_FORCE_INLINE int getProbabilityFromFlags(const BigramFlags flags) {
|
static AK_FORCE_INLINE int getProbabilityFromFlags(const BigramFlags flags) {
|
||||||
return flags & MASK_ATTRIBUTE_PROBABILITY;
|
return flags & MASK_ATTRIBUTE_PROBABILITY;
|
||||||
|
@ -39,10 +43,7 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
// Bigrams reading methods
|
// Bigrams reading methods
|
||||||
static void skipExistingBigrams(const uint8_t *const bigramsBuf, int *const pos);
|
static void skipExistingBigrams(const uint8_t *const bigramsBuf, int *const bigramListPos);
|
||||||
|
|
||||||
static int getBigramAddressAndForwardPointer(const uint8_t *const bigramsBuf,
|
|
||||||
const BigramFlags flags, int *const pos);
|
|
||||||
|
|
||||||
// Returns the size of the bigram position field that is stored in bigram flags.
|
// Returns the size of the bigram position field that is stored in bigram flags.
|
||||||
static AK_FORCE_INLINE int attributeAddressSize(const BigramFlags flags) {
|
static AK_FORCE_INLINE int attributeAddressSize(const BigramFlags flags) {
|
||||||
|
@ -67,48 +68,11 @@ public:
|
||||||
return (flags & (~MASK_ATTRIBUTE_PROBABILITY)) | (probability & MASK_ATTRIBUTE_PROBABILITY);
|
return (flags & (~MASK_ATTRIBUTE_PROBABILITY)) | (probability & MASK_ATTRIBUTE_PROBABILITY);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Returns true if the bigram entry is valid and put entry values into out*.
|
static bool createAndWriteBigramEntry(BufferWithExtendableBuffer *const buffer,
|
||||||
static AK_FORCE_INLINE bool createBigramEntryAndGetFlagsAndOffsetAndOffsetFieldSize(
|
const int targetPos, const int probability, const bool hasNext, int *const writingPos);
|
||||||
const int entryPos, const int targetPos, const int probability, const bool hasNext,
|
|
||||||
BigramFlags *const outBigramFlags, uint32_t *const outOffset,
|
|
||||||
int *const outOffsetFieldSize) {
|
|
||||||
if (targetPos == NOT_A_DICT_POS) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
BigramFlags flags = probability & MASK_ATTRIBUTE_PROBABILITY;
|
|
||||||
if (hasNext) {
|
|
||||||
flags |= FLAG_ATTRIBUTE_HAS_NEXT;
|
|
||||||
}
|
|
||||||
const int targetFieldPos = entryPos + 1;
|
|
||||||
const int offset = targetPos - targetFieldPos;
|
|
||||||
if (offset < 0) {
|
|
||||||
flags |= FLAG_ATTRIBUTE_OFFSET_NEGATIVE;
|
|
||||||
}
|
|
||||||
const uint32_t absOffest = abs(offset);
|
|
||||||
if ((absOffest >> 24) != 0) {
|
|
||||||
// Offset is too large.
|
|
||||||
return false;
|
|
||||||
} else if ((absOffest >> 16) != 0) {
|
|
||||||
flags |= FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES;
|
|
||||||
*outOffsetFieldSize = 3;
|
|
||||||
} else if ((absOffest >> 8) != 0) {
|
|
||||||
flags |= FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES;
|
|
||||||
*outOffsetFieldSize = 2;
|
|
||||||
} else {
|
|
||||||
flags |= FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE;
|
|
||||||
*outOffsetFieldSize = 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Currently, all newly written bigram position fields are 3 bytes to simplify dictionary
|
static bool writeBigramEntry(BufferWithExtendableBuffer *const buffer, const BigramFlags flags,
|
||||||
// writing.
|
const int targetOffset, int *const writingPos);
|
||||||
// TODO: Remove following 2 lines and optimize memory space.
|
|
||||||
flags = (flags & (~MASK_ATTRIBUTE_ADDRESS_TYPE)) | FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES;
|
|
||||||
*outOffsetFieldSize = 3;
|
|
||||||
|
|
||||||
*outBigramFlags = flags;
|
|
||||||
*outOffset = absOffest;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
DISALLOW_IMPLICIT_CONSTRUCTORS(BigramListReadWriteUtils);
|
DISALLOW_IMPLICIT_CONSTRUCTORS(BigramListReadWriteUtils);
|
||||||
|
@ -122,9 +86,16 @@ private:
|
||||||
static const BigramFlags MASK_ATTRIBUTE_PROBABILITY;
|
static const BigramFlags MASK_ATTRIBUTE_PROBABILITY;
|
||||||
static const int ATTRIBUTE_ADDRESS_SHIFT;
|
static const int ATTRIBUTE_ADDRESS_SHIFT;
|
||||||
|
|
||||||
|
// Returns true if the bigram entry is valid and put entry flags into out*.
|
||||||
|
static bool createAndGetBigramFlags(const int entryPos, const int targetPos,
|
||||||
|
const int probability, const bool hasNext, BigramFlags *const outBigramFlags);
|
||||||
|
|
||||||
static AK_FORCE_INLINE bool isOffsetNegative(const BigramFlags flags) {
|
static AK_FORCE_INLINE bool isOffsetNegative(const BigramFlags flags) {
|
||||||
return (flags & FLAG_ATTRIBUTE_OFFSET_NEGATIVE) != 0;
|
return (flags & FLAG_ATTRIBUTE_OFFSET_NEGATIVE) != 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int getBigramAddressAndAdvancePosition(const uint8_t *const bigramsBuf,
|
||||||
|
const BigramFlags flags, int *const pos);
|
||||||
};
|
};
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
#endif // LATINIME_BIGRAM_LIST_READ_WRITE_UTILS_H
|
#endif // LATINIME_BIGRAM_LIST_READ_WRITE_UTILS_H
|
||||||
|
|
|
@ -16,41 +16,47 @@
|
||||||
|
|
||||||
#include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h"
|
#include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h"
|
||||||
|
|
||||||
|
#include "suggest/core/policy/dictionary_shortcuts_structure_policy.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
|
||||||
const int DynamicBigramListPolicy::BIGRAM_LINK_COUNT_LIMIT = 10000;
|
const int DynamicBigramListPolicy::CONTINUING_BIGRAM_LINK_COUNT_LIMIT = 10000;
|
||||||
|
const int DynamicBigramListPolicy::BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT = 100000;
|
||||||
|
|
||||||
void DynamicBigramListPolicy::getNextBigram(int *const outBigramPos, int *const outProbability,
|
void DynamicBigramListPolicy::getNextBigram(int *const outBigramPos, int *const outProbability,
|
||||||
bool *const outHasNext, int *const pos) const {
|
bool *const outHasNext, int *const bigramEntryPos) const {
|
||||||
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*pos);
|
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*bigramEntryPos);
|
||||||
const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer);
|
const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer);
|
||||||
if (usesAdditionalBuffer) {
|
if (usesAdditionalBuffer) {
|
||||||
*pos -= mBuffer->getOriginalBufferSize();
|
*bigramEntryPos -= mBuffer->getOriginalBufferSize();
|
||||||
}
|
}
|
||||||
const BigramListReadWriteUtils::BigramFlags flags =
|
BigramListReadWriteUtils::BigramFlags bigramFlags;
|
||||||
BigramListReadWriteUtils::getFlagsAndForwardPointer(buffer, pos);
|
int originalBigramPos;
|
||||||
int originalBigramPos = BigramListReadWriteUtils::getBigramAddressAndForwardPointer(
|
BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition(buffer, &bigramFlags,
|
||||||
buffer, flags, pos);
|
&originalBigramPos, bigramEntryPos);
|
||||||
if (usesAdditionalBuffer && originalBigramPos != NOT_A_DICT_POS) {
|
if (usesAdditionalBuffer && originalBigramPos != NOT_A_DICT_POS) {
|
||||||
originalBigramPos += mBuffer->getOriginalBufferSize();
|
originalBigramPos += mBuffer->getOriginalBufferSize();
|
||||||
}
|
}
|
||||||
*outBigramPos = followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos);
|
*outBigramPos = followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos);
|
||||||
*outProbability = BigramListReadWriteUtils::getProbabilityFromFlags(flags);
|
*outProbability = BigramListReadWriteUtils::getProbabilityFromFlags(bigramFlags);
|
||||||
*outHasNext = BigramListReadWriteUtils::hasNext(flags);
|
*outHasNext = BigramListReadWriteUtils::hasNext(bigramFlags);
|
||||||
if (usesAdditionalBuffer) {
|
if (usesAdditionalBuffer) {
|
||||||
*pos += mBuffer->getOriginalBufferSize();
|
*bigramEntryPos += mBuffer->getOriginalBufferSize();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void DynamicBigramListPolicy::skipAllBigrams(int *const pos) const {
|
void DynamicBigramListPolicy::skipAllBigrams(int *const bigramListPos) const {
|
||||||
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*pos);
|
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*bigramListPos);
|
||||||
const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer);
|
const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer);
|
||||||
if (usesAdditionalBuffer) {
|
if (usesAdditionalBuffer) {
|
||||||
*pos -= mBuffer->getOriginalBufferSize();
|
*bigramListPos -= mBuffer->getOriginalBufferSize();
|
||||||
}
|
}
|
||||||
BigramListReadWriteUtils::skipExistingBigrams(buffer, pos);
|
BigramListReadWriteUtils::skipExistingBigrams(buffer, bigramListPos);
|
||||||
if (usesAdditionalBuffer) {
|
if (usesAdditionalBuffer) {
|
||||||
*pos += mBuffer->getOriginalBufferSize();
|
*bigramListPos += mBuffer->getOriginalBufferSize();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -61,13 +67,19 @@ bool DynamicBigramListPolicy::copyAllBigrams(BufferWithExtendableBuffer *const b
|
||||||
*fromPos -= mBuffer->getOriginalBufferSize();
|
*fromPos -= mBuffer->getOriginalBufferSize();
|
||||||
}
|
}
|
||||||
*outBigramsCount = 0;
|
*outBigramsCount = 0;
|
||||||
BigramListReadWriteUtils::BigramFlags flags;
|
BigramListReadWriteUtils::BigramFlags bigramFlags;
|
||||||
|
int bigramEntryCount = 0;
|
||||||
do {
|
do {
|
||||||
|
if (++bigramEntryCount > BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT) {
|
||||||
|
AKLOGE("Too many bigram entries. %d", BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT);
|
||||||
|
ASSERT(false);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
// The buffer address can be changed after calling buffer writing methods.
|
// The buffer address can be changed after calling buffer writing methods.
|
||||||
const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer);
|
int originalBigramPos;
|
||||||
flags = BigramListReadWriteUtils::getFlagsAndForwardPointer(buffer, fromPos);
|
BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition(
|
||||||
int originalBigramPos = BigramListReadWriteUtils::getBigramAddressAndForwardPointer(
|
mBuffer->getBuffer(usesAdditionalBuffer), &bigramFlags, &originalBigramPos,
|
||||||
buffer, flags, fromPos);
|
fromPos);
|
||||||
if (originalBigramPos == NOT_A_DICT_POS) {
|
if (originalBigramPos == NOT_A_DICT_POS) {
|
||||||
// skip invalid bigram entry.
|
// skip invalid bigram entry.
|
||||||
continue;
|
continue;
|
||||||
|
@ -76,132 +88,163 @@ bool DynamicBigramListPolicy::copyAllBigrams(BufferWithExtendableBuffer *const b
|
||||||
originalBigramPos += mBuffer->getOriginalBufferSize();
|
originalBigramPos += mBuffer->getOriginalBufferSize();
|
||||||
}
|
}
|
||||||
const int bigramPos = followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos);
|
const int bigramPos = followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos);
|
||||||
BigramListReadWriteUtils::BigramFlags newBigramFlags;
|
if (!BigramListReadWriteUtils::createAndWriteBigramEntry(bufferToWrite, bigramPos,
|
||||||
uint32_t newBigramOffset;
|
BigramListReadWriteUtils::getProbabilityFromFlags(bigramFlags),
|
||||||
int newBigramOffsetFieldSize;
|
BigramListReadWriteUtils::hasNext(bigramFlags), toPos)) {
|
||||||
if(!BigramListReadWriteUtils::createBigramEntryAndGetFlagsAndOffsetAndOffsetFieldSize(
|
|
||||||
*toPos, bigramPos, BigramListReadWriteUtils::getProbabilityFromFlags(flags),
|
|
||||||
BigramListReadWriteUtils::hasNext(flags), &newBigramFlags, &newBigramOffset,
|
|
||||||
&newBigramOffsetFieldSize)) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
// Write bigram entry. Target buffer is always the additional buffer.
|
|
||||||
if (!bufferToWrite->writeUintAndAdvancePosition(newBigramFlags, 1 /* size */,toPos)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
if (!bufferToWrite->writeUintAndAdvancePosition(newBigramOffset, newBigramOffsetFieldSize,
|
|
||||||
toPos)) {
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
(*outBigramsCount)++;
|
(*outBigramsCount)++;
|
||||||
} while(BigramListReadWriteUtils::hasNext(flags));
|
} while(BigramListReadWriteUtils::hasNext(bigramFlags));
|
||||||
if (usesAdditionalBuffer) {
|
if (usesAdditionalBuffer) {
|
||||||
*fromPos += mBuffer->getOriginalBufferSize();
|
*fromPos += mBuffer->getOriginalBufferSize();
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool DynamicBigramListPolicy::addNewBigramEntryToBigramList(const int bigramPos,
|
// Finding useless bigram entries and remove them. Bigram entry is useless when the target PtNode
|
||||||
const int probability, int *const pos) {
|
// has been deleted or is not a valid terminal.
|
||||||
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*pos);
|
bool DynamicBigramListPolicy::updateAllBigramEntriesAndDeleteUselessEntries(
|
||||||
|
int *const bigramListPos) {
|
||||||
|
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*bigramListPos);
|
||||||
if (usesAdditionalBuffer) {
|
if (usesAdditionalBuffer) {
|
||||||
*pos -= mBuffer->getOriginalBufferSize();
|
*bigramListPos -= mBuffer->getOriginalBufferSize();
|
||||||
}
|
}
|
||||||
BigramListReadWriteUtils::BigramFlags flags;
|
DynamicPatriciaTrieNodeReader nodeReader(mBuffer, this /* bigramsPolicy */, mShortcutPolicy);
|
||||||
|
BigramListReadWriteUtils::BigramFlags bigramFlags;
|
||||||
|
int bigramEntryCount = 0;
|
||||||
do {
|
do {
|
||||||
int entryPos = *pos;
|
if (++bigramEntryCount > BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT) {
|
||||||
|
AKLOGE("Too many bigram entries. %d", BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT);
|
||||||
|
ASSERT(false);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
int bigramEntryPos = *bigramListPos;
|
||||||
|
int originalBigramPos;
|
||||||
|
// The buffer address can be changed after calling buffer writing methods.
|
||||||
|
BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition(
|
||||||
|
mBuffer->getBuffer(usesAdditionalBuffer), &bigramFlags, &originalBigramPos,
|
||||||
|
bigramListPos);
|
||||||
|
if (usesAdditionalBuffer) {
|
||||||
|
bigramEntryPos += mBuffer->getOriginalBufferSize();
|
||||||
|
}
|
||||||
|
if (originalBigramPos == NOT_A_DICT_POS) {
|
||||||
|
// This entry has already been removed.
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (usesAdditionalBuffer) {
|
||||||
|
originalBigramPos += mBuffer->getOriginalBufferSize();
|
||||||
|
}
|
||||||
|
const int bigramTargetNodePos =
|
||||||
|
followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos);
|
||||||
|
nodeReader.fetchNodeInfoInBufferFromPtNodePos(bigramTargetNodePos);
|
||||||
|
// TODO: Update probability for supporting probability decaying.
|
||||||
|
if (nodeReader.isDeleted() || !nodeReader.isTerminal()
|
||||||
|
|| bigramTargetNodePos == NOT_A_DICT_POS) {
|
||||||
|
// The target is no longer valid terminal. Invalidate the current bigram entry.
|
||||||
|
if (!BigramListReadWriteUtils::writeBigramEntry(mBuffer, bigramFlags,
|
||||||
|
NOT_A_DICT_POS /* targetOffset */, &bigramEntryPos)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} while(BigramListReadWriteUtils::hasNext(bigramFlags));
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool DynamicBigramListPolicy::addNewBigramEntryToBigramList(const int bigramTargetPos,
|
||||||
|
const int probability, int *const bigramListPos) {
|
||||||
|
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*bigramListPos);
|
||||||
|
if (usesAdditionalBuffer) {
|
||||||
|
*bigramListPos -= mBuffer->getOriginalBufferSize();
|
||||||
|
}
|
||||||
|
BigramListReadWriteUtils::BigramFlags bigramFlags;
|
||||||
|
int bigramEntryCount = 0;
|
||||||
|
do {
|
||||||
|
if (++bigramEntryCount > BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT) {
|
||||||
|
AKLOGE("Too many bigram entries. %d", BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT);
|
||||||
|
ASSERT(false);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
int entryPos = *bigramListPos;
|
||||||
if (usesAdditionalBuffer) {
|
if (usesAdditionalBuffer) {
|
||||||
entryPos += mBuffer->getOriginalBufferSize();
|
entryPos += mBuffer->getOriginalBufferSize();
|
||||||
}
|
}
|
||||||
|
int originalBigramPos;
|
||||||
// The buffer address can be changed after calling buffer writing methods.
|
// The buffer address can be changed after calling buffer writing methods.
|
||||||
const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer);
|
BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition(
|
||||||
flags = BigramListReadWriteUtils::getFlagsAndForwardPointer(buffer, pos);
|
mBuffer->getBuffer(usesAdditionalBuffer), &bigramFlags, &originalBigramPos,
|
||||||
int originalBigramPos = BigramListReadWriteUtils::getBigramAddressAndForwardPointer(
|
bigramListPos);
|
||||||
buffer, flags, pos);
|
|
||||||
if (usesAdditionalBuffer && originalBigramPos != NOT_A_DICT_POS) {
|
if (usesAdditionalBuffer && originalBigramPos != NOT_A_DICT_POS) {
|
||||||
originalBigramPos += mBuffer->getOriginalBufferSize();
|
originalBigramPos += mBuffer->getOriginalBufferSize();
|
||||||
}
|
}
|
||||||
if (followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos) == bigramPos) {
|
if (followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos) == bigramTargetPos) {
|
||||||
// Update this bigram entry.
|
// Update this bigram entry.
|
||||||
const BigramListReadWriteUtils::BigramFlags updatedFlags =
|
const BigramListReadWriteUtils::BigramFlags updatedFlags =
|
||||||
BigramListReadWriteUtils::setProbabilityInFlags(flags, probability);
|
BigramListReadWriteUtils::setProbabilityInFlags(bigramFlags, probability);
|
||||||
return mBuffer->writeUintAndAdvancePosition(updatedFlags, 1 /* size */, &entryPos);
|
return BigramListReadWriteUtils::writeBigramEntry(mBuffer, updatedFlags,
|
||||||
|
originalBigramPos, &entryPos);
|
||||||
}
|
}
|
||||||
if (BigramListReadWriteUtils::hasNext(flags)) {
|
if (BigramListReadWriteUtils::hasNext(bigramFlags)) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
// The current last entry is found.
|
// The current last entry is found.
|
||||||
// First, update the flags of the last entry.
|
// First, update the flags of the last entry.
|
||||||
const BigramListReadWriteUtils::BigramFlags updatedFlags =
|
const BigramListReadWriteUtils::BigramFlags updatedFlags =
|
||||||
BigramListReadWriteUtils::setHasNextFlag(flags);
|
BigramListReadWriteUtils::setHasNextFlag(bigramFlags);
|
||||||
if (!mBuffer->writeUintAndAdvancePosition(updatedFlags, 1 /* size */, &entryPos)) {
|
if (!BigramListReadWriteUtils::writeBigramEntry(mBuffer, updatedFlags, originalBigramPos,
|
||||||
|
&entryPos)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (usesAdditionalBuffer) {
|
if (usesAdditionalBuffer) {
|
||||||
*pos += mBuffer->getOriginalBufferSize();
|
*bigramListPos += mBuffer->getOriginalBufferSize();
|
||||||
}
|
}
|
||||||
// Then, add a new entry after the last entry.
|
// Then, add a new entry after the last entry.
|
||||||
return writeNewBigramEntry(bigramPos, probability, pos);
|
return writeNewBigramEntry(bigramTargetPos, probability, bigramListPos);
|
||||||
} while(BigramListReadWriteUtils::hasNext(flags));
|
} while(BigramListReadWriteUtils::hasNext(bigramFlags));
|
||||||
// We return directly from the while loop.
|
// We return directly from the while loop.
|
||||||
ASSERT(false);
|
ASSERT(false);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool DynamicBigramListPolicy::writeNewBigramEntry(const int bigramPos, const int probability,
|
bool DynamicBigramListPolicy::writeNewBigramEntry(const int bigramTargetPos, const int probability,
|
||||||
int *const writingPos) {
|
int *const writingPos) {
|
||||||
BigramListReadWriteUtils::BigramFlags newBigramFlags;
|
// hasNext is false because we are adding a new bigram entry at the end of the bigram list.
|
||||||
uint32_t newBigramOffset;
|
return BigramListReadWriteUtils::createAndWriteBigramEntry(mBuffer, bigramTargetPos,
|
||||||
int newBigramOffsetFieldSize;
|
probability, false /* hasNext */, writingPos);
|
||||||
if(!BigramListReadWriteUtils::createBigramEntryAndGetFlagsAndOffsetAndOffsetFieldSize(
|
|
||||||
*writingPos, bigramPos, probability, false /* hasNext */, &newBigramFlags,
|
|
||||||
&newBigramOffset, &newBigramOffsetFieldSize)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
// Write bigram flags.
|
|
||||||
if (!mBuffer->writeUintAndAdvancePosition(newBigramFlags, 1 /* size */, writingPos)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
// Write bigram positon offset.
|
|
||||||
if (!mBuffer->writeUintAndAdvancePosition(newBigramOffset, newBigramOffsetFieldSize,
|
|
||||||
writingPos)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool DynamicBigramListPolicy::removeBigram(const int bigramListPos, const int targetBigramPos) {
|
bool DynamicBigramListPolicy::removeBigram(const int bigramListPos, const int bigramTargetPos) {
|
||||||
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(bigramListPos);
|
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(bigramListPos);
|
||||||
int pos = bigramListPos;
|
int pos = bigramListPos;
|
||||||
if (usesAdditionalBuffer) {
|
if (usesAdditionalBuffer) {
|
||||||
pos -= mBuffer->getOriginalBufferSize();
|
pos -= mBuffer->getOriginalBufferSize();
|
||||||
}
|
}
|
||||||
BigramListReadWriteUtils::BigramFlags flags;
|
BigramListReadWriteUtils::BigramFlags bigramFlags;
|
||||||
|
int bigramEntryCount = 0;
|
||||||
do {
|
do {
|
||||||
// The buffer address can be changed after calling buffer writing methods.
|
if (++bigramEntryCount > BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT) {
|
||||||
const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer);
|
AKLOGE("Too many bigram entries. %d", BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT);
|
||||||
flags = BigramListReadWriteUtils::getFlagsAndForwardPointer(buffer, &pos);
|
ASSERT(false);
|
||||||
int bigramOffsetFieldPos = pos;
|
return false;
|
||||||
if (usesAdditionalBuffer) {
|
}
|
||||||
bigramOffsetFieldPos += mBuffer->getOriginalBufferSize();
|
int bigramEntryPos = pos;
|
||||||
|
int originalBigramPos;
|
||||||
|
// The buffer address can be changed after calling buffer writing methods.
|
||||||
|
BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition(
|
||||||
|
mBuffer->getBuffer(usesAdditionalBuffer), &bigramFlags, &originalBigramPos, &pos);
|
||||||
|
if (usesAdditionalBuffer) {
|
||||||
|
bigramEntryPos += mBuffer->getOriginalBufferSize();
|
||||||
}
|
}
|
||||||
int originalBigramPos = BigramListReadWriteUtils::getBigramAddressAndForwardPointer(
|
|
||||||
buffer, flags, &pos);
|
|
||||||
if (usesAdditionalBuffer && originalBigramPos != NOT_A_DICT_POS) {
|
if (usesAdditionalBuffer && originalBigramPos != NOT_A_DICT_POS) {
|
||||||
originalBigramPos += mBuffer->getOriginalBufferSize();
|
originalBigramPos += mBuffer->getOriginalBufferSize();
|
||||||
}
|
}
|
||||||
const int bigramPos = followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos);
|
const int bigramPos = followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos);
|
||||||
if (bigramPos != targetBigramPos) {
|
if (bigramPos != bigramTargetPos) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
// Target entry is found. Write 0 into the bigram pos field to mark the bigram invalid.
|
// Target entry is found. Write an invalid target position to mark the bigram invalid.
|
||||||
const int bigramOffsetFieldSize = BigramListReadWriteUtils::attributeAddressSize(flags);
|
return BigramListReadWriteUtils::writeBigramEntry(mBuffer, bigramFlags,
|
||||||
if (!mBuffer->writeUintAndAdvancePosition(0 /* data */, bigramOffsetFieldSize,
|
NOT_A_DICT_POS /* targetOffset */, &bigramEntryPos);
|
||||||
&bigramOffsetFieldPos)) {
|
} while(BigramListReadWriteUtils::hasNext(bigramFlags));
|
||||||
return false;
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
} while(BigramListReadWriteUtils::hasNext(flags));
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -212,14 +255,14 @@ int DynamicBigramListPolicy::followBigramLinkAndGetCurrentBigramPtNodePos(
|
||||||
}
|
}
|
||||||
int currentPos = originalBigramPos;
|
int currentPos = originalBigramPos;
|
||||||
DynamicPatriciaTrieNodeReader nodeReader(mBuffer, this /* bigramsPolicy */, mShortcutPolicy);
|
DynamicPatriciaTrieNodeReader nodeReader(mBuffer, this /* bigramsPolicy */, mShortcutPolicy);
|
||||||
nodeReader.fetchNodeInfoFromBuffer(currentPos);
|
nodeReader.fetchNodeInfoInBufferFromPtNodePos(currentPos);
|
||||||
int bigramLinkCount = 0;
|
int bigramLinkCount = 0;
|
||||||
while (nodeReader.getBigramLinkedNodePos() != NOT_A_DICT_POS) {
|
while (nodeReader.getBigramLinkedNodePos() != NOT_A_DICT_POS) {
|
||||||
currentPos = nodeReader.getBigramLinkedNodePos();
|
currentPos = nodeReader.getBigramLinkedNodePos();
|
||||||
nodeReader.fetchNodeInfoFromBuffer(currentPos);
|
nodeReader.fetchNodeInfoInBufferFromPtNodePos(currentPos);
|
||||||
bigramLinkCount++;
|
bigramLinkCount++;
|
||||||
if (bigramLinkCount > BIGRAM_LINK_COUNT_LIMIT) {
|
if (bigramLinkCount > CONTINUING_BIGRAM_LINK_COUNT_LIMIT) {
|
||||||
AKLOGI("Bigram link is invalid. start position: %d", bigramPos);
|
AKLOGE("Bigram link is invalid. start position: %d", bigramPos);
|
||||||
ASSERT(false);
|
ASSERT(false);
|
||||||
return NOT_A_DICT_POS;
|
return NOT_A_DICT_POS;
|
||||||
}
|
}
|
||||||
|
|
|
@ -21,13 +21,12 @@
|
||||||
|
|
||||||
#include "defines.h"
|
#include "defines.h"
|
||||||
#include "suggest/core/policy/dictionary_bigrams_structure_policy.h"
|
#include "suggest/core/policy/dictionary_bigrams_structure_policy.h"
|
||||||
#include "suggest/core/policy/dictionary_shortcuts_structure_policy.h"
|
|
||||||
#include "suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h"
|
|
||||||
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h"
|
|
||||||
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
|
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
|
||||||
|
class BufferWithExtendableBuffer;
|
||||||
|
class DictionaryShortcutsStructurePolicy;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This is a dynamic version of BigramListPolicy and supports an additional buffer.
|
* This is a dynamic version of BigramListPolicy and supports an additional buffer.
|
||||||
*/
|
*/
|
||||||
|
@ -40,9 +39,9 @@ class DynamicBigramListPolicy : public DictionaryBigramsStructurePolicy {
|
||||||
~DynamicBigramListPolicy() {}
|
~DynamicBigramListPolicy() {}
|
||||||
|
|
||||||
void getNextBigram(int *const outBigramPos, int *const outProbability, bool *const outHasNext,
|
void getNextBigram(int *const outBigramPos, int *const outProbability, bool *const outHasNext,
|
||||||
int *const pos) const;
|
int *const bigramEntryPos) const;
|
||||||
|
|
||||||
void skipAllBigrams(int *const pos) const;
|
void skipAllBigrams(int *const bigramListPos) const;
|
||||||
|
|
||||||
// Copy bigrams from the bigram list that starts at fromPos in mBuffer to toPos in
|
// Copy bigrams from the bigram list that starts at fromPos in mBuffer to toPos in
|
||||||
// bufferToWrite and advance these positions after bigram lists. This method skips invalid
|
// bufferToWrite and advance these positions after bigram lists. This method skips invalid
|
||||||
|
@ -50,18 +49,22 @@ class DynamicBigramListPolicy : public DictionaryBigramsStructurePolicy {
|
||||||
bool copyAllBigrams(BufferWithExtendableBuffer *const bufferToWrite, int *const fromPos,
|
bool copyAllBigrams(BufferWithExtendableBuffer *const bufferToWrite, int *const fromPos,
|
||||||
int *const toPos, int *const outBigramsCount) const;
|
int *const toPos, int *const outBigramsCount) const;
|
||||||
|
|
||||||
bool addNewBigramEntryToBigramList(const int bigramPos, const int probability, int *const pos);
|
bool updateAllBigramEntriesAndDeleteUselessEntries(int *const bigramListPos);
|
||||||
|
|
||||||
bool writeNewBigramEntry(const int bigramPos, const int probability,
|
bool addNewBigramEntryToBigramList(const int bigramTargetPos, const int probability,
|
||||||
|
int *const bigramListPos);
|
||||||
|
|
||||||
|
bool writeNewBigramEntry(const int bigramTargetPos, const int probability,
|
||||||
int *const writingPos);
|
int *const writingPos);
|
||||||
|
|
||||||
// Return if targetBigramPos is found or not.
|
// Return if targetBigramPos is found or not.
|
||||||
bool removeBigram(const int bigramListPos, const int targetBigramPos);
|
bool removeBigram(const int bigramListPos, const int bigramTargetPos);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicBigramListPolicy);
|
DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicBigramListPolicy);
|
||||||
|
|
||||||
static const int BIGRAM_LINK_COUNT_LIMIT;
|
static const int CONTINUING_BIGRAM_LINK_COUNT_LIMIT;
|
||||||
|
static const int BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT;
|
||||||
|
|
||||||
BufferWithExtendableBuffer *const mBuffer;
|
BufferWithExtendableBuffer *const mBuffer;
|
||||||
const DictionaryShortcutsStructurePolicy *const mShortcutPolicy;
|
const DictionaryShortcutsStructurePolicy *const mShortcutPolicy;
|
||||||
|
|
|
@ -20,6 +20,7 @@
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include "defines.h"
|
#include "defines.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h"
|
||||||
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h"
|
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h"
|
||||||
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h"
|
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h"
|
||||||
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h"
|
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h"
|
||||||
|
@ -68,6 +69,36 @@ class DynamicPatriciaTrieGcEventListeners {
|
||||||
int mChildrenValue;
|
int mChildrenValue;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Updates all bigram entries that are held by valid PtNodes. This removes useless bigram
|
||||||
|
// entries.
|
||||||
|
class ListenerForUpdatingBigramProbability
|
||||||
|
: public DynamicPatriciaTrieReadingHelper::TraversingEventListener {
|
||||||
|
public:
|
||||||
|
ListenerForUpdatingBigramProbability(DynamicBigramListPolicy *const bigramPolicy)
|
||||||
|
: mBigramPolicy(bigramPolicy) {}
|
||||||
|
|
||||||
|
bool onAscend() { return true; }
|
||||||
|
|
||||||
|
bool onDescend() { return true; }
|
||||||
|
|
||||||
|
bool onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node) {
|
||||||
|
if (!node->isDeleted()) {
|
||||||
|
int pos = node->getBigramsPos();
|
||||||
|
if (pos != NOT_A_DICT_POS) {
|
||||||
|
if (!mBigramPolicy->updateAllBigramEntriesAndDeleteUselessEntries(&pos)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
DISALLOW_IMPLICIT_CONSTRUCTORS(ListenerForUpdatingBigramProbability);
|
||||||
|
|
||||||
|
DynamicBigramListPolicy *const mBigramPolicy;
|
||||||
|
};
|
||||||
|
|
||||||
private:
|
private:
|
||||||
DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTrieGcEventListeners);
|
DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTrieGcEventListeners);
|
||||||
};
|
};
|
||||||
|
|
|
@ -23,26 +23,26 @@
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
|
||||||
void DynamicPatriciaTrieNodeReader::fetchNodeInfoFromBufferAndProcessMovedNode(const int nodePos,
|
void DynamicPatriciaTrieNodeReader::fetchPtNodeInfoFromBufferAndProcessMovedPtNode(
|
||||||
const int maxCodePointCount, int *const outCodePoints) {
|
const int ptNodePos, const int maxCodePointCount, int *const outCodePoints) {
|
||||||
if (nodePos < 0 || nodePos >= mBuffer->getTailPosition()) {
|
if (ptNodePos < 0 || ptNodePos >= mBuffer->getTailPosition()) {
|
||||||
AKLOGE("Fetching PtNode info form invalid dictionary position: %d, dictionary size: %d",
|
AKLOGE("Fetching PtNode info form invalid dictionary position: %d, dictionary size: %d",
|
||||||
nodePos, mBuffer->getTailPosition());
|
ptNodePos, mBuffer->getTailPosition());
|
||||||
ASSERT(false);
|
ASSERT(false);
|
||||||
invalidatePtNodeInfo();
|
invalidatePtNodeInfo();
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(nodePos);
|
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(ptNodePos);
|
||||||
const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
|
const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
|
||||||
int pos = nodePos;
|
int pos = ptNodePos;
|
||||||
mHeadPos = nodePos;
|
mHeadPos = ptNodePos;
|
||||||
if (usesAdditionalBuffer) {
|
if (usesAdditionalBuffer) {
|
||||||
pos -= mBuffer->getOriginalBufferSize();
|
pos -= mBuffer->getOriginalBufferSize();
|
||||||
}
|
}
|
||||||
mFlags = PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos);
|
mFlags = PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos);
|
||||||
const int parentPos =
|
const int parentPos =
|
||||||
DynamicPatriciaTrieReadingUtils::getParentPosAndAdvancePosition(dictBuf, &pos);
|
DynamicPatriciaTrieReadingUtils::getParentPosAndAdvancePosition(dictBuf, &pos);
|
||||||
mParentPos = (parentPos != 0) ? nodePos + parentPos : NOT_A_DICT_POS;
|
mParentPos = (parentPos != 0) ? ptNodePos + parentPos : NOT_A_DICT_POS;
|
||||||
if (outCodePoints != 0) {
|
if (outCodePoints != 0) {
|
||||||
mCodePointCount = PatriciaTrieReadingUtils::getCharsAndAdvancePosition(
|
mCodePointCount = PatriciaTrieReadingUtils::getCharsAndAdvancePosition(
|
||||||
dictBuf, mFlags, maxCodePointCount, outCodePoints, &pos);
|
dictBuf, mFlags, maxCodePointCount, outCodePoints, &pos);
|
||||||
|
@ -99,7 +99,8 @@ void DynamicPatriciaTrieNodeReader::fetchNodeInfoFromBufferAndProcessMovedNode(c
|
||||||
// Read destination node if the read node is a moved node.
|
// Read destination node if the read node is a moved node.
|
||||||
if (DynamicPatriciaTrieReadingUtils::isMoved(mFlags)) {
|
if (DynamicPatriciaTrieReadingUtils::isMoved(mFlags)) {
|
||||||
// The destination position is stored at the same place as the parent position.
|
// The destination position is stored at the same place as the parent position.
|
||||||
fetchNodeInfoFromBufferAndProcessMovedNode(mParentPos, maxCodePointCount, outCodePoints);
|
fetchPtNodeInfoFromBufferAndProcessMovedPtNode(mParentPos, maxCodePointCount,
|
||||||
|
outCodePoints);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -48,17 +48,17 @@ class DynamicPatriciaTrieNodeReader {
|
||||||
|
|
||||||
~DynamicPatriciaTrieNodeReader() {}
|
~DynamicPatriciaTrieNodeReader() {}
|
||||||
|
|
||||||
// Reads node information from dictionary buffer and updates members with the information.
|
// Reads PtNode information from dictionary buffer and updates members with the information.
|
||||||
AK_FORCE_INLINE void fetchNodeInfoFromBuffer(const int nodePos) {
|
AK_FORCE_INLINE void fetchNodeInfoInBufferFromPtNodePos(const int ptNodePos) {
|
||||||
fetchNodeInfoFromBufferAndGetNodeCodePoints(nodePos , 0 /* maxCodePointCount */,
|
fetchNodeInfoInBufferFromPtNodePosAndGetNodeCodePoints(ptNodePos ,
|
||||||
0 /* outCodePoints */);
|
0 /* maxCodePointCount */, 0 /* outCodePoints */);
|
||||||
}
|
}
|
||||||
|
|
||||||
AK_FORCE_INLINE void fetchNodeInfoFromBufferAndGetNodeCodePoints(const int nodePos,
|
AK_FORCE_INLINE void fetchNodeInfoInBufferFromPtNodePosAndGetNodeCodePoints(
|
||||||
const int maxCodePointCount, int *const outCodePoints) {
|
const int ptNodePos, const int maxCodePointCount, int *const outCodePoints) {
|
||||||
mSiblingPos = NOT_A_DICT_POS;
|
mSiblingPos = NOT_A_DICT_POS;
|
||||||
mBigramLinkedNodePos = NOT_A_DICT_POS;
|
mBigramLinkedNodePos = NOT_A_DICT_POS;
|
||||||
fetchNodeInfoFromBufferAndProcessMovedNode(nodePos, maxCodePointCount, outCodePoints);
|
fetchPtNodeInfoFromBufferAndProcessMovedPtNode(ptNodePos, maxCodePointCount, outCodePoints);
|
||||||
}
|
}
|
||||||
|
|
||||||
// HeadPos is different from NodePos when the current PtNode is a moved PtNode.
|
// HeadPos is different from NodePos when the current PtNode is a moved PtNode.
|
||||||
|
@ -154,8 +154,8 @@ class DynamicPatriciaTrieNodeReader {
|
||||||
int mBigramPos;
|
int mBigramPos;
|
||||||
int mSiblingPos;
|
int mSiblingPos;
|
||||||
|
|
||||||
void fetchNodeInfoFromBufferAndProcessMovedNode(const int nodePos, const int maxCodePointCount,
|
void fetchPtNodeInfoFromBufferAndProcessMovedPtNode(const int ptNodePos,
|
||||||
int *const outCodePoints);
|
const int maxCodePointCount, int *const outCodePoints);
|
||||||
|
|
||||||
void invalidatePtNodeInfo();
|
void invalidatePtNodeInfo();
|
||||||
};
|
};
|
||||||
|
|
|
@ -35,7 +35,7 @@ void DynamicPatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const d
|
||||||
}
|
}
|
||||||
DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer,
|
DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer,
|
||||||
getBigramsStructurePolicy(), getShortcutsStructurePolicy());
|
getBigramsStructurePolicy(), getShortcutsStructurePolicy());
|
||||||
readingHelper.initWithNodeArrayPos(dicNode->getChildrenPos());
|
readingHelper.initWithPtNodeArrayPos(dicNode->getChildrenPos());
|
||||||
const DynamicPatriciaTrieNodeReader *const nodeReader = readingHelper.getNodeReader();
|
const DynamicPatriciaTrieNodeReader *const nodeReader = readingHelper.getNodeReader();
|
||||||
while (!readingHelper.isEnd()) {
|
while (!readingHelper.isEnd()) {
|
||||||
childDicNodes->pushLeavingChild(dicNode, nodeReader->getHeadPos(),
|
childDicNodes->pushLeavingChild(dicNode, nodeReader->getHeadPos(),
|
||||||
|
@ -48,7 +48,7 @@ void DynamicPatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const d
|
||||||
}
|
}
|
||||||
|
|
||||||
int DynamicPatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
|
int DynamicPatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
|
||||||
const int nodePos, const int maxCodePointCount, int *const outCodePoints,
|
const int ptNodePos, const int maxCodePointCount, int *const outCodePoints,
|
||||||
int *const outUnigramProbability) const {
|
int *const outUnigramProbability) const {
|
||||||
// This method traverses parent nodes from the terminal by following parent pointers; thus,
|
// This method traverses parent nodes from the terminal by following parent pointers; thus,
|
||||||
// node code points are stored in the buffer in the reverse order.
|
// node code points are stored in the buffer in the reverse order.
|
||||||
|
@ -56,9 +56,9 @@ int DynamicPatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCoun
|
||||||
DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer,
|
DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer,
|
||||||
getBigramsStructurePolicy(), getShortcutsStructurePolicy());
|
getBigramsStructurePolicy(), getShortcutsStructurePolicy());
|
||||||
// First, read the terminal node and get its probability.
|
// First, read the terminal node and get its probability.
|
||||||
readingHelper.initWithNodePos(nodePos);
|
readingHelper.initWithPtNodePos(ptNodePos);
|
||||||
if (!readingHelper.isValidTerminalNode()) {
|
if (!readingHelper.isValidTerminalNode()) {
|
||||||
// Node at the nodePos is not a valid terminal node.
|
// Node at the ptNodePos is not a valid terminal node.
|
||||||
*outUnigramProbability = NOT_A_PROBABILITY;
|
*outUnigramProbability = NOT_A_PROBABILITY;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -67,7 +67,7 @@ int DynamicPatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCoun
|
||||||
// Then, following parent node link to the dictionary root and fetch node code points.
|
// Then, following parent node link to the dictionary root and fetch node code points.
|
||||||
while (!readingHelper.isEnd()) {
|
while (!readingHelper.isEnd()) {
|
||||||
if (readingHelper.getTotalCodePointCount() > maxCodePointCount) {
|
if (readingHelper.getTotalCodePointCount() > maxCodePointCount) {
|
||||||
// The nodePos is not a valid terminal node position in the dictionary.
|
// The ptNodePos is not a valid terminal node position in the dictionary.
|
||||||
*outUnigramProbability = NOT_A_PROBABILITY;
|
*outUnigramProbability = NOT_A_PROBABILITY;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -98,7 +98,7 @@ int DynamicPatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const in
|
||||||
}
|
}
|
||||||
DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer,
|
DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer,
|
||||||
getBigramsStructurePolicy(), getShortcutsStructurePolicy());
|
getBigramsStructurePolicy(), getShortcutsStructurePolicy());
|
||||||
readingHelper.initWithNodeArrayPos(getRootPosition());
|
readingHelper.initWithPtNodeArrayPos(getRootPosition());
|
||||||
const DynamicPatriciaTrieNodeReader *const nodeReader = readingHelper.getNodeReader();
|
const DynamicPatriciaTrieNodeReader *const nodeReader = readingHelper.getNodeReader();
|
||||||
while (!readingHelper.isEnd()) {
|
while (!readingHelper.isEnd()) {
|
||||||
const int matchedCodePointCount = readingHelper.getPrevTotalCodePointCount();
|
const int matchedCodePointCount = readingHelper.getPrevTotalCodePointCount();
|
||||||
|
@ -148,39 +148,39 @@ int DynamicPatriciaTriePolicy::getProbability(const int unigramProbability,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int DynamicPatriciaTriePolicy::getUnigramProbabilityOfPtNode(const int nodePos) const {
|
int DynamicPatriciaTriePolicy::getUnigramProbabilityOfPtNode(const int ptNodePos) const {
|
||||||
if (nodePos == NOT_A_DICT_POS) {
|
if (ptNodePos == NOT_A_DICT_POS) {
|
||||||
return NOT_A_PROBABILITY;
|
return NOT_A_PROBABILITY;
|
||||||
}
|
}
|
||||||
DynamicPatriciaTrieNodeReader nodeReader(&mBufferWithExtendableBuffer,
|
DynamicPatriciaTrieNodeReader nodeReader(&mBufferWithExtendableBuffer,
|
||||||
getBigramsStructurePolicy(), getShortcutsStructurePolicy());
|
getBigramsStructurePolicy(), getShortcutsStructurePolicy());
|
||||||
nodeReader.fetchNodeInfoFromBuffer(nodePos);
|
nodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos);
|
||||||
if (nodeReader.isDeleted() || nodeReader.isBlacklisted() || nodeReader.isNotAWord()) {
|
if (nodeReader.isDeleted() || nodeReader.isBlacklisted() || nodeReader.isNotAWord()) {
|
||||||
return NOT_A_PROBABILITY;
|
return NOT_A_PROBABILITY;
|
||||||
}
|
}
|
||||||
return getProbability(nodeReader.getProbability(), NOT_A_PROBABILITY);
|
return getProbability(nodeReader.getProbability(), NOT_A_PROBABILITY);
|
||||||
}
|
}
|
||||||
|
|
||||||
int DynamicPatriciaTriePolicy::getShortcutPositionOfNode(const int nodePos) const {
|
int DynamicPatriciaTriePolicy::getShortcutPositionOfPtNode(const int ptNodePos) const {
|
||||||
if (nodePos == NOT_A_DICT_POS) {
|
if (ptNodePos == NOT_A_DICT_POS) {
|
||||||
return NOT_A_DICT_POS;
|
return NOT_A_DICT_POS;
|
||||||
}
|
}
|
||||||
DynamicPatriciaTrieNodeReader nodeReader(&mBufferWithExtendableBuffer,
|
DynamicPatriciaTrieNodeReader nodeReader(&mBufferWithExtendableBuffer,
|
||||||
getBigramsStructurePolicy(), getShortcutsStructurePolicy());
|
getBigramsStructurePolicy(), getShortcutsStructurePolicy());
|
||||||
nodeReader.fetchNodeInfoFromBuffer(nodePos);
|
nodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos);
|
||||||
if (nodeReader.isDeleted()) {
|
if (nodeReader.isDeleted()) {
|
||||||
return NOT_A_DICT_POS;
|
return NOT_A_DICT_POS;
|
||||||
}
|
}
|
||||||
return nodeReader.getShortcutPos();
|
return nodeReader.getShortcutPos();
|
||||||
}
|
}
|
||||||
|
|
||||||
int DynamicPatriciaTriePolicy::getBigramsPositionOfNode(const int nodePos) const {
|
int DynamicPatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) const {
|
||||||
if (nodePos == NOT_A_DICT_POS) {
|
if (ptNodePos == NOT_A_DICT_POS) {
|
||||||
return NOT_A_DICT_POS;
|
return NOT_A_DICT_POS;
|
||||||
}
|
}
|
||||||
DynamicPatriciaTrieNodeReader nodeReader(&mBufferWithExtendableBuffer,
|
DynamicPatriciaTrieNodeReader nodeReader(&mBufferWithExtendableBuffer,
|
||||||
getBigramsStructurePolicy(), getShortcutsStructurePolicy());
|
getBigramsStructurePolicy(), getShortcutsStructurePolicy());
|
||||||
nodeReader.fetchNodeInfoFromBuffer(nodePos);
|
nodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos);
|
||||||
if (nodeReader.isDeleted()) {
|
if (nodeReader.isDeleted()) {
|
||||||
return NOT_A_DICT_POS;
|
return NOT_A_DICT_POS;
|
||||||
}
|
}
|
||||||
|
@ -195,7 +195,7 @@ bool DynamicPatriciaTriePolicy::addUnigramWord(const int *const word, const int
|
||||||
}
|
}
|
||||||
DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer,
|
DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer,
|
||||||
getBigramsStructurePolicy(), getShortcutsStructurePolicy());
|
getBigramsStructurePolicy(), getShortcutsStructurePolicy());
|
||||||
readingHelper.initWithNodeArrayPos(getRootPosition());
|
readingHelper.initWithPtNodeArrayPos(getRootPosition());
|
||||||
DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer,
|
DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer,
|
||||||
&mBigramListPolicy, &mShortcutListPolicy);
|
&mBigramListPolicy, &mShortcutListPolicy);
|
||||||
return writingHelper.addUnigramWord(&readingHelper, word, length, probability);
|
return writingHelper.addUnigramWord(&readingHelper, word, length, probability);
|
||||||
|
|
|
@ -51,7 +51,7 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
||||||
DicNodeVector *const childDicNodes) const;
|
DicNodeVector *const childDicNodes) const;
|
||||||
|
|
||||||
int getCodePointsAndProbabilityAndReturnCodePointCount(
|
int getCodePointsAndProbabilityAndReturnCodePointCount(
|
||||||
const int terminalNodePos, const int maxCodePointCount, int *const outCodePoints,
|
const int terminalPtNodePos, const int maxCodePointCount, int *const outCodePoints,
|
||||||
int *const outUnigramProbability) const;
|
int *const outUnigramProbability) const;
|
||||||
|
|
||||||
int getTerminalNodePositionOfWord(const int *const inWord,
|
int getTerminalNodePositionOfWord(const int *const inWord,
|
||||||
|
@ -59,11 +59,11 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
||||||
|
|
||||||
int getProbability(const int unigramProbability, const int bigramProbability) const;
|
int getProbability(const int unigramProbability, const int bigramProbability) const;
|
||||||
|
|
||||||
int getUnigramProbabilityOfPtNode(const int nodePos) const;
|
int getUnigramProbabilityOfPtNode(const int ptNodePos) const;
|
||||||
|
|
||||||
int getShortcutPositionOfNode(const int nodePos) const;
|
int getShortcutPositionOfPtNode(const int ptNodePos) const;
|
||||||
|
|
||||||
int getBigramsPositionOfNode(const int nodePos) const;
|
int getBigramsPositionOfPtNode(const int ptNodePos) const;
|
||||||
|
|
||||||
const DictionaryHeaderStructurePolicy *getHeaderStructurePolicy() const {
|
const DictionaryHeaderStructurePolicy *getHeaderStructurePolicy() const {
|
||||||
return &mHeaderPolicy;
|
return &mHeaderPolicy;
|
||||||
|
|
|
@ -72,7 +72,7 @@ bool DynamicPatriciaTrieReadingHelper::traverseAllPtNodesInPostorderDepthFirstMa
|
||||||
|
|
||||||
// Read node array size and process empty node arrays. Nodes and arrays are counted up in this
|
// Read node array size and process empty node arrays. Nodes and arrays are counted up in this
|
||||||
// method to avoid an infinite loop.
|
// method to avoid an infinite loop.
|
||||||
void DynamicPatriciaTrieReadingHelper::nextNodeArray() {
|
void DynamicPatriciaTrieReadingHelper::nextPtNodeArray() {
|
||||||
mReadingState.mPosOfLastPtNodeArrayHead = mReadingState.mPos;
|
mReadingState.mPosOfLastPtNodeArrayHead = mReadingState.mPos;
|
||||||
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(mReadingState.mPos);
|
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(mReadingState.mPos);
|
||||||
const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
|
const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
|
||||||
|
@ -123,7 +123,7 @@ void DynamicPatriciaTrieReadingHelper::followForwardLink() {
|
||||||
if (DynamicPatriciaTrieReadingUtils::isValidForwardLinkPosition(forwardLinkPosition)) {
|
if (DynamicPatriciaTrieReadingUtils::isValidForwardLinkPosition(forwardLinkPosition)) {
|
||||||
// Follow the forward link.
|
// Follow the forward link.
|
||||||
mReadingState.mPos += forwardLinkPosition;
|
mReadingState.mPos += forwardLinkPosition;
|
||||||
nextNodeArray();
|
nextPtNodeArray();
|
||||||
} else {
|
} else {
|
||||||
// All node arrays have been read.
|
// All node arrays have been read.
|
||||||
mReadingState.mPos = NOT_A_DICT_POS;
|
mReadingState.mPos = NOT_A_DICT_POS;
|
||||||
|
|
|
@ -73,32 +73,32 @@ class DynamicPatriciaTrieReadingHelper {
|
||||||
return mReadingState.mPos == NOT_A_DICT_POS;
|
return mReadingState.mPos == NOT_A_DICT_POS;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Initialize reading state with the head position of a node array.
|
// Initialize reading state with the head position of a PtNode array.
|
||||||
AK_FORCE_INLINE void initWithNodeArrayPos(const int nodeArrayPos) {
|
AK_FORCE_INLINE void initWithPtNodeArrayPos(const int ptNodeArrayPos) {
|
||||||
if (nodeArrayPos == NOT_A_DICT_POS) {
|
if (ptNodeArrayPos == NOT_A_DICT_POS) {
|
||||||
mReadingState.mPos = NOT_A_DICT_POS;
|
mReadingState.mPos = NOT_A_DICT_POS;
|
||||||
} else {
|
} else {
|
||||||
mIsError = false;
|
mIsError = false;
|
||||||
mReadingState.mPos = nodeArrayPos;
|
mReadingState.mPos = ptNodeArrayPos;
|
||||||
mReadingState.mPrevTotalCodePointCount = 0;
|
mReadingState.mPrevTotalCodePointCount = 0;
|
||||||
mReadingState.mTotalNodeCount = 0;
|
mReadingState.mTotalNodeCount = 0;
|
||||||
mReadingState.mNodeArrayCount = 0;
|
mReadingState.mNodeArrayCount = 0;
|
||||||
mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS;
|
mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS;
|
||||||
mReadingStateStack.clear();
|
mReadingStateStack.clear();
|
||||||
nextNodeArray();
|
nextPtNodeArray();
|
||||||
if (!isEnd()) {
|
if (!isEnd()) {
|
||||||
fetchNodeInfo();
|
fetchPtNodeInfo();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Initialize reading state with the head position of a node.
|
// Initialize reading state with the head position of a node.
|
||||||
AK_FORCE_INLINE void initWithNodePos(const int nodePos) {
|
AK_FORCE_INLINE void initWithPtNodePos(const int ptNodePos) {
|
||||||
if (nodePos == NOT_A_DICT_POS) {
|
if (ptNodePos == NOT_A_DICT_POS) {
|
||||||
mReadingState.mPos = NOT_A_DICT_POS;
|
mReadingState.mPos = NOT_A_DICT_POS;
|
||||||
} else {
|
} else {
|
||||||
mIsError = false;
|
mIsError = false;
|
||||||
mReadingState.mPos = nodePos;
|
mReadingState.mPos = ptNodePos;
|
||||||
mReadingState.mNodeCount = 1;
|
mReadingState.mNodeCount = 1;
|
||||||
mReadingState.mPrevTotalCodePointCount = 0;
|
mReadingState.mPrevTotalCodePointCount = 0;
|
||||||
mReadingState.mTotalNodeCount = 1;
|
mReadingState.mTotalNodeCount = 1;
|
||||||
|
@ -106,7 +106,7 @@ class DynamicPatriciaTrieReadingHelper {
|
||||||
mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS;
|
mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS;
|
||||||
mReadingState.mPosOfLastPtNodeArrayHead = NOT_A_DICT_POS;
|
mReadingState.mPosOfLastPtNodeArrayHead = NOT_A_DICT_POS;
|
||||||
mReadingStateStack.clear();
|
mReadingStateStack.clear();
|
||||||
fetchNodeInfo();
|
fetchPtNodeInfo();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -151,10 +151,10 @@ class DynamicPatriciaTrieReadingHelper {
|
||||||
// All nodes in the current node array have been read.
|
// All nodes in the current node array have been read.
|
||||||
followForwardLink();
|
followForwardLink();
|
||||||
if (!isEnd()) {
|
if (!isEnd()) {
|
||||||
fetchNodeInfo();
|
fetchPtNodeInfo();
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
fetchNodeInfo();
|
fetchPtNodeInfo();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -167,9 +167,9 @@ class DynamicPatriciaTrieReadingHelper {
|
||||||
mReadingState.mPos = mNodeReader.getChildrenPos();
|
mReadingState.mPos = mNodeReader.getChildrenPos();
|
||||||
mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS;
|
mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS;
|
||||||
// Read children node array.
|
// Read children node array.
|
||||||
nextNodeArray();
|
nextPtNodeArray();
|
||||||
if (!isEnd()) {
|
if (!isEnd()) {
|
||||||
fetchNodeInfo();
|
fetchPtNodeInfo();
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
mReadingState.mPos = NOT_A_DICT_POS;
|
mReadingState.mPos = NOT_A_DICT_POS;
|
||||||
|
@ -186,7 +186,7 @@ class DynamicPatriciaTrieReadingHelper {
|
||||||
mReadingState.mPos = mNodeReader.getParentPos();
|
mReadingState.mPos = mNodeReader.getParentPos();
|
||||||
mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS;
|
mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS;
|
||||||
mReadingState.mPosOfLastPtNodeArrayHead = NOT_A_DICT_POS;
|
mReadingState.mPosOfLastPtNodeArrayHead = NOT_A_DICT_POS;
|
||||||
fetchNodeInfo();
|
fetchPtNodeInfo();
|
||||||
} else {
|
} else {
|
||||||
mReadingState.mPos = NOT_A_DICT_POS;
|
mReadingState.mPos = NOT_A_DICT_POS;
|
||||||
}
|
}
|
||||||
|
@ -202,7 +202,7 @@ class DynamicPatriciaTrieReadingHelper {
|
||||||
|
|
||||||
AK_FORCE_INLINE void reloadCurrentPtNodeInfo() {
|
AK_FORCE_INLINE void reloadCurrentPtNodeInfo() {
|
||||||
if (!isEnd()) {
|
if (!isEnd()) {
|
||||||
fetchNodeInfo();
|
fetchPtNodeInfo();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -240,12 +240,12 @@ class DynamicPatriciaTrieReadingHelper {
|
||||||
int mMergedNodeCodePoints[MAX_WORD_LENGTH];
|
int mMergedNodeCodePoints[MAX_WORD_LENGTH];
|
||||||
std::vector<ReadingState> mReadingStateStack;
|
std::vector<ReadingState> mReadingStateStack;
|
||||||
|
|
||||||
void nextNodeArray();
|
void nextPtNodeArray();
|
||||||
|
|
||||||
void followForwardLink();
|
void followForwardLink();
|
||||||
|
|
||||||
AK_FORCE_INLINE void fetchNodeInfo() {
|
AK_FORCE_INLINE void fetchPtNodeInfo() {
|
||||||
mNodeReader.fetchNodeInfoFromBufferAndGetNodeCodePoints(mReadingState.mPos,
|
mNodeReader.fetchNodeInfoInBufferFromPtNodePosAndGetNodeCodePoints(mReadingState.mPos,
|
||||||
MAX_WORD_LENGTH, mMergedNodeCodePoints);
|
MAX_WORD_LENGTH, mMergedNodeCodePoints);
|
||||||
if (mNodeReader.getCodePointCount() <= 0) {
|
if (mNodeReader.getCodePointCount() <= 0) {
|
||||||
// Empty node is not allowed.
|
// Empty node is not allowed.
|
||||||
|
@ -271,7 +271,7 @@ class DynamicPatriciaTrieReadingHelper {
|
||||||
} else {
|
} else {
|
||||||
mReadingState = mReadingStateStack.back();
|
mReadingState = mReadingStateStack.back();
|
||||||
mReadingStateStack.pop_back();
|
mReadingStateStack.pop_back();
|
||||||
fetchNodeInfo();
|
fetchPtNodeInfo();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
|
@ -90,7 +90,7 @@ bool DynamicPatriciaTrieWritingHelper::addBigramWords(const int word0Pos, const
|
||||||
const int probability) {
|
const int probability) {
|
||||||
int mMergedNodeCodePoints[MAX_WORD_LENGTH];
|
int mMergedNodeCodePoints[MAX_WORD_LENGTH];
|
||||||
DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy);
|
DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy);
|
||||||
nodeReader.fetchNodeInfoFromBufferAndGetNodeCodePoints(word0Pos, MAX_WORD_LENGTH,
|
nodeReader.fetchNodeInfoInBufferFromPtNodePosAndGetNodeCodePoints(word0Pos, MAX_WORD_LENGTH,
|
||||||
mMergedNodeCodePoints);
|
mMergedNodeCodePoints);
|
||||||
// Move node to add bigram entry.
|
// Move node to add bigram entry.
|
||||||
const int newNodePos = mBuffer->getTailPosition();
|
const int newNodePos = mBuffer->getTailPosition();
|
||||||
|
@ -104,7 +104,7 @@ bool DynamicPatriciaTrieWritingHelper::addBigramWords(const int word0Pos, const
|
||||||
&writingPos)) {
|
&writingPos)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
nodeReader.fetchNodeInfoFromBuffer(newNodePos);
|
nodeReader.fetchNodeInfoInBufferFromPtNodePos(newNodePos);
|
||||||
if (nodeReader.getBigramsPos() != NOT_A_DICT_POS) {
|
if (nodeReader.getBigramsPos() != NOT_A_DICT_POS) {
|
||||||
// Insert a new bigram entry into the existing bigram list.
|
// Insert a new bigram entry into the existing bigram list.
|
||||||
int bigramListPos = nodeReader.getBigramsPos();
|
int bigramListPos = nodeReader.getBigramsPos();
|
||||||
|
@ -131,7 +131,7 @@ bool DynamicPatriciaTrieWritingHelper::addBigramWords(const int word0Pos, const
|
||||||
// Remove a bigram relation from word0Pos to word1Pos.
|
// Remove a bigram relation from word0Pos to word1Pos.
|
||||||
bool DynamicPatriciaTrieWritingHelper::removeBigramWords(const int word0Pos, const int word1Pos) {
|
bool DynamicPatriciaTrieWritingHelper::removeBigramWords(const int word0Pos, const int word1Pos) {
|
||||||
DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy);
|
DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy);
|
||||||
nodeReader.fetchNodeInfoFromBuffer(word0Pos);
|
nodeReader.fetchNodeInfoInBufferFromPtNodePos(word0Pos);
|
||||||
if (nodeReader.getBigramsPos() == NOT_A_DICT_POS) {
|
if (nodeReader.getBigramsPos() == NOT_A_DICT_POS) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -217,7 +217,7 @@ bool DynamicPatriciaTrieWritingHelper::markNodeAsMovedAndSetPosition(
|
||||||
// Update children's parent position.
|
// Update children's parent position.
|
||||||
DynamicPatriciaTrieReadingHelper readingHelper(mBuffer, mBigramPolicy, mShortcutPolicy);
|
DynamicPatriciaTrieReadingHelper readingHelper(mBuffer, mBigramPolicy, mShortcutPolicy);
|
||||||
const DynamicPatriciaTrieNodeReader *const nodeReader = readingHelper.getNodeReader();
|
const DynamicPatriciaTrieNodeReader *const nodeReader = readingHelper.getNodeReader();
|
||||||
readingHelper.initWithNodeArrayPos(originalNode->getChildrenPos());
|
readingHelper.initWithPtNodeArrayPos(originalNode->getChildrenPos());
|
||||||
while (!readingHelper.isEnd()) {
|
while (!readingHelper.isEnd()) {
|
||||||
const int childPtNodeWrittenPos = nodeReader->getHeadPos();
|
const int childPtNodeWrittenPos = nodeReader->getHeadPos();
|
||||||
const int parentOffset = movedPos - childPtNodeWrittenPos;
|
const int parentOffset = movedPos - childPtNodeWrittenPos;
|
||||||
|
@ -452,7 +452,7 @@ bool DynamicPatriciaTrieWritingHelper::reallocatePtNodeAndAddNewPtNodes(
|
||||||
}
|
}
|
||||||
// Load node info. Information of the 1st part will be fetched.
|
// Load node info. Information of the 1st part will be fetched.
|
||||||
DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy);
|
DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy);
|
||||||
nodeReader.fetchNodeInfoFromBuffer(firstPartOfReallocatedPtNodePos);
|
nodeReader.fetchNodeInfoInBufferFromPtNodePos(firstPartOfReallocatedPtNodePos);
|
||||||
// Update children position.
|
// Update children position.
|
||||||
int childrenPosFieldPos = nodeReader.getChildrenPosFieldPos();
|
int childrenPosFieldPos = nodeReader.getChildrenPosFieldPos();
|
||||||
if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(mBuffer,
|
if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(mBuffer,
|
||||||
|
@ -519,7 +519,7 @@ bool DynamicPatriciaTrieWritingHelper::writeBufferToFilePointer(FILE *const file
|
||||||
bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
|
bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
|
||||||
BufferWithExtendableBuffer *const bufferToWrite) {
|
BufferWithExtendableBuffer *const bufferToWrite) {
|
||||||
DynamicPatriciaTrieReadingHelper readingHelper(mBuffer, mBigramPolicy, mShortcutPolicy);
|
DynamicPatriciaTrieReadingHelper readingHelper(mBuffer, mBigramPolicy, mShortcutPolicy);
|
||||||
readingHelper.initWithNodeArrayPos(rootPtNodeArrayPos);
|
readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
|
||||||
DynamicPatriciaTrieGcEventListeners
|
DynamicPatriciaTrieGcEventListeners
|
||||||
::ListenerForUpdatingUnigramProbabilityAndMarkingUselessPtNodesAsDeleted
|
::ListenerForUpdatingUnigramProbabilityAndMarkingUselessPtNodesAsDeleted
|
||||||
listenerForUpdatingUnigramProbabilityAndMarkingUselessPtNodesAsDeleted(
|
listenerForUpdatingUnigramProbabilityAndMarkingUselessPtNodesAsDeleted(
|
||||||
|
@ -528,6 +528,14 @@ bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
|
||||||
&listenerForUpdatingUnigramProbabilityAndMarkingUselessPtNodesAsDeleted)) {
|
&listenerForUpdatingUnigramProbabilityAndMarkingUselessPtNodesAsDeleted)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
|
||||||
|
DynamicPatriciaTrieGcEventListeners::ListenerForUpdatingBigramProbability
|
||||||
|
listenerForupdatingBigramProbability(mBigramPolicy);
|
||||||
|
if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(
|
||||||
|
&listenerForupdatingBigramProbability)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
// TODO: Implement.
|
// TODO: Implement.
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
|
@ -49,7 +49,7 @@ void PatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode,
|
||||||
// with a z, it's the last PtNode of the root array, so all children addresses will be smaller
|
// with a z, it's the last PtNode of the root array, so all children addresses will be smaller
|
||||||
// than the position we look for, and we have to descend the z node).
|
// than the position we look for, and we have to descend the z node).
|
||||||
/* Parameters :
|
/* Parameters :
|
||||||
* nodePos: the byte position of the terminal PtNode of the word we are searching for (this is
|
* ptNodePos: the byte position of the terminal PtNode of the word we are searching for (this is
|
||||||
* what is stored as the "bigram position" in each bigram)
|
* what is stored as the "bigram position" in each bigram)
|
||||||
* outCodePoints: an array to write the found word, with MAX_WORD_LENGTH size.
|
* outCodePoints: an array to write the found word, with MAX_WORD_LENGTH size.
|
||||||
* outUnigramProbability: a pointer to an int to write the probability into.
|
* outUnigramProbability: a pointer to an int to write the probability into.
|
||||||
|
@ -57,7 +57,7 @@ void PatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode,
|
||||||
*/
|
*/
|
||||||
// TODO: Split this function to be more readable
|
// TODO: Split this function to be more readable
|
||||||
int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
|
int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
|
||||||
const int nodePos, const int maxCodePointCount, int *const outCodePoints,
|
const int ptNodePos, const int maxCodePointCount, int *const outCodePoints,
|
||||||
int *const outUnigramProbability) const {
|
int *const outUnigramProbability) const {
|
||||||
int pos = getRootPosition();
|
int pos = getRootPosition();
|
||||||
int wordPos = 0;
|
int wordPos = 0;
|
||||||
|
@ -78,7 +78,7 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
|
||||||
PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos);
|
PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos);
|
||||||
const int character = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(
|
const int character = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(
|
||||||
mDictRoot, &pos);
|
mDictRoot, &pos);
|
||||||
if (nodePos == startPos) {
|
if (ptNodePos == startPos) {
|
||||||
// We found the position. Copy the rest of the code points in the buffer and return
|
// We found the position. Copy the rest of the code points in the buffer and return
|
||||||
// the length.
|
// the length.
|
||||||
outCodePoints[wordPos] = character;
|
outCodePoints[wordPos] = character;
|
||||||
|
@ -121,7 +121,7 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
|
||||||
// Here comes the tricky part. First, read the children position.
|
// Here comes the tricky part. First, read the children position.
|
||||||
const int childrenPos = PatriciaTrieReadingUtils
|
const int childrenPos = PatriciaTrieReadingUtils
|
||||||
::readChildrenPositionAndAdvancePosition(mDictRoot, flags, ¤tPos);
|
::readChildrenPositionAndAdvancePosition(mDictRoot, flags, ¤tPos);
|
||||||
if (childrenPos > nodePos) {
|
if (childrenPos > ptNodePos) {
|
||||||
// If the children pos is greater than the position, it means the previous
|
// If the children pos is greater than the position, it means the previous
|
||||||
// PtNode, which position is stored in lastCandidatePtNodePos, was the right
|
// PtNode, which position is stored in lastCandidatePtNodePos, was the right
|
||||||
// one.
|
// one.
|
||||||
|
@ -213,7 +213,7 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// If we have looked through all the PtNodes and found no match, the nodePos is
|
// If we have looked through all the PtNodes and found no match, the ptNodePos is
|
||||||
// not the position of a terminal in this dictionary.
|
// not the position of a terminal in this dictionary.
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -319,11 +319,11 @@ int PatriciaTriePolicy::getProbability(const int unigramProbability,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int PatriciaTriePolicy::getUnigramProbabilityOfPtNode(const int nodePos) const {
|
int PatriciaTriePolicy::getUnigramProbabilityOfPtNode(const int ptNodePos) const {
|
||||||
if (nodePos == NOT_A_DICT_POS) {
|
if (ptNodePos == NOT_A_DICT_POS) {
|
||||||
return NOT_A_PROBABILITY;
|
return NOT_A_PROBABILITY;
|
||||||
}
|
}
|
||||||
int pos = nodePos;
|
int pos = ptNodePos;
|
||||||
const PatriciaTrieReadingUtils::NodeFlags flags =
|
const PatriciaTrieReadingUtils::NodeFlags flags =
|
||||||
PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos);
|
PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos);
|
||||||
if (!PatriciaTrieReadingUtils::isTerminal(flags)) {
|
if (!PatriciaTrieReadingUtils::isTerminal(flags)) {
|
||||||
|
@ -341,11 +341,11 @@ int PatriciaTriePolicy::getUnigramProbabilityOfPtNode(const int nodePos) const {
|
||||||
mDictRoot, &pos), NOT_A_PROBABILITY);
|
mDictRoot, &pos), NOT_A_PROBABILITY);
|
||||||
}
|
}
|
||||||
|
|
||||||
int PatriciaTriePolicy::getShortcutPositionOfNode(const int nodePos) const {
|
int PatriciaTriePolicy::getShortcutPositionOfPtNode(const int ptNodePos) const {
|
||||||
if (nodePos == NOT_A_DICT_POS) {
|
if (ptNodePos == NOT_A_DICT_POS) {
|
||||||
return NOT_A_DICT_POS;
|
return NOT_A_DICT_POS;
|
||||||
}
|
}
|
||||||
int pos = nodePos;
|
int pos = ptNodePos;
|
||||||
const PatriciaTrieReadingUtils::NodeFlags flags =
|
const PatriciaTrieReadingUtils::NodeFlags flags =
|
||||||
PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos);
|
PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos);
|
||||||
if (!PatriciaTrieReadingUtils::hasShortcutTargets(flags)) {
|
if (!PatriciaTrieReadingUtils::hasShortcutTargets(flags)) {
|
||||||
|
@ -361,11 +361,11 @@ int PatriciaTriePolicy::getShortcutPositionOfNode(const int nodePos) const {
|
||||||
return pos;
|
return pos;
|
||||||
}
|
}
|
||||||
|
|
||||||
int PatriciaTriePolicy::getBigramsPositionOfNode(const int nodePos) const {
|
int PatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) const {
|
||||||
if (nodePos == NOT_A_DICT_POS) {
|
if (ptNodePos == NOT_A_DICT_POS) {
|
||||||
return NOT_A_DICT_POS;
|
return NOT_A_DICT_POS;
|
||||||
}
|
}
|
||||||
int pos = nodePos;
|
int pos = ptNodePos;
|
||||||
const PatriciaTrieReadingUtils::NodeFlags flags =
|
const PatriciaTrieReadingUtils::NodeFlags flags =
|
||||||
PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos);
|
PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos);
|
||||||
if (!PatriciaTrieReadingUtils::hasBigrams(flags)) {
|
if (!PatriciaTrieReadingUtils::hasBigrams(flags)) {
|
||||||
|
@ -385,8 +385,8 @@ int PatriciaTriePolicy::getBigramsPositionOfNode(const int nodePos) const {
|
||||||
}
|
}
|
||||||
|
|
||||||
int PatriciaTriePolicy::createAndGetLeavingChildNode(const DicNode *const dicNode,
|
int PatriciaTriePolicy::createAndGetLeavingChildNode(const DicNode *const dicNode,
|
||||||
const int nodePos, DicNodeVector *childDicNodes) const {
|
const int ptNodePos, DicNodeVector *childDicNodes) const {
|
||||||
int pos = nodePos;
|
int pos = ptNodePos;
|
||||||
const PatriciaTrieReadingUtils::NodeFlags flags =
|
const PatriciaTrieReadingUtils::NodeFlags flags =
|
||||||
PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos);
|
PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos);
|
||||||
int mergedNodeCodePoints[MAX_WORD_LENGTH];
|
int mergedNodeCodePoints[MAX_WORD_LENGTH];
|
||||||
|
@ -404,7 +404,7 @@ int PatriciaTriePolicy::createAndGetLeavingChildNode(const DicNode *const dicNod
|
||||||
if (PatriciaTrieReadingUtils::hasBigrams(flags)) {
|
if (PatriciaTrieReadingUtils::hasBigrams(flags)) {
|
||||||
getBigramsStructurePolicy()->skipAllBigrams(&pos);
|
getBigramsStructurePolicy()->skipAllBigrams(&pos);
|
||||||
}
|
}
|
||||||
childDicNodes->pushLeavingChild(dicNode, nodePos, childrenPos, probability,
|
childDicNodes->pushLeavingChild(dicNode, ptNodePos, childrenPos, probability,
|
||||||
PatriciaTrieReadingUtils::isTerminal(flags),
|
PatriciaTrieReadingUtils::isTerminal(flags),
|
||||||
PatriciaTrieReadingUtils::hasChildrenInFlags(flags),
|
PatriciaTrieReadingUtils::hasChildrenInFlags(flags),
|
||||||
PatriciaTrieReadingUtils::isBlacklisted(flags) ||
|
PatriciaTrieReadingUtils::isBlacklisted(flags) ||
|
||||||
|
|
|
@ -58,11 +58,11 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
||||||
|
|
||||||
int getProbability(const int unigramProbability, const int bigramProbability) const;
|
int getProbability(const int unigramProbability, const int bigramProbability) const;
|
||||||
|
|
||||||
int getUnigramProbabilityOfPtNode(const int nodePos) const;
|
int getUnigramProbabilityOfPtNode(const int ptNodePos) const;
|
||||||
|
|
||||||
int getShortcutPositionOfNode(const int nodePos) const;
|
int getShortcutPositionOfPtNode(const int ptNodePos) const;
|
||||||
|
|
||||||
int getBigramsPositionOfNode(const int nodePos) const;
|
int getBigramsPositionOfPtNode(const int ptNodePos) const;
|
||||||
|
|
||||||
const DictionaryHeaderStructurePolicy *getHeaderStructurePolicy() const {
|
const DictionaryHeaderStructurePolicy *getHeaderStructurePolicy() const {
|
||||||
return &mHeaderPolicy;
|
return &mHeaderPolicy;
|
||||||
|
@ -121,7 +121,7 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
||||||
const BigramListPolicy mBigramListPolicy;
|
const BigramListPolicy mBigramListPolicy;
|
||||||
const ShortcutListPolicy mShortcutListPolicy;
|
const ShortcutListPolicy mShortcutListPolicy;
|
||||||
|
|
||||||
int createAndGetLeavingChildNode(const DicNode *const dicNode, const int nodePos,
|
int createAndGetLeavingChildNode(const DicNode *const dicNode, const int ptNodePos,
|
||||||
DicNodeVector *const childDicNodes) const;
|
DicNodeVector *const childDicNodes) const;
|
||||||
};
|
};
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
|
|
Loading…
Reference in New Issue