am 5e797bd7
: Merge "Implement update node probability method."
* commit '5e797bd7e29861bf60087b27634dc249bf0c70bc': Implement update node probability method.
This commit is contained in:
commit
ecdde8ee85
4 changed files with 57 additions and 11 deletions
|
@ -43,8 +43,13 @@ void DynamicPatriciaTrieNodeReader::fetchNodeInfoFromBufferAndProcessMovedNode(c
|
||||||
dictBuf, mFlags, MAX_WORD_LENGTH, &pos);
|
dictBuf, mFlags, MAX_WORD_LENGTH, &pos);
|
||||||
}
|
}
|
||||||
if (isTerminal()) {
|
if (isTerminal()) {
|
||||||
|
mProbabilityFieldPos = pos;
|
||||||
|
if (usesAdditionalBuffer) {
|
||||||
|
mProbabilityFieldPos += mBuffer->getOriginalBufferSize();
|
||||||
|
}
|
||||||
mProbability = PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(dictBuf, &pos);
|
mProbability = PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(dictBuf, &pos);
|
||||||
} else {
|
} else {
|
||||||
|
mProbabilityFieldPos = NOT_A_DICT_POS;
|
||||||
mProbability = NOT_A_PROBABILITY;
|
mProbability = NOT_A_PROBABILITY;
|
||||||
}
|
}
|
||||||
mChildrenPos = DynamicPatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(
|
mChildrenPos = DynamicPatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(
|
||||||
|
|
|
@ -40,7 +40,8 @@ class DynamicPatriciaTrieNodeReader {
|
||||||
const DictionaryShortcutsStructurePolicy *const shortcutsPolicy)
|
const DictionaryShortcutsStructurePolicy *const shortcutsPolicy)
|
||||||
: mBuffer(buffer), mBigramsPolicy(bigramsPolicy),
|
: mBuffer(buffer), mBigramsPolicy(bigramsPolicy),
|
||||||
mShortcutsPolicy(shortcutsPolicy), mNodePos(NOT_A_VALID_WORD_POS), mFlags(0),
|
mShortcutsPolicy(shortcutsPolicy), mNodePos(NOT_A_VALID_WORD_POS), mFlags(0),
|
||||||
mParentPos(NOT_A_DICT_POS), mCodePointCount(0), mProbability(NOT_A_PROBABILITY),
|
mParentPos(NOT_A_DICT_POS), mCodePointCount(0),
|
||||||
|
mProbabilityFieldPos(NOT_A_DICT_POS), mProbability(NOT_A_PROBABILITY),
|
||||||
mChildrenPos(NOT_A_DICT_POS), mShortcutPos(NOT_A_DICT_POS),
|
mChildrenPos(NOT_A_DICT_POS), mShortcutPos(NOT_A_DICT_POS),
|
||||||
mBigramPos(NOT_A_DICT_POS), mSiblingPos(NOT_A_VALID_WORD_POS) {}
|
mBigramPos(NOT_A_DICT_POS), mSiblingPos(NOT_A_VALID_WORD_POS) {}
|
||||||
|
|
||||||
|
@ -95,6 +96,10 @@ class DynamicPatriciaTrieNodeReader {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Probability
|
// Probability
|
||||||
|
AK_FORCE_INLINE int getProbabilityFieldPos() const {
|
||||||
|
return mProbabilityFieldPos;
|
||||||
|
}
|
||||||
|
|
||||||
AK_FORCE_INLINE int getProbability() const {
|
AK_FORCE_INLINE int getProbability() const {
|
||||||
return mProbability;
|
return mProbability;
|
||||||
}
|
}
|
||||||
|
@ -129,6 +134,7 @@ class DynamicPatriciaTrieNodeReader {
|
||||||
DynamicPatriciaTrieReadingUtils::NodeFlags mFlags;
|
DynamicPatriciaTrieReadingUtils::NodeFlags mFlags;
|
||||||
int mParentPos;
|
int mParentPos;
|
||||||
uint8_t mCodePointCount;
|
uint8_t mCodePointCount;
|
||||||
|
int mProbabilityFieldPos;
|
||||||
int mProbability;
|
int mProbability;
|
||||||
int mChildrenPos;
|
int mChildrenPos;
|
||||||
int mShortcutPos;
|
int mShortcutPos;
|
||||||
|
|
|
@ -26,6 +26,9 @@
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
|
||||||
|
// TODO: Enable dynamic update and remove this flag.
|
||||||
|
const bool DynamicPatriciaTrieWritingHelper::ENABLE_DYNAMIC_UPDATE = false;
|
||||||
|
|
||||||
bool DynamicPatriciaTrieWritingHelper::addUnigramWord(
|
bool DynamicPatriciaTrieWritingHelper::addUnigramWord(
|
||||||
DynamicPatriciaTrieReadingHelper *const readingHelper,
|
DynamicPatriciaTrieReadingHelper *const readingHelper,
|
||||||
const int *const wordCodePoints, const int codePointCount, const int probability) {
|
const int *const wordCodePoints, const int codePointCount, const int probability) {
|
||||||
|
@ -56,12 +59,12 @@ bool DynamicPatriciaTrieWritingHelper::addUnigramWord(
|
||||||
}
|
}
|
||||||
// All characters are matched.
|
// All characters are matched.
|
||||||
if (codePointCount == readingHelper->getTotalCodePointCount()) {
|
if (codePointCount == readingHelper->getTotalCodePointCount()) {
|
||||||
if (nodeReader->isTerminal()) {
|
if (ENABLE_DYNAMIC_UPDATE) {
|
||||||
// TODO: Update probability.
|
setPtNodeProbability(nodeReader, probability,
|
||||||
|
readingHelper->getMergedNodeCodePoints());
|
||||||
} else {
|
} else {
|
||||||
// TODO: Make it terminal and update probability.
|
return false;
|
||||||
}
|
}
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
if (!nodeReader->hasChildren()) {
|
if (!nodeReader->hasChildren()) {
|
||||||
// TODO: Create children node array and add new node as a child.
|
// TODO: Create children node array and add new node as a child.
|
||||||
|
@ -76,12 +79,14 @@ bool DynamicPatriciaTrieWritingHelper::addUnigramWord(
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
int pos = readingHelper->getPosOfLastForwardLinkField();
|
int pos = readingHelper->getPosOfLastForwardLinkField();
|
||||||
// TODO: Remove.
|
if (ENABLE_DYNAMIC_UPDATE) {
|
||||||
return false;
|
return createAndInsertNodeIntoPtNodeArray(parentPos,
|
||||||
return createAndInsertNodeIntoPtNodeArray(parentPos,
|
wordCodePoints + readingHelper->getPrevTotalCodePointCount(),
|
||||||
wordCodePoints + readingHelper->getPrevTotalCodePointCount(),
|
codePointCount - readingHelper->getPrevTotalCodePointCount(),
|
||||||
codePointCount - readingHelper->getPrevTotalCodePointCount(),
|
probability, &pos);
|
||||||
probability, &pos);
|
} else {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool DynamicPatriciaTrieWritingHelper::addBigramWords(const int word0Pos, const int word1Pos,
|
bool DynamicPatriciaTrieWritingHelper::addBigramWords(const int word0Pos, const int word1Pos,
|
||||||
|
@ -214,4 +219,30 @@ bool DynamicPatriciaTrieWritingHelper::createAndInsertNodeIntoPtNodeArray(const
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool DynamicPatriciaTrieWritingHelper::setPtNodeProbability(
|
||||||
|
const DynamicPatriciaTrieNodeReader *const originalPtNode, const int probability,
|
||||||
|
const int *const codePoints) {
|
||||||
|
if (originalPtNode->isTerminal()) {
|
||||||
|
// Overwrites the probability.
|
||||||
|
int probabilityFieldPos = originalPtNode->getProbabilityFieldPos();
|
||||||
|
if (!DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition(mBuffer,
|
||||||
|
probability, &probabilityFieldPos)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Make the node terminal and write the probability.
|
||||||
|
int movedPos = mBuffer->getTailPosition();
|
||||||
|
if (!markNodeAsMovedAndSetPosition(originalPtNode, movedPos)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (!writeNodeToBuffer(originalPtNode->isBlacklisted(), originalPtNode->isNotAWord(),
|
||||||
|
originalPtNode->getParentPos(), codePoints, originalPtNode->getCodePointCount(),
|
||||||
|
probability, originalPtNode->getChildrenPos(), originalPtNode->getBigramsPos(),
|
||||||
|
originalPtNode->getShortcutPos(), &movedPos)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
|
|
|
@ -49,6 +49,7 @@ class DynamicPatriciaTrieWritingHelper {
|
||||||
private:
|
private:
|
||||||
DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTrieWritingHelper);
|
DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTrieWritingHelper);
|
||||||
|
|
||||||
|
static const bool ENABLE_DYNAMIC_UPDATE;
|
||||||
BufferWithExtendableBuffer *const mBuffer;
|
BufferWithExtendableBuffer *const mBuffer;
|
||||||
DynamicBigramListPolicy *const mBigramPolicy;
|
DynamicBigramListPolicy *const mBigramPolicy;
|
||||||
DynamicShortcutListPolicy *const mShortcutPolicy;
|
DynamicShortcutListPolicy *const mShortcutPolicy;
|
||||||
|
@ -63,6 +64,9 @@ class DynamicPatriciaTrieWritingHelper {
|
||||||
|
|
||||||
bool createAndInsertNodeIntoPtNodeArray(const int parentPos, const int *const nodeCodePoints,
|
bool createAndInsertNodeIntoPtNodeArray(const int parentPos, const int *const nodeCodePoints,
|
||||||
const int nodeCodePointCount, const int probability, int *const forwardLinkFieldPos);
|
const int nodeCodePointCount, const int probability, int *const forwardLinkFieldPos);
|
||||||
|
|
||||||
|
bool setPtNodeProbability(const DynamicPatriciaTrieNodeReader *const originalNode,
|
||||||
|
const int probability, const int *const codePoints);
|
||||||
};
|
};
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
#endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_WRITING_HELPER_H */
|
#endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_WRITING_HELPER_H */
|
||||||
|
|
Loading…
Reference in a new issue