Implement update node probability method.

Bug: 6669677
Change-Id: I61ac0d05e362fc7d8a967ddd8286580998c70487
main
Keisuke Kuroyanagi 2013-09-09 15:29:11 +09:00
parent 37e0fd2ff0
commit 7bd7dc5d0d
4 changed files with 57 additions and 11 deletions

View File

@ -43,8 +43,13 @@ void DynamicPatriciaTrieNodeReader::fetchNodeInfoFromBufferAndProcessMovedNode(c
dictBuf, mFlags, MAX_WORD_LENGTH, &pos); dictBuf, mFlags, MAX_WORD_LENGTH, &pos);
} }
if (isTerminal()) { if (isTerminal()) {
mProbabilityFieldPos = pos;
if (usesAdditionalBuffer) {
mProbabilityFieldPos += mBuffer->getOriginalBufferSize();
}
mProbability = PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(dictBuf, &pos); mProbability = PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(dictBuf, &pos);
} else { } else {
mProbabilityFieldPos = NOT_A_DICT_POS;
mProbability = NOT_A_PROBABILITY; mProbability = NOT_A_PROBABILITY;
} }
mChildrenPos = DynamicPatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition( mChildrenPos = DynamicPatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(

View File

@ -40,7 +40,8 @@ class DynamicPatriciaTrieNodeReader {
const DictionaryShortcutsStructurePolicy *const shortcutsPolicy) const DictionaryShortcutsStructurePolicy *const shortcutsPolicy)
: mBuffer(buffer), mBigramsPolicy(bigramsPolicy), : mBuffer(buffer), mBigramsPolicy(bigramsPolicy),
mShortcutsPolicy(shortcutsPolicy), mNodePos(NOT_A_VALID_WORD_POS), mFlags(0), mShortcutsPolicy(shortcutsPolicy), mNodePos(NOT_A_VALID_WORD_POS), mFlags(0),
mParentPos(NOT_A_DICT_POS), mCodePointCount(0), mProbability(NOT_A_PROBABILITY), mParentPos(NOT_A_DICT_POS), mCodePointCount(0),
mProbabilityFieldPos(NOT_A_DICT_POS), mProbability(NOT_A_PROBABILITY),
mChildrenPos(NOT_A_DICT_POS), mShortcutPos(NOT_A_DICT_POS), mChildrenPos(NOT_A_DICT_POS), mShortcutPos(NOT_A_DICT_POS),
mBigramPos(NOT_A_DICT_POS), mSiblingPos(NOT_A_VALID_WORD_POS) {} mBigramPos(NOT_A_DICT_POS), mSiblingPos(NOT_A_VALID_WORD_POS) {}
@ -95,6 +96,10 @@ class DynamicPatriciaTrieNodeReader {
} }
// Probability // Probability
AK_FORCE_INLINE int getProbabilityFieldPos() const {
return mProbabilityFieldPos;
}
AK_FORCE_INLINE int getProbability() const { AK_FORCE_INLINE int getProbability() const {
return mProbability; return mProbability;
} }
@ -129,6 +134,7 @@ class DynamicPatriciaTrieNodeReader {
DynamicPatriciaTrieReadingUtils::NodeFlags mFlags; DynamicPatriciaTrieReadingUtils::NodeFlags mFlags;
int mParentPos; int mParentPos;
uint8_t mCodePointCount; uint8_t mCodePointCount;
int mProbabilityFieldPos;
int mProbability; int mProbability;
int mChildrenPos; int mChildrenPos;
int mShortcutPos; int mShortcutPos;

View File

@ -26,6 +26,9 @@
namespace latinime { namespace latinime {
// TODO: Enable dynamic update and remove this flag.
const bool DynamicPatriciaTrieWritingHelper::ENABLE_DYNAMIC_UPDATE = false;
bool DynamicPatriciaTrieWritingHelper::addUnigramWord( bool DynamicPatriciaTrieWritingHelper::addUnigramWord(
DynamicPatriciaTrieReadingHelper *const readingHelper, DynamicPatriciaTrieReadingHelper *const readingHelper,
const int *const wordCodePoints, const int codePointCount, const int probability) { const int *const wordCodePoints, const int codePointCount, const int probability) {
@ -56,12 +59,12 @@ bool DynamicPatriciaTrieWritingHelper::addUnigramWord(
} }
// All characters are matched. // All characters are matched.
if (codePointCount == readingHelper->getTotalCodePointCount()) { if (codePointCount == readingHelper->getTotalCodePointCount()) {
if (nodeReader->isTerminal()) { if (ENABLE_DYNAMIC_UPDATE) {
// TODO: Update probability. setPtNodeProbability(nodeReader, probability,
readingHelper->getMergedNodeCodePoints());
} else { } else {
// TODO: Make it terminal and update probability. return false;
} }
return false;
} }
if (!nodeReader->hasChildren()) { if (!nodeReader->hasChildren()) {
// TODO: Create children node array and add new node as a child. // TODO: Create children node array and add new node as a child.
@ -76,12 +79,14 @@ bool DynamicPatriciaTrieWritingHelper::addUnigramWord(
return false; return false;
} }
int pos = readingHelper->getPosOfLastForwardLinkField(); int pos = readingHelper->getPosOfLastForwardLinkField();
// TODO: Remove. if (ENABLE_DYNAMIC_UPDATE) {
return false; return createAndInsertNodeIntoPtNodeArray(parentPos,
return createAndInsertNodeIntoPtNodeArray(parentPos, wordCodePoints + readingHelper->getPrevTotalCodePointCount(),
wordCodePoints + readingHelper->getPrevTotalCodePointCount(), codePointCount - readingHelper->getPrevTotalCodePointCount(),
codePointCount - readingHelper->getPrevTotalCodePointCount(), probability, &pos);
probability, &pos); } else {
return false;
}
} }
bool DynamicPatriciaTrieWritingHelper::addBigramWords(const int word0Pos, const int word1Pos, bool DynamicPatriciaTrieWritingHelper::addBigramWords(const int word0Pos, const int word1Pos,
@ -214,4 +219,30 @@ bool DynamicPatriciaTrieWritingHelper::createAndInsertNodeIntoPtNodeArray(const
return true; return true;
} }
bool DynamicPatriciaTrieWritingHelper::setPtNodeProbability(
const DynamicPatriciaTrieNodeReader *const originalPtNode, const int probability,
const int *const codePoints) {
if (originalPtNode->isTerminal()) {
// Overwrites the probability.
int probabilityFieldPos = originalPtNode->getProbabilityFieldPos();
if (!DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition(mBuffer,
probability, &probabilityFieldPos)) {
return false;
}
} else {
// Make the node terminal and write the probability.
int movedPos = mBuffer->getTailPosition();
if (!markNodeAsMovedAndSetPosition(originalPtNode, movedPos)) {
return false;
}
if (!writeNodeToBuffer(originalPtNode->isBlacklisted(), originalPtNode->isNotAWord(),
originalPtNode->getParentPos(), codePoints, originalPtNode->getCodePointCount(),
probability, originalPtNode->getChildrenPos(), originalPtNode->getBigramsPos(),
originalPtNode->getShortcutPos(), &movedPos)) {
return false;
}
}
return true;
}
} // namespace latinime } // namespace latinime

View File

@ -49,6 +49,7 @@ class DynamicPatriciaTrieWritingHelper {
private: private:
DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTrieWritingHelper); DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTrieWritingHelper);
static const bool ENABLE_DYNAMIC_UPDATE;
BufferWithExtendableBuffer *const mBuffer; BufferWithExtendableBuffer *const mBuffer;
DynamicBigramListPolicy *const mBigramPolicy; DynamicBigramListPolicy *const mBigramPolicy;
DynamicShortcutListPolicy *const mShortcutPolicy; DynamicShortcutListPolicy *const mShortcutPolicy;
@ -63,6 +64,9 @@ class DynamicPatriciaTrieWritingHelper {
bool createAndInsertNodeIntoPtNodeArray(const int parentPos, const int *const nodeCodePoints, bool createAndInsertNodeIntoPtNodeArray(const int parentPos, const int *const nodeCodePoints,
const int nodeCodePointCount, const int probability, int *const forwardLinkFieldPos); const int nodeCodePointCount, const int probability, int *const forwardLinkFieldPos);
bool setPtNodeProbability(const DynamicPatriciaTrieNodeReader *const originalNode,
const int probability, const int *const codePoints);
}; };
} // namespace latinime } // namespace latinime
#endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_WRITING_HELPER_H */ #endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_WRITING_HELPER_H */