am 3fbc5ef1: Implement inserting new node into PtNode array.

* commit '3fbc5ef196bbe20b02be2ff11768e00a4f16ff4c':
  Implement inserting new node into PtNode array.
This commit is contained in:
Keisuke Kuroyanagi 2013-09-06 04:49:16 -07:00 committed by Android Git Automerger
commit b41d66d718
4 changed files with 142 additions and 3 deletions

View file

@ -70,6 +70,7 @@ void DynamicPatriciaTrieReadingHelper::followForwardLink() {
if (usesAdditionalBuffer) { if (usesAdditionalBuffer) {
mPos += mBuffer->getOriginalBufferSize(); mPos += mBuffer->getOriginalBufferSize();
} }
mPosOfLastForwardLinkField = mPos;
if (DynamicPatriciaTrieReadingUtils::isValidForwardLinkPosition(forwardLinkPosition)) { if (DynamicPatriciaTrieReadingUtils::isValidForwardLinkPosition(forwardLinkPosition)) {
// Follow the forward link. // Follow the forward link.
mPos += forwardLinkPosition; mPos += forwardLinkPosition;

View file

@ -38,8 +38,8 @@ class DynamicPatriciaTrieReadingHelper {
const DictionaryBigramsStructurePolicy *const bigramsPolicy, const DictionaryBigramsStructurePolicy *const bigramsPolicy,
const DictionaryShortcutsStructurePolicy *const shortcutsPolicy) const DictionaryShortcutsStructurePolicy *const shortcutsPolicy)
: mIsError(false), mPos(NOT_A_DICT_POS), mNodeCount(0), mPrevTotalCodePointCount(0), : mIsError(false), mPos(NOT_A_DICT_POS), mNodeCount(0), mPrevTotalCodePointCount(0),
mTotalNodeCount(0), mNodeArrayCount(0), mBuffer(buffer), mTotalNodeCount(0), mNodeArrayCount(0), mPosOfLastForwardLinkField(NOT_A_DICT_POS),
mNodeReader(mBuffer, bigramsPolicy, shortcutsPolicy) {} mBuffer(buffer), mNodeReader(mBuffer, bigramsPolicy, shortcutsPolicy) {}
~DynamicPatriciaTrieReadingHelper() {} ~DynamicPatriciaTrieReadingHelper() {}
@ -62,6 +62,7 @@ class DynamicPatriciaTrieReadingHelper {
mPrevTotalCodePointCount = 0; mPrevTotalCodePointCount = 0;
mTotalNodeCount = 0; mTotalNodeCount = 0;
mNodeArrayCount = 0; mNodeArrayCount = 0;
mPosOfLastForwardLinkField = NOT_A_DICT_POS;
nextNodeArray(); nextNodeArray();
if (!isEnd()) { if (!isEnd()) {
fetchNodeInfo(); fetchNodeInfo();
@ -81,6 +82,7 @@ class DynamicPatriciaTrieReadingHelper {
mPrevTotalCodePointCount = 0; mPrevTotalCodePointCount = 0;
mTotalNodeCount = 1; mTotalNodeCount = 1;
mNodeArrayCount = 1; mNodeArrayCount = 1;
mPosOfLastForwardLinkField = NOT_A_DICT_POS;
fetchNodeInfo(); fetchNodeInfo();
} }
} }
@ -140,6 +142,7 @@ class DynamicPatriciaTrieReadingHelper {
mTotalNodeCount = 0; mTotalNodeCount = 0;
mNodeArrayCount = 0; mNodeArrayCount = 0;
mPos = mNodeReader.getChildrenPos(); mPos = mNodeReader.getChildrenPos();
mPosOfLastForwardLinkField = NOT_A_DICT_POS;
// Read children node array. // Read children node array.
nextNodeArray(); nextNodeArray();
if (!isEnd()) { if (!isEnd()) {
@ -158,12 +161,17 @@ class DynamicPatriciaTrieReadingHelper {
mNodeArrayCount = 1; mNodeArrayCount = 1;
mNodeCount = 1; mNodeCount = 1;
mPos = mNodeReader.getParentPos(); mPos = mNodeReader.getParentPos();
mPosOfLastForwardLinkField = NOT_A_DICT_POS;
fetchNodeInfo(); fetchNodeInfo();
} else { } else {
mPos = NOT_A_DICT_POS; mPos = NOT_A_DICT_POS;
} }
} }
AK_FORCE_INLINE int getPosOfLastForwardLinkField() const {
return mPosOfLastForwardLinkField;
}
private: private:
DISALLOW_COPY_AND_ASSIGN(DynamicPatriciaTrieReadingHelper); DISALLOW_COPY_AND_ASSIGN(DynamicPatriciaTrieReadingHelper);
@ -177,6 +185,7 @@ class DynamicPatriciaTrieReadingHelper {
int mPrevTotalCodePointCount; int mPrevTotalCodePointCount;
int mTotalNodeCount; int mTotalNodeCount;
int mNodeArrayCount; int mNodeArrayCount;
int mPosOfLastForwardLinkField;
const BufferWithExtendableBuffer *const mBuffer; const BufferWithExtendableBuffer *const mBuffer;
DynamicPatriciaTrieNodeReader mNodeReader; DynamicPatriciaTrieNodeReader mNodeReader;
int mMergedNodeCodePoints[MAX_WORD_LENGTH]; int mMergedNodeCodePoints[MAX_WORD_LENGTH];

View file

@ -19,7 +19,9 @@
#include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h" #include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h"
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h" #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h"
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h" #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h"
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h"
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h" #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h"
#include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h"
#include "suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h" #include "suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h"
namespace latinime { namespace latinime {
@ -27,6 +29,7 @@ namespace latinime {
bool DynamicPatriciaTrieWritingHelper::addUnigramWord( bool DynamicPatriciaTrieWritingHelper::addUnigramWord(
DynamicPatriciaTrieReadingHelper *const readingHelper, DynamicPatriciaTrieReadingHelper *const readingHelper,
const int *const wordCodePoints, const int codePointCount, const int probability) { const int *const wordCodePoints, const int codePointCount, const int probability) {
int parentPos = NOT_A_VALID_WORD_POS;
while (!readingHelper->isEnd()) { while (!readingHelper->isEnd()) {
const int matchedCodePointCount = readingHelper->getPrevTotalCodePointCount(); const int matchedCodePointCount = readingHelper->getPrevTotalCodePointCount();
if (!readingHelper->isMatchedCodePoint(0 /* index */, if (!readingHelper->isMatchedCodePoint(0 /* index */,
@ -65,14 +68,20 @@ bool DynamicPatriciaTrieWritingHelper::addUnigramWord(
return false; return false;
} }
// Advance to the children nodes. // Advance to the children nodes.
parentPos = nodeReader->getNodePos();
readingHelper->readChildNode(); readingHelper->readChildNode();
} }
if (readingHelper->isError()) { if (readingHelper->isError()) {
// The dictionary is invalid. // The dictionary is invalid.
return false; return false;
} }
// TODO: add at the last position of the node array. int pos = readingHelper->getPosOfLastForwardLinkField();
// TODO: Remove.
return false; return false;
return createAndInsertNodeIntoPtNodeArray(parentPos,
wordCodePoints + readingHelper->getPrevTotalCodePointCount(),
codePointCount - readingHelper->getPrevTotalCodePointCount(),
probability, &pos);
} }
bool DynamicPatriciaTrieWritingHelper::addBigramWords(const int word0Pos, const int word1Pos, bool DynamicPatriciaTrieWritingHelper::addBigramWords(const int word0Pos, const int word1Pos,
@ -97,4 +106,112 @@ bool DynamicPatriciaTrieWritingHelper::removeBigramWords(const int word0Pos, con
return false; return false;
} }
bool DynamicPatriciaTrieWritingHelper::markNodeAsMovedAndSetPosition(
const DynamicPatriciaTrieNodeReader *const originalNode, const int movedPos) {
int pos = originalNode->getNodePos();
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(pos);
const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
if (usesAdditionalBuffer) {
pos -= mBuffer->getOriginalBufferSize();
}
// Read original flags
const PatriciaTrieReadingUtils::NodeFlags originalFlags =
PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos);
const PatriciaTrieReadingUtils::NodeFlags updatedFlags =
DynamicPatriciaTrieReadingUtils::updateAndGetFlags(originalFlags, true /* isMoved */,
false /* isDeleted */);
int writingPos = originalNode->getNodePos();
// Update flags.
if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, updatedFlags,
&writingPos)) {
return false;
}
// Update moved position, which is stored in the parent position field.
if (!DynamicPatriciaTrieWritingUtils::writeParentPositionAndAdvancePosition(
mBuffer, movedPos, &writingPos)) {
return false;
}
return true;
}
// Write new node at writingPos.
bool DynamicPatriciaTrieWritingHelper::writeNodeToBuffer(const bool isBlacklisted,
const bool isNotAWord, const int parentPos, const int *const codePoints,
const int codePointCount, const int probability, const int childrenPos,
const int originalBigramListPos, const int originalShortcutListPos,
int *const writingPos) {
// Create node flags and write them.
const PatriciaTrieReadingUtils::NodeFlags nodeFlags =
PatriciaTrieReadingUtils::createAndGetFlags(isBlacklisted, isNotAWord,
probability != NOT_A_PROBABILITY, originalShortcutListPos != NOT_A_DICT_POS,
originalBigramListPos != NOT_A_DICT_POS, codePointCount > 1,
3 /* childrenPositionFieldSize */);
if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, nodeFlags,
writingPos)) {
return false;
}
// Write parent position
if (!DynamicPatriciaTrieWritingUtils::writeParentPositionAndAdvancePosition(mBuffer, parentPos,
writingPos)) {
return false;
}
// Write code points
if (!DynamicPatriciaTrieWritingUtils::writeCodePointsAndAdvancePosition(mBuffer, codePoints,
codePointCount, writingPos)) {
return false;;
}
// Write probability when the probability is a valid probability, which means this node is
// terminal.
if (probability != NOT_A_PROBABILITY) {
if (!DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition(mBuffer,
probability, writingPos)) {
return false;
}
}
// Write children position
if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(mBuffer,
childrenPos, writingPos)) {
return false;
}
// Copy shortcut list when the originalShortcutListPos is valid dictionary position.
if (originalShortcutListPos != NOT_A_DICT_POS) {
int fromPos = originalShortcutListPos;
mShortcutPolicy->copyAllShortcuts(&fromPos, writingPos);
}
// Copy bigram list when the originalBigramListPos is valid dictionary position.
if (originalBigramListPos != NOT_A_DICT_POS) {
int fromPos = originalBigramListPos;
if (!mBigramPolicy->copyAllBigrams(&fromPos, writingPos)) {
return false;
}
}
return true;
}
bool DynamicPatriciaTrieWritingHelper::createAndInsertNodeIntoPtNodeArray(const int parentPos,
const int *const nodeCodePoints, const int nodeCodePointCount, const int probability,
int *const forwardLinkFieldPos) {
const int newPtNodeArrayPos = mBuffer->getTailPosition();
if (!DynamicPatriciaTrieWritingUtils::writeForwardLinkPositionAndAdvancePosition(mBuffer,
newPtNodeArrayPos, forwardLinkFieldPos)) {
return false;
}
int writingPos = newPtNodeArrayPos;
if (!DynamicPatriciaTrieWritingUtils::writePtNodeArraySizeAndAdvancePosition(mBuffer,
1 /* arraySize */, &writingPos)) {
return false;
}
if (!writeNodeToBuffer(false /* isBlacklisted */, false /* isNotAWord */, parentPos,
nodeCodePoints, nodeCodePointCount, probability, NOT_A_DICT_POS /* childrenPos */,
NOT_A_DICT_POS /* originalBigramsPos */, NOT_A_DICT_POS /* originalShortcutPos */,
&writingPos)) {
return false;
}
if (!DynamicPatriciaTrieWritingUtils::writeForwardLinkPositionAndAdvancePosition(mBuffer,
NOT_A_DICT_POS /* forwardLinkPos */, &writingPos)) {
return false;
}
return true;
}
} // namespace latinime } // namespace latinime

View file

@ -23,6 +23,7 @@ namespace latinime {
class BufferWithExtendableBuffer; class BufferWithExtendableBuffer;
class DynamicBigramListPolicy; class DynamicBigramListPolicy;
class DynamicPatriciaTrieNodeReader;
class DynamicPatriciaTrieReadingHelper; class DynamicPatriciaTrieReadingHelper;
class DynamicShortcutListPolicy; class DynamicShortcutListPolicy;
@ -51,6 +52,17 @@ class DynamicPatriciaTrieWritingHelper {
BufferWithExtendableBuffer *const mBuffer; BufferWithExtendableBuffer *const mBuffer;
DynamicBigramListPolicy *const mBigramPolicy; DynamicBigramListPolicy *const mBigramPolicy;
DynamicShortcutListPolicy *const mShortcutPolicy; DynamicShortcutListPolicy *const mShortcutPolicy;
bool markNodeAsMovedAndSetPosition(const DynamicPatriciaTrieNodeReader *const nodeToUpdate,
const int movedPos);
bool writeNodeToBuffer(const bool isBlacklisted, const bool isNotAWord, const int parentPos,
const int *const codePoints, const int codePointCount, const int probability,
const int childrenPos, const int originalBigramListPos,
const int originalShortcutListPos, int *const writingPos);
bool createAndInsertNodeIntoPtNodeArray(const int parentPos, const int *const nodeCodePoints,
const int nodeCodePointCount, const int probability, int *const forwardLinkFieldPos);
}; };
} // namespace latinime } // namespace latinime
#endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_WRITING_HELPER_H */ #endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_WRITING_HELPER_H */