GC step 3. Place and write valid PtNodes.

Bug: 6669677

Change-Id: Id2b7359bafb34acf12dc3bf4c595a03073fda805
main
Keisuke Kuroyanagi 2013-09-20 21:11:49 +09:00
parent 7a8dabc3e1
commit 0355bf2d4c
6 changed files with 199 additions and 15 deletions

View File

@ -20,7 +20,8 @@ namespace latinime {
bool DynamicPatriciaTrieGcEventListeners bool DynamicPatriciaTrieGcEventListeners
::ListenerForUpdatingUnigramProbabilityAndMarkingUselessPtNodesAsDeleted ::ListenerForUpdatingUnigramProbabilityAndMarkingUselessPtNodesAsDeleted
::onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node) { ::onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node,
const int *const nodeCodePoints) {
// PtNode is useless when the PtNode is not a terminal and doesn't have any not useless // PtNode is useless when the PtNode is not a terminal and doesn't have any not useless
// children. // children.
bool isUselessPtNode = !node->isTerminal(); bool isUselessPtNode = !node->isTerminal();
@ -45,4 +46,53 @@ bool DynamicPatriciaTrieGcEventListeners
return true; return true;
} }
// Writes dummy PtNode array size when the head of PtNode array is read.
bool DynamicPatriciaTrieGcEventListeners::ListenerForPlacingAndWritingValidPtNodesToBuffer
::onDescend(const int ptNodeArrayPos) {
mValidPtNodeCount = 0;
int writingPos = mBufferToWrite->getTailPosition();
mPositionMap->insert(hash_map_compat<int, int>::value_type(ptNodeArrayPos, writingPos));
// Writes dummy PtNode array size because arrays can have a forward link or needles PtNodes.
// This field will be updated later in onReadingPtNodeArrayTail() with actual PtNode count.
mPtNodeArraySizeFieldPos = writingPos;
return DynamicPatriciaTrieWritingUtils::writePtNodeArraySizeAndAdvancePosition(
mBufferToWrite, 0 /* arraySize */, &writingPos);
}
// Write PtNode array terminal and actual PtNode array size.
bool DynamicPatriciaTrieGcEventListeners::ListenerForPlacingAndWritingValidPtNodesToBuffer
::onReadingPtNodeArrayTail() {
int writingPos = mBufferToWrite->getTailPosition();
// Write PtNode array terminal.
if (!DynamicPatriciaTrieWritingUtils::writeForwardLinkPositionAndAdvancePosition(
mBufferToWrite, NOT_A_DICT_POS /* forwardLinkPos */, &writingPos)) {
return false;
}
// Write actual PtNode array size.
if (!DynamicPatriciaTrieWritingUtils::writePtNodeArraySizeAndAdvancePosition(
mBufferToWrite, mValidPtNodeCount, &mPtNodeArraySizeFieldPos)) {
return false;
}
return true;
}
// Write valid PtNode to buffer and memorize mapping from the old position to the new position.
bool DynamicPatriciaTrieGcEventListeners::ListenerForPlacingAndWritingValidPtNodesToBuffer
::onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node,
const int *const nodeCodePoints) {
if (node->isDeleted()) {
// Current PtNode is not written in new buffer because it has been deleted.
mPositionMap->insert(hash_map_compat<int, int>::value_type(node->getHeadPos(),
NOT_A_DICT_POS));
return true;
}
int writingPos = mBufferToWrite->getTailPosition();
mPositionMap->insert(hash_map_compat<int, int>::value_type(node->getHeadPos(), writingPos));
mValidPtNodeCount++;
// Writes current PtNode.
return mWritingHelper->writePtNodeToBufferByCopyingPtNodeInfo(mBufferToWrite, node,
node->getParentPos(), nodeCodePoints, node->getCodePointCount(),
node->getProbability(), &writingPos);
}
} // namespace latinime } // namespace latinime

View File

@ -24,6 +24,8 @@
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h" #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h"
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h" #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h"
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h" #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
#include "utils/hash_map_compat.h"
namespace latinime { namespace latinime {
@ -52,12 +54,15 @@ class DynamicPatriciaTrieGcEventListeners {
return true; return true;
} }
bool onDescend() { bool onDescend(const int ptNodeArrayPos) {
valueStack.push_back(0); valueStack.push_back(0);
return true; return true;
} }
bool onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node); bool onReadingPtNodeArrayTail() { return true; }
bool onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node,
const int *const nodeCodePoints);
private: private:
DISALLOW_IMPLICIT_CONSTRUCTORS( DISALLOW_IMPLICIT_CONSTRUCTORS(
@ -79,9 +84,12 @@ class DynamicPatriciaTrieGcEventListeners {
bool onAscend() { return true; } bool onAscend() { return true; }
bool onDescend() { return true; } bool onDescend(const int ptNodeArrayPos) { return true; }
bool onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node) { bool onReadingPtNodeArrayTail() { return true; }
bool onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node,
const int *const nodeCodePoints) {
if (!node->isDeleted()) { if (!node->isDeleted()) {
int pos = node->getBigramsPos(); int pos = node->getBigramsPos();
if (pos != NOT_A_DICT_POS) { if (pos != NOT_A_DICT_POS) {
@ -99,6 +107,40 @@ class DynamicPatriciaTrieGcEventListeners {
DynamicBigramListPolicy *const mBigramPolicy; DynamicBigramListPolicy *const mBigramPolicy;
}; };
class ListenerForPlacingAndWritingValidPtNodesToBuffer
: public DynamicPatriciaTrieReadingHelper::TraversingEventListener {
public:
ListenerForPlacingAndWritingValidPtNodesToBuffer(
DynamicPatriciaTrieWritingHelper *const writingHelper,
BufferWithExtendableBuffer *const bufferToWrite,
hash_map_compat<int, int> *const positionMap)
: mWritingHelper(writingHelper), mBufferToWrite(bufferToWrite),
mPositionMap(positionMap), mValidPtNodeCount(0),
mPtNodeArraySizeFieldPos(NOT_A_DICT_POS) {};
bool onAscend() { return true; }
bool onDescend(const int ptNodeArrayPos);
bool onReadingPtNodeArrayTail();
bool onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node,
const int *const nodeCodePoints);
hash_map_compat<int, int> *getPositionMap() const {
return mPositionMap;
}
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(ListenerForPlacingAndWritingValidPtNodesToBuffer);
DynamicPatriciaTrieWritingHelper *const mWritingHelper;
BufferWithExtendableBuffer *const mBufferToWrite;
hash_map_compat<int, int> *const mPositionMap;
int mValidPtNodeCount;
int mPtNodeArraySizeFieldPos;
};
private: private:
DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTrieGcEventListeners); DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTrieGcEventListeners);
}; };

View File

@ -25,18 +25,22 @@ const int DynamicPatriciaTrieReadingHelper::MAX_CHILD_COUNT_TO_AVOID_INFINITE_LO
const int DynamicPatriciaTrieReadingHelper::MAX_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP = 100000; const int DynamicPatriciaTrieReadingHelper::MAX_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP = 100000;
const size_t DynamicPatriciaTrieReadingHelper::MAX_READING_STATE_STACK_SIZE = MAX_WORD_LENGTH; const size_t DynamicPatriciaTrieReadingHelper::MAX_READING_STATE_STACK_SIZE = MAX_WORD_LENGTH;
// Visits all PtNodes in post-order depth first manner.
// For example, visits c -> b -> y -> x -> a for the following dictionary:
// a _ b _ c
// \ x _ y
bool DynamicPatriciaTrieReadingHelper::traverseAllPtNodesInPostorderDepthFirstManner( bool DynamicPatriciaTrieReadingHelper::traverseAllPtNodesInPostorderDepthFirstManner(
TraversingEventListener *const listener) { TraversingEventListener *const listener) {
bool alreadyVisitedChildren = false; bool alreadyVisitedChildren = false;
// Descend from the root to the root PtNode array. // Descend from the root to the root PtNode array.
if (!listener->onDescend()) { if (!listener->onDescend(getPosOfLastPtNodeArrayHead())) {
return false; return false;
} }
while (!isEnd()) { while (!isEnd()) {
if (!alreadyVisitedChildren) { if (!alreadyVisitedChildren) {
if (mNodeReader.hasChildren()) { if (mNodeReader.hasChildren()) {
// Move to the first child. // Move to the first child.
if (!listener->onDescend()) { if (!listener->onDescend(mNodeReader.getChildrenPos())) {
return false; return false;
} }
pushReadingStateToStack(); pushReadingStateToStack();
@ -45,13 +49,16 @@ bool DynamicPatriciaTrieReadingHelper::traverseAllPtNodesInPostorderDepthFirstMa
alreadyVisitedChildren = true; alreadyVisitedChildren = true;
} }
} else { } else {
if (!listener->onVisitingPtNode(&mNodeReader)) { if (!listener->onVisitingPtNode(&mNodeReader, mMergedNodeCodePoints)) {
return false; return false;
} }
readNextSiblingNode(); readNextSiblingNode();
if (isEnd()) { if (isEnd()) {
// All PtNodes in current linked PtNode arrays have been visited. // All PtNodes in current linked PtNode arrays have been visited.
// Return to the parent. // Return to the parent.
if (!listener->onReadingPtNodeArrayTail()) {
return false;
}
if (!listener->onAscend()) { if (!listener->onAscend()) {
return false; return false;
} }
@ -70,6 +77,75 @@ bool DynamicPatriciaTrieReadingHelper::traverseAllPtNodesInPostorderDepthFirstMa
return !isError(); return !isError();
} }
// Visits all PtNodes in PtNode array level pre-order depth first manner, which is the same order
// that PtNodes are written in the dictionary buffer.
// For example, visits a -> b -> x -> c -> y for the following dictionary:
// a _ b _ c
// \ x _ y
bool DynamicPatriciaTrieReadingHelper::traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner(
TraversingEventListener *const listener) {
bool alreadyVisitedAllPtNodesInArray = false;
bool alreadyVisitedChildren = false;
// Descend from the root to the root PtNode array.
if (!listener->onDescend(getPosOfLastPtNodeArrayHead())) {
return false;
}
pushReadingStateToStack();
while (!isEnd()) {
if (alreadyVisitedAllPtNodesInArray) {
if (alreadyVisitedChildren) {
// Move to next sibling PtNode's children.
readNextSiblingNode();
if (isEnd()) {
// Return to the parent PTNode.
if (!listener->onAscend()) {
return false;
}
popReadingStateFromStack();
alreadyVisitedChildren = true;
alreadyVisitedAllPtNodesInArray = true;
} else {
alreadyVisitedChildren = false;
}
} else {
if (mNodeReader.hasChildren()) {
// Move to the first child.
if (!listener->onDescend(mNodeReader.getChildrenPos())) {
return false;
}
pushReadingStateToStack();
readChildNode();
// Push state to return the head of PtNode array.
pushReadingStateToStack();
alreadyVisitedAllPtNodesInArray = false;
alreadyVisitedChildren = false;
} else {
alreadyVisitedChildren = true;
}
}
} else {
if (!listener->onVisitingPtNode(&mNodeReader, mMergedNodeCodePoints)) {
return false;
}
readNextSiblingNode();
if (isEnd()) {
if (!listener->onReadingPtNodeArrayTail()) {
return false;
}
// Return to the head of current PtNode array.
popReadingStateFromStack();
alreadyVisitedAllPtNodesInArray = true;
}
}
}
popReadingStateFromStack();
// Ascend from the root PtNode array to the root.
if (!listener->onAscend()) {
return false;
}
return !isError();
}
// Read node array size and process empty node arrays. Nodes and arrays are counted up in this // Read node array size and process empty node arrays. Nodes and arrays are counted up in this
// method to avoid an infinite loop. // method to avoid an infinite loop.
void DynamicPatriciaTrieReadingHelper::nextPtNodeArray() { void DynamicPatriciaTrieReadingHelper::nextPtNodeArray() {

View File

@ -45,10 +45,14 @@ class DynamicPatriciaTrieReadingHelper {
virtual bool onAscend() = 0; virtual bool onAscend() = 0;
// Returns whether the event handling was succeeded or not. // Returns whether the event handling was succeeded or not.
virtual bool onDescend() = 0; virtual bool onDescend(const int ptNodeArrayPos) = 0;
// Returns whether the event handling was succeeded or not. // Returns whether the event handling was succeeded or not.
virtual bool onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node) = 0; virtual bool onReadingPtNodeArrayTail() = 0;
// Returns whether the event handling was succeeded or not.
virtual bool onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node,
const int *const nodeCodePoints) = 0;
protected: protected:
TraversingEventListener() {}; TraversingEventListener() {};
@ -208,6 +212,9 @@ class DynamicPatriciaTrieReadingHelper {
bool traverseAllPtNodesInPostorderDepthFirstManner(TraversingEventListener *const listener); bool traverseAllPtNodesInPostorderDepthFirstManner(TraversingEventListener *const listener);
bool traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner(
TraversingEventListener *const listener);
private: private:
DISALLOW_COPY_AND_ASSIGN(DynamicPatriciaTrieReadingHelper); DISALLOW_COPY_AND_ASSIGN(DynamicPatriciaTrieReadingHelper);

View File

@ -536,6 +536,13 @@ bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
&listenerForupdatingBigramProbability)) { &listenerForupdatingBigramProbability)) {
return false; return false;
} }
// Mapping from positions in mBuffer to positions in bufferToWrite.
hash_map_compat<int, int> positionMap;
readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
DynamicPatriciaTrieGcEventListeners::ListenerForPlacingAndWritingValidPtNodesToBuffer
listenerForPlacingAndWritingLivingPtNodesToBuffer(this, mBuffer, &positionMap);
// TODO: Implement. // TODO: Implement.
return false; return false;
} }

View File

@ -59,6 +59,13 @@ class DynamicPatriciaTrieWritingHelper {
// DynamicPatriciaTrieGcEventListeners. // DynamicPatriciaTrieGcEventListeners.
bool markNodeAsDeleted(const DynamicPatriciaTrieNodeReader *const nodeToUpdate); bool markNodeAsDeleted(const DynamicPatriciaTrieNodeReader *const nodeToUpdate);
// CAVEAT: This method must be called only from this class or inner classes of
// DynamicPatriciaTrieGcEventListeners.
bool writePtNodeToBufferByCopyingPtNodeInfo(BufferWithExtendableBuffer *const bufferToWrite,
const DynamicPatriciaTrieNodeReader *const originalNode, const int parentPos,
const int *const codePoints, const int codePointCount, const int probability,
int *const writingPos);
private: private:
DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTrieWritingHelper); DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTrieWritingHelper);
@ -82,11 +89,6 @@ class DynamicPatriciaTrieWritingHelper {
const int parentPos, const int *const codePoints, const int codePointCount, const int parentPos, const int *const codePoints, const int codePointCount,
const int probability, int *const writingPos); const int probability, int *const writingPos);
bool writePtNodeToBufferByCopyingPtNodeInfo(BufferWithExtendableBuffer *const bufferToWrite,
const DynamicPatriciaTrieNodeReader *const originalNode, const int parentPos,
const int *const codePoints, const int codePointCount, const int probability,
int *const writingPos);
bool createAndInsertNodeIntoPtNodeArray(const int parentPos, const int *const nodeCodePoints, bool createAndInsertNodeIntoPtNodeArray(const int parentPos, const int *const nodeCodePoints,
const int nodeCodePointCount, const int probability, int *const forwardLinkFieldPos); const int nodeCodePointCount, const int probability, int *const forwardLinkFieldPos);