From 0355bf2d4c5c6a51a5f1d55b38a9ad539764641e Mon Sep 17 00:00:00 2001 From: Keisuke Kuroyanagi Date: Fri, 20 Sep 2013 21:11:49 +0900 Subject: [PATCH] GC step 3. Place and write valid PtNodes. Bug: 6669677 Change-Id: Id2b7359bafb34acf12dc3bf4c595a03073fda805 --- ...namic_patricia_trie_gc_event_listeners.cpp | 52 +++++++++++- ...dynamic_patricia_trie_gc_event_listeners.h | 50 ++++++++++- .../dynamic_patricia_trie_reading_helper.cpp | 82 ++++++++++++++++++- .../dynamic_patricia_trie_reading_helper.h | 11 ++- .../dynamic_patricia_trie_writing_helper.cpp | 7 ++ .../dynamic_patricia_trie_writing_helper.h | 12 +-- 6 files changed, 199 insertions(+), 15 deletions(-) diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp index 9b826733f..ffa02e3f6 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp @@ -20,7 +20,8 @@ namespace latinime { bool DynamicPatriciaTrieGcEventListeners ::ListenerForUpdatingUnigramProbabilityAndMarkingUselessPtNodesAsDeleted - ::onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node) { + ::onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node, + const int *const nodeCodePoints) { // PtNode is useless when the PtNode is not a terminal and doesn't have any not useless // children. bool isUselessPtNode = !node->isTerminal(); @@ -45,4 +46,53 @@ bool DynamicPatriciaTrieGcEventListeners return true; } +// Writes dummy PtNode array size when the head of PtNode array is read. +bool DynamicPatriciaTrieGcEventListeners::ListenerForPlacingAndWritingValidPtNodesToBuffer + ::onDescend(const int ptNodeArrayPos) { + mValidPtNodeCount = 0; + int writingPos = mBufferToWrite->getTailPosition(); + mPositionMap->insert(hash_map_compat::value_type(ptNodeArrayPos, writingPos)); + // Writes dummy PtNode array size because arrays can have a forward link or needles PtNodes. + // This field will be updated later in onReadingPtNodeArrayTail() with actual PtNode count. + mPtNodeArraySizeFieldPos = writingPos; + return DynamicPatriciaTrieWritingUtils::writePtNodeArraySizeAndAdvancePosition( + mBufferToWrite, 0 /* arraySize */, &writingPos); +} + +// Write PtNode array terminal and actual PtNode array size. +bool DynamicPatriciaTrieGcEventListeners::ListenerForPlacingAndWritingValidPtNodesToBuffer + ::onReadingPtNodeArrayTail() { + int writingPos = mBufferToWrite->getTailPosition(); + // Write PtNode array terminal. + if (!DynamicPatriciaTrieWritingUtils::writeForwardLinkPositionAndAdvancePosition( + mBufferToWrite, NOT_A_DICT_POS /* forwardLinkPos */, &writingPos)) { + return false; + } + // Write actual PtNode array size. + if (!DynamicPatriciaTrieWritingUtils::writePtNodeArraySizeAndAdvancePosition( + mBufferToWrite, mValidPtNodeCount, &mPtNodeArraySizeFieldPos)) { + return false; + } + return true; +} + +// Write valid PtNode to buffer and memorize mapping from the old position to the new position. +bool DynamicPatriciaTrieGcEventListeners::ListenerForPlacingAndWritingValidPtNodesToBuffer + ::onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node, + const int *const nodeCodePoints) { + if (node->isDeleted()) { + // Current PtNode is not written in new buffer because it has been deleted. + mPositionMap->insert(hash_map_compat::value_type(node->getHeadPos(), + NOT_A_DICT_POS)); + return true; + } + int writingPos = mBufferToWrite->getTailPosition(); + mPositionMap->insert(hash_map_compat::value_type(node->getHeadPos(), writingPos)); + mValidPtNodeCount++; + // Writes current PtNode. + return mWritingHelper->writePtNodeToBufferByCopyingPtNodeInfo(mBufferToWrite, node, + node->getParentPos(), nodeCodePoints, node->getCodePointCount(), + node->getProbability(), &writingPos); +} + } // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h index f08a2c544..728559330 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h @@ -24,6 +24,8 @@ #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h" #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h" #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h" +#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" +#include "utils/hash_map_compat.h" namespace latinime { @@ -52,12 +54,15 @@ class DynamicPatriciaTrieGcEventListeners { return true; } - bool onDescend() { + bool onDescend(const int ptNodeArrayPos) { valueStack.push_back(0); return true; } - bool onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node); + bool onReadingPtNodeArrayTail() { return true; } + + bool onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node, + const int *const nodeCodePoints); private: DISALLOW_IMPLICIT_CONSTRUCTORS( @@ -79,9 +84,12 @@ class DynamicPatriciaTrieGcEventListeners { bool onAscend() { return true; } - bool onDescend() { return true; } + bool onDescend(const int ptNodeArrayPos) { return true; } - bool onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node) { + bool onReadingPtNodeArrayTail() { return true; } + + bool onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node, + const int *const nodeCodePoints) { if (!node->isDeleted()) { int pos = node->getBigramsPos(); if (pos != NOT_A_DICT_POS) { @@ -99,6 +107,40 @@ class DynamicPatriciaTrieGcEventListeners { DynamicBigramListPolicy *const mBigramPolicy; }; + class ListenerForPlacingAndWritingValidPtNodesToBuffer + : public DynamicPatriciaTrieReadingHelper::TraversingEventListener { + public: + ListenerForPlacingAndWritingValidPtNodesToBuffer( + DynamicPatriciaTrieWritingHelper *const writingHelper, + BufferWithExtendableBuffer *const bufferToWrite, + hash_map_compat *const positionMap) + : mWritingHelper(writingHelper), mBufferToWrite(bufferToWrite), + mPositionMap(positionMap), mValidPtNodeCount(0), + mPtNodeArraySizeFieldPos(NOT_A_DICT_POS) {}; + + bool onAscend() { return true; } + + bool onDescend(const int ptNodeArrayPos); + + bool onReadingPtNodeArrayTail(); + + bool onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node, + const int *const nodeCodePoints); + + hash_map_compat *getPositionMap() const { + return mPositionMap; + } + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(ListenerForPlacingAndWritingValidPtNodesToBuffer); + + DynamicPatriciaTrieWritingHelper *const mWritingHelper; + BufferWithExtendableBuffer *const mBufferToWrite; + hash_map_compat *const mPositionMap; + int mValidPtNodeCount; + int mPtNodeArraySizeFieldPos; + }; + private: DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTrieGcEventListeners); }; diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.cpp index 10ab79321..754b679d5 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.cpp @@ -25,18 +25,22 @@ const int DynamicPatriciaTrieReadingHelper::MAX_CHILD_COUNT_TO_AVOID_INFINITE_LO const int DynamicPatriciaTrieReadingHelper::MAX_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP = 100000; const size_t DynamicPatriciaTrieReadingHelper::MAX_READING_STATE_STACK_SIZE = MAX_WORD_LENGTH; +// Visits all PtNodes in post-order depth first manner. +// For example, visits c -> b -> y -> x -> a for the following dictionary: +// a _ b _ c +// \ x _ y bool DynamicPatriciaTrieReadingHelper::traverseAllPtNodesInPostorderDepthFirstManner( TraversingEventListener *const listener) { bool alreadyVisitedChildren = false; // Descend from the root to the root PtNode array. - if (!listener->onDescend()) { + if (!listener->onDescend(getPosOfLastPtNodeArrayHead())) { return false; } while (!isEnd()) { if (!alreadyVisitedChildren) { if (mNodeReader.hasChildren()) { // Move to the first child. - if (!listener->onDescend()) { + if (!listener->onDescend(mNodeReader.getChildrenPos())) { return false; } pushReadingStateToStack(); @@ -45,13 +49,16 @@ bool DynamicPatriciaTrieReadingHelper::traverseAllPtNodesInPostorderDepthFirstMa alreadyVisitedChildren = true; } } else { - if (!listener->onVisitingPtNode(&mNodeReader)) { + if (!listener->onVisitingPtNode(&mNodeReader, mMergedNodeCodePoints)) { return false; } readNextSiblingNode(); if (isEnd()) { // All PtNodes in current linked PtNode arrays have been visited. // Return to the parent. + if (!listener->onReadingPtNodeArrayTail()) { + return false; + } if (!listener->onAscend()) { return false; } @@ -70,6 +77,75 @@ bool DynamicPatriciaTrieReadingHelper::traverseAllPtNodesInPostorderDepthFirstMa return !isError(); } +// Visits all PtNodes in PtNode array level pre-order depth first manner, which is the same order +// that PtNodes are written in the dictionary buffer. +// For example, visits a -> b -> x -> c -> y for the following dictionary: +// a _ b _ c +// \ x _ y +bool DynamicPatriciaTrieReadingHelper::traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner( + TraversingEventListener *const listener) { + bool alreadyVisitedAllPtNodesInArray = false; + bool alreadyVisitedChildren = false; + // Descend from the root to the root PtNode array. + if (!listener->onDescend(getPosOfLastPtNodeArrayHead())) { + return false; + } + pushReadingStateToStack(); + while (!isEnd()) { + if (alreadyVisitedAllPtNodesInArray) { + if (alreadyVisitedChildren) { + // Move to next sibling PtNode's children. + readNextSiblingNode(); + if (isEnd()) { + // Return to the parent PTNode. + if (!listener->onAscend()) { + return false; + } + popReadingStateFromStack(); + alreadyVisitedChildren = true; + alreadyVisitedAllPtNodesInArray = true; + } else { + alreadyVisitedChildren = false; + } + } else { + if (mNodeReader.hasChildren()) { + // Move to the first child. + if (!listener->onDescend(mNodeReader.getChildrenPos())) { + return false; + } + pushReadingStateToStack(); + readChildNode(); + // Push state to return the head of PtNode array. + pushReadingStateToStack(); + alreadyVisitedAllPtNodesInArray = false; + alreadyVisitedChildren = false; + } else { + alreadyVisitedChildren = true; + } + } + } else { + if (!listener->onVisitingPtNode(&mNodeReader, mMergedNodeCodePoints)) { + return false; + } + readNextSiblingNode(); + if (isEnd()) { + if (!listener->onReadingPtNodeArrayTail()) { + return false; + } + // Return to the head of current PtNode array. + popReadingStateFromStack(); + alreadyVisitedAllPtNodesInArray = true; + } + } + } + popReadingStateFromStack(); + // Ascend from the root PtNode array to the root. + if (!listener->onAscend()) { + return false; + } + return !isError(); +} + // Read node array size and process empty node arrays. Nodes and arrays are counted up in this // method to avoid an infinite loop. void DynamicPatriciaTrieReadingHelper::nextPtNodeArray() { diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h index 4974459f9..b033eee05 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h @@ -45,10 +45,14 @@ class DynamicPatriciaTrieReadingHelper { virtual bool onAscend() = 0; // Returns whether the event handling was succeeded or not. - virtual bool onDescend() = 0; + virtual bool onDescend(const int ptNodeArrayPos) = 0; // Returns whether the event handling was succeeded or not. - virtual bool onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node) = 0; + virtual bool onReadingPtNodeArrayTail() = 0; + + // Returns whether the event handling was succeeded or not. + virtual bool onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node, + const int *const nodeCodePoints) = 0; protected: TraversingEventListener() {}; @@ -208,6 +212,9 @@ class DynamicPatriciaTrieReadingHelper { bool traverseAllPtNodesInPostorderDepthFirstManner(TraversingEventListener *const listener); + bool traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner( + TraversingEventListener *const listener); + private: DISALLOW_COPY_AND_ASSIGN(DynamicPatriciaTrieReadingHelper); diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp index 0d872f254..3fc9c4ce0 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp @@ -536,6 +536,13 @@ bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos, &listenerForupdatingBigramProbability)) { return false; } + + // Mapping from positions in mBuffer to positions in bufferToWrite. + hash_map_compat positionMap; + readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos); + DynamicPatriciaTrieGcEventListeners::ListenerForPlacingAndWritingValidPtNodesToBuffer + listenerForPlacingAndWritingLivingPtNodesToBuffer(this, mBuffer, &positionMap); + // TODO: Implement. return false; } diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h index d164e4331..e82b80ae5 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h @@ -59,6 +59,13 @@ class DynamicPatriciaTrieWritingHelper { // DynamicPatriciaTrieGcEventListeners. bool markNodeAsDeleted(const DynamicPatriciaTrieNodeReader *const nodeToUpdate); + // CAVEAT: This method must be called only from this class or inner classes of + // DynamicPatriciaTrieGcEventListeners. + bool writePtNodeToBufferByCopyingPtNodeInfo(BufferWithExtendableBuffer *const bufferToWrite, + const DynamicPatriciaTrieNodeReader *const originalNode, const int parentPos, + const int *const codePoints, const int codePointCount, const int probability, + int *const writingPos); + private: DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTrieWritingHelper); @@ -82,11 +89,6 @@ class DynamicPatriciaTrieWritingHelper { const int parentPos, const int *const codePoints, const int codePointCount, const int probability, int *const writingPos); - bool writePtNodeToBufferByCopyingPtNodeInfo(BufferWithExtendableBuffer *const bufferToWrite, - const DynamicPatriciaTrieNodeReader *const originalNode, const int parentPos, - const int *const codePoints, const int codePointCount, const int probability, - int *const writingPos); - bool createAndInsertNodeIntoPtNodeArray(const int parentPos, const int *const nodeCodePoints, const int nodeCodePointCount, const int probability, int *const forwardLinkFieldPos);