am 0355bf2d
: GC step 3. Place and write valid PtNodes.
* commit '0355bf2d4c5c6a51a5f1d55b38a9ad539764641e': GC step 3. Place and write valid PtNodes.
This commit is contained in:
commit
386a0dc07b
6 changed files with 199 additions and 15 deletions
|
@ -20,7 +20,8 @@ namespace latinime {
|
|||
|
||||
bool DynamicPatriciaTrieGcEventListeners
|
||||
::ListenerForUpdatingUnigramProbabilityAndMarkingUselessPtNodesAsDeleted
|
||||
::onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node) {
|
||||
::onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node,
|
||||
const int *const nodeCodePoints) {
|
||||
// PtNode is useless when the PtNode is not a terminal and doesn't have any not useless
|
||||
// children.
|
||||
bool isUselessPtNode = !node->isTerminal();
|
||||
|
@ -45,4 +46,53 @@ bool DynamicPatriciaTrieGcEventListeners
|
|||
return true;
|
||||
}
|
||||
|
||||
// Writes dummy PtNode array size when the head of PtNode array is read.
|
||||
bool DynamicPatriciaTrieGcEventListeners::ListenerForPlacingAndWritingValidPtNodesToBuffer
|
||||
::onDescend(const int ptNodeArrayPos) {
|
||||
mValidPtNodeCount = 0;
|
||||
int writingPos = mBufferToWrite->getTailPosition();
|
||||
mPositionMap->insert(hash_map_compat<int, int>::value_type(ptNodeArrayPos, writingPos));
|
||||
// Writes dummy PtNode array size because arrays can have a forward link or needles PtNodes.
|
||||
// This field will be updated later in onReadingPtNodeArrayTail() with actual PtNode count.
|
||||
mPtNodeArraySizeFieldPos = writingPos;
|
||||
return DynamicPatriciaTrieWritingUtils::writePtNodeArraySizeAndAdvancePosition(
|
||||
mBufferToWrite, 0 /* arraySize */, &writingPos);
|
||||
}
|
||||
|
||||
// Write PtNode array terminal and actual PtNode array size.
|
||||
bool DynamicPatriciaTrieGcEventListeners::ListenerForPlacingAndWritingValidPtNodesToBuffer
|
||||
::onReadingPtNodeArrayTail() {
|
||||
int writingPos = mBufferToWrite->getTailPosition();
|
||||
// Write PtNode array terminal.
|
||||
if (!DynamicPatriciaTrieWritingUtils::writeForwardLinkPositionAndAdvancePosition(
|
||||
mBufferToWrite, NOT_A_DICT_POS /* forwardLinkPos */, &writingPos)) {
|
||||
return false;
|
||||
}
|
||||
// Write actual PtNode array size.
|
||||
if (!DynamicPatriciaTrieWritingUtils::writePtNodeArraySizeAndAdvancePosition(
|
||||
mBufferToWrite, mValidPtNodeCount, &mPtNodeArraySizeFieldPos)) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// Write valid PtNode to buffer and memorize mapping from the old position to the new position.
|
||||
bool DynamicPatriciaTrieGcEventListeners::ListenerForPlacingAndWritingValidPtNodesToBuffer
|
||||
::onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node,
|
||||
const int *const nodeCodePoints) {
|
||||
if (node->isDeleted()) {
|
||||
// Current PtNode is not written in new buffer because it has been deleted.
|
||||
mPositionMap->insert(hash_map_compat<int, int>::value_type(node->getHeadPos(),
|
||||
NOT_A_DICT_POS));
|
||||
return true;
|
||||
}
|
||||
int writingPos = mBufferToWrite->getTailPosition();
|
||||
mPositionMap->insert(hash_map_compat<int, int>::value_type(node->getHeadPos(), writingPos));
|
||||
mValidPtNodeCount++;
|
||||
// Writes current PtNode.
|
||||
return mWritingHelper->writePtNodeToBufferByCopyingPtNodeInfo(mBufferToWrite, node,
|
||||
node->getParentPos(), nodeCodePoints, node->getCodePointCount(),
|
||||
node->getProbability(), &writingPos);
|
||||
}
|
||||
|
||||
} // namespace latinime
|
||||
|
|
|
@ -24,6 +24,8 @@
|
|||
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h"
|
||||
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h"
|
||||
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h"
|
||||
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
|
||||
#include "utils/hash_map_compat.h"
|
||||
|
||||
namespace latinime {
|
||||
|
||||
|
@ -52,12 +54,15 @@ class DynamicPatriciaTrieGcEventListeners {
|
|||
return true;
|
||||
}
|
||||
|
||||
bool onDescend() {
|
||||
bool onDescend(const int ptNodeArrayPos) {
|
||||
valueStack.push_back(0);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node);
|
||||
bool onReadingPtNodeArrayTail() { return true; }
|
||||
|
||||
bool onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node,
|
||||
const int *const nodeCodePoints);
|
||||
|
||||
private:
|
||||
DISALLOW_IMPLICIT_CONSTRUCTORS(
|
||||
|
@ -79,9 +84,12 @@ class DynamicPatriciaTrieGcEventListeners {
|
|||
|
||||
bool onAscend() { return true; }
|
||||
|
||||
bool onDescend() { return true; }
|
||||
bool onDescend(const int ptNodeArrayPos) { return true; }
|
||||
|
||||
bool onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node) {
|
||||
bool onReadingPtNodeArrayTail() { return true; }
|
||||
|
||||
bool onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node,
|
||||
const int *const nodeCodePoints) {
|
||||
if (!node->isDeleted()) {
|
||||
int pos = node->getBigramsPos();
|
||||
if (pos != NOT_A_DICT_POS) {
|
||||
|
@ -99,6 +107,40 @@ class DynamicPatriciaTrieGcEventListeners {
|
|||
DynamicBigramListPolicy *const mBigramPolicy;
|
||||
};
|
||||
|
||||
class ListenerForPlacingAndWritingValidPtNodesToBuffer
|
||||
: public DynamicPatriciaTrieReadingHelper::TraversingEventListener {
|
||||
public:
|
||||
ListenerForPlacingAndWritingValidPtNodesToBuffer(
|
||||
DynamicPatriciaTrieWritingHelper *const writingHelper,
|
||||
BufferWithExtendableBuffer *const bufferToWrite,
|
||||
hash_map_compat<int, int> *const positionMap)
|
||||
: mWritingHelper(writingHelper), mBufferToWrite(bufferToWrite),
|
||||
mPositionMap(positionMap), mValidPtNodeCount(0),
|
||||
mPtNodeArraySizeFieldPos(NOT_A_DICT_POS) {};
|
||||
|
||||
bool onAscend() { return true; }
|
||||
|
||||
bool onDescend(const int ptNodeArrayPos);
|
||||
|
||||
bool onReadingPtNodeArrayTail();
|
||||
|
||||
bool onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node,
|
||||
const int *const nodeCodePoints);
|
||||
|
||||
hash_map_compat<int, int> *getPositionMap() const {
|
||||
return mPositionMap;
|
||||
}
|
||||
|
||||
private:
|
||||
DISALLOW_IMPLICIT_CONSTRUCTORS(ListenerForPlacingAndWritingValidPtNodesToBuffer);
|
||||
|
||||
DynamicPatriciaTrieWritingHelper *const mWritingHelper;
|
||||
BufferWithExtendableBuffer *const mBufferToWrite;
|
||||
hash_map_compat<int, int> *const mPositionMap;
|
||||
int mValidPtNodeCount;
|
||||
int mPtNodeArraySizeFieldPos;
|
||||
};
|
||||
|
||||
private:
|
||||
DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTrieGcEventListeners);
|
||||
};
|
||||
|
|
|
@ -25,18 +25,22 @@ const int DynamicPatriciaTrieReadingHelper::MAX_CHILD_COUNT_TO_AVOID_INFINITE_LO
|
|||
const int DynamicPatriciaTrieReadingHelper::MAX_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP = 100000;
|
||||
const size_t DynamicPatriciaTrieReadingHelper::MAX_READING_STATE_STACK_SIZE = MAX_WORD_LENGTH;
|
||||
|
||||
// Visits all PtNodes in post-order depth first manner.
|
||||
// For example, visits c -> b -> y -> x -> a for the following dictionary:
|
||||
// a _ b _ c
|
||||
// \ x _ y
|
||||
bool DynamicPatriciaTrieReadingHelper::traverseAllPtNodesInPostorderDepthFirstManner(
|
||||
TraversingEventListener *const listener) {
|
||||
bool alreadyVisitedChildren = false;
|
||||
// Descend from the root to the root PtNode array.
|
||||
if (!listener->onDescend()) {
|
||||
if (!listener->onDescend(getPosOfLastPtNodeArrayHead())) {
|
||||
return false;
|
||||
}
|
||||
while (!isEnd()) {
|
||||
if (!alreadyVisitedChildren) {
|
||||
if (mNodeReader.hasChildren()) {
|
||||
// Move to the first child.
|
||||
if (!listener->onDescend()) {
|
||||
if (!listener->onDescend(mNodeReader.getChildrenPos())) {
|
||||
return false;
|
||||
}
|
||||
pushReadingStateToStack();
|
||||
|
@ -45,13 +49,16 @@ bool DynamicPatriciaTrieReadingHelper::traverseAllPtNodesInPostorderDepthFirstMa
|
|||
alreadyVisitedChildren = true;
|
||||
}
|
||||
} else {
|
||||
if (!listener->onVisitingPtNode(&mNodeReader)) {
|
||||
if (!listener->onVisitingPtNode(&mNodeReader, mMergedNodeCodePoints)) {
|
||||
return false;
|
||||
}
|
||||
readNextSiblingNode();
|
||||
if (isEnd()) {
|
||||
// All PtNodes in current linked PtNode arrays have been visited.
|
||||
// Return to the parent.
|
||||
if (!listener->onReadingPtNodeArrayTail()) {
|
||||
return false;
|
||||
}
|
||||
if (!listener->onAscend()) {
|
||||
return false;
|
||||
}
|
||||
|
@ -70,6 +77,75 @@ bool DynamicPatriciaTrieReadingHelper::traverseAllPtNodesInPostorderDepthFirstMa
|
|||
return !isError();
|
||||
}
|
||||
|
||||
// Visits all PtNodes in PtNode array level pre-order depth first manner, which is the same order
|
||||
// that PtNodes are written in the dictionary buffer.
|
||||
// For example, visits a -> b -> x -> c -> y for the following dictionary:
|
||||
// a _ b _ c
|
||||
// \ x _ y
|
||||
bool DynamicPatriciaTrieReadingHelper::traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner(
|
||||
TraversingEventListener *const listener) {
|
||||
bool alreadyVisitedAllPtNodesInArray = false;
|
||||
bool alreadyVisitedChildren = false;
|
||||
// Descend from the root to the root PtNode array.
|
||||
if (!listener->onDescend(getPosOfLastPtNodeArrayHead())) {
|
||||
return false;
|
||||
}
|
||||
pushReadingStateToStack();
|
||||
while (!isEnd()) {
|
||||
if (alreadyVisitedAllPtNodesInArray) {
|
||||
if (alreadyVisitedChildren) {
|
||||
// Move to next sibling PtNode's children.
|
||||
readNextSiblingNode();
|
||||
if (isEnd()) {
|
||||
// Return to the parent PTNode.
|
||||
if (!listener->onAscend()) {
|
||||
return false;
|
||||
}
|
||||
popReadingStateFromStack();
|
||||
alreadyVisitedChildren = true;
|
||||
alreadyVisitedAllPtNodesInArray = true;
|
||||
} else {
|
||||
alreadyVisitedChildren = false;
|
||||
}
|
||||
} else {
|
||||
if (mNodeReader.hasChildren()) {
|
||||
// Move to the first child.
|
||||
if (!listener->onDescend(mNodeReader.getChildrenPos())) {
|
||||
return false;
|
||||
}
|
||||
pushReadingStateToStack();
|
||||
readChildNode();
|
||||
// Push state to return the head of PtNode array.
|
||||
pushReadingStateToStack();
|
||||
alreadyVisitedAllPtNodesInArray = false;
|
||||
alreadyVisitedChildren = false;
|
||||
} else {
|
||||
alreadyVisitedChildren = true;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (!listener->onVisitingPtNode(&mNodeReader, mMergedNodeCodePoints)) {
|
||||
return false;
|
||||
}
|
||||
readNextSiblingNode();
|
||||
if (isEnd()) {
|
||||
if (!listener->onReadingPtNodeArrayTail()) {
|
||||
return false;
|
||||
}
|
||||
// Return to the head of current PtNode array.
|
||||
popReadingStateFromStack();
|
||||
alreadyVisitedAllPtNodesInArray = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
popReadingStateFromStack();
|
||||
// Ascend from the root PtNode array to the root.
|
||||
if (!listener->onAscend()) {
|
||||
return false;
|
||||
}
|
||||
return !isError();
|
||||
}
|
||||
|
||||
// Read node array size and process empty node arrays. Nodes and arrays are counted up in this
|
||||
// method to avoid an infinite loop.
|
||||
void DynamicPatriciaTrieReadingHelper::nextPtNodeArray() {
|
||||
|
|
|
@ -45,10 +45,14 @@ class DynamicPatriciaTrieReadingHelper {
|
|||
virtual bool onAscend() = 0;
|
||||
|
||||
// Returns whether the event handling was succeeded or not.
|
||||
virtual bool onDescend() = 0;
|
||||
virtual bool onDescend(const int ptNodeArrayPos) = 0;
|
||||
|
||||
// Returns whether the event handling was succeeded or not.
|
||||
virtual bool onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node) = 0;
|
||||
virtual bool onReadingPtNodeArrayTail() = 0;
|
||||
|
||||
// Returns whether the event handling was succeeded or not.
|
||||
virtual bool onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node,
|
||||
const int *const nodeCodePoints) = 0;
|
||||
|
||||
protected:
|
||||
TraversingEventListener() {};
|
||||
|
@ -208,6 +212,9 @@ class DynamicPatriciaTrieReadingHelper {
|
|||
|
||||
bool traverseAllPtNodesInPostorderDepthFirstManner(TraversingEventListener *const listener);
|
||||
|
||||
bool traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner(
|
||||
TraversingEventListener *const listener);
|
||||
|
||||
private:
|
||||
DISALLOW_COPY_AND_ASSIGN(DynamicPatriciaTrieReadingHelper);
|
||||
|
||||
|
|
|
@ -536,6 +536,13 @@ bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
|
|||
&listenerForupdatingBigramProbability)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Mapping from positions in mBuffer to positions in bufferToWrite.
|
||||
hash_map_compat<int, int> positionMap;
|
||||
readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
|
||||
DynamicPatriciaTrieGcEventListeners::ListenerForPlacingAndWritingValidPtNodesToBuffer
|
||||
listenerForPlacingAndWritingLivingPtNodesToBuffer(this, mBuffer, &positionMap);
|
||||
|
||||
// TODO: Implement.
|
||||
return false;
|
||||
}
|
||||
|
|
|
@ -59,6 +59,13 @@ class DynamicPatriciaTrieWritingHelper {
|
|||
// DynamicPatriciaTrieGcEventListeners.
|
||||
bool markNodeAsDeleted(const DynamicPatriciaTrieNodeReader *const nodeToUpdate);
|
||||
|
||||
// CAVEAT: This method must be called only from this class or inner classes of
|
||||
// DynamicPatriciaTrieGcEventListeners.
|
||||
bool writePtNodeToBufferByCopyingPtNodeInfo(BufferWithExtendableBuffer *const bufferToWrite,
|
||||
const DynamicPatriciaTrieNodeReader *const originalNode, const int parentPos,
|
||||
const int *const codePoints, const int codePointCount, const int probability,
|
||||
int *const writingPos);
|
||||
|
||||
private:
|
||||
DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTrieWritingHelper);
|
||||
|
||||
|
@ -82,11 +89,6 @@ class DynamicPatriciaTrieWritingHelper {
|
|||
const int parentPos, const int *const codePoints, const int codePointCount,
|
||||
const int probability, int *const writingPos);
|
||||
|
||||
bool writePtNodeToBufferByCopyingPtNodeInfo(BufferWithExtendableBuffer *const bufferToWrite,
|
||||
const DynamicPatriciaTrieNodeReader *const originalNode, const int parentPos,
|
||||
const int *const codePoints, const int codePointCount, const int probability,
|
||||
int *const writingPos);
|
||||
|
||||
bool createAndInsertNodeIntoPtNodeArray(const int parentPos, const int *const nodeCodePoints,
|
||||
const int nodeCodePointCount, const int probability, int *const forwardLinkFieldPos);
|
||||
|
||||
|
|
Loading…
Reference in a new issue