Step 1 to implement GC. Finding garbage PtNodes.

Bug: 6669677
Change-Id: I3551fe2f16a09d2bf7761f4e1d73ebd4a03380e7
This commit is contained in:
Keisuke Kuroyanagi 2013-09-20 15:37:14 +09:00
parent 80f934af54
commit 2a64726a16
7 changed files with 347 additions and 67 deletions

View file

@ -73,6 +73,7 @@ LATIN_IME_CORE_SRC_FILES := \
header/header_read_write_utils.cpp \ header/header_read_write_utils.cpp \
shortcut/shortcut_list_reading_utils.cpp \ shortcut/shortcut_list_reading_utils.cpp \
dictionary_structure_with_buffer_policy_factory.cpp \ dictionary_structure_with_buffer_policy_factory.cpp \
dynamic_patricia_trie_gc_event_listeners.cpp \
dynamic_patricia_trie_node_reader.cpp \ dynamic_patricia_trie_node_reader.cpp \
dynamic_patricia_trie_policy.cpp \ dynamic_patricia_trie_policy.cpp \
dynamic_patricia_trie_reading_helper.cpp \ dynamic_patricia_trie_reading_helper.cpp \

View file

@ -0,0 +1,48 @@
/*
* Copyright (C) 2013 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h"
namespace latinime {
bool DynamicPatriciaTrieGcEventListeners
::ListenerForUpdatingUnigramProbabilityAndMarkingUselessPtNodesAsDeleted
::onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node) {
// PtNode is useless when the PtNode is not a terminal and doesn't have any not useless
// children.
bool isUselessPtNode = !node->isTerminal();
if (mChildrenValue > 0) {
isUselessPtNode = false;
} else if (node->isTerminal()) {
// Remove children as all children are useless.
int writingPos = node->getChildrenPosFieldPos();
if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(
mBuffer, NOT_A_DICT_POS /* childrenPosition */, &writingPos)) {
return false;
}
}
if (isUselessPtNode) {
// Current PtNode is no longer needed. Mark it as deleted.
if (!mWritingHelper->markNodeAsDeleted(node)) {
return false;
}
} else {
valueStack.back() += 1;
}
return true;
}
} // namespace latinime

View file

@ -0,0 +1,75 @@
/*
* Copyright (C) 2013, The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef LATINIME_DYNAMIC_PATRICIA_TRIE_GC_EVENT_LISTENERS_H
#define LATINIME_DYNAMIC_PATRICIA_TRIE_GC_EVENT_LISTENERS_H
#include <vector>
#include "defines.h"
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h"
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h"
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h"
namespace latinime {
class DynamicPatriciaTrieGcEventListeners {
public:
// Updates all PtNodes that can be reached from the root. Checks if each PtNode is useless or
// not and marks useless PtNodes as deleted. Such deleted PtNodes will be discarded in the GC.
// TODO: Concatenate non-terminal PtNodes.
class ListenerForUpdatingUnigramProbabilityAndMarkingUselessPtNodesAsDeleted
: public DynamicPatriciaTrieReadingHelper::TraversingEventListener {
public:
ListenerForUpdatingUnigramProbabilityAndMarkingUselessPtNodesAsDeleted(
DynamicPatriciaTrieWritingHelper *const writingHelper,
BufferWithExtendableBuffer *const buffer)
: mWritingHelper(writingHelper), mBuffer(buffer), valueStack(),
mChildrenValue(0) {}
~ListenerForUpdatingUnigramProbabilityAndMarkingUselessPtNodesAsDeleted() {};
bool onAscend() {
if (valueStack.empty()) {
return false;
}
mChildrenValue = valueStack.back();
valueStack.pop_back();
return true;
}
bool onDescend() {
valueStack.push_back(0);
return true;
}
bool onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node);
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(
ListenerForUpdatingUnigramProbabilityAndMarkingUselessPtNodesAsDeleted);
DynamicPatriciaTrieWritingHelper *const mWritingHelper;
BufferWithExtendableBuffer *const mBuffer;
std::vector<int> valueStack;
int mChildrenValue;
};
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTrieGcEventListeners);
};
} // namespace latinime
#endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_GC_EVENT_LISTENERS_H */

View file

@ -23,36 +23,85 @@ namespace latinime {
// To avoid infinite loop caused by invalid or malicious forward links. // To avoid infinite loop caused by invalid or malicious forward links.
const int DynamicPatriciaTrieReadingHelper::MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP = 100000; const int DynamicPatriciaTrieReadingHelper::MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP = 100000;
const int DynamicPatriciaTrieReadingHelper::MAX_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP = 100000; const int DynamicPatriciaTrieReadingHelper::MAX_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP = 100000;
const size_t DynamicPatriciaTrieReadingHelper::MAX_READING_STATE_STACK_SIZE = MAX_WORD_LENGTH;
bool DynamicPatriciaTrieReadingHelper::traverseAllPtNodesInPostorderDepthFirstManner(
TraversingEventListener *const listener) {
bool alreadyVisitedChildren = false;
// Descend from the root to the root PtNode array.
if (!listener->onDescend()) {
return false;
}
while (!isEnd()) {
if (!alreadyVisitedChildren) {
if (mNodeReader.hasChildren()) {
// Move to the first child.
if (!listener->onDescend()) {
return false;
}
pushReadingStateToStack();
readChildNode();
} else {
alreadyVisitedChildren = true;
}
} else {
if (!listener->onVisitingPtNode(&mNodeReader)) {
return false;
}
readNextSiblingNode();
if (isEnd()) {
// All PtNodes in current linked PtNode arrays have been visited.
// Return to the parent.
if (!listener->onAscend()) {
return false;
}
popReadingStateFromStack();
alreadyVisitedChildren = true;
} else {
// Process sibling PtNode.
alreadyVisitedChildren = false;
}
}
}
// Ascend from the root PtNode array to the root.
if (!listener->onAscend()) {
return false;
}
return !isError();
}
// Read node array size and process empty node arrays. Nodes and arrays are counted up in this // Read node array size and process empty node arrays. Nodes and arrays are counted up in this
// method to avoid an infinite loop. // method to avoid an infinite loop.
void DynamicPatriciaTrieReadingHelper::nextNodeArray() { void DynamicPatriciaTrieReadingHelper::nextNodeArray() {
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(mPos); mReadingState.mPosOfLastPtNodeArrayHead = mReadingState.mPos;
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(mReadingState.mPos);
const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer); const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
if (usesAdditionalBuffer) { if (usesAdditionalBuffer) {
mPos -= mBuffer->getOriginalBufferSize(); mReadingState.mPos -= mBuffer->getOriginalBufferSize();
} }
mNodeCount = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition(dictBuf, mReadingState.mNodeCount = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition(
&mPos); dictBuf, &mReadingState.mPos);
if (usesAdditionalBuffer) { if (usesAdditionalBuffer) {
mPos += mBuffer->getOriginalBufferSize(); mReadingState.mPos += mBuffer->getOriginalBufferSize();
} }
// Count up nodes and node arrays to avoid infinite loop. // Count up nodes and node arrays to avoid infinite loop.
mTotalNodeCount += mNodeCount; mReadingState.mTotalNodeCount += mReadingState.mNodeCount;
mNodeArrayCount++; mReadingState.mNodeArrayCount++;
if (mNodeCount < 0 || mTotalNodeCount > MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP if (mReadingState.mNodeCount < 0
|| mNodeArrayCount > MAX_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP) { || mReadingState.mTotalNodeCount > MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP
|| mReadingState.mNodeArrayCount > MAX_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP) {
// Invalid dictionary. // Invalid dictionary.
AKLOGI("Invalid dictionary. nodeCount: %d, totalNodeCount: %d, MAX_CHILD_COUNT: %d" AKLOGI("Invalid dictionary. nodeCount: %d, totalNodeCount: %d, MAX_CHILD_COUNT: %d"
"nodeArrayCount: %d, MAX_NODE_ARRAY_COUNT: %d", "nodeArrayCount: %d, MAX_NODE_ARRAY_COUNT: %d",
mNodeCount, mTotalNodeCount, MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP, mReadingState.mNodeCount, mReadingState.mTotalNodeCount,
mNodeArrayCount, MAX_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP); MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP, mReadingState.mNodeArrayCount,
MAX_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP);
ASSERT(false); ASSERT(false);
mIsError = true; mIsError = true;
mPos = NOT_A_DICT_POS; mReadingState.mPos = NOT_A_DICT_POS;
return; return;
} }
if (mNodeCount == 0) { if (mReadingState.mNodeCount == 0) {
// Empty node array. Try following forward link. // Empty node array. Try following forward link.
followForwardLink(); followForwardLink();
} }
@ -60,24 +109,24 @@ void DynamicPatriciaTrieReadingHelper::nextNodeArray() {
// Follow the forward link and read the next node array if exists. // Follow the forward link and read the next node array if exists.
void DynamicPatriciaTrieReadingHelper::followForwardLink() { void DynamicPatriciaTrieReadingHelper::followForwardLink() {
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(mPos); const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(mReadingState.mPos);
const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer); const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
if (usesAdditionalBuffer) { if (usesAdditionalBuffer) {
mPos -= mBuffer->getOriginalBufferSize(); mReadingState.mPos -= mBuffer->getOriginalBufferSize();
} }
const int forwardLinkPosition = const int forwardLinkPosition =
DynamicPatriciaTrieReadingUtils::getForwardLinkPosition(dictBuf, mPos); DynamicPatriciaTrieReadingUtils::getForwardLinkPosition(dictBuf, mReadingState.mPos);
if (usesAdditionalBuffer) { if (usesAdditionalBuffer) {
mPos += mBuffer->getOriginalBufferSize(); mReadingState.mPos += mBuffer->getOriginalBufferSize();
} }
mPosOfLastForwardLinkField = mPos; mReadingState.mPosOfLastForwardLinkField = mReadingState.mPos;
if (DynamicPatriciaTrieReadingUtils::isValidForwardLinkPosition(forwardLinkPosition)) { if (DynamicPatriciaTrieReadingUtils::isValidForwardLinkPosition(forwardLinkPosition)) {
// Follow the forward link. // Follow the forward link.
mPos += forwardLinkPosition; mReadingState.mPos += forwardLinkPosition;
nextNodeArray(); nextNodeArray();
} else { } else {
// All node arrays have been read. // All node arrays have been read.
mPos = NOT_A_DICT_POS; mReadingState.mPos = NOT_A_DICT_POS;
} }
} }

View file

@ -17,6 +17,9 @@
#ifndef LATINIME_DYNAMIC_PATRICIA_TRIE_READING_HELPER_H #ifndef LATINIME_DYNAMIC_PATRICIA_TRIE_READING_HELPER_H
#define LATINIME_DYNAMIC_PATRICIA_TRIE_READING_HELPER_H #define LATINIME_DYNAMIC_PATRICIA_TRIE_READING_HELPER_H
#include <cstddef>
#include <vector>
#include "defines.h" #include "defines.h"
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h" #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h"
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h" #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h"
@ -34,12 +37,31 @@ class DictionaryShortcutsStructurePolicy;
*/ */
class DynamicPatriciaTrieReadingHelper { class DynamicPatriciaTrieReadingHelper {
public: public:
class TraversingEventListener {
public:
virtual ~TraversingEventListener() {};
// Returns whether the event handling was succeeded or not.
virtual bool onAscend() = 0;
// Returns whether the event handling was succeeded or not.
virtual bool onDescend() = 0;
// Returns whether the event handling was succeeded or not.
virtual bool onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node) = 0;
protected:
TraversingEventListener() {};
private:
DISALLOW_COPY_AND_ASSIGN(TraversingEventListener);
};
DynamicPatriciaTrieReadingHelper(const BufferWithExtendableBuffer *const buffer, DynamicPatriciaTrieReadingHelper(const BufferWithExtendableBuffer *const buffer,
const DictionaryBigramsStructurePolicy *const bigramsPolicy, const DictionaryBigramsStructurePolicy *const bigramsPolicy,
const DictionaryShortcutsStructurePolicy *const shortcutsPolicy) const DictionaryShortcutsStructurePolicy *const shortcutsPolicy)
: mIsError(false), mPos(NOT_A_DICT_POS), mNodeCount(0), mPrevTotalCodePointCount(0), : mIsError(false), mReadingState(), mBuffer(buffer),
mTotalNodeCount(0), mNodeArrayCount(0), mPosOfLastForwardLinkField(NOT_A_DICT_POS), mNodeReader(mBuffer, bigramsPolicy, shortcutsPolicy), mReadingStateStack() {}
mBuffer(buffer), mNodeReader(mBuffer, bigramsPolicy, shortcutsPolicy) {}
~DynamicPatriciaTrieReadingHelper() {} ~DynamicPatriciaTrieReadingHelper() {}
@ -48,21 +70,21 @@ class DynamicPatriciaTrieReadingHelper {
} }
AK_FORCE_INLINE bool isEnd() const { AK_FORCE_INLINE bool isEnd() const {
return mPos == NOT_A_DICT_POS; return mReadingState.mPos == NOT_A_DICT_POS;
} }
// Initialize reading state with the head position of a node array. // Initialize reading state with the head position of a node array.
AK_FORCE_INLINE void initWithNodeArrayPos(const int nodeArrayPos) { AK_FORCE_INLINE void initWithNodeArrayPos(const int nodeArrayPos) {
if (nodeArrayPos == NOT_A_DICT_POS) { if (nodeArrayPos == NOT_A_DICT_POS) {
mPos = NOT_A_DICT_POS; mReadingState.mPos = NOT_A_DICT_POS;
} else { } else {
mIsError = false; mIsError = false;
mPos = nodeArrayPos; mReadingState.mPos = nodeArrayPos;
mNodeCount = 0; mReadingState.mPrevTotalCodePointCount = 0;
mPrevTotalCodePointCount = 0; mReadingState.mTotalNodeCount = 0;
mTotalNodeCount = 0; mReadingState.mNodeArrayCount = 0;
mNodeArrayCount = 0; mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS;
mPosOfLastForwardLinkField = NOT_A_DICT_POS; mReadingStateStack.clear();
nextNodeArray(); nextNodeArray();
if (!isEnd()) { if (!isEnd()) {
fetchNodeInfo(); fetchNodeInfo();
@ -73,15 +95,17 @@ class DynamicPatriciaTrieReadingHelper {
// Initialize reading state with the head position of a node. // Initialize reading state with the head position of a node.
AK_FORCE_INLINE void initWithNodePos(const int nodePos) { AK_FORCE_INLINE void initWithNodePos(const int nodePos) {
if (nodePos == NOT_A_DICT_POS) { if (nodePos == NOT_A_DICT_POS) {
mPos = NOT_A_DICT_POS; mReadingState.mPos = NOT_A_DICT_POS;
} else { } else {
mIsError = false; mIsError = false;
mPos = nodePos; mReadingState.mPos = nodePos;
mNodeCount = 1; mReadingState.mNodeCount = 1;
mPrevTotalCodePointCount = 0; mReadingState.mPrevTotalCodePointCount = 0;
mTotalNodeCount = 1; mReadingState.mTotalNodeCount = 1;
mNodeArrayCount = 1; mReadingState.mNodeArrayCount = 1;
mPosOfLastForwardLinkField = NOT_A_DICT_POS; mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS;
mReadingState.mPosOfLastPtNodeArrayHead = NOT_A_DICT_POS;
mReadingStateStack.clear();
fetchNodeInfo(); fetchNodeInfo();
} }
} }
@ -100,12 +124,12 @@ class DynamicPatriciaTrieReadingHelper {
// Return code point count exclude the last read node's code points. // Return code point count exclude the last read node's code points.
AK_FORCE_INLINE int getPrevTotalCodePointCount() const { AK_FORCE_INLINE int getPrevTotalCodePointCount() const {
return mPrevTotalCodePointCount; return mReadingState.mPrevTotalCodePointCount;
} }
// Return code point count include the last read node's code points. // Return code point count include the last read node's code points.
AK_FORCE_INLINE int getTotalCodePointCount() const { AK_FORCE_INLINE int getTotalCodePointCount() const {
return mPrevTotalCodePointCount + mNodeReader.getCodePointCount(); return mReadingState.mPrevTotalCodePointCount + mNodeReader.getCodePointCount();
} }
AK_FORCE_INLINE void fetchMergedNodeCodePointsInReverseOrder( AK_FORCE_INLINE void fetchMergedNodeCodePointsInReverseOrder(
@ -121,9 +145,9 @@ class DynamicPatriciaTrieReadingHelper {
} }
AK_FORCE_INLINE void readNextSiblingNode() { AK_FORCE_INLINE void readNextSiblingNode() {
mNodeCount -= 1; mReadingState.mNodeCount -= 1;
mPos = mNodeReader.getSiblingNodePos(); mReadingState.mPos = mNodeReader.getSiblingNodePos();
if (mNodeCount <= 0) { if (mReadingState.mNodeCount <= 0) {
// All nodes in the current node array have been read. // All nodes in the current node array have been read.
followForwardLink(); followForwardLink();
if (!isEnd()) { if (!isEnd()) {
@ -137,47 +161,64 @@ class DynamicPatriciaTrieReadingHelper {
// Read the first child node of the current node. // Read the first child node of the current node.
AK_FORCE_INLINE void readChildNode() { AK_FORCE_INLINE void readChildNode() {
if (mNodeReader.hasChildren()) { if (mNodeReader.hasChildren()) {
mPrevTotalCodePointCount += mNodeReader.getCodePointCount(); mReadingState.mPrevTotalCodePointCount += mNodeReader.getCodePointCount();
mTotalNodeCount = 0; mReadingState.mTotalNodeCount = 0;
mNodeArrayCount = 0; mReadingState.mNodeArrayCount = 0;
mPos = mNodeReader.getChildrenPos(); mReadingState.mPos = mNodeReader.getChildrenPos();
mPosOfLastForwardLinkField = NOT_A_DICT_POS; mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS;
// Read children node array. // Read children node array.
nextNodeArray(); nextNodeArray();
if (!isEnd()) { if (!isEnd()) {
fetchNodeInfo(); fetchNodeInfo();
} }
} else { } else {
mPos = NOT_A_DICT_POS; mReadingState.mPos = NOT_A_DICT_POS;
} }
} }
// Read the parent node of the current node. // Read the parent node of the current node.
AK_FORCE_INLINE void readParentNode() { AK_FORCE_INLINE void readParentNode() {
if (mNodeReader.getParentPos() != NOT_A_DICT_POS) { if (mNodeReader.getParentPos() != NOT_A_DICT_POS) {
mPrevTotalCodePointCount += mNodeReader.getCodePointCount(); mReadingState.mPrevTotalCodePointCount += mNodeReader.getCodePointCount();
mTotalNodeCount = 1; mReadingState.mTotalNodeCount = 1;
mNodeArrayCount = 1; mReadingState.mNodeArrayCount = 1;
mNodeCount = 1; mReadingState.mNodeCount = 1;
mPos = mNodeReader.getParentPos(); mReadingState.mPos = mNodeReader.getParentPos();
mPosOfLastForwardLinkField = NOT_A_DICT_POS; mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS;
mReadingState.mPosOfLastPtNodeArrayHead = NOT_A_DICT_POS;
fetchNodeInfo(); fetchNodeInfo();
} else { } else {
mPos = NOT_A_DICT_POS; mReadingState.mPos = NOT_A_DICT_POS;
} }
} }
AK_FORCE_INLINE int getPosOfLastForwardLinkField() const { AK_FORCE_INLINE int getPosOfLastForwardLinkField() const {
return mPosOfLastForwardLinkField; return mReadingState.mPosOfLastForwardLinkField;
} }
AK_FORCE_INLINE int getPosOfLastPtNodeArrayHead() const {
return mReadingState.mPosOfLastPtNodeArrayHead;
}
AK_FORCE_INLINE void reloadCurrentPtNodeInfo() {
if (!isEnd()) {
fetchNodeInfo();
}
}
bool traverseAllPtNodesInPostorderDepthFirstManner(TraversingEventListener *const listener);
private: private:
DISALLOW_COPY_AND_ASSIGN(DynamicPatriciaTrieReadingHelper); DISALLOW_COPY_AND_ASSIGN(DynamicPatriciaTrieReadingHelper);
static const int MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP; class ReadingState {
static const int MAX_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP; public:
// Note that copy constructor and assignment operator are used for this class to use
// std::vector.
ReadingState() : mPos(NOT_A_DICT_POS), mNodeCount(0), mPrevTotalCodePointCount(0),
mTotalNodeCount(0), mNodeArrayCount(0), mPosOfLastForwardLinkField(NOT_A_DICT_POS),
mPosOfLastPtNodeArrayHead(NOT_A_DICT_POS) {}
bool mIsError;
int mPos; int mPos;
// Node count of a node array. // Node count of a node array.
int mNodeCount; int mNodeCount;
@ -185,21 +226,52 @@ class DynamicPatriciaTrieReadingHelper {
int mTotalNodeCount; int mTotalNodeCount;
int mNodeArrayCount; int mNodeArrayCount;
int mPosOfLastForwardLinkField; int mPosOfLastForwardLinkField;
int mPosOfLastPtNodeArrayHead;
};
static const int MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP;
static const int MAX_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP;
static const size_t MAX_READING_STATE_STACK_SIZE;
bool mIsError;
ReadingState mReadingState;
const BufferWithExtendableBuffer *const mBuffer; const BufferWithExtendableBuffer *const mBuffer;
DynamicPatriciaTrieNodeReader mNodeReader; DynamicPatriciaTrieNodeReader mNodeReader;
int mMergedNodeCodePoints[MAX_WORD_LENGTH]; int mMergedNodeCodePoints[MAX_WORD_LENGTH];
std::vector<ReadingState> mReadingStateStack;
void nextNodeArray(); void nextNodeArray();
void followForwardLink(); void followForwardLink();
AK_FORCE_INLINE void fetchNodeInfo() { AK_FORCE_INLINE void fetchNodeInfo() {
mNodeReader.fetchNodeInfoFromBufferAndGetNodeCodePoints(mPos, MAX_WORD_LENGTH, mNodeReader.fetchNodeInfoFromBufferAndGetNodeCodePoints(mReadingState.mPos,
mMergedNodeCodePoints); MAX_WORD_LENGTH, mMergedNodeCodePoints);
if (mNodeReader.getCodePointCount() <= 0) { if (mNodeReader.getCodePointCount() <= 0) {
// Empty node is not allowed. // Empty node is not allowed.
mIsError = true; mIsError = true;
mPos = NOT_A_DICT_POS; mReadingState.mPos = NOT_A_DICT_POS;
}
}
AK_FORCE_INLINE void pushReadingStateToStack() {
if (mReadingStateStack.size() > MAX_READING_STATE_STACK_SIZE) {
AKLOGI("Reading state stack overflow. Max size: %d", MAX_READING_STATE_STACK_SIZE);
ASSERT(false);
mIsError = true;
mReadingState.mPos = NOT_A_DICT_POS;
} else {
mReadingStateStack.push_back(mReadingState);
}
}
AK_FORCE_INLINE void popReadingStateFromStack() {
if (mReadingStateStack.empty()) {
mReadingState.mPos = NOT_A_DICT_POS;
} else {
mReadingState = mReadingStateStack.back();
mReadingStateStack.pop_back();
fetchNodeInfo();
} }
} }
}; };

View file

@ -20,6 +20,7 @@
#include <cstring> #include <cstring>
#include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h" #include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h"
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h"
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h" #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h"
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h" #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h"
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h" #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h"
@ -159,6 +160,26 @@ void DynamicPatriciaTrieWritingHelper::writeToDictFileWithGC(const int rootPtNod
flushAllToFile(fileName, &headerBuffer, &newDictBuffer); flushAllToFile(fileName, &headerBuffer, &newDictBuffer);
} }
bool DynamicPatriciaTrieWritingHelper::markNodeAsDeleted(
const DynamicPatriciaTrieNodeReader *const nodeToUpdate) {
int pos = nodeToUpdate->getHeadPos();
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(pos);
const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
if (usesAdditionalBuffer) {
pos -= mBuffer->getOriginalBufferSize();
}
// Read original flags
const PatriciaTrieReadingUtils::NodeFlags originalFlags =
PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos);
const PatriciaTrieReadingUtils::NodeFlags updatedFlags =
DynamicPatriciaTrieReadingUtils::updateAndGetFlags(originalFlags, false /* isMoved */,
true /* isDeleted */);
int writingPos = nodeToUpdate->getHeadPos();
// Update flags.
return DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, updatedFlags,
&writingPos);
}
bool DynamicPatriciaTrieWritingHelper::markNodeAsMovedAndSetPosition( bool DynamicPatriciaTrieWritingHelper::markNodeAsMovedAndSetPosition(
const DynamicPatriciaTrieNodeReader *const originalNode, const int movedPos, const DynamicPatriciaTrieNodeReader *const originalNode, const int movedPos,
const int bigramLinkedNodePos) { const int bigramLinkedNodePos) {
@ -497,6 +518,16 @@ bool DynamicPatriciaTrieWritingHelper::writeBufferToFilePointer(FILE *const file
bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos, bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
BufferWithExtendableBuffer *const bufferToWrite) { BufferWithExtendableBuffer *const bufferToWrite) {
DynamicPatriciaTrieReadingHelper readingHelper(mBuffer, mBigramPolicy, mShortcutPolicy);
readingHelper.initWithNodeArrayPos(rootPtNodeArrayPos);
DynamicPatriciaTrieGcEventListeners
::ListenerForUpdatingUnigramProbabilityAndMarkingUselessPtNodesAsDeleted
listenerForUpdatingUnigramProbabilityAndMarkingUselessPtNodesAsDeleted(
this, mBuffer);
if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(
&listenerForUpdatingUnigramProbabilityAndMarkingUselessPtNodesAsDeleted)) {
return false;
}
// TODO: Implement. // TODO: Implement.
return false; return false;
} }

View file

@ -55,6 +55,10 @@ class DynamicPatriciaTrieWritingHelper {
void writeToDictFileWithGC(const int rootPtNodeArrayPos, const char *const fileName, void writeToDictFileWithGC(const int rootPtNodeArrayPos, const char *const fileName,
const HeaderPolicy *const headerPolicy); const HeaderPolicy *const headerPolicy);
// CAVEAT: This method must be called only from inner classes of
// DynamicPatriciaTrieGcEventListeners.
bool markNodeAsDeleted(const DynamicPatriciaTrieNodeReader *const nodeToUpdate);
private: private:
DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTrieWritingHelper); DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTrieWritingHelper);