Step 1 to implement GC. Finding garbage PtNodes.
Bug: 6669677 Change-Id: I3551fe2f16a09d2bf7761f4e1d73ebd4a03380e7
This commit is contained in:
parent
80f934af54
commit
2a64726a16
7 changed files with 347 additions and 67 deletions
|
@ -73,6 +73,7 @@ LATIN_IME_CORE_SRC_FILES := \
|
||||||
header/header_read_write_utils.cpp \
|
header/header_read_write_utils.cpp \
|
||||||
shortcut/shortcut_list_reading_utils.cpp \
|
shortcut/shortcut_list_reading_utils.cpp \
|
||||||
dictionary_structure_with_buffer_policy_factory.cpp \
|
dictionary_structure_with_buffer_policy_factory.cpp \
|
||||||
|
dynamic_patricia_trie_gc_event_listeners.cpp \
|
||||||
dynamic_patricia_trie_node_reader.cpp \
|
dynamic_patricia_trie_node_reader.cpp \
|
||||||
dynamic_patricia_trie_policy.cpp \
|
dynamic_patricia_trie_policy.cpp \
|
||||||
dynamic_patricia_trie_reading_helper.cpp \
|
dynamic_patricia_trie_reading_helper.cpp \
|
||||||
|
|
|
@ -0,0 +1,48 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2013 The Android Open Source Project
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h"
|
||||||
|
|
||||||
|
namespace latinime {
|
||||||
|
|
||||||
|
bool DynamicPatriciaTrieGcEventListeners
|
||||||
|
::ListenerForUpdatingUnigramProbabilityAndMarkingUselessPtNodesAsDeleted
|
||||||
|
::onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node) {
|
||||||
|
// PtNode is useless when the PtNode is not a terminal and doesn't have any not useless
|
||||||
|
// children.
|
||||||
|
bool isUselessPtNode = !node->isTerminal();
|
||||||
|
if (mChildrenValue > 0) {
|
||||||
|
isUselessPtNode = false;
|
||||||
|
} else if (node->isTerminal()) {
|
||||||
|
// Remove children as all children are useless.
|
||||||
|
int writingPos = node->getChildrenPosFieldPos();
|
||||||
|
if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(
|
||||||
|
mBuffer, NOT_A_DICT_POS /* childrenPosition */, &writingPos)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (isUselessPtNode) {
|
||||||
|
// Current PtNode is no longer needed. Mark it as deleted.
|
||||||
|
if (!mWritingHelper->markNodeAsDeleted(node)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
valueStack.back() += 1;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace latinime
|
|
@ -0,0 +1,75 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2013, The Android Open Source Project
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef LATINIME_DYNAMIC_PATRICIA_TRIE_GC_EVENT_LISTENERS_H
|
||||||
|
#define LATINIME_DYNAMIC_PATRICIA_TRIE_GC_EVENT_LISTENERS_H
|
||||||
|
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include "defines.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h"
|
||||||
|
|
||||||
|
namespace latinime {
|
||||||
|
|
||||||
|
class DynamicPatriciaTrieGcEventListeners {
|
||||||
|
public:
|
||||||
|
// Updates all PtNodes that can be reached from the root. Checks if each PtNode is useless or
|
||||||
|
// not and marks useless PtNodes as deleted. Such deleted PtNodes will be discarded in the GC.
|
||||||
|
// TODO: Concatenate non-terminal PtNodes.
|
||||||
|
class ListenerForUpdatingUnigramProbabilityAndMarkingUselessPtNodesAsDeleted
|
||||||
|
: public DynamicPatriciaTrieReadingHelper::TraversingEventListener {
|
||||||
|
public:
|
||||||
|
ListenerForUpdatingUnigramProbabilityAndMarkingUselessPtNodesAsDeleted(
|
||||||
|
DynamicPatriciaTrieWritingHelper *const writingHelper,
|
||||||
|
BufferWithExtendableBuffer *const buffer)
|
||||||
|
: mWritingHelper(writingHelper), mBuffer(buffer), valueStack(),
|
||||||
|
mChildrenValue(0) {}
|
||||||
|
|
||||||
|
~ListenerForUpdatingUnigramProbabilityAndMarkingUselessPtNodesAsDeleted() {};
|
||||||
|
|
||||||
|
bool onAscend() {
|
||||||
|
if (valueStack.empty()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
mChildrenValue = valueStack.back();
|
||||||
|
valueStack.pop_back();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool onDescend() {
|
||||||
|
valueStack.push_back(0);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node);
|
||||||
|
|
||||||
|
private:
|
||||||
|
DISALLOW_IMPLICIT_CONSTRUCTORS(
|
||||||
|
ListenerForUpdatingUnigramProbabilityAndMarkingUselessPtNodesAsDeleted);
|
||||||
|
|
||||||
|
DynamicPatriciaTrieWritingHelper *const mWritingHelper;
|
||||||
|
BufferWithExtendableBuffer *const mBuffer;
|
||||||
|
std::vector<int> valueStack;
|
||||||
|
int mChildrenValue;
|
||||||
|
};
|
||||||
|
|
||||||
|
private:
|
||||||
|
DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTrieGcEventListeners);
|
||||||
|
};
|
||||||
|
} // namespace latinime
|
||||||
|
#endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_GC_EVENT_LISTENERS_H */
|
|
@ -23,36 +23,85 @@ namespace latinime {
|
||||||
// To avoid infinite loop caused by invalid or malicious forward links.
|
// To avoid infinite loop caused by invalid or malicious forward links.
|
||||||
const int DynamicPatriciaTrieReadingHelper::MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP = 100000;
|
const int DynamicPatriciaTrieReadingHelper::MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP = 100000;
|
||||||
const int DynamicPatriciaTrieReadingHelper::MAX_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP = 100000;
|
const int DynamicPatriciaTrieReadingHelper::MAX_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP = 100000;
|
||||||
|
const size_t DynamicPatriciaTrieReadingHelper::MAX_READING_STATE_STACK_SIZE = MAX_WORD_LENGTH;
|
||||||
|
|
||||||
|
bool DynamicPatriciaTrieReadingHelper::traverseAllPtNodesInPostorderDepthFirstManner(
|
||||||
|
TraversingEventListener *const listener) {
|
||||||
|
bool alreadyVisitedChildren = false;
|
||||||
|
// Descend from the root to the root PtNode array.
|
||||||
|
if (!listener->onDescend()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
while (!isEnd()) {
|
||||||
|
if (!alreadyVisitedChildren) {
|
||||||
|
if (mNodeReader.hasChildren()) {
|
||||||
|
// Move to the first child.
|
||||||
|
if (!listener->onDescend()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
pushReadingStateToStack();
|
||||||
|
readChildNode();
|
||||||
|
} else {
|
||||||
|
alreadyVisitedChildren = true;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (!listener->onVisitingPtNode(&mNodeReader)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
readNextSiblingNode();
|
||||||
|
if (isEnd()) {
|
||||||
|
// All PtNodes in current linked PtNode arrays have been visited.
|
||||||
|
// Return to the parent.
|
||||||
|
if (!listener->onAscend()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
popReadingStateFromStack();
|
||||||
|
alreadyVisitedChildren = true;
|
||||||
|
} else {
|
||||||
|
// Process sibling PtNode.
|
||||||
|
alreadyVisitedChildren = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Ascend from the root PtNode array to the root.
|
||||||
|
if (!listener->onAscend()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return !isError();
|
||||||
|
}
|
||||||
|
|
||||||
// Read node array size and process empty node arrays. Nodes and arrays are counted up in this
|
// Read node array size and process empty node arrays. Nodes and arrays are counted up in this
|
||||||
// method to avoid an infinite loop.
|
// method to avoid an infinite loop.
|
||||||
void DynamicPatriciaTrieReadingHelper::nextNodeArray() {
|
void DynamicPatriciaTrieReadingHelper::nextNodeArray() {
|
||||||
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(mPos);
|
mReadingState.mPosOfLastPtNodeArrayHead = mReadingState.mPos;
|
||||||
|
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(mReadingState.mPos);
|
||||||
const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
|
const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
|
||||||
if (usesAdditionalBuffer) {
|
if (usesAdditionalBuffer) {
|
||||||
mPos -= mBuffer->getOriginalBufferSize();
|
mReadingState.mPos -= mBuffer->getOriginalBufferSize();
|
||||||
}
|
}
|
||||||
mNodeCount = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition(dictBuf,
|
mReadingState.mNodeCount = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition(
|
||||||
&mPos);
|
dictBuf, &mReadingState.mPos);
|
||||||
if (usesAdditionalBuffer) {
|
if (usesAdditionalBuffer) {
|
||||||
mPos += mBuffer->getOriginalBufferSize();
|
mReadingState.mPos += mBuffer->getOriginalBufferSize();
|
||||||
}
|
}
|
||||||
// Count up nodes and node arrays to avoid infinite loop.
|
// Count up nodes and node arrays to avoid infinite loop.
|
||||||
mTotalNodeCount += mNodeCount;
|
mReadingState.mTotalNodeCount += mReadingState.mNodeCount;
|
||||||
mNodeArrayCount++;
|
mReadingState.mNodeArrayCount++;
|
||||||
if (mNodeCount < 0 || mTotalNodeCount > MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP
|
if (mReadingState.mNodeCount < 0
|
||||||
|| mNodeArrayCount > MAX_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP) {
|
|| mReadingState.mTotalNodeCount > MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP
|
||||||
|
|| mReadingState.mNodeArrayCount > MAX_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP) {
|
||||||
// Invalid dictionary.
|
// Invalid dictionary.
|
||||||
AKLOGI("Invalid dictionary. nodeCount: %d, totalNodeCount: %d, MAX_CHILD_COUNT: %d"
|
AKLOGI("Invalid dictionary. nodeCount: %d, totalNodeCount: %d, MAX_CHILD_COUNT: %d"
|
||||||
"nodeArrayCount: %d, MAX_NODE_ARRAY_COUNT: %d",
|
"nodeArrayCount: %d, MAX_NODE_ARRAY_COUNT: %d",
|
||||||
mNodeCount, mTotalNodeCount, MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP,
|
mReadingState.mNodeCount, mReadingState.mTotalNodeCount,
|
||||||
mNodeArrayCount, MAX_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP);
|
MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP, mReadingState.mNodeArrayCount,
|
||||||
|
MAX_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP);
|
||||||
ASSERT(false);
|
ASSERT(false);
|
||||||
mIsError = true;
|
mIsError = true;
|
||||||
mPos = NOT_A_DICT_POS;
|
mReadingState.mPos = NOT_A_DICT_POS;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (mNodeCount == 0) {
|
if (mReadingState.mNodeCount == 0) {
|
||||||
// Empty node array. Try following forward link.
|
// Empty node array. Try following forward link.
|
||||||
followForwardLink();
|
followForwardLink();
|
||||||
}
|
}
|
||||||
|
@ -60,24 +109,24 @@ void DynamicPatriciaTrieReadingHelper::nextNodeArray() {
|
||||||
|
|
||||||
// Follow the forward link and read the next node array if exists.
|
// Follow the forward link and read the next node array if exists.
|
||||||
void DynamicPatriciaTrieReadingHelper::followForwardLink() {
|
void DynamicPatriciaTrieReadingHelper::followForwardLink() {
|
||||||
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(mPos);
|
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(mReadingState.mPos);
|
||||||
const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
|
const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
|
||||||
if (usesAdditionalBuffer) {
|
if (usesAdditionalBuffer) {
|
||||||
mPos -= mBuffer->getOriginalBufferSize();
|
mReadingState.mPos -= mBuffer->getOriginalBufferSize();
|
||||||
}
|
}
|
||||||
const int forwardLinkPosition =
|
const int forwardLinkPosition =
|
||||||
DynamicPatriciaTrieReadingUtils::getForwardLinkPosition(dictBuf, mPos);
|
DynamicPatriciaTrieReadingUtils::getForwardLinkPosition(dictBuf, mReadingState.mPos);
|
||||||
if (usesAdditionalBuffer) {
|
if (usesAdditionalBuffer) {
|
||||||
mPos += mBuffer->getOriginalBufferSize();
|
mReadingState.mPos += mBuffer->getOriginalBufferSize();
|
||||||
}
|
}
|
||||||
mPosOfLastForwardLinkField = mPos;
|
mReadingState.mPosOfLastForwardLinkField = mReadingState.mPos;
|
||||||
if (DynamicPatriciaTrieReadingUtils::isValidForwardLinkPosition(forwardLinkPosition)) {
|
if (DynamicPatriciaTrieReadingUtils::isValidForwardLinkPosition(forwardLinkPosition)) {
|
||||||
// Follow the forward link.
|
// Follow the forward link.
|
||||||
mPos += forwardLinkPosition;
|
mReadingState.mPos += forwardLinkPosition;
|
||||||
nextNodeArray();
|
nextNodeArray();
|
||||||
} else {
|
} else {
|
||||||
// All node arrays have been read.
|
// All node arrays have been read.
|
||||||
mPos = NOT_A_DICT_POS;
|
mReadingState.mPos = NOT_A_DICT_POS;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -17,6 +17,9 @@
|
||||||
#ifndef LATINIME_DYNAMIC_PATRICIA_TRIE_READING_HELPER_H
|
#ifndef LATINIME_DYNAMIC_PATRICIA_TRIE_READING_HELPER_H
|
||||||
#define LATINIME_DYNAMIC_PATRICIA_TRIE_READING_HELPER_H
|
#define LATINIME_DYNAMIC_PATRICIA_TRIE_READING_HELPER_H
|
||||||
|
|
||||||
|
#include <cstddef>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
#include "defines.h"
|
#include "defines.h"
|
||||||
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h"
|
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h"
|
||||||
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h"
|
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h"
|
||||||
|
@ -34,12 +37,31 @@ class DictionaryShortcutsStructurePolicy;
|
||||||
*/
|
*/
|
||||||
class DynamicPatriciaTrieReadingHelper {
|
class DynamicPatriciaTrieReadingHelper {
|
||||||
public:
|
public:
|
||||||
|
class TraversingEventListener {
|
||||||
|
public:
|
||||||
|
virtual ~TraversingEventListener() {};
|
||||||
|
|
||||||
|
// Returns whether the event handling was succeeded or not.
|
||||||
|
virtual bool onAscend() = 0;
|
||||||
|
|
||||||
|
// Returns whether the event handling was succeeded or not.
|
||||||
|
virtual bool onDescend() = 0;
|
||||||
|
|
||||||
|
// Returns whether the event handling was succeeded or not.
|
||||||
|
virtual bool onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node) = 0;
|
||||||
|
|
||||||
|
protected:
|
||||||
|
TraversingEventListener() {};
|
||||||
|
|
||||||
|
private:
|
||||||
|
DISALLOW_COPY_AND_ASSIGN(TraversingEventListener);
|
||||||
|
};
|
||||||
|
|
||||||
DynamicPatriciaTrieReadingHelper(const BufferWithExtendableBuffer *const buffer,
|
DynamicPatriciaTrieReadingHelper(const BufferWithExtendableBuffer *const buffer,
|
||||||
const DictionaryBigramsStructurePolicy *const bigramsPolicy,
|
const DictionaryBigramsStructurePolicy *const bigramsPolicy,
|
||||||
const DictionaryShortcutsStructurePolicy *const shortcutsPolicy)
|
const DictionaryShortcutsStructurePolicy *const shortcutsPolicy)
|
||||||
: mIsError(false), mPos(NOT_A_DICT_POS), mNodeCount(0), mPrevTotalCodePointCount(0),
|
: mIsError(false), mReadingState(), mBuffer(buffer),
|
||||||
mTotalNodeCount(0), mNodeArrayCount(0), mPosOfLastForwardLinkField(NOT_A_DICT_POS),
|
mNodeReader(mBuffer, bigramsPolicy, shortcutsPolicy), mReadingStateStack() {}
|
||||||
mBuffer(buffer), mNodeReader(mBuffer, bigramsPolicy, shortcutsPolicy) {}
|
|
||||||
|
|
||||||
~DynamicPatriciaTrieReadingHelper() {}
|
~DynamicPatriciaTrieReadingHelper() {}
|
||||||
|
|
||||||
|
@ -48,21 +70,21 @@ class DynamicPatriciaTrieReadingHelper {
|
||||||
}
|
}
|
||||||
|
|
||||||
AK_FORCE_INLINE bool isEnd() const {
|
AK_FORCE_INLINE bool isEnd() const {
|
||||||
return mPos == NOT_A_DICT_POS;
|
return mReadingState.mPos == NOT_A_DICT_POS;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Initialize reading state with the head position of a node array.
|
// Initialize reading state with the head position of a node array.
|
||||||
AK_FORCE_INLINE void initWithNodeArrayPos(const int nodeArrayPos) {
|
AK_FORCE_INLINE void initWithNodeArrayPos(const int nodeArrayPos) {
|
||||||
if (nodeArrayPos == NOT_A_DICT_POS) {
|
if (nodeArrayPos == NOT_A_DICT_POS) {
|
||||||
mPos = NOT_A_DICT_POS;
|
mReadingState.mPos = NOT_A_DICT_POS;
|
||||||
} else {
|
} else {
|
||||||
mIsError = false;
|
mIsError = false;
|
||||||
mPos = nodeArrayPos;
|
mReadingState.mPos = nodeArrayPos;
|
||||||
mNodeCount = 0;
|
mReadingState.mPrevTotalCodePointCount = 0;
|
||||||
mPrevTotalCodePointCount = 0;
|
mReadingState.mTotalNodeCount = 0;
|
||||||
mTotalNodeCount = 0;
|
mReadingState.mNodeArrayCount = 0;
|
||||||
mNodeArrayCount = 0;
|
mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS;
|
||||||
mPosOfLastForwardLinkField = NOT_A_DICT_POS;
|
mReadingStateStack.clear();
|
||||||
nextNodeArray();
|
nextNodeArray();
|
||||||
if (!isEnd()) {
|
if (!isEnd()) {
|
||||||
fetchNodeInfo();
|
fetchNodeInfo();
|
||||||
|
@ -73,15 +95,17 @@ class DynamicPatriciaTrieReadingHelper {
|
||||||
// Initialize reading state with the head position of a node.
|
// Initialize reading state with the head position of a node.
|
||||||
AK_FORCE_INLINE void initWithNodePos(const int nodePos) {
|
AK_FORCE_INLINE void initWithNodePos(const int nodePos) {
|
||||||
if (nodePos == NOT_A_DICT_POS) {
|
if (nodePos == NOT_A_DICT_POS) {
|
||||||
mPos = NOT_A_DICT_POS;
|
mReadingState.mPos = NOT_A_DICT_POS;
|
||||||
} else {
|
} else {
|
||||||
mIsError = false;
|
mIsError = false;
|
||||||
mPos = nodePos;
|
mReadingState.mPos = nodePos;
|
||||||
mNodeCount = 1;
|
mReadingState.mNodeCount = 1;
|
||||||
mPrevTotalCodePointCount = 0;
|
mReadingState.mPrevTotalCodePointCount = 0;
|
||||||
mTotalNodeCount = 1;
|
mReadingState.mTotalNodeCount = 1;
|
||||||
mNodeArrayCount = 1;
|
mReadingState.mNodeArrayCount = 1;
|
||||||
mPosOfLastForwardLinkField = NOT_A_DICT_POS;
|
mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS;
|
||||||
|
mReadingState.mPosOfLastPtNodeArrayHead = NOT_A_DICT_POS;
|
||||||
|
mReadingStateStack.clear();
|
||||||
fetchNodeInfo();
|
fetchNodeInfo();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -100,12 +124,12 @@ class DynamicPatriciaTrieReadingHelper {
|
||||||
|
|
||||||
// Return code point count exclude the last read node's code points.
|
// Return code point count exclude the last read node's code points.
|
||||||
AK_FORCE_INLINE int getPrevTotalCodePointCount() const {
|
AK_FORCE_INLINE int getPrevTotalCodePointCount() const {
|
||||||
return mPrevTotalCodePointCount;
|
return mReadingState.mPrevTotalCodePointCount;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Return code point count include the last read node's code points.
|
// Return code point count include the last read node's code points.
|
||||||
AK_FORCE_INLINE int getTotalCodePointCount() const {
|
AK_FORCE_INLINE int getTotalCodePointCount() const {
|
||||||
return mPrevTotalCodePointCount + mNodeReader.getCodePointCount();
|
return mReadingState.mPrevTotalCodePointCount + mNodeReader.getCodePointCount();
|
||||||
}
|
}
|
||||||
|
|
||||||
AK_FORCE_INLINE void fetchMergedNodeCodePointsInReverseOrder(
|
AK_FORCE_INLINE void fetchMergedNodeCodePointsInReverseOrder(
|
||||||
|
@ -121,9 +145,9 @@ class DynamicPatriciaTrieReadingHelper {
|
||||||
}
|
}
|
||||||
|
|
||||||
AK_FORCE_INLINE void readNextSiblingNode() {
|
AK_FORCE_INLINE void readNextSiblingNode() {
|
||||||
mNodeCount -= 1;
|
mReadingState.mNodeCount -= 1;
|
||||||
mPos = mNodeReader.getSiblingNodePos();
|
mReadingState.mPos = mNodeReader.getSiblingNodePos();
|
||||||
if (mNodeCount <= 0) {
|
if (mReadingState.mNodeCount <= 0) {
|
||||||
// All nodes in the current node array have been read.
|
// All nodes in the current node array have been read.
|
||||||
followForwardLink();
|
followForwardLink();
|
||||||
if (!isEnd()) {
|
if (!isEnd()) {
|
||||||
|
@ -137,47 +161,64 @@ class DynamicPatriciaTrieReadingHelper {
|
||||||
// Read the first child node of the current node.
|
// Read the first child node of the current node.
|
||||||
AK_FORCE_INLINE void readChildNode() {
|
AK_FORCE_INLINE void readChildNode() {
|
||||||
if (mNodeReader.hasChildren()) {
|
if (mNodeReader.hasChildren()) {
|
||||||
mPrevTotalCodePointCount += mNodeReader.getCodePointCount();
|
mReadingState.mPrevTotalCodePointCount += mNodeReader.getCodePointCount();
|
||||||
mTotalNodeCount = 0;
|
mReadingState.mTotalNodeCount = 0;
|
||||||
mNodeArrayCount = 0;
|
mReadingState.mNodeArrayCount = 0;
|
||||||
mPos = mNodeReader.getChildrenPos();
|
mReadingState.mPos = mNodeReader.getChildrenPos();
|
||||||
mPosOfLastForwardLinkField = NOT_A_DICT_POS;
|
mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS;
|
||||||
// Read children node array.
|
// Read children node array.
|
||||||
nextNodeArray();
|
nextNodeArray();
|
||||||
if (!isEnd()) {
|
if (!isEnd()) {
|
||||||
fetchNodeInfo();
|
fetchNodeInfo();
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
mPos = NOT_A_DICT_POS;
|
mReadingState.mPos = NOT_A_DICT_POS;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Read the parent node of the current node.
|
// Read the parent node of the current node.
|
||||||
AK_FORCE_INLINE void readParentNode() {
|
AK_FORCE_INLINE void readParentNode() {
|
||||||
if (mNodeReader.getParentPos() != NOT_A_DICT_POS) {
|
if (mNodeReader.getParentPos() != NOT_A_DICT_POS) {
|
||||||
mPrevTotalCodePointCount += mNodeReader.getCodePointCount();
|
mReadingState.mPrevTotalCodePointCount += mNodeReader.getCodePointCount();
|
||||||
mTotalNodeCount = 1;
|
mReadingState.mTotalNodeCount = 1;
|
||||||
mNodeArrayCount = 1;
|
mReadingState.mNodeArrayCount = 1;
|
||||||
mNodeCount = 1;
|
mReadingState.mNodeCount = 1;
|
||||||
mPos = mNodeReader.getParentPos();
|
mReadingState.mPos = mNodeReader.getParentPos();
|
||||||
mPosOfLastForwardLinkField = NOT_A_DICT_POS;
|
mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS;
|
||||||
|
mReadingState.mPosOfLastPtNodeArrayHead = NOT_A_DICT_POS;
|
||||||
fetchNodeInfo();
|
fetchNodeInfo();
|
||||||
} else {
|
} else {
|
||||||
mPos = NOT_A_DICT_POS;
|
mReadingState.mPos = NOT_A_DICT_POS;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
AK_FORCE_INLINE int getPosOfLastForwardLinkField() const {
|
AK_FORCE_INLINE int getPosOfLastForwardLinkField() const {
|
||||||
return mPosOfLastForwardLinkField;
|
return mReadingState.mPosOfLastForwardLinkField;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
AK_FORCE_INLINE int getPosOfLastPtNodeArrayHead() const {
|
||||||
|
return mReadingState.mPosOfLastPtNodeArrayHead;
|
||||||
|
}
|
||||||
|
|
||||||
|
AK_FORCE_INLINE void reloadCurrentPtNodeInfo() {
|
||||||
|
if (!isEnd()) {
|
||||||
|
fetchNodeInfo();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool traverseAllPtNodesInPostorderDepthFirstManner(TraversingEventListener *const listener);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
DISALLOW_COPY_AND_ASSIGN(DynamicPatriciaTrieReadingHelper);
|
DISALLOW_COPY_AND_ASSIGN(DynamicPatriciaTrieReadingHelper);
|
||||||
|
|
||||||
static const int MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP;
|
class ReadingState {
|
||||||
static const int MAX_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP;
|
public:
|
||||||
|
// Note that copy constructor and assignment operator are used for this class to use
|
||||||
|
// std::vector.
|
||||||
|
ReadingState() : mPos(NOT_A_DICT_POS), mNodeCount(0), mPrevTotalCodePointCount(0),
|
||||||
|
mTotalNodeCount(0), mNodeArrayCount(0), mPosOfLastForwardLinkField(NOT_A_DICT_POS),
|
||||||
|
mPosOfLastPtNodeArrayHead(NOT_A_DICT_POS) {}
|
||||||
|
|
||||||
bool mIsError;
|
|
||||||
int mPos;
|
int mPos;
|
||||||
// Node count of a node array.
|
// Node count of a node array.
|
||||||
int mNodeCount;
|
int mNodeCount;
|
||||||
|
@ -185,21 +226,52 @@ class DynamicPatriciaTrieReadingHelper {
|
||||||
int mTotalNodeCount;
|
int mTotalNodeCount;
|
||||||
int mNodeArrayCount;
|
int mNodeArrayCount;
|
||||||
int mPosOfLastForwardLinkField;
|
int mPosOfLastForwardLinkField;
|
||||||
|
int mPosOfLastPtNodeArrayHead;
|
||||||
|
};
|
||||||
|
|
||||||
|
static const int MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP;
|
||||||
|
static const int MAX_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP;
|
||||||
|
static const size_t MAX_READING_STATE_STACK_SIZE;
|
||||||
|
|
||||||
|
bool mIsError;
|
||||||
|
ReadingState mReadingState;
|
||||||
const BufferWithExtendableBuffer *const mBuffer;
|
const BufferWithExtendableBuffer *const mBuffer;
|
||||||
DynamicPatriciaTrieNodeReader mNodeReader;
|
DynamicPatriciaTrieNodeReader mNodeReader;
|
||||||
int mMergedNodeCodePoints[MAX_WORD_LENGTH];
|
int mMergedNodeCodePoints[MAX_WORD_LENGTH];
|
||||||
|
std::vector<ReadingState> mReadingStateStack;
|
||||||
|
|
||||||
void nextNodeArray();
|
void nextNodeArray();
|
||||||
|
|
||||||
void followForwardLink();
|
void followForwardLink();
|
||||||
|
|
||||||
AK_FORCE_INLINE void fetchNodeInfo() {
|
AK_FORCE_INLINE void fetchNodeInfo() {
|
||||||
mNodeReader.fetchNodeInfoFromBufferAndGetNodeCodePoints(mPos, MAX_WORD_LENGTH,
|
mNodeReader.fetchNodeInfoFromBufferAndGetNodeCodePoints(mReadingState.mPos,
|
||||||
mMergedNodeCodePoints);
|
MAX_WORD_LENGTH, mMergedNodeCodePoints);
|
||||||
if (mNodeReader.getCodePointCount() <= 0) {
|
if (mNodeReader.getCodePointCount() <= 0) {
|
||||||
// Empty node is not allowed.
|
// Empty node is not allowed.
|
||||||
mIsError = true;
|
mIsError = true;
|
||||||
mPos = NOT_A_DICT_POS;
|
mReadingState.mPos = NOT_A_DICT_POS;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
AK_FORCE_INLINE void pushReadingStateToStack() {
|
||||||
|
if (mReadingStateStack.size() > MAX_READING_STATE_STACK_SIZE) {
|
||||||
|
AKLOGI("Reading state stack overflow. Max size: %d", MAX_READING_STATE_STACK_SIZE);
|
||||||
|
ASSERT(false);
|
||||||
|
mIsError = true;
|
||||||
|
mReadingState.mPos = NOT_A_DICT_POS;
|
||||||
|
} else {
|
||||||
|
mReadingStateStack.push_back(mReadingState);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
AK_FORCE_INLINE void popReadingStateFromStack() {
|
||||||
|
if (mReadingStateStack.empty()) {
|
||||||
|
mReadingState.mPos = NOT_A_DICT_POS;
|
||||||
|
} else {
|
||||||
|
mReadingState = mReadingStateStack.back();
|
||||||
|
mReadingStateStack.pop_back();
|
||||||
|
fetchNodeInfo();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
|
@ -20,6 +20,7 @@
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
|
|
||||||
#include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h"
|
#include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h"
|
||||||
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h"
|
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h"
|
||||||
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h"
|
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h"
|
||||||
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h"
|
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h"
|
||||||
|
@ -159,6 +160,26 @@ void DynamicPatriciaTrieWritingHelper::writeToDictFileWithGC(const int rootPtNod
|
||||||
flushAllToFile(fileName, &headerBuffer, &newDictBuffer);
|
flushAllToFile(fileName, &headerBuffer, &newDictBuffer);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool DynamicPatriciaTrieWritingHelper::markNodeAsDeleted(
|
||||||
|
const DynamicPatriciaTrieNodeReader *const nodeToUpdate) {
|
||||||
|
int pos = nodeToUpdate->getHeadPos();
|
||||||
|
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(pos);
|
||||||
|
const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
|
||||||
|
if (usesAdditionalBuffer) {
|
||||||
|
pos -= mBuffer->getOriginalBufferSize();
|
||||||
|
}
|
||||||
|
// Read original flags
|
||||||
|
const PatriciaTrieReadingUtils::NodeFlags originalFlags =
|
||||||
|
PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos);
|
||||||
|
const PatriciaTrieReadingUtils::NodeFlags updatedFlags =
|
||||||
|
DynamicPatriciaTrieReadingUtils::updateAndGetFlags(originalFlags, false /* isMoved */,
|
||||||
|
true /* isDeleted */);
|
||||||
|
int writingPos = nodeToUpdate->getHeadPos();
|
||||||
|
// Update flags.
|
||||||
|
return DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, updatedFlags,
|
||||||
|
&writingPos);
|
||||||
|
}
|
||||||
|
|
||||||
bool DynamicPatriciaTrieWritingHelper::markNodeAsMovedAndSetPosition(
|
bool DynamicPatriciaTrieWritingHelper::markNodeAsMovedAndSetPosition(
|
||||||
const DynamicPatriciaTrieNodeReader *const originalNode, const int movedPos,
|
const DynamicPatriciaTrieNodeReader *const originalNode, const int movedPos,
|
||||||
const int bigramLinkedNodePos) {
|
const int bigramLinkedNodePos) {
|
||||||
|
@ -497,6 +518,16 @@ bool DynamicPatriciaTrieWritingHelper::writeBufferToFilePointer(FILE *const file
|
||||||
|
|
||||||
bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
|
bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
|
||||||
BufferWithExtendableBuffer *const bufferToWrite) {
|
BufferWithExtendableBuffer *const bufferToWrite) {
|
||||||
|
DynamicPatriciaTrieReadingHelper readingHelper(mBuffer, mBigramPolicy, mShortcutPolicy);
|
||||||
|
readingHelper.initWithNodeArrayPos(rootPtNodeArrayPos);
|
||||||
|
DynamicPatriciaTrieGcEventListeners
|
||||||
|
::ListenerForUpdatingUnigramProbabilityAndMarkingUselessPtNodesAsDeleted
|
||||||
|
listenerForUpdatingUnigramProbabilityAndMarkingUselessPtNodesAsDeleted(
|
||||||
|
this, mBuffer);
|
||||||
|
if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(
|
||||||
|
&listenerForUpdatingUnigramProbabilityAndMarkingUselessPtNodesAsDeleted)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
// TODO: Implement.
|
// TODO: Implement.
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
|
@ -55,6 +55,10 @@ class DynamicPatriciaTrieWritingHelper {
|
||||||
void writeToDictFileWithGC(const int rootPtNodeArrayPos, const char *const fileName,
|
void writeToDictFileWithGC(const int rootPtNodeArrayPos, const char *const fileName,
|
||||||
const HeaderPolicy *const headerPolicy);
|
const HeaderPolicy *const headerPolicy);
|
||||||
|
|
||||||
|
// CAVEAT: This method must be called only from inner classes of
|
||||||
|
// DynamicPatriciaTrieGcEventListeners.
|
||||||
|
bool markNodeAsDeleted(const DynamicPatriciaTrieNodeReader *const nodeToUpdate);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTrieWritingHelper);
|
DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTrieWritingHelper);
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue