Introduce DynamicPatriciaTrieReadingHelper.
It supports iterating nodes and dealing with additional buffer. It counts nodes and node arrays to avoid infinite loop. Bug: 6669677 Change-Id: I322e7263c0535e098635a1e5de098838de09467d
This commit is contained in:
parent
ab4c846b7f
commit
4d814bfcb7
5 changed files with 350 additions and 127 deletions
|
@ -74,6 +74,7 @@ LATIN_IME_CORE_SRC_FILES := \
|
|||
dictionary_structure_with_buffer_policy_factory.cpp \
|
||||
dynamic_patricia_trie_node_reader.cpp \
|
||||
dynamic_patricia_trie_policy.cpp \
|
||||
dynamic_patricia_trie_reading_helper.cpp \
|
||||
dynamic_patricia_trie_reading_utils.cpp \
|
||||
patricia_trie_policy.cpp \
|
||||
patricia_trie_reading_utils.cpp) \
|
||||
|
|
|
@ -20,95 +20,68 @@
|
|||
#include "suggest/core/dicnode/dic_node.h"
|
||||
#include "suggest/core/dicnode/dic_node_vector.h"
|
||||
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h"
|
||||
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h"
|
||||
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h"
|
||||
#include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h"
|
||||
|
||||
namespace latinime {
|
||||
|
||||
// To avoid infinite loop caused by invalid or malicious forward links.
|
||||
const int DynamicPatriciaTriePolicy::MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP = 100000;
|
||||
|
||||
void DynamicPatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode,
|
||||
DicNodeVector *const childDicNodes) const {
|
||||
if (!dicNode->hasChildren()) {
|
||||
return;
|
||||
}
|
||||
DynamicPatriciaTrieNodeReader nodeReader(mDictRoot, mOriginalDictSize, &mExtendableBuffer,
|
||||
getBigramsStructurePolicy(), getShortcutsStructurePolicy());
|
||||
int mergedNodeCodePoints[MAX_WORD_LENGTH];
|
||||
int nextPos = dicNode->getChildrenPos();
|
||||
int totalChildCount = 0;
|
||||
do {
|
||||
const int childCount = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition(
|
||||
mDictRoot, &nextPos);
|
||||
totalChildCount += childCount;
|
||||
if (childCount <= 0 || totalChildCount > MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP) {
|
||||
// Invalid dictionary.
|
||||
AKLOGI("Invalid dictionary. childCount: %d, totalChildCount: %d, MAX: %d",
|
||||
childCount, totalChildCount, MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP);
|
||||
ASSERT(false);
|
||||
return;
|
||||
}
|
||||
for (int i = 0; i < childCount; i++) {
|
||||
nodeReader.fetchNodeInfoFromBufferAndGetNodeCodePoints(nextPos, MAX_WORD_LENGTH,
|
||||
mergedNodeCodePoints);
|
||||
if (!nodeReader.isDeleted()) {
|
||||
// Push child node when the node is not a deleted node.
|
||||
childDicNodes->pushLeavingChild(dicNode, nodeReader.getNodePos(),
|
||||
nodeReader.getChildrenPos(), nodeReader.getProbability(),
|
||||
nodeReader.isTerminal(), nodeReader.hasChildren(),
|
||||
nodeReader.isBlacklisted() || nodeReader.isNotAWord(),
|
||||
nodeReader.getCodePointCount(), mergedNodeCodePoints);
|
||||
}
|
||||
nextPos = nodeReader.getSiblingNodePos();
|
||||
}
|
||||
nextPos = DynamicPatriciaTrieReadingUtils::getForwardLinkPosition(mDictRoot, nextPos);
|
||||
} while (DynamicPatriciaTrieReadingUtils::isValidForwardLinkPosition(nextPos));
|
||||
DynamicPatriciaTrieReadingHelper readingHelper(mDictRoot, mOriginalDictSize,
|
||||
&mExtendableBuffer, getBigramsStructurePolicy(), getShortcutsStructurePolicy());
|
||||
readingHelper.initWithNodeArrayPos(dicNode->getChildrenPos());
|
||||
const DynamicPatriciaTrieNodeReader *const nodeReader = readingHelper.getNodeReader();
|
||||
while (!readingHelper.isEnd()) {
|
||||
childDicNodes->pushLeavingChild(dicNode, nodeReader->getNodePos(),
|
||||
nodeReader->getChildrenPos(), nodeReader->getProbability(),
|
||||
nodeReader->isTerminal() && !nodeReader->isDeleted(),
|
||||
nodeReader->hasChildren(), nodeReader->isBlacklisted() || nodeReader->isNotAWord(),
|
||||
nodeReader->getCodePointCount(), readingHelper.getMergedNodeCodePoints());
|
||||
readingHelper.readNextSiblingNode();
|
||||
}
|
||||
}
|
||||
|
||||
int DynamicPatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
|
||||
const int nodePos, const int maxCodePointCount, int *const outCodePoints,
|
||||
int *const outUnigramProbability) const {
|
||||
if (nodePos == NOT_A_VALID_WORD_POS) {
|
||||
*outUnigramProbability = NOT_A_PROBABILITY;
|
||||
return 0;
|
||||
}
|
||||
// This method traverses parent nodes from the terminal by following parent pointers; thus,
|
||||
// node code points are stored in the buffer in the reverse order.
|
||||
int reverseCodePoints[maxCodePointCount];
|
||||
int mergedNodeCodePoints[maxCodePointCount];
|
||||
int codePointCount = 0;
|
||||
|
||||
DynamicPatriciaTrieNodeReader nodeReader(mDictRoot, mOriginalDictSize, &mExtendableBuffer,
|
||||
getBigramsStructurePolicy(), getShortcutsStructurePolicy());
|
||||
// First, read terminal node and get its probability.
|
||||
nodeReader.fetchNodeInfoFromBufferAndGetNodeCodePoints(nodePos, maxCodePointCount,
|
||||
mergedNodeCodePoints);
|
||||
// Store terminal node probability.
|
||||
*outUnigramProbability = nodeReader.getProbability();
|
||||
// Store terminal node code points to buffer in the reverse order.
|
||||
for (int i = nodeReader.getCodePointCount() - 1; i >= 0; --i) {
|
||||
reverseCodePoints[codePointCount++] = mergedNodeCodePoints[i];
|
||||
DynamicPatriciaTrieReadingHelper readingHelper(mDictRoot, mOriginalDictSize,
|
||||
&mExtendableBuffer, getBigramsStructurePolicy(), getShortcutsStructurePolicy());
|
||||
// First, read the terminal node and get its probability.
|
||||
readingHelper.initWithNodePos(nodePos);
|
||||
if (!readingHelper.isValidTerminalNode()) {
|
||||
// Node at the nodePos is not a valid terminal node.
|
||||
*outUnigramProbability = NOT_A_PROBABILITY;
|
||||
return 0;
|
||||
}
|
||||
// Then, follow parent pos toward the root node.
|
||||
while (nodeReader.getParentPos() != NOT_A_DICT_POS) {
|
||||
// codePointCount must be incremented at least once in each iteration to ensure preventing
|
||||
// infinite loop.
|
||||
if (nodeReader.isDeleted() || codePointCount > maxCodePointCount
|
||||
|| nodeReader.getCodePointCount() <= 0) {
|
||||
// Store terminal node probability.
|
||||
*outUnigramProbability = readingHelper.getNodeReader()->getProbability();
|
||||
// Then, following parent node link to the dictionary root and fetch node code points.
|
||||
while (!readingHelper.isEnd()) {
|
||||
if (readingHelper.getTotalCodePointCount() > maxCodePointCount) {
|
||||
// The nodePos is not a valid terminal node position in the dictionary.
|
||||
*outUnigramProbability = NOT_A_PROBABILITY;
|
||||
return 0;
|
||||
}
|
||||
// Read parent node.
|
||||
nodeReader.fetchNodeInfoFromBufferAndGetNodeCodePoints(nodeReader.getParentPos(),
|
||||
maxCodePointCount, mergedNodeCodePoints);
|
||||
// Store node code points to buffer in the reverse order.
|
||||
for (int i = nodeReader.getCodePointCount() - 1; i >= 0; --i) {
|
||||
reverseCodePoints[codePointCount++] = mergedNodeCodePoints[i];
|
||||
}
|
||||
readingHelper.fetchMergedNodeCodePointsInReverseOrder(
|
||||
readingHelper.getPrevTotalCodePointCount(), reverseCodePoints);
|
||||
// Follow parent node toward the root node.
|
||||
readingHelper.readParentNode();
|
||||
}
|
||||
if (readingHelper.isError()) {
|
||||
// The node position or the dictionary is invalid.
|
||||
*outUnigramProbability = NOT_A_PROBABILITY;
|
||||
return 0;
|
||||
}
|
||||
// Reverse the stored code points to output them.
|
||||
const int codePointCount = readingHelper.getTotalCodePointCount();
|
||||
for (int i = 0; i < codePointCount; ++i) {
|
||||
outCodePoints[i] = reverseCodePoints[codePointCount - i - 1];
|
||||
}
|
||||
|
@ -121,73 +94,39 @@ int DynamicPatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const in
|
|||
for (int i = 0; i < length; ++i) {
|
||||
searchCodePoints[i] = forceLowerCaseSearch ? CharUtils::toLowerCase(inWord[i]) : inWord[i];
|
||||
}
|
||||
int mergedNodeCodePoints[MAX_WORD_LENGTH];
|
||||
int currentLength = 0;
|
||||
int pos = getRootPosition();
|
||||
DynamicPatriciaTrieNodeReader nodeReader(mDictRoot, mOriginalDictSize, &mExtendableBuffer,
|
||||
getBigramsStructurePolicy(), getShortcutsStructurePolicy());
|
||||
while (currentLength < length) {
|
||||
// When foundMatchedNode becomes true, currentLength is increased at least once.
|
||||
bool foundMatchedNode = false;
|
||||
int totalChildCount = 0;
|
||||
do {
|
||||
const int childCount = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition(
|
||||
mDictRoot, &pos);
|
||||
totalChildCount += childCount;
|
||||
if (childCount <= 0 || totalChildCount > MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP) {
|
||||
// Invalid dictionary.
|
||||
AKLOGI("Invalid dictionary. childCount: %d, totalChildCount: %d, MAX: %d",
|
||||
childCount, totalChildCount, MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP);
|
||||
ASSERT(false);
|
||||
DynamicPatriciaTrieReadingHelper readingHelper(mDictRoot, mOriginalDictSize,
|
||||
&mExtendableBuffer, getBigramsStructurePolicy(), getShortcutsStructurePolicy());
|
||||
readingHelper.initWithNodeArrayPos(getRootPosition());
|
||||
const DynamicPatriciaTrieNodeReader *const nodeReader = readingHelper.getNodeReader();
|
||||
while (!readingHelper.isEnd()) {
|
||||
const int matchedCodePointCount = readingHelper.getPrevTotalCodePointCount();
|
||||
if (readingHelper.getTotalCodePointCount() > length
|
||||
|| !readingHelper.isMatchedCodePoint(0 /* index */,
|
||||
searchCodePoints[matchedCodePointCount])) {
|
||||
// Current node has too many code points or its first code point is different from
|
||||
// target code point. Skip this node and read the next sibling node.
|
||||
readingHelper.readNextSiblingNode();
|
||||
continue;
|
||||
}
|
||||
// Check following merged node code points.
|
||||
const int nodeCodePointCount = nodeReader->getCodePointCount();
|
||||
for (int j = 1; j < nodeCodePointCount; ++j) {
|
||||
if (!readingHelper.isMatchedCodePoint(
|
||||
j, searchCodePoints[matchedCodePointCount + j])) {
|
||||
// Different code point is found. The given word is not included in the dictionary.
|
||||
return NOT_A_VALID_WORD_POS;
|
||||
}
|
||||
for (int i = 0; i < childCount; i++) {
|
||||
nodeReader.fetchNodeInfoFromBufferAndGetNodeCodePoints(pos, MAX_WORD_LENGTH,
|
||||
mergedNodeCodePoints);
|
||||
const int nodeCodePointCount = nodeReader.getCodePointCount();
|
||||
if (nodeReader.isDeleted() || nodeCodePointCount <= 0
|
||||
|| currentLength + nodeCodePointCount > length) {
|
||||
// Skip deleted or empty node.
|
||||
pos = nodeReader.getSiblingNodePos();
|
||||
continue;
|
||||
}
|
||||
bool matched = true;
|
||||
for (int j = 0; j < nodeCodePointCount; ++j) {
|
||||
if (mergedNodeCodePoints[j] != searchCodePoints[currentLength + j]) {
|
||||
// Different code point is found.
|
||||
matched = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (matched) {
|
||||
currentLength += nodeCodePointCount;
|
||||
if (length == currentLength) {
|
||||
// Terminal position is found.
|
||||
return nodeReader.getNodePos();
|
||||
}
|
||||
if (!nodeReader.hasChildren()) {
|
||||
return NOT_A_VALID_WORD_POS;
|
||||
}
|
||||
foundMatchedNode = true;
|
||||
// Advance to the children nodes.
|
||||
pos = nodeReader.getChildrenPos();
|
||||
break;
|
||||
}
|
||||
// Try next sibling node.
|
||||
pos = nodeReader.getSiblingNodePos();
|
||||
}
|
||||
if (foundMatchedNode) {
|
||||
break;
|
||||
}
|
||||
// If the matched node is not found in the current PtNode array, try to follow the
|
||||
// forward link.
|
||||
pos = DynamicPatriciaTrieReadingUtils::getForwardLinkPosition(
|
||||
mDictRoot, pos);
|
||||
} while (DynamicPatriciaTrieReadingUtils::isValidForwardLinkPosition(pos));
|
||||
if (!foundMatchedNode) {
|
||||
// Matched node is not found.
|
||||
}
|
||||
// All characters are matched.
|
||||
if (length == readingHelper.getTotalCodePointCount()) {
|
||||
// Terminal position is found.
|
||||
return nodeReader->getNodePos();
|
||||
}
|
||||
if (!nodeReader->hasChildren()) {
|
||||
return NOT_A_VALID_WORD_POS;
|
||||
}
|
||||
// Advance to the children nodes.
|
||||
readingHelper.readChildNode();
|
||||
}
|
||||
// If we already traversed the tree further than the word is long, there means
|
||||
// there was no match (or we would have found it).
|
||||
|
|
|
@ -87,7 +87,6 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
|||
|
||||
private:
|
||||
DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTriePolicy);
|
||||
static const int MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP;
|
||||
|
||||
const MmappedBuffer *const mBuffer;
|
||||
const ExtendableBuffer mExtendableBuffer;
|
||||
|
|
|
@ -0,0 +1,83 @@
|
|||
/*
|
||||
* Copyright (C) 2013, The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h"
|
||||
|
||||
namespace latinime {
|
||||
|
||||
// To avoid infinite loop caused by invalid or malicious forward links.
|
||||
const int DynamicPatriciaTrieReadingHelper::MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP = 100000;
|
||||
const int DynamicPatriciaTrieReadingHelper::MAX_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP = 100000;
|
||||
|
||||
// Read node array size and process empty node arrays. Nodes and arrays are counted up in this
|
||||
// method to avoid an infinite loop.
|
||||
void DynamicPatriciaTrieReadingHelper::nextNodeArray() {
|
||||
const bool usesAdditionalBuffer = mPos >= mOriginalDictSize;
|
||||
const uint8_t *const dictBuf = (usesAdditionalBuffer)
|
||||
? mExtendableBuffer->getBuffer() : mDictRoot;
|
||||
if (usesAdditionalBuffer) {
|
||||
mPos -= mOriginalDictSize;
|
||||
}
|
||||
mNodeCount = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition(dictBuf,
|
||||
&mPos);
|
||||
if (usesAdditionalBuffer) {
|
||||
mPos += mOriginalDictSize;
|
||||
}
|
||||
// Count up nodes and node arrays to avoid infinite loop.
|
||||
mTotalNodeCount += mNodeCount;
|
||||
mNodeArrayCount++;
|
||||
if (mNodeCount < 0 || mTotalNodeCount > MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP
|
||||
|| mNodeArrayCount > MAX_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP) {
|
||||
// Invalid dictionary.
|
||||
AKLOGI("Invalid dictionary. nodeCount: %d, totalNodeCount: %d, MAX_CHILD_COUNT: %d"
|
||||
"nodeArrayCount: %d, MAX_NODE_ARRAY_COUNT: %d",
|
||||
mNodeCount, mTotalNodeCount, MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP,
|
||||
mNodeArrayCount, MAX_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP);
|
||||
ASSERT(false);
|
||||
mIsError = true;
|
||||
mPos = NOT_A_DICT_POS;
|
||||
return;
|
||||
}
|
||||
if (mNodeCount == 0) {
|
||||
// Empty node array. Try following forward link.
|
||||
followForwardLink();
|
||||
}
|
||||
}
|
||||
|
||||
// Follow the forward link and read the next node array if exists.
|
||||
void DynamicPatriciaTrieReadingHelper::followForwardLink() {
|
||||
const bool usesAdditionalBuffer = mPos >= mOriginalDictSize;
|
||||
const uint8_t *const dictBuf = (usesAdditionalBuffer)
|
||||
? mExtendableBuffer->getBuffer() : mDictRoot;
|
||||
if (usesAdditionalBuffer) {
|
||||
mPos -= mOriginalDictSize;
|
||||
}
|
||||
const int forwardLinkPosition =
|
||||
DynamicPatriciaTrieReadingUtils::getForwardLinkPosition(dictBuf, mPos);
|
||||
if (usesAdditionalBuffer) {
|
||||
mPos += mOriginalDictSize;
|
||||
}
|
||||
if (DynamicPatriciaTrieReadingUtils::isValidForwardLinkPosition(forwardLinkPosition)) {
|
||||
// Follow the forward link.
|
||||
mPos = forwardLinkPosition;
|
||||
nextNodeArray();
|
||||
} else {
|
||||
// All node arrays have been read.
|
||||
mPos = NOT_A_DICT_POS;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace latinime
|
|
@ -0,0 +1,201 @@
|
|||
/*
|
||||
* Copyright (C) 2013, The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef LATINIME_DYNAMIC_PATRICIA_TRIE_READING_HELPER_H
|
||||
#define LATINIME_DYNAMIC_PATRICIA_TRIE_READING_HELPER_H
|
||||
|
||||
#include "defines.h"
|
||||
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h"
|
||||
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h"
|
||||
#include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h"
|
||||
#include "suggest/policyimpl/dictionary/utils/extendable_buffer.h"
|
||||
|
||||
namespace latinime {
|
||||
|
||||
/*
|
||||
* This class is used for traversing dynamic patricia trie. This class supports iterating nodes and
|
||||
* dealing with additional buffer. This class counts nodes and node arrays to avoid infinite loop.
|
||||
*/
|
||||
class DynamicPatriciaTrieReadingHelper {
|
||||
public:
|
||||
DynamicPatriciaTrieReadingHelper(const uint8_t *const dictRoot, const int originalDictSize,
|
||||
const ExtendableBuffer *const extendableBuffer,
|
||||
const DictionaryBigramsStructurePolicy *const bigramsPolicy,
|
||||
const DictionaryShortcutsStructurePolicy *const shortcutsPolicy)
|
||||
: mIsError(false), mPos(NOT_A_DICT_POS), mNodeCount(0), mPrevTotalCodePointCount(0),
|
||||
mTotalNodeCount(0), mNodeArrayCount(0), mDictRoot(dictRoot),
|
||||
mOriginalDictSize(originalDictSize), mExtendableBuffer(extendableBuffer),
|
||||
mNodeReader(mDictRoot, mOriginalDictSize, mExtendableBuffer, bigramsPolicy,
|
||||
shortcutsPolicy) {}
|
||||
|
||||
~DynamicPatriciaTrieReadingHelper() {}
|
||||
|
||||
AK_FORCE_INLINE bool isError() const {
|
||||
return mIsError;
|
||||
}
|
||||
|
||||
AK_FORCE_INLINE bool isEnd() const {
|
||||
return mPos == NOT_A_DICT_POS;
|
||||
}
|
||||
|
||||
// Initialize reading state with the head position of a node array.
|
||||
AK_FORCE_INLINE void initWithNodeArrayPos(const int nodeArrayPos) {
|
||||
if (nodeArrayPos == NOT_A_DICT_POS) {
|
||||
mPos = NOT_A_DICT_POS;
|
||||
} else {
|
||||
mIsError = false;
|
||||
mPos = nodeArrayPos;
|
||||
mNodeCount = 0;
|
||||
mPrevTotalCodePointCount = 0;
|
||||
mTotalNodeCount = 0;
|
||||
mNodeArrayCount = 0;
|
||||
nextNodeArray();
|
||||
if (!isEnd()) {
|
||||
fetchNodeInfo();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Initialize reading state with the head position of a node.
|
||||
AK_FORCE_INLINE void initWithNodePos(const int nodePos) {
|
||||
// TODO: Consolidate NOT_A_VALID_WORD_POS and NOT_A_DICT_POS
|
||||
if (nodePos == NOT_A_VALID_WORD_POS || nodePos == NOT_A_DICT_POS) {
|
||||
mPos = NOT_A_DICT_POS;
|
||||
} else {
|
||||
mIsError = false;
|
||||
mPos = nodePos;
|
||||
mNodeCount = 1;
|
||||
mPrevTotalCodePointCount = 0;
|
||||
mTotalNodeCount = 1;
|
||||
mNodeArrayCount = 1;
|
||||
fetchNodeInfo();
|
||||
}
|
||||
}
|
||||
|
||||
AK_FORCE_INLINE const DynamicPatriciaTrieNodeReader* getNodeReader() const {
|
||||
return &mNodeReader;
|
||||
}
|
||||
|
||||
AK_FORCE_INLINE bool isValidTerminalNode() const {
|
||||
return !isEnd() && !mNodeReader.isDeleted() && mNodeReader.isTerminal();
|
||||
}
|
||||
|
||||
AK_FORCE_INLINE bool isMatchedCodePoint(const int index, const int codePoint) const {
|
||||
return mMergedNodeCodePoints[index] == codePoint;
|
||||
}
|
||||
|
||||
// Return code point count exclude the last read node's code points.
|
||||
AK_FORCE_INLINE int getPrevTotalCodePointCount() const {
|
||||
return mPrevTotalCodePointCount;
|
||||
}
|
||||
|
||||
// Return code point count include the last read node's code points.
|
||||
AK_FORCE_INLINE int getTotalCodePointCount() const {
|
||||
return mPrevTotalCodePointCount + mNodeReader.getCodePointCount();
|
||||
}
|
||||
|
||||
AK_FORCE_INLINE void fetchMergedNodeCodePointsInReverseOrder(
|
||||
const int index, int *const outCodePoints) const {
|
||||
const int nodeCodePointCount = mNodeReader.getCodePointCount();
|
||||
for (int i = 0; i < nodeCodePointCount; ++i) {
|
||||
outCodePoints[index + i] = mMergedNodeCodePoints[nodeCodePointCount - 1 - i];
|
||||
}
|
||||
}
|
||||
|
||||
AK_FORCE_INLINE const int *getMergedNodeCodePoints() const {
|
||||
return mMergedNodeCodePoints;
|
||||
}
|
||||
|
||||
AK_FORCE_INLINE void readNextSiblingNode() {
|
||||
mNodeCount -= 1;
|
||||
mPos = mNodeReader.getSiblingNodePos();
|
||||
if (mNodeCount <= 0) {
|
||||
// All nodes in the current node array have been read.
|
||||
followForwardLink();
|
||||
if (!isEnd()) {
|
||||
fetchNodeInfo();
|
||||
}
|
||||
} else {
|
||||
fetchNodeInfo();
|
||||
}
|
||||
}
|
||||
|
||||
// Read the first child node of the current node.
|
||||
AK_FORCE_INLINE void readChildNode() {
|
||||
if (mNodeReader.hasChildren()) {
|
||||
mPrevTotalCodePointCount += mNodeReader.getCodePointCount();
|
||||
mTotalNodeCount = 0;
|
||||
mNodeArrayCount = 0;
|
||||
mPos = mNodeReader.getChildrenPos();
|
||||
// Read children node array.
|
||||
nextNodeArray();
|
||||
if (!isEnd()) {
|
||||
fetchNodeInfo();
|
||||
}
|
||||
} else {
|
||||
mPos = NOT_A_DICT_POS;
|
||||
}
|
||||
}
|
||||
|
||||
// Read the parent node of the current node.
|
||||
AK_FORCE_INLINE void readParentNode() {
|
||||
if (mNodeReader.getParentPos() != NOT_A_DICT_POS) {
|
||||
mPrevTotalCodePointCount += mNodeReader.getCodePointCount();
|
||||
mTotalNodeCount = 1;
|
||||
mNodeArrayCount = 1;
|
||||
mNodeCount = 1;
|
||||
mPos = mNodeReader.getParentPos();
|
||||
fetchNodeInfo();
|
||||
} else {
|
||||
mPos = NOT_A_DICT_POS;
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
DISALLOW_COPY_AND_ASSIGN(DynamicPatriciaTrieReadingHelper);
|
||||
|
||||
static const int MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP;
|
||||
static const int MAX_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP;
|
||||
|
||||
bool mIsError;
|
||||
int mPos;
|
||||
// Node count of a node array.
|
||||
int mNodeCount;
|
||||
int mPrevTotalCodePointCount;
|
||||
int mTotalNodeCount;
|
||||
int mNodeArrayCount;
|
||||
const uint8_t *const mDictRoot;
|
||||
const int mOriginalDictSize;
|
||||
const ExtendableBuffer *const mExtendableBuffer;
|
||||
DynamicPatriciaTrieNodeReader mNodeReader;
|
||||
int mMergedNodeCodePoints[MAX_WORD_LENGTH];
|
||||
|
||||
void nextNodeArray();
|
||||
|
||||
void followForwardLink();
|
||||
|
||||
AK_FORCE_INLINE void fetchNodeInfo() {
|
||||
mNodeReader.fetchNodeInfoFromBufferAndGetNodeCodePoints(mPos, MAX_WORD_LENGTH,
|
||||
mMergedNodeCodePoints);
|
||||
if (mNodeReader.getCodePointCount() <= 0) {
|
||||
// Empty node is not allowed.
|
||||
mIsError = true;
|
||||
mPos = NOT_A_DICT_POS;
|
||||
}
|
||||
}
|
||||
};
|
||||
} // namespace latinime
|
||||
#endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_READING_HELPER_H */
|
Loading…
Reference in a new issue