am be76dbfe: Merge "Introduce DynamicPatriciaTrieReadingHelper."
* commit 'be76dbfef94db92e0199bf7fac8b9cda480955bf': Introduce DynamicPatriciaTrieReadingHelper.main
commit
bcb2c53266
|
@ -74,6 +74,7 @@ LATIN_IME_CORE_SRC_FILES := \
|
||||||
dictionary_structure_with_buffer_policy_factory.cpp \
|
dictionary_structure_with_buffer_policy_factory.cpp \
|
||||||
dynamic_patricia_trie_node_reader.cpp \
|
dynamic_patricia_trie_node_reader.cpp \
|
||||||
dynamic_patricia_trie_policy.cpp \
|
dynamic_patricia_trie_policy.cpp \
|
||||||
|
dynamic_patricia_trie_reading_helper.cpp \
|
||||||
dynamic_patricia_trie_reading_utils.cpp \
|
dynamic_patricia_trie_reading_utils.cpp \
|
||||||
patricia_trie_policy.cpp \
|
patricia_trie_policy.cpp \
|
||||||
patricia_trie_reading_utils.cpp) \
|
patricia_trie_reading_utils.cpp) \
|
||||||
|
|
|
@ -20,95 +20,68 @@
|
||||||
#include "suggest/core/dicnode/dic_node.h"
|
#include "suggest/core/dicnode/dic_node.h"
|
||||||
#include "suggest/core/dicnode/dic_node_vector.h"
|
#include "suggest/core/dicnode/dic_node_vector.h"
|
||||||
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h"
|
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h"
|
||||||
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h"
|
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h"
|
||||||
#include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h"
|
#include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h"
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
|
||||||
// To avoid infinite loop caused by invalid or malicious forward links.
|
|
||||||
const int DynamicPatriciaTriePolicy::MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP = 100000;
|
|
||||||
|
|
||||||
void DynamicPatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode,
|
void DynamicPatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode,
|
||||||
DicNodeVector *const childDicNodes) const {
|
DicNodeVector *const childDicNodes) const {
|
||||||
if (!dicNode->hasChildren()) {
|
if (!dicNode->hasChildren()) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
DynamicPatriciaTrieNodeReader nodeReader(mDictRoot, mOriginalDictSize, &mExtendableBuffer,
|
DynamicPatriciaTrieReadingHelper readingHelper(mDictRoot, mOriginalDictSize,
|
||||||
getBigramsStructurePolicy(), getShortcutsStructurePolicy());
|
&mExtendableBuffer, getBigramsStructurePolicy(), getShortcutsStructurePolicy());
|
||||||
int mergedNodeCodePoints[MAX_WORD_LENGTH];
|
readingHelper.initWithNodeArrayPos(dicNode->getChildrenPos());
|
||||||
int nextPos = dicNode->getChildrenPos();
|
const DynamicPatriciaTrieNodeReader *const nodeReader = readingHelper.getNodeReader();
|
||||||
int totalChildCount = 0;
|
while (!readingHelper.isEnd()) {
|
||||||
do {
|
childDicNodes->pushLeavingChild(dicNode, nodeReader->getNodePos(),
|
||||||
const int childCount = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition(
|
nodeReader->getChildrenPos(), nodeReader->getProbability(),
|
||||||
mDictRoot, &nextPos);
|
nodeReader->isTerminal() && !nodeReader->isDeleted(),
|
||||||
totalChildCount += childCount;
|
nodeReader->hasChildren(), nodeReader->isBlacklisted() || nodeReader->isNotAWord(),
|
||||||
if (childCount <= 0 || totalChildCount > MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP) {
|
nodeReader->getCodePointCount(), readingHelper.getMergedNodeCodePoints());
|
||||||
// Invalid dictionary.
|
readingHelper.readNextSiblingNode();
|
||||||
AKLOGI("Invalid dictionary. childCount: %d, totalChildCount: %d, MAX: %d",
|
}
|
||||||
childCount, totalChildCount, MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP);
|
|
||||||
ASSERT(false);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
for (int i = 0; i < childCount; i++) {
|
|
||||||
nodeReader.fetchNodeInfoFromBufferAndGetNodeCodePoints(nextPos, MAX_WORD_LENGTH,
|
|
||||||
mergedNodeCodePoints);
|
|
||||||
if (!nodeReader.isDeleted()) {
|
|
||||||
// Push child node when the node is not a deleted node.
|
|
||||||
childDicNodes->pushLeavingChild(dicNode, nodeReader.getNodePos(),
|
|
||||||
nodeReader.getChildrenPos(), nodeReader.getProbability(),
|
|
||||||
nodeReader.isTerminal(), nodeReader.hasChildren(),
|
|
||||||
nodeReader.isBlacklisted() || nodeReader.isNotAWord(),
|
|
||||||
nodeReader.getCodePointCount(), mergedNodeCodePoints);
|
|
||||||
}
|
|
||||||
nextPos = nodeReader.getSiblingNodePos();
|
|
||||||
}
|
|
||||||
nextPos = DynamicPatriciaTrieReadingUtils::getForwardLinkPosition(mDictRoot, nextPos);
|
|
||||||
} while (DynamicPatriciaTrieReadingUtils::isValidForwardLinkPosition(nextPos));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int DynamicPatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
|
int DynamicPatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
|
||||||
const int nodePos, const int maxCodePointCount, int *const outCodePoints,
|
const int nodePos, const int maxCodePointCount, int *const outCodePoints,
|
||||||
int *const outUnigramProbability) const {
|
int *const outUnigramProbability) const {
|
||||||
if (nodePos == NOT_A_VALID_WORD_POS) {
|
|
||||||
*outUnigramProbability = NOT_A_PROBABILITY;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
// This method traverses parent nodes from the terminal by following parent pointers; thus,
|
// This method traverses parent nodes from the terminal by following parent pointers; thus,
|
||||||
// node code points are stored in the buffer in the reverse order.
|
// node code points are stored in the buffer in the reverse order.
|
||||||
int reverseCodePoints[maxCodePointCount];
|
int reverseCodePoints[maxCodePointCount];
|
||||||
int mergedNodeCodePoints[maxCodePointCount];
|
DynamicPatriciaTrieReadingHelper readingHelper(mDictRoot, mOriginalDictSize,
|
||||||
int codePointCount = 0;
|
&mExtendableBuffer, getBigramsStructurePolicy(), getShortcutsStructurePolicy());
|
||||||
|
// First, read the terminal node and get its probability.
|
||||||
DynamicPatriciaTrieNodeReader nodeReader(mDictRoot, mOriginalDictSize, &mExtendableBuffer,
|
readingHelper.initWithNodePos(nodePos);
|
||||||
getBigramsStructurePolicy(), getShortcutsStructurePolicy());
|
if (!readingHelper.isValidTerminalNode()) {
|
||||||
// First, read terminal node and get its probability.
|
// Node at the nodePos is not a valid terminal node.
|
||||||
nodeReader.fetchNodeInfoFromBufferAndGetNodeCodePoints(nodePos, maxCodePointCount,
|
*outUnigramProbability = NOT_A_PROBABILITY;
|
||||||
mergedNodeCodePoints);
|
return 0;
|
||||||
// Store terminal node probability.
|
|
||||||
*outUnigramProbability = nodeReader.getProbability();
|
|
||||||
// Store terminal node code points to buffer in the reverse order.
|
|
||||||
for (int i = nodeReader.getCodePointCount() - 1; i >= 0; --i) {
|
|
||||||
reverseCodePoints[codePointCount++] = mergedNodeCodePoints[i];
|
|
||||||
}
|
}
|
||||||
// Then, follow parent pos toward the root node.
|
// Store terminal node probability.
|
||||||
while (nodeReader.getParentPos() != NOT_A_DICT_POS) {
|
*outUnigramProbability = readingHelper.getNodeReader()->getProbability();
|
||||||
// codePointCount must be incremented at least once in each iteration to ensure preventing
|
// Then, following parent node link to the dictionary root and fetch node code points.
|
||||||
// infinite loop.
|
while (!readingHelper.isEnd()) {
|
||||||
if (nodeReader.isDeleted() || codePointCount > maxCodePointCount
|
if (readingHelper.getTotalCodePointCount() > maxCodePointCount) {
|
||||||
|| nodeReader.getCodePointCount() <= 0) {
|
|
||||||
// The nodePos is not a valid terminal node position in the dictionary.
|
// The nodePos is not a valid terminal node position in the dictionary.
|
||||||
*outUnigramProbability = NOT_A_PROBABILITY;
|
*outUnigramProbability = NOT_A_PROBABILITY;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
// Read parent node.
|
|
||||||
nodeReader.fetchNodeInfoFromBufferAndGetNodeCodePoints(nodeReader.getParentPos(),
|
|
||||||
maxCodePointCount, mergedNodeCodePoints);
|
|
||||||
// Store node code points to buffer in the reverse order.
|
// Store node code points to buffer in the reverse order.
|
||||||
for (int i = nodeReader.getCodePointCount() - 1; i >= 0; --i) {
|
readingHelper.fetchMergedNodeCodePointsInReverseOrder(
|
||||||
reverseCodePoints[codePointCount++] = mergedNodeCodePoints[i];
|
readingHelper.getPrevTotalCodePointCount(), reverseCodePoints);
|
||||||
}
|
// Follow parent node toward the root node.
|
||||||
|
readingHelper.readParentNode();
|
||||||
|
}
|
||||||
|
if (readingHelper.isError()) {
|
||||||
|
// The node position or the dictionary is invalid.
|
||||||
|
*outUnigramProbability = NOT_A_PROBABILITY;
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
// Reverse the stored code points to output them.
|
// Reverse the stored code points to output them.
|
||||||
|
const int codePointCount = readingHelper.getTotalCodePointCount();
|
||||||
for (int i = 0; i < codePointCount; ++i) {
|
for (int i = 0; i < codePointCount; ++i) {
|
||||||
outCodePoints[i] = reverseCodePoints[codePointCount - i - 1];
|
outCodePoints[i] = reverseCodePoints[codePointCount - i - 1];
|
||||||
}
|
}
|
||||||
|
@ -121,73 +94,39 @@ int DynamicPatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const in
|
||||||
for (int i = 0; i < length; ++i) {
|
for (int i = 0; i < length; ++i) {
|
||||||
searchCodePoints[i] = forceLowerCaseSearch ? CharUtils::toLowerCase(inWord[i]) : inWord[i];
|
searchCodePoints[i] = forceLowerCaseSearch ? CharUtils::toLowerCase(inWord[i]) : inWord[i];
|
||||||
}
|
}
|
||||||
int mergedNodeCodePoints[MAX_WORD_LENGTH];
|
DynamicPatriciaTrieReadingHelper readingHelper(mDictRoot, mOriginalDictSize,
|
||||||
int currentLength = 0;
|
&mExtendableBuffer, getBigramsStructurePolicy(), getShortcutsStructurePolicy());
|
||||||
int pos = getRootPosition();
|
readingHelper.initWithNodeArrayPos(getRootPosition());
|
||||||
DynamicPatriciaTrieNodeReader nodeReader(mDictRoot, mOriginalDictSize, &mExtendableBuffer,
|
const DynamicPatriciaTrieNodeReader *const nodeReader = readingHelper.getNodeReader();
|
||||||
getBigramsStructurePolicy(), getShortcutsStructurePolicy());
|
while (!readingHelper.isEnd()) {
|
||||||
while (currentLength < length) {
|
const int matchedCodePointCount = readingHelper.getPrevTotalCodePointCount();
|
||||||
// When foundMatchedNode becomes true, currentLength is increased at least once.
|
if (readingHelper.getTotalCodePointCount() > length
|
||||||
bool foundMatchedNode = false;
|
|| !readingHelper.isMatchedCodePoint(0 /* index */,
|
||||||
int totalChildCount = 0;
|
searchCodePoints[matchedCodePointCount])) {
|
||||||
do {
|
// Current node has too many code points or its first code point is different from
|
||||||
const int childCount = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition(
|
// target code point. Skip this node and read the next sibling node.
|
||||||
mDictRoot, &pos);
|
readingHelper.readNextSiblingNode();
|
||||||
totalChildCount += childCount;
|
continue;
|
||||||
if (childCount <= 0 || totalChildCount > MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP) {
|
}
|
||||||
// Invalid dictionary.
|
// Check following merged node code points.
|
||||||
AKLOGI("Invalid dictionary. childCount: %d, totalChildCount: %d, MAX: %d",
|
const int nodeCodePointCount = nodeReader->getCodePointCount();
|
||||||
childCount, totalChildCount, MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP);
|
for (int j = 1; j < nodeCodePointCount; ++j) {
|
||||||
ASSERT(false);
|
if (!readingHelper.isMatchedCodePoint(
|
||||||
|
j, searchCodePoints[matchedCodePointCount + j])) {
|
||||||
|
// Different code point is found. The given word is not included in the dictionary.
|
||||||
return NOT_A_VALID_WORD_POS;
|
return NOT_A_VALID_WORD_POS;
|
||||||
}
|
}
|
||||||
for (int i = 0; i < childCount; i++) {
|
}
|
||||||
nodeReader.fetchNodeInfoFromBufferAndGetNodeCodePoints(pos, MAX_WORD_LENGTH,
|
// All characters are matched.
|
||||||
mergedNodeCodePoints);
|
if (length == readingHelper.getTotalCodePointCount()) {
|
||||||
const int nodeCodePointCount = nodeReader.getCodePointCount();
|
// Terminal position is found.
|
||||||
if (nodeReader.isDeleted() || nodeCodePointCount <= 0
|
return nodeReader->getNodePos();
|
||||||
|| currentLength + nodeCodePointCount > length) {
|
}
|
||||||
// Skip deleted or empty node.
|
if (!nodeReader->hasChildren()) {
|
||||||
pos = nodeReader.getSiblingNodePos();
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
bool matched = true;
|
|
||||||
for (int j = 0; j < nodeCodePointCount; ++j) {
|
|
||||||
if (mergedNodeCodePoints[j] != searchCodePoints[currentLength + j]) {
|
|
||||||
// Different code point is found.
|
|
||||||
matched = false;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (matched) {
|
|
||||||
currentLength += nodeCodePointCount;
|
|
||||||
if (length == currentLength) {
|
|
||||||
// Terminal position is found.
|
|
||||||
return nodeReader.getNodePos();
|
|
||||||
}
|
|
||||||
if (!nodeReader.hasChildren()) {
|
|
||||||
return NOT_A_VALID_WORD_POS;
|
|
||||||
}
|
|
||||||
foundMatchedNode = true;
|
|
||||||
// Advance to the children nodes.
|
|
||||||
pos = nodeReader.getChildrenPos();
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
// Try next sibling node.
|
|
||||||
pos = nodeReader.getSiblingNodePos();
|
|
||||||
}
|
|
||||||
if (foundMatchedNode) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
// If the matched node is not found in the current PtNode array, try to follow the
|
|
||||||
// forward link.
|
|
||||||
pos = DynamicPatriciaTrieReadingUtils::getForwardLinkPosition(
|
|
||||||
mDictRoot, pos);
|
|
||||||
} while (DynamicPatriciaTrieReadingUtils::isValidForwardLinkPosition(pos));
|
|
||||||
if (!foundMatchedNode) {
|
|
||||||
// Matched node is not found.
|
|
||||||
return NOT_A_VALID_WORD_POS;
|
return NOT_A_VALID_WORD_POS;
|
||||||
}
|
}
|
||||||
|
// Advance to the children nodes.
|
||||||
|
readingHelper.readChildNode();
|
||||||
}
|
}
|
||||||
// If we already traversed the tree further than the word is long, there means
|
// If we already traversed the tree further than the word is long, there means
|
||||||
// there was no match (or we would have found it).
|
// there was no match (or we would have found it).
|
||||||
|
|
|
@ -87,7 +87,6 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
||||||
|
|
||||||
private:
|
private:
|
||||||
DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTriePolicy);
|
DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTriePolicy);
|
||||||
static const int MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP;
|
|
||||||
|
|
||||||
const MmappedBuffer *const mBuffer;
|
const MmappedBuffer *const mBuffer;
|
||||||
const ExtendableBuffer mExtendableBuffer;
|
const ExtendableBuffer mExtendableBuffer;
|
||||||
|
|
|
@ -0,0 +1,83 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2013, The Android Open Source Project
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h"
|
||||||
|
|
||||||
|
namespace latinime {
|
||||||
|
|
||||||
|
// To avoid infinite loop caused by invalid or malicious forward links.
|
||||||
|
const int DynamicPatriciaTrieReadingHelper::MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP = 100000;
|
||||||
|
const int DynamicPatriciaTrieReadingHelper::MAX_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP = 100000;
|
||||||
|
|
||||||
|
// Read node array size and process empty node arrays. Nodes and arrays are counted up in this
|
||||||
|
// method to avoid an infinite loop.
|
||||||
|
void DynamicPatriciaTrieReadingHelper::nextNodeArray() {
|
||||||
|
const bool usesAdditionalBuffer = mPos >= mOriginalDictSize;
|
||||||
|
const uint8_t *const dictBuf = (usesAdditionalBuffer)
|
||||||
|
? mExtendableBuffer->getBuffer() : mDictRoot;
|
||||||
|
if (usesAdditionalBuffer) {
|
||||||
|
mPos -= mOriginalDictSize;
|
||||||
|
}
|
||||||
|
mNodeCount = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition(dictBuf,
|
||||||
|
&mPos);
|
||||||
|
if (usesAdditionalBuffer) {
|
||||||
|
mPos += mOriginalDictSize;
|
||||||
|
}
|
||||||
|
// Count up nodes and node arrays to avoid infinite loop.
|
||||||
|
mTotalNodeCount += mNodeCount;
|
||||||
|
mNodeArrayCount++;
|
||||||
|
if (mNodeCount < 0 || mTotalNodeCount > MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP
|
||||||
|
|| mNodeArrayCount > MAX_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP) {
|
||||||
|
// Invalid dictionary.
|
||||||
|
AKLOGI("Invalid dictionary. nodeCount: %d, totalNodeCount: %d, MAX_CHILD_COUNT: %d"
|
||||||
|
"nodeArrayCount: %d, MAX_NODE_ARRAY_COUNT: %d",
|
||||||
|
mNodeCount, mTotalNodeCount, MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP,
|
||||||
|
mNodeArrayCount, MAX_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP);
|
||||||
|
ASSERT(false);
|
||||||
|
mIsError = true;
|
||||||
|
mPos = NOT_A_DICT_POS;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (mNodeCount == 0) {
|
||||||
|
// Empty node array. Try following forward link.
|
||||||
|
followForwardLink();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Follow the forward link and read the next node array if exists.
|
||||||
|
void DynamicPatriciaTrieReadingHelper::followForwardLink() {
|
||||||
|
const bool usesAdditionalBuffer = mPos >= mOriginalDictSize;
|
||||||
|
const uint8_t *const dictBuf = (usesAdditionalBuffer)
|
||||||
|
? mExtendableBuffer->getBuffer() : mDictRoot;
|
||||||
|
if (usesAdditionalBuffer) {
|
||||||
|
mPos -= mOriginalDictSize;
|
||||||
|
}
|
||||||
|
const int forwardLinkPosition =
|
||||||
|
DynamicPatriciaTrieReadingUtils::getForwardLinkPosition(dictBuf, mPos);
|
||||||
|
if (usesAdditionalBuffer) {
|
||||||
|
mPos += mOriginalDictSize;
|
||||||
|
}
|
||||||
|
if (DynamicPatriciaTrieReadingUtils::isValidForwardLinkPosition(forwardLinkPosition)) {
|
||||||
|
// Follow the forward link.
|
||||||
|
mPos = forwardLinkPosition;
|
||||||
|
nextNodeArray();
|
||||||
|
} else {
|
||||||
|
// All node arrays have been read.
|
||||||
|
mPos = NOT_A_DICT_POS;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace latinime
|
|
@ -0,0 +1,201 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2013, The Android Open Source Project
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef LATINIME_DYNAMIC_PATRICIA_TRIE_READING_HELPER_H
|
||||||
|
#define LATINIME_DYNAMIC_PATRICIA_TRIE_READING_HELPER_H
|
||||||
|
|
||||||
|
#include "defines.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/utils/extendable_buffer.h"
|
||||||
|
|
||||||
|
namespace latinime {
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This class is used for traversing dynamic patricia trie. This class supports iterating nodes and
|
||||||
|
* dealing with additional buffer. This class counts nodes and node arrays to avoid infinite loop.
|
||||||
|
*/
|
||||||
|
class DynamicPatriciaTrieReadingHelper {
|
||||||
|
public:
|
||||||
|
DynamicPatriciaTrieReadingHelper(const uint8_t *const dictRoot, const int originalDictSize,
|
||||||
|
const ExtendableBuffer *const extendableBuffer,
|
||||||
|
const DictionaryBigramsStructurePolicy *const bigramsPolicy,
|
||||||
|
const DictionaryShortcutsStructurePolicy *const shortcutsPolicy)
|
||||||
|
: mIsError(false), mPos(NOT_A_DICT_POS), mNodeCount(0), mPrevTotalCodePointCount(0),
|
||||||
|
mTotalNodeCount(0), mNodeArrayCount(0), mDictRoot(dictRoot),
|
||||||
|
mOriginalDictSize(originalDictSize), mExtendableBuffer(extendableBuffer),
|
||||||
|
mNodeReader(mDictRoot, mOriginalDictSize, mExtendableBuffer, bigramsPolicy,
|
||||||
|
shortcutsPolicy) {}
|
||||||
|
|
||||||
|
~DynamicPatriciaTrieReadingHelper() {}
|
||||||
|
|
||||||
|
AK_FORCE_INLINE bool isError() const {
|
||||||
|
return mIsError;
|
||||||
|
}
|
||||||
|
|
||||||
|
AK_FORCE_INLINE bool isEnd() const {
|
||||||
|
return mPos == NOT_A_DICT_POS;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Initialize reading state with the head position of a node array.
|
||||||
|
AK_FORCE_INLINE void initWithNodeArrayPos(const int nodeArrayPos) {
|
||||||
|
if (nodeArrayPos == NOT_A_DICT_POS) {
|
||||||
|
mPos = NOT_A_DICT_POS;
|
||||||
|
} else {
|
||||||
|
mIsError = false;
|
||||||
|
mPos = nodeArrayPos;
|
||||||
|
mNodeCount = 0;
|
||||||
|
mPrevTotalCodePointCount = 0;
|
||||||
|
mTotalNodeCount = 0;
|
||||||
|
mNodeArrayCount = 0;
|
||||||
|
nextNodeArray();
|
||||||
|
if (!isEnd()) {
|
||||||
|
fetchNodeInfo();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Initialize reading state with the head position of a node.
|
||||||
|
AK_FORCE_INLINE void initWithNodePos(const int nodePos) {
|
||||||
|
// TODO: Consolidate NOT_A_VALID_WORD_POS and NOT_A_DICT_POS
|
||||||
|
if (nodePos == NOT_A_VALID_WORD_POS || nodePos == NOT_A_DICT_POS) {
|
||||||
|
mPos = NOT_A_DICT_POS;
|
||||||
|
} else {
|
||||||
|
mIsError = false;
|
||||||
|
mPos = nodePos;
|
||||||
|
mNodeCount = 1;
|
||||||
|
mPrevTotalCodePointCount = 0;
|
||||||
|
mTotalNodeCount = 1;
|
||||||
|
mNodeArrayCount = 1;
|
||||||
|
fetchNodeInfo();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
AK_FORCE_INLINE const DynamicPatriciaTrieNodeReader* getNodeReader() const {
|
||||||
|
return &mNodeReader;
|
||||||
|
}
|
||||||
|
|
||||||
|
AK_FORCE_INLINE bool isValidTerminalNode() const {
|
||||||
|
return !isEnd() && !mNodeReader.isDeleted() && mNodeReader.isTerminal();
|
||||||
|
}
|
||||||
|
|
||||||
|
AK_FORCE_INLINE bool isMatchedCodePoint(const int index, const int codePoint) const {
|
||||||
|
return mMergedNodeCodePoints[index] == codePoint;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Return code point count exclude the last read node's code points.
|
||||||
|
AK_FORCE_INLINE int getPrevTotalCodePointCount() const {
|
||||||
|
return mPrevTotalCodePointCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Return code point count include the last read node's code points.
|
||||||
|
AK_FORCE_INLINE int getTotalCodePointCount() const {
|
||||||
|
return mPrevTotalCodePointCount + mNodeReader.getCodePointCount();
|
||||||
|
}
|
||||||
|
|
||||||
|
AK_FORCE_INLINE void fetchMergedNodeCodePointsInReverseOrder(
|
||||||
|
const int index, int *const outCodePoints) const {
|
||||||
|
const int nodeCodePointCount = mNodeReader.getCodePointCount();
|
||||||
|
for (int i = 0; i < nodeCodePointCount; ++i) {
|
||||||
|
outCodePoints[index + i] = mMergedNodeCodePoints[nodeCodePointCount - 1 - i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
AK_FORCE_INLINE const int *getMergedNodeCodePoints() const {
|
||||||
|
return mMergedNodeCodePoints;
|
||||||
|
}
|
||||||
|
|
||||||
|
AK_FORCE_INLINE void readNextSiblingNode() {
|
||||||
|
mNodeCount -= 1;
|
||||||
|
mPos = mNodeReader.getSiblingNodePos();
|
||||||
|
if (mNodeCount <= 0) {
|
||||||
|
// All nodes in the current node array have been read.
|
||||||
|
followForwardLink();
|
||||||
|
if (!isEnd()) {
|
||||||
|
fetchNodeInfo();
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
fetchNodeInfo();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Read the first child node of the current node.
|
||||||
|
AK_FORCE_INLINE void readChildNode() {
|
||||||
|
if (mNodeReader.hasChildren()) {
|
||||||
|
mPrevTotalCodePointCount += mNodeReader.getCodePointCount();
|
||||||
|
mTotalNodeCount = 0;
|
||||||
|
mNodeArrayCount = 0;
|
||||||
|
mPos = mNodeReader.getChildrenPos();
|
||||||
|
// Read children node array.
|
||||||
|
nextNodeArray();
|
||||||
|
if (!isEnd()) {
|
||||||
|
fetchNodeInfo();
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
mPos = NOT_A_DICT_POS;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Read the parent node of the current node.
|
||||||
|
AK_FORCE_INLINE void readParentNode() {
|
||||||
|
if (mNodeReader.getParentPos() != NOT_A_DICT_POS) {
|
||||||
|
mPrevTotalCodePointCount += mNodeReader.getCodePointCount();
|
||||||
|
mTotalNodeCount = 1;
|
||||||
|
mNodeArrayCount = 1;
|
||||||
|
mNodeCount = 1;
|
||||||
|
mPos = mNodeReader.getParentPos();
|
||||||
|
fetchNodeInfo();
|
||||||
|
} else {
|
||||||
|
mPos = NOT_A_DICT_POS;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
DISALLOW_COPY_AND_ASSIGN(DynamicPatriciaTrieReadingHelper);
|
||||||
|
|
||||||
|
static const int MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP;
|
||||||
|
static const int MAX_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP;
|
||||||
|
|
||||||
|
bool mIsError;
|
||||||
|
int mPos;
|
||||||
|
// Node count of a node array.
|
||||||
|
int mNodeCount;
|
||||||
|
int mPrevTotalCodePointCount;
|
||||||
|
int mTotalNodeCount;
|
||||||
|
int mNodeArrayCount;
|
||||||
|
const uint8_t *const mDictRoot;
|
||||||
|
const int mOriginalDictSize;
|
||||||
|
const ExtendableBuffer *const mExtendableBuffer;
|
||||||
|
DynamicPatriciaTrieNodeReader mNodeReader;
|
||||||
|
int mMergedNodeCodePoints[MAX_WORD_LENGTH];
|
||||||
|
|
||||||
|
void nextNodeArray();
|
||||||
|
|
||||||
|
void followForwardLink();
|
||||||
|
|
||||||
|
AK_FORCE_INLINE void fetchNodeInfo() {
|
||||||
|
mNodeReader.fetchNodeInfoFromBufferAndGetNodeCodePoints(mPos, MAX_WORD_LENGTH,
|
||||||
|
mMergedNodeCodePoints);
|
||||||
|
if (mNodeReader.getCodePointCount() <= 0) {
|
||||||
|
// Empty node is not allowed.
|
||||||
|
mIsError = true;
|
||||||
|
mPos = NOT_A_DICT_POS;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
} // namespace latinime
|
||||||
|
#endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_READING_HELPER_H */
|
Loading…
Reference in New Issue