Refactoring: Introduce PtNode and PtNodeReader.
To handle multiple dictionary formats in helpers. Bug: 11073222 Change-Id: Iaef7be08534f9010e837ffcf8c8292b174b64d2b
This commit is contained in:
parent
75d7f0fbf5
commit
c481d0556f
15 changed files with 528 additions and 436 deletions
|
@ -157,8 +157,9 @@ bool DynamicBigramListPolicy::updateAllBigramEntriesAndDeleteUselessEntries(
|
||||||
}
|
}
|
||||||
const int bigramTargetNodePos =
|
const int bigramTargetNodePos =
|
||||||
followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos);
|
followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos);
|
||||||
nodeReader.fetchNodeInfoInBufferFromPtNodePos(bigramTargetNodePos);
|
const PtNodeParams ptNodeParams(nodeReader.fetchNodeInfoInBufferFromPtNodePos(
|
||||||
if (nodeReader.isDeleted() || !nodeReader.isTerminal()
|
bigramTargetNodePos));
|
||||||
|
if (ptNodeParams.isDeleted() || !ptNodeParams.isTerminal()
|
||||||
|| bigramTargetNodePos == NOT_A_DICT_POS) {
|
|| bigramTargetNodePos == NOT_A_DICT_POS) {
|
||||||
// The target is no longer valid terminal. Invalidate the current bigram entry.
|
// The target is no longer valid terminal. Invalidate the current bigram entry.
|
||||||
if (!BigramListReadWriteUtils::writeBigramEntry(mBuffer, bigramFlags,
|
if (!BigramListReadWriteUtils::writeBigramEntry(mBuffer, bigramFlags,
|
||||||
|
@ -342,20 +343,22 @@ int DynamicBigramListPolicy::followBigramLinkAndGetCurrentBigramPtNodePos(
|
||||||
if (originalBigramPos == NOT_A_DICT_POS) {
|
if (originalBigramPos == NOT_A_DICT_POS) {
|
||||||
return NOT_A_DICT_POS;
|
return NOT_A_DICT_POS;
|
||||||
}
|
}
|
||||||
int currentPos = originalBigramPos;
|
|
||||||
DynamicPatriciaTrieNodeReader nodeReader(mBuffer, this /* bigramsPolicy */, mShortcutPolicy);
|
DynamicPatriciaTrieNodeReader nodeReader(mBuffer, this /* bigramsPolicy */, mShortcutPolicy);
|
||||||
nodeReader.fetchNodeInfoInBufferFromPtNodePos(currentPos);
|
int currentPos = NOT_A_DICT_POS;
|
||||||
int bigramLinkCount = 0;
|
int bigramLinkCount = 0;
|
||||||
while (nodeReader.getBigramLinkedNodePos() != NOT_A_DICT_POS) {
|
int bigramLinkedNodePos = originalBigramPos;
|
||||||
currentPos = nodeReader.getBigramLinkedNodePos();
|
do {
|
||||||
nodeReader.fetchNodeInfoInBufferFromPtNodePos(currentPos);
|
currentPos = bigramLinkedNodePos;
|
||||||
|
const PtNodeParams ptNodeParams(nodeReader.fetchNodeInfoInBufferFromPtNodePos(currentPos));
|
||||||
|
bigramLinkedNodePos = ptNodeParams.getBigramLinkedNodePos();
|
||||||
bigramLinkCount++;
|
bigramLinkCount++;
|
||||||
if (bigramLinkCount > CONTINUING_BIGRAM_LINK_COUNT_LIMIT) {
|
if (bigramLinkCount > CONTINUING_BIGRAM_LINK_COUNT_LIMIT) {
|
||||||
AKLOGE("Bigram link is invalid. start position: %d", originalBigramPos);
|
AKLOGE("Bigram link is invalid. start position: %d", originalBigramPos);
|
||||||
ASSERT(false);
|
ASSERT(false);
|
||||||
return NOT_A_DICT_POS;
|
return NOT_A_DICT_POS;
|
||||||
}
|
}
|
||||||
}
|
bigramLinkedNodePos = ptNodeParams.getBigramLinkedNodePos();
|
||||||
|
} while (bigramLinkedNodePos != NOT_A_DICT_POS);
|
||||||
return currentPos;
|
return currentPos;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,185 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2013, The Android Open Source Project
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef LATINIME_PT_NODE_PARAMS_H
|
||||||
|
#define LATINIME_PT_NODE_PARAMS_H
|
||||||
|
|
||||||
|
#include <cstring>
|
||||||
|
|
||||||
|
#include "defines.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
|
||||||
|
|
||||||
|
namespace latinime {
|
||||||
|
|
||||||
|
// This class has information of a PtNode. This class is immutable.
|
||||||
|
class PtNodeParams {
|
||||||
|
public:
|
||||||
|
// Invalid PtNode.
|
||||||
|
PtNodeParams() : mHeadPos(NOT_A_DICT_POS), mFlags(0), mParentPos(NOT_A_DICT_POS),
|
||||||
|
mCodePointCount(0), mCodePoints(), mTerminalIdFieldPos(NOT_A_DICT_POS),
|
||||||
|
mTerminalId(Ver4DictConstants::NOT_A_TERMINAL), mProbabilityFieldPos(NOT_A_DICT_POS),
|
||||||
|
mProbability(NOT_A_PROBABILITY), mChildrenPosFieldPos(NOT_A_DICT_POS),
|
||||||
|
mChildrenPos(NOT_A_DICT_POS), mBigramLinkedNodePos(NOT_A_DICT_POS),
|
||||||
|
mShortcutPos(NOT_A_DICT_POS), mBigramPos(NOT_A_DICT_POS),
|
||||||
|
mSiblingPos(NOT_A_DICT_POS) {}
|
||||||
|
|
||||||
|
PtNodeParams(const PtNodeParams& ptNodeParams)
|
||||||
|
: mHeadPos(ptNodeParams.mHeadPos), mFlags(ptNodeParams.mFlags),
|
||||||
|
mParentPos(ptNodeParams.mParentPos), mCodePointCount(ptNodeParams.mCodePointCount),
|
||||||
|
mCodePoints(), mTerminalIdFieldPos(ptNodeParams.mTerminalIdFieldPos),
|
||||||
|
mTerminalId(ptNodeParams.mTerminalId),
|
||||||
|
mProbabilityFieldPos(ptNodeParams.mProbabilityFieldPos),
|
||||||
|
mProbability(ptNodeParams.mProbability),
|
||||||
|
mChildrenPosFieldPos(ptNodeParams.mChildrenPosFieldPos),
|
||||||
|
mChildrenPos(ptNodeParams.mChildrenPos),
|
||||||
|
mBigramLinkedNodePos(ptNodeParams.mBigramLinkedNodePos),
|
||||||
|
mShortcutPos(ptNodeParams.mShortcutPos), mBigramPos(ptNodeParams.mBigramPos),
|
||||||
|
mSiblingPos(ptNodeParams.mSiblingPos) {
|
||||||
|
memcpy(mCodePoints, ptNodeParams.getCodePoints(), sizeof(int) * mCodePointCount);
|
||||||
|
}
|
||||||
|
|
||||||
|
PtNodeParams(const int headPos, const PatriciaTrieReadingUtils::NodeFlags flags,
|
||||||
|
const int parentPos, const int codePointCount, const int *const codePoints,
|
||||||
|
const int probabilityFieldPos, const int probability, const int childrenPosFieldPos,
|
||||||
|
const int childrenPos, const int bigramLinkedNodePos, const int shortcutPos,
|
||||||
|
const int bigramPos, const int siblingPos)
|
||||||
|
: mHeadPos(headPos), mFlags(flags), mParentPos(parentPos),
|
||||||
|
mCodePointCount(codePointCount), mCodePoints(),
|
||||||
|
mTerminalIdFieldPos(NOT_A_DICT_POS), mTerminalId(Ver4DictConstants::NOT_A_TERMINAL),
|
||||||
|
mProbabilityFieldPos(probabilityFieldPos), mProbability(probability),
|
||||||
|
mChildrenPosFieldPos(childrenPosFieldPos), mChildrenPos(childrenPos),
|
||||||
|
mBigramLinkedNodePos(bigramLinkedNodePos), mShortcutPos(shortcutPos),
|
||||||
|
mBigramPos(bigramPos), mSiblingPos(siblingPos) {
|
||||||
|
memcpy(mCodePoints, codePoints, sizeof(int) * mCodePointCount);
|
||||||
|
}
|
||||||
|
|
||||||
|
AK_FORCE_INLINE bool isValid() const {
|
||||||
|
return mCodePointCount > 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Head position of the PtNode
|
||||||
|
AK_FORCE_INLINE int getHeadPos() const {
|
||||||
|
return mHeadPos;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Flags
|
||||||
|
AK_FORCE_INLINE bool isDeleted() const {
|
||||||
|
return DynamicPatriciaTrieReadingUtils::isDeleted(mFlags);
|
||||||
|
}
|
||||||
|
|
||||||
|
AK_FORCE_INLINE bool hasChildren() const {
|
||||||
|
return mChildrenPos != NOT_A_DICT_POS;
|
||||||
|
}
|
||||||
|
|
||||||
|
AK_FORCE_INLINE bool isTerminal() const {
|
||||||
|
return PatriciaTrieReadingUtils::isTerminal(mFlags);
|
||||||
|
}
|
||||||
|
|
||||||
|
AK_FORCE_INLINE bool isBlacklisted() const {
|
||||||
|
return PatriciaTrieReadingUtils::isBlacklisted(mFlags);
|
||||||
|
}
|
||||||
|
|
||||||
|
AK_FORCE_INLINE bool isNotAWord() const {
|
||||||
|
return PatriciaTrieReadingUtils::isNotAWord(mFlags);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parent node position
|
||||||
|
AK_FORCE_INLINE int getParentPos() const {
|
||||||
|
return mParentPos;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Number of code points
|
||||||
|
AK_FORCE_INLINE uint8_t getCodePointCount() const {
|
||||||
|
return mCodePointCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
AK_FORCE_INLINE const int *getCodePoints() const {
|
||||||
|
return mCodePoints;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Probability
|
||||||
|
AK_FORCE_INLINE int getTerminalIdFieldPos() const {
|
||||||
|
return mTerminalIdFieldPos;
|
||||||
|
}
|
||||||
|
|
||||||
|
AK_FORCE_INLINE int getTerminalId() const {
|
||||||
|
return mTerminalId;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Probability
|
||||||
|
AK_FORCE_INLINE int getProbabilityFieldPos() const {
|
||||||
|
return mProbabilityFieldPos;
|
||||||
|
}
|
||||||
|
|
||||||
|
AK_FORCE_INLINE int getProbability() const {
|
||||||
|
return mProbability;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Children PtNode array position
|
||||||
|
AK_FORCE_INLINE int getChildrenPosFieldPos() const {
|
||||||
|
return mChildrenPosFieldPos;
|
||||||
|
}
|
||||||
|
|
||||||
|
AK_FORCE_INLINE int getChildrenPos() const {
|
||||||
|
return mChildrenPos;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Bigram linked node position.
|
||||||
|
AK_FORCE_INLINE int getBigramLinkedNodePos() const {
|
||||||
|
return mBigramLinkedNodePos;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Shortcutlist position
|
||||||
|
AK_FORCE_INLINE int getShortcutPos() const {
|
||||||
|
return mShortcutPos;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Bigrams position
|
||||||
|
AK_FORCE_INLINE int getBigramsPos() const {
|
||||||
|
return mBigramPos;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sibling node position
|
||||||
|
AK_FORCE_INLINE int getSiblingNodePos() const {
|
||||||
|
return mSiblingPos;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
// This class have a public copy constructor to be used as a return value.
|
||||||
|
|
||||||
|
// Disallowing the assignment operator.
|
||||||
|
PtNodeParams &operator=(PtNodeParams &ptNodeParams);
|
||||||
|
|
||||||
|
const int mHeadPos;
|
||||||
|
const PatriciaTrieReadingUtils::NodeFlags mFlags;
|
||||||
|
const int mParentPos;
|
||||||
|
const uint8_t mCodePointCount;
|
||||||
|
int mCodePoints[MAX_WORD_LENGTH];
|
||||||
|
const int mTerminalIdFieldPos;
|
||||||
|
const int mTerminalId;
|
||||||
|
const int mProbabilityFieldPos;
|
||||||
|
const int mProbability;
|
||||||
|
const int mChildrenPosFieldPos;
|
||||||
|
const int mChildrenPos;
|
||||||
|
const int mBigramLinkedNodePos;
|
||||||
|
const int mShortcutPos;
|
||||||
|
const int mBigramPos;
|
||||||
|
const int mSiblingPos;
|
||||||
|
};
|
||||||
|
} // namespace latinime
|
||||||
|
#endif /* LATINIME_PT_NODE_PARAMS_H */
|
|
@ -0,0 +1,39 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2013, The Android Open Source Project
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef LATINIME_PT_NODE_READER_H
|
||||||
|
#define LATINIME_PT_NODE_READER_H
|
||||||
|
|
||||||
|
#include "defines.h"
|
||||||
|
|
||||||
|
#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h"
|
||||||
|
|
||||||
|
namespace latinime {
|
||||||
|
|
||||||
|
// Interface class used to read PtNode information.
|
||||||
|
class PtNodeReader {
|
||||||
|
public:
|
||||||
|
virtual ~PtNodeReader() {}
|
||||||
|
virtual const PtNodeParams fetchNodeInfoInBufferFromPtNodePos(const int ptNodePos) const = 0;
|
||||||
|
|
||||||
|
protected:
|
||||||
|
PtNodeReader() {};
|
||||||
|
|
||||||
|
private:
|
||||||
|
DISALLOW_COPY_AND_ASSIGN(PtNodeReader);
|
||||||
|
};
|
||||||
|
} // namespace latinime
|
||||||
|
#endif /* LATINIME_PT_NODE_READER_H */
|
|
@ -17,22 +17,22 @@
|
||||||
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_gc_event_listeners.h"
|
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_gc_event_listeners.h"
|
||||||
|
|
||||||
#include "suggest/core/policy/dictionary_header_structure_policy.h"
|
#include "suggest/core/policy/dictionary_header_structure_policy.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h"
|
||||||
#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
|
#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
|
||||||
bool DynamicPatriciaTrieGcEventListeners
|
bool DynamicPatriciaTrieGcEventListeners
|
||||||
::TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted
|
::TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted
|
||||||
::onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node,
|
::onVisitingPtNode(const PtNodeParams *const ptNodeParams) {
|
||||||
const int *const nodeCodePoints) {
|
|
||||||
// PtNode is useless when the PtNode is not a terminal and doesn't have any not useless
|
// PtNode is useless when the PtNode is not a terminal and doesn't have any not useless
|
||||||
// children.
|
// children.
|
||||||
bool isUselessPtNode = !node->isTerminal();
|
bool isUselessPtNode = !ptNodeParams->isTerminal();
|
||||||
if (node->isTerminal() && mIsDecayingDict) {
|
if (ptNodeParams->isTerminal() && mIsDecayingDict) {
|
||||||
const int newProbability =
|
const int newProbability =
|
||||||
ForgettingCurveUtils::getEncodedProbabilityToSave(node->getProbability(),
|
ForgettingCurveUtils::getEncodedProbabilityToSave(ptNodeParams->getProbability(),
|
||||||
mHeaderPolicy);
|
mHeaderPolicy);
|
||||||
int writingPos = node->getProbabilityFieldPos();
|
int writingPos = ptNodeParams->getProbabilityFieldPos();
|
||||||
// Update probability.
|
// Update probability.
|
||||||
if (!DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition(
|
if (!DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition(
|
||||||
mBuffer, newProbability, &writingPos)) {
|
mBuffer, newProbability, &writingPos)) {
|
||||||
|
@ -44,9 +44,9 @@ bool DynamicPatriciaTrieGcEventListeners
|
||||||
}
|
}
|
||||||
if (mChildrenValue > 0) {
|
if (mChildrenValue > 0) {
|
||||||
isUselessPtNode = false;
|
isUselessPtNode = false;
|
||||||
} else if (node->isTerminal()) {
|
} else if (ptNodeParams->isTerminal()) {
|
||||||
// Remove children as all children are useless.
|
// Remove children as all children are useless.
|
||||||
int writingPos = node->getChildrenPosFieldPos();
|
int writingPos = ptNodeParams->getChildrenPosFieldPos();
|
||||||
if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(
|
if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(
|
||||||
mBuffer, NOT_A_DICT_POS /* childrenPosition */, &writingPos)) {
|
mBuffer, NOT_A_DICT_POS /* childrenPosition */, &writingPos)) {
|
||||||
return false;
|
return false;
|
||||||
|
@ -54,12 +54,12 @@ bool DynamicPatriciaTrieGcEventListeners
|
||||||
}
|
}
|
||||||
if (isUselessPtNode) {
|
if (isUselessPtNode) {
|
||||||
// Current PtNode is no longer needed. Mark it as deleted.
|
// Current PtNode is no longer needed. Mark it as deleted.
|
||||||
if (!mWritingHelper->markNodeAsDeleted(node)) {
|
if (!mWritingHelper->markNodeAsDeleted(ptNodeParams)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
mValueStack.back() += 1;
|
mValueStack.back() += 1;
|
||||||
if (node->isTerminal()) {
|
if (ptNodeParams->isTerminal()) {
|
||||||
mValidUnigramCount += 1;
|
mValidUnigramCount += 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -67,10 +67,9 @@ bool DynamicPatriciaTrieGcEventListeners
|
||||||
}
|
}
|
||||||
|
|
||||||
bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateBigramProbability
|
bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateBigramProbability
|
||||||
::onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node,
|
::onVisitingPtNode(const PtNodeParams *const ptNodeParams) {
|
||||||
const int *const nodeCodePoints) {
|
if (!ptNodeParams->isDeleted()) {
|
||||||
if (!node->isDeleted()) {
|
int pos = ptNodeParams->getBigramsPos();
|
||||||
int pos = node->getBigramsPos();
|
|
||||||
if (pos != NOT_A_DICT_POS) {
|
if (pos != NOT_A_DICT_POS) {
|
||||||
int bigramEntryCount = 0;
|
int bigramEntryCount = 0;
|
||||||
if (!mBigramPolicy->updateAllBigramEntriesAndDeleteUselessEntries(&pos,
|
if (!mBigramPolicy->updateAllBigramEntriesAndDeleteUselessEntries(&pos,
|
||||||
|
@ -117,31 +116,29 @@ bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNo
|
||||||
|
|
||||||
// Write valid PtNode to buffer and memorize mapping from the old position to the new position.
|
// Write valid PtNode to buffer and memorize mapping from the old position to the new position.
|
||||||
bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer
|
bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer
|
||||||
::onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node,
|
::onVisitingPtNode(const PtNodeParams *const ptNodeParams) {
|
||||||
const int *const nodeCodePoints) {
|
if (ptNodeParams->isDeleted()) {
|
||||||
if (node->isDeleted()) {
|
|
||||||
// Current PtNode is not written in new buffer because it has been deleted.
|
// Current PtNode is not written in new buffer because it has been deleted.
|
||||||
mDictPositionRelocationMap->mPtNodePositionRelocationMap.insert(
|
mDictPositionRelocationMap->mPtNodePositionRelocationMap.insert(
|
||||||
DynamicPatriciaTrieWritingHelper::PtNodePositionRelocationMap::value_type(
|
DynamicPatriciaTrieWritingHelper::PtNodePositionRelocationMap::value_type(
|
||||||
node->getHeadPos(), NOT_A_DICT_POS));
|
ptNodeParams->getHeadPos(), NOT_A_DICT_POS));
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
int writingPos = mBufferToWrite->getTailPosition();
|
int writingPos = mBufferToWrite->getTailPosition();
|
||||||
mDictPositionRelocationMap->mPtNodePositionRelocationMap.insert(
|
mDictPositionRelocationMap->mPtNodePositionRelocationMap.insert(
|
||||||
DynamicPatriciaTrieWritingHelper::PtNodePositionRelocationMap::value_type(
|
DynamicPatriciaTrieWritingHelper::PtNodePositionRelocationMap::value_type(
|
||||||
node->getHeadPos(), writingPos));
|
ptNodeParams->getHeadPos(), writingPos));
|
||||||
mValidPtNodeCount++;
|
mValidPtNodeCount++;
|
||||||
// Writes current PtNode.
|
// Writes current PtNode.
|
||||||
return mWritingHelper->writePtNodeToBufferByCopyingPtNodeInfo(mBufferToWrite, node,
|
return mWritingHelper->writePtNodeToBufferByCopyingPtNodeInfo(mBufferToWrite, ptNodeParams,
|
||||||
node->getParentPos(), nodeCodePoints, node->getCodePointCount(),
|
ptNodeParams->getParentPos(), ptNodeParams->getCodePoints(),
|
||||||
node->getProbability(), &writingPos);
|
ptNodeParams->getCodePointCount(), ptNodeParams->getProbability(), &writingPos);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateAllPositionFields
|
bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateAllPositionFields
|
||||||
::onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node,
|
::onVisitingPtNode(const PtNodeParams *const ptNodeParams) {
|
||||||
const int *const nodeCodePoints) {
|
|
||||||
// Updates parent position.
|
// Updates parent position.
|
||||||
int parentPos = node->getParentPos();
|
int parentPos = ptNodeParams->getParentPos();
|
||||||
if (parentPos != NOT_A_DICT_POS) {
|
if (parentPos != NOT_A_DICT_POS) {
|
||||||
DynamicPatriciaTrieWritingHelper::PtNodePositionRelocationMap::const_iterator it =
|
DynamicPatriciaTrieWritingHelper::PtNodePositionRelocationMap::const_iterator it =
|
||||||
mDictPositionRelocationMap->mPtNodePositionRelocationMap.find(parentPos);
|
mDictPositionRelocationMap->mPtNodePositionRelocationMap.find(parentPos);
|
||||||
|
@ -149,15 +146,16 @@ bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateAllPositionField
|
||||||
parentPos = it->second;
|
parentPos = it->second;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
int writingPos = node->getHeadPos() + DynamicPatriciaTrieWritingUtils::NODE_FLAG_FIELD_SIZE;
|
int writingPos = ptNodeParams->getHeadPos()
|
||||||
|
+ DynamicPatriciaTrieWritingUtils::NODE_FLAG_FIELD_SIZE;
|
||||||
// Write updated parent offset.
|
// Write updated parent offset.
|
||||||
if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition(mBufferToWrite,
|
if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition(mBufferToWrite,
|
||||||
parentPos, node->getHeadPos(), &writingPos)) {
|
parentPos, ptNodeParams->getHeadPos(), &writingPos)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Updates children position.
|
// Updates children position.
|
||||||
int childrenPos = node->getChildrenPos();
|
int childrenPos = ptNodeParams->getChildrenPos();
|
||||||
if (childrenPos != NOT_A_DICT_POS) {
|
if (childrenPos != NOT_A_DICT_POS) {
|
||||||
DynamicPatriciaTrieWritingHelper::PtNodeArrayPositionRelocationMap::const_iterator it =
|
DynamicPatriciaTrieWritingHelper::PtNodeArrayPositionRelocationMap::const_iterator it =
|
||||||
mDictPositionRelocationMap->mPtNodeArrayPositionRelocationMap.find(childrenPos);
|
mDictPositionRelocationMap->mPtNodeArrayPositionRelocationMap.find(childrenPos);
|
||||||
|
@ -165,14 +163,14 @@ bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateAllPositionField
|
||||||
childrenPos = it->second;
|
childrenPos = it->second;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
writingPos = node->getChildrenPosFieldPos();
|
writingPos = ptNodeParams->getChildrenPosFieldPos();
|
||||||
if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(mBufferToWrite,
|
if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(mBufferToWrite,
|
||||||
childrenPos, &writingPos)) {
|
childrenPos, &writingPos)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Updates bigram target PtNode positions in the bigram list.
|
// Updates bigram target PtNode positions in the bigram list.
|
||||||
int bigramsPos = node->getBigramsPos();
|
int bigramsPos = ptNodeParams->getBigramsPos();
|
||||||
if (bigramsPos != NOT_A_DICT_POS) {
|
if (bigramsPos != NOT_A_DICT_POS) {
|
||||||
int bigramEntryCount;
|
int bigramEntryCount;
|
||||||
if (!mBigramPolicy->updateAllBigramTargetPtNodePositions(&bigramsPos,
|
if (!mBigramPolicy->updateAllBigramTargetPtNodePositions(&bigramsPos,
|
||||||
|
@ -181,7 +179,7 @@ bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateAllPositionField
|
||||||
}
|
}
|
||||||
mBigramCount += bigramEntryCount;
|
mBigramCount += bigramEntryCount;
|
||||||
}
|
}
|
||||||
if (node->isTerminal()) {
|
if (ptNodeParams->isTerminal()) {
|
||||||
mUnigramCount++;
|
mUnigramCount++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -30,6 +30,7 @@
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
|
||||||
class DictionaryHeaderStructurePolicy;
|
class DictionaryHeaderStructurePolicy;
|
||||||
|
class PtNodeParams;
|
||||||
|
|
||||||
class DynamicPatriciaTrieGcEventListeners {
|
class DynamicPatriciaTrieGcEventListeners {
|
||||||
public:
|
public:
|
||||||
|
@ -66,8 +67,7 @@ class DynamicPatriciaTrieGcEventListeners {
|
||||||
|
|
||||||
bool onReadingPtNodeArrayTail() { return true; }
|
bool onReadingPtNodeArrayTail() { return true; }
|
||||||
|
|
||||||
bool onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node,
|
bool onVisitingPtNode(const PtNodeParams *const ptNodeParams);
|
||||||
const int *const nodeCodePoints);
|
|
||||||
|
|
||||||
int getValidUnigramCount() const {
|
int getValidUnigramCount() const {
|
||||||
return mValidUnigramCount;
|
return mValidUnigramCount;
|
||||||
|
@ -101,8 +101,7 @@ class DynamicPatriciaTrieGcEventListeners {
|
||||||
|
|
||||||
bool onReadingPtNodeArrayTail() { return true; }
|
bool onReadingPtNodeArrayTail() { return true; }
|
||||||
|
|
||||||
bool onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node,
|
bool onVisitingPtNode(const PtNodeParams *const ptNodeParams);
|
||||||
const int *const nodeCodePoints);
|
|
||||||
|
|
||||||
int getValidBigramEntryCount() const {
|
int getValidBigramEntryCount() const {
|
||||||
return mValidBigramEntryCount;
|
return mValidBigramEntryCount;
|
||||||
|
@ -133,8 +132,7 @@ class DynamicPatriciaTrieGcEventListeners {
|
||||||
|
|
||||||
bool onReadingPtNodeArrayTail();
|
bool onReadingPtNodeArrayTail();
|
||||||
|
|
||||||
bool onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node,
|
bool onVisitingPtNode(const PtNodeParams *const ptNodeParams);
|
||||||
const int *const nodeCodePoints);
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
DISALLOW_IMPLICIT_CONSTRUCTORS(TraversePolicyToPlaceAndWriteValidPtNodesToBuffer);
|
DISALLOW_IMPLICIT_CONSTRUCTORS(TraversePolicyToPlaceAndWriteValidPtNodesToBuffer);
|
||||||
|
@ -167,8 +165,7 @@ class DynamicPatriciaTrieGcEventListeners {
|
||||||
|
|
||||||
bool onReadingPtNodeArrayTail() { return true; }
|
bool onReadingPtNodeArrayTail() { return true; }
|
||||||
|
|
||||||
bool onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node,
|
bool onVisitingPtNode(const PtNodeParams *const ptNodeParams);
|
||||||
const int *const nodeCodePoints);
|
|
||||||
|
|
||||||
int getUnigramCount() const {
|
int getUnigramCount() const {
|
||||||
return mUnigramCount;
|
return mUnigramCount;
|
||||||
|
|
|
@ -18,107 +18,90 @@
|
||||||
|
|
||||||
#include "suggest/core/policy/dictionary_bigrams_structure_policy.h"
|
#include "suggest/core/policy/dictionary_bigrams_structure_policy.h"
|
||||||
#include "suggest/core/policy/dictionary_shortcuts_structure_policy.h"
|
#include "suggest/core/policy/dictionary_shortcuts_structure_policy.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.h"
|
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.h"
|
||||||
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
|
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
|
||||||
void DynamicPatriciaTrieNodeReader::fetchPtNodeInfoFromBufferAndProcessMovedPtNode(
|
const PtNodeParams DynamicPatriciaTrieNodeReader::fetchPtNodeInfoFromBufferAndProcessMovedPtNode(
|
||||||
const int ptNodePos, const int maxCodePointCount, int *const outCodePoints) {
|
const int ptNodePos, const int siblingNodePos, const int bigramLinkedNodePos) const {
|
||||||
if (ptNodePos < 0 || ptNodePos >= mBuffer->getTailPosition()) {
|
if (ptNodePos < 0 || ptNodePos >= mBuffer->getTailPosition()) {
|
||||||
// Reading invalid position because of bug or broken dictionary.
|
// Reading invalid position because of bug or broken dictionary.
|
||||||
AKLOGE("Fetching PtNode info from invalid dictionary position: %d, dictionary size: %d",
|
AKLOGE("Fetching PtNode info from invalid dictionary position: %d, dictionary size: %d",
|
||||||
ptNodePos, mBuffer->getTailPosition());
|
ptNodePos, mBuffer->getTailPosition());
|
||||||
ASSERT(false);
|
ASSERT(false);
|
||||||
invalidatePtNodeInfo();
|
return PtNodeParams();
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(ptNodePos);
|
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(ptNodePos);
|
||||||
const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
|
const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
|
||||||
int pos = ptNodePos;
|
int pos = ptNodePos;
|
||||||
mHeadPos = ptNodePos;
|
const int headPos = ptNodePos;
|
||||||
if (usesAdditionalBuffer) {
|
if (usesAdditionalBuffer) {
|
||||||
pos -= mBuffer->getOriginalBufferSize();
|
pos -= mBuffer->getOriginalBufferSize();
|
||||||
}
|
}
|
||||||
mFlags = PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos);
|
const PatriciaTrieReadingUtils::NodeFlags flags =
|
||||||
|
PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos);
|
||||||
const int parentPosOffset =
|
const int parentPosOffset =
|
||||||
DynamicPatriciaTrieReadingUtils::getParentPtNodePosOffsetAndAdvancePosition(dictBuf,
|
DynamicPatriciaTrieReadingUtils::getParentPtNodePosOffsetAndAdvancePosition(dictBuf,
|
||||||
&pos);
|
&pos);
|
||||||
mParentPos = DynamicPatriciaTrieReadingUtils::getParentPtNodePos(parentPosOffset, mHeadPos);
|
const int parentPos =
|
||||||
if (outCodePoints != 0) {
|
DynamicPatriciaTrieReadingUtils::getParentPtNodePos(parentPosOffset, headPos);
|
||||||
mCodePointCount = PatriciaTrieReadingUtils::getCharsAndAdvancePosition(
|
int codePoints[MAX_WORD_LENGTH];
|
||||||
dictBuf, mFlags, maxCodePointCount, outCodePoints, &pos);
|
const int codePonitCount = PatriciaTrieReadingUtils::getCharsAndAdvancePosition(
|
||||||
} else {
|
dictBuf, flags, MAX_WORD_LENGTH, codePoints, &pos);
|
||||||
mCodePointCount = PatriciaTrieReadingUtils::skipCharacters(
|
int probability = NOT_A_PROBABILITY;
|
||||||
dictBuf, mFlags, MAX_WORD_LENGTH, &pos);
|
int probabilityFieldPos = NOT_A_DICT_POS;
|
||||||
}
|
if (PatriciaTrieReadingUtils::isTerminal(flags)) {
|
||||||
if (isTerminal()) {
|
probabilityFieldPos = pos;
|
||||||
mProbabilityFieldPos = pos;
|
|
||||||
if (usesAdditionalBuffer) {
|
if (usesAdditionalBuffer) {
|
||||||
mProbabilityFieldPos += mBuffer->getOriginalBufferSize();
|
probabilityFieldPos += mBuffer->getOriginalBufferSize();
|
||||||
}
|
}
|
||||||
mProbability = PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(dictBuf, &pos);
|
probability = PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(dictBuf, &pos);
|
||||||
} else {
|
|
||||||
mProbabilityFieldPos = NOT_A_DICT_POS;
|
|
||||||
mProbability = NOT_A_PROBABILITY;
|
|
||||||
}
|
}
|
||||||
mChildrenPosFieldPos = pos;
|
int childrenPosFieldPos = pos;
|
||||||
if (usesAdditionalBuffer) {
|
if (usesAdditionalBuffer) {
|
||||||
mChildrenPosFieldPos += mBuffer->getOriginalBufferSize();
|
childrenPosFieldPos += mBuffer->getOriginalBufferSize();
|
||||||
}
|
}
|
||||||
mChildrenPos = DynamicPatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(
|
int childrenPos = DynamicPatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(
|
||||||
dictBuf, &pos);
|
dictBuf, &pos);
|
||||||
if (usesAdditionalBuffer && mChildrenPos != NOT_A_DICT_POS) {
|
if (usesAdditionalBuffer && childrenPos != NOT_A_DICT_POS) {
|
||||||
mChildrenPos += mBuffer->getOriginalBufferSize();
|
childrenPos += mBuffer->getOriginalBufferSize();
|
||||||
}
|
}
|
||||||
if (mSiblingPos == NOT_A_DICT_POS) {
|
int newBigramLinkedNodePos = bigramLinkedNodePos;
|
||||||
if (DynamicPatriciaTrieReadingUtils::isMoved(mFlags)) {
|
if (siblingNodePos == NOT_A_DICT_POS) {
|
||||||
mBigramLinkedNodePos = mChildrenPos;
|
if (DynamicPatriciaTrieReadingUtils::isMoved(flags)) {
|
||||||
} else {
|
newBigramLinkedNodePos = childrenPos;
|
||||||
mBigramLinkedNodePos = NOT_A_DICT_POS;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (usesAdditionalBuffer) {
|
if (usesAdditionalBuffer) {
|
||||||
pos += mBuffer->getOriginalBufferSize();
|
pos += mBuffer->getOriginalBufferSize();
|
||||||
}
|
}
|
||||||
if (PatriciaTrieReadingUtils::hasShortcutTargets(mFlags)) {
|
int shortcutsPos = NOT_A_DICT_POS;
|
||||||
mShortcutPos = pos;
|
if (PatriciaTrieReadingUtils::hasShortcutTargets(flags)) {
|
||||||
|
shortcutsPos = pos;
|
||||||
mShortcutsPolicy->skipAllShortcuts(&pos);
|
mShortcutsPolicy->skipAllShortcuts(&pos);
|
||||||
} else {
|
|
||||||
mShortcutPos = NOT_A_DICT_POS;
|
|
||||||
}
|
}
|
||||||
if (PatriciaTrieReadingUtils::hasBigrams(mFlags)) {
|
int bigramsPos = NOT_A_DICT_POS;
|
||||||
mBigramPos = pos;
|
if (PatriciaTrieReadingUtils::hasBigrams(flags)) {
|
||||||
|
bigramsPos = pos;
|
||||||
mBigramsPolicy->skipAllBigrams(&pos);
|
mBigramsPolicy->skipAllBigrams(&pos);
|
||||||
} else {
|
|
||||||
mBigramPos = NOT_A_DICT_POS;
|
|
||||||
}
|
}
|
||||||
// Update siblingPos if needed.
|
int newSiblingNodePos = siblingNodePos;
|
||||||
if (mSiblingPos == NOT_A_DICT_POS) {
|
if (siblingNodePos == NOT_A_DICT_POS) {
|
||||||
// Sibling position is the tail position of current node.
|
// Sibling position is the tail position of current node.
|
||||||
mSiblingPos = pos;
|
newSiblingNodePos = pos;
|
||||||
}
|
}
|
||||||
// Read destination node if the read node is a moved node.
|
// Read destination node if the read node is a moved node.
|
||||||
if (DynamicPatriciaTrieReadingUtils::isMoved(mFlags)) {
|
if (DynamicPatriciaTrieReadingUtils::isMoved(flags)) {
|
||||||
// The destination position is stored at the same place as the parent position.
|
// The destination position is stored at the same place as the parent position.
|
||||||
fetchPtNodeInfoFromBufferAndProcessMovedPtNode(mParentPos, maxCodePointCount,
|
return fetchPtNodeInfoFromBufferAndProcessMovedPtNode(parentPos, newSiblingNodePos,
|
||||||
outCodePoints);
|
newBigramLinkedNodePos);
|
||||||
|
} else {
|
||||||
|
return PtNodeParams(headPos, flags, parentPos, codePonitCount, codePoints,
|
||||||
|
probabilityFieldPos, probability, childrenPosFieldPos, childrenPos,
|
||||||
|
newBigramLinkedNodePos, shortcutsPos, bigramsPos, newSiblingNodePos);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void DynamicPatriciaTrieNodeReader::invalidatePtNodeInfo() {
|
|
||||||
mHeadPos = NOT_A_DICT_POS;
|
|
||||||
mFlags = 0;
|
|
||||||
mParentPos = NOT_A_DICT_POS;
|
|
||||||
mCodePointCount = 0;
|
|
||||||
mProbabilityFieldPos = NOT_A_DICT_POS;
|
|
||||||
mProbability = NOT_A_PROBABILITY;
|
|
||||||
mChildrenPosFieldPos = NOT_A_DICT_POS;
|
|
||||||
mChildrenPos = NOT_A_DICT_POS;
|
|
||||||
mBigramLinkedNodePos = NOT_A_DICT_POS;
|
|
||||||
mShortcutPos = NOT_A_DICT_POS;
|
|
||||||
mBigramPos = NOT_A_DICT_POS;
|
|
||||||
mSiblingPos = NOT_A_DICT_POS;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -20,8 +20,8 @@
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
|
|
||||||
#include "defines.h"
|
#include "defines.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h"
|
#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.h"
|
#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_reader.h"
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
|
||||||
|
@ -33,106 +33,19 @@ class DictionaryShortcutsStructurePolicy;
|
||||||
* This class is used for helping to read nodes of dynamic patricia trie. This class handles moved
|
* This class is used for helping to read nodes of dynamic patricia trie. This class handles moved
|
||||||
* node and reads node attributes.
|
* node and reads node attributes.
|
||||||
*/
|
*/
|
||||||
class DynamicPatriciaTrieNodeReader {
|
class DynamicPatriciaTrieNodeReader : public PtNodeReader {
|
||||||
public:
|
public:
|
||||||
DynamicPatriciaTrieNodeReader(const BufferWithExtendableBuffer *const buffer,
|
DynamicPatriciaTrieNodeReader(const BufferWithExtendableBuffer *const buffer,
|
||||||
const DictionaryBigramsStructurePolicy *const bigramsPolicy,
|
const DictionaryBigramsStructurePolicy *const bigramsPolicy,
|
||||||
const DictionaryShortcutsStructurePolicy *const shortcutsPolicy)
|
const DictionaryShortcutsStructurePolicy *const shortcutsPolicy)
|
||||||
: mBuffer(buffer), mBigramsPolicy(bigramsPolicy),
|
: mBuffer(buffer), mBigramsPolicy(bigramsPolicy),
|
||||||
mShortcutsPolicy(shortcutsPolicy), mHeadPos(NOT_A_DICT_POS), mFlags(0),
|
mShortcutsPolicy(shortcutsPolicy) {}
|
||||||
mParentPos(NOT_A_DICT_POS), mCodePointCount(0), mProbabilityFieldPos(NOT_A_DICT_POS),
|
|
||||||
mProbability(NOT_A_PROBABILITY), mChildrenPosFieldPos(NOT_A_DICT_POS),
|
|
||||||
mChildrenPos(NOT_A_DICT_POS), mBigramLinkedNodePos(NOT_A_DICT_POS),
|
|
||||||
mShortcutPos(NOT_A_DICT_POS), mBigramPos(NOT_A_DICT_POS),
|
|
||||||
mSiblingPos(NOT_A_DICT_POS) {}
|
|
||||||
|
|
||||||
~DynamicPatriciaTrieNodeReader() {}
|
~DynamicPatriciaTrieNodeReader() {}
|
||||||
|
|
||||||
// Reads PtNode information from dictionary buffer and updates members with the information.
|
virtual const PtNodeParams fetchNodeInfoInBufferFromPtNodePos(const int ptNodePos) const {
|
||||||
AK_FORCE_INLINE void fetchNodeInfoInBufferFromPtNodePos(const int ptNodePos) {
|
return fetchPtNodeInfoFromBufferAndProcessMovedPtNode(ptNodePos,
|
||||||
fetchNodeInfoInBufferFromPtNodePosAndGetNodeCodePoints(ptNodePos ,
|
NOT_A_DICT_POS /* siblingNodePos */, NOT_A_DICT_POS /* bigramLinkedNodePos */);
|
||||||
0 /* maxCodePointCount */, 0 /* outCodePoints */);
|
|
||||||
}
|
|
||||||
|
|
||||||
AK_FORCE_INLINE void fetchNodeInfoInBufferFromPtNodePosAndGetNodeCodePoints(
|
|
||||||
const int ptNodePos, const int maxCodePointCount, int *const outCodePoints) {
|
|
||||||
mSiblingPos = NOT_A_DICT_POS;
|
|
||||||
mBigramLinkedNodePos = NOT_A_DICT_POS;
|
|
||||||
fetchPtNodeInfoFromBufferAndProcessMovedPtNode(ptNodePos, maxCodePointCount, outCodePoints);
|
|
||||||
}
|
|
||||||
|
|
||||||
// HeadPos is different from NodePos when the current PtNode is a moved PtNode.
|
|
||||||
AK_FORCE_INLINE int getHeadPos() const {
|
|
||||||
return mHeadPos;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Flags
|
|
||||||
AK_FORCE_INLINE bool isDeleted() const {
|
|
||||||
return DynamicPatriciaTrieReadingUtils::isDeleted(mFlags);
|
|
||||||
}
|
|
||||||
|
|
||||||
AK_FORCE_INLINE bool hasChildren() const {
|
|
||||||
return mChildrenPos != NOT_A_DICT_POS;
|
|
||||||
}
|
|
||||||
|
|
||||||
AK_FORCE_INLINE bool isTerminal() const {
|
|
||||||
return PatriciaTrieReadingUtils::isTerminal(mFlags);
|
|
||||||
}
|
|
||||||
|
|
||||||
AK_FORCE_INLINE bool isBlacklisted() const {
|
|
||||||
return PatriciaTrieReadingUtils::isBlacklisted(mFlags);
|
|
||||||
}
|
|
||||||
|
|
||||||
AK_FORCE_INLINE bool isNotAWord() const {
|
|
||||||
return PatriciaTrieReadingUtils::isNotAWord(mFlags);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Parent node position
|
|
||||||
AK_FORCE_INLINE int getParentPos() const {
|
|
||||||
return mParentPos;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Number of code points
|
|
||||||
AK_FORCE_INLINE uint8_t getCodePointCount() const {
|
|
||||||
return mCodePointCount;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Probability
|
|
||||||
AK_FORCE_INLINE int getProbabilityFieldPos() const {
|
|
||||||
return mProbabilityFieldPos;
|
|
||||||
}
|
|
||||||
|
|
||||||
AK_FORCE_INLINE int getProbability() const {
|
|
||||||
return mProbability;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Children PtNode array position
|
|
||||||
AK_FORCE_INLINE int getChildrenPosFieldPos() const {
|
|
||||||
return mChildrenPosFieldPos;
|
|
||||||
}
|
|
||||||
|
|
||||||
AK_FORCE_INLINE int getChildrenPos() const {
|
|
||||||
return mChildrenPos;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Bigram linked node position.
|
|
||||||
AK_FORCE_INLINE int getBigramLinkedNodePos() const {
|
|
||||||
return mBigramLinkedNodePos;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Shortcutlist position
|
|
||||||
AK_FORCE_INLINE int getShortcutPos() const {
|
|
||||||
return mShortcutPos;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Bigrams position
|
|
||||||
AK_FORCE_INLINE int getBigramsPos() const {
|
|
||||||
return mBigramPos;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Sibling node position
|
|
||||||
AK_FORCE_INLINE int getSiblingNodePos() const {
|
|
||||||
return mSiblingPos;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
@ -141,23 +54,9 @@ class DynamicPatriciaTrieNodeReader {
|
||||||
const BufferWithExtendableBuffer *const mBuffer;
|
const BufferWithExtendableBuffer *const mBuffer;
|
||||||
const DictionaryBigramsStructurePolicy *const mBigramsPolicy;
|
const DictionaryBigramsStructurePolicy *const mBigramsPolicy;
|
||||||
const DictionaryShortcutsStructurePolicy *const mShortcutsPolicy;
|
const DictionaryShortcutsStructurePolicy *const mShortcutsPolicy;
|
||||||
int mHeadPos;
|
|
||||||
DynamicPatriciaTrieReadingUtils::NodeFlags mFlags;
|
|
||||||
int mParentPos;
|
|
||||||
uint8_t mCodePointCount;
|
|
||||||
int mProbabilityFieldPos;
|
|
||||||
int mProbability;
|
|
||||||
int mChildrenPosFieldPos;
|
|
||||||
int mChildrenPos;
|
|
||||||
int mBigramLinkedNodePos;
|
|
||||||
int mShortcutPos;
|
|
||||||
int mBigramPos;
|
|
||||||
int mSiblingPos;
|
|
||||||
|
|
||||||
void fetchPtNodeInfoFromBufferAndProcessMovedPtNode(const int ptNodePos,
|
const PtNodeParams fetchPtNodeInfoFromBufferAndProcessMovedPtNode(const int ptNodePos,
|
||||||
const int maxCodePointCount, int *const outCodePoints);
|
const int siblingNodePos, const int bigramLinkedNodePos) const;
|
||||||
|
|
||||||
void invalidatePtNodeInfo();
|
|
||||||
};
|
};
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
#endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_NODE_READER_H */
|
#endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_NODE_READER_H */
|
||||||
|
|
|
@ -50,24 +50,27 @@ void DynamicPatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *cons
|
||||||
if (!dicNode->hasChildren()) {
|
if (!dicNode->hasChildren()) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer,
|
DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer, &mNodeReader);
|
||||||
getBigramsStructurePolicy(), getShortcutsStructurePolicy());
|
|
||||||
readingHelper.initWithPtNodeArrayPos(dicNode->getChildrenPtNodeArrayPos());
|
readingHelper.initWithPtNodeArrayPos(dicNode->getChildrenPtNodeArrayPos());
|
||||||
const DynamicPatriciaTrieNodeReader *const nodeReader = readingHelper.getNodeReader();
|
|
||||||
while (!readingHelper.isEnd()) {
|
while (!readingHelper.isEnd()) {
|
||||||
bool isTerminal = nodeReader->isTerminal() && !nodeReader->isDeleted();
|
const PtNodeParams ptNodeParams(readingHelper.getPtNodeParams());
|
||||||
|
if (!ptNodeParams.isValid()) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
bool isTerminal = ptNodeParams.isTerminal() && !ptNodeParams.isDeleted();
|
||||||
if (isTerminal && mHeaderPolicy.isDecayingDict()) {
|
if (isTerminal && mHeaderPolicy.isDecayingDict()) {
|
||||||
// A DecayingDict may have a terminal PtNode that has a terminal DicNode whose
|
// A DecayingDict may have a terminal PtNode that has a terminal DicNode whose
|
||||||
// probability is NOT_A_PROBABILITY. In such case, we don't want to treat it as a
|
// probability is NOT_A_PROBABILITY. In such case, we don't want to treat it as a
|
||||||
// valid terminal DicNode.
|
// valid terminal DicNode.
|
||||||
isTerminal = getProbability(nodeReader->getProbability(), NOT_A_PROBABILITY)
|
isTerminal = getProbability(ptNodeParams.getProbability(), NOT_A_PROBABILITY)
|
||||||
!= NOT_A_PROBABILITY;
|
!= NOT_A_PROBABILITY;
|
||||||
}
|
}
|
||||||
childDicNodes->pushLeavingChild(dicNode, nodeReader->getHeadPos(),
|
childDicNodes->pushLeavingChild(dicNode, ptNodeParams.getHeadPos(),
|
||||||
nodeReader->getChildrenPos(), nodeReader->getProbability(), isTerminal,
|
ptNodeParams.getChildrenPos(), ptNodeParams.getProbability(), isTerminal,
|
||||||
nodeReader->hasChildren(), nodeReader->isBlacklisted() || nodeReader->isNotAWord(),
|
ptNodeParams.hasChildren(),
|
||||||
nodeReader->getCodePointCount(), readingHelper.getMergedNodeCodePoints());
|
ptNodeParams.isBlacklisted() || ptNodeParams.isNotAWord(),
|
||||||
readingHelper.readNextSiblingNode();
|
ptNodeParams.getCodePointCount(), ptNodeParams.getCodePoints());
|
||||||
|
readingHelper.readNextSiblingNode(ptNodeParams);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -77,29 +80,33 @@ int DynamicPatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCoun
|
||||||
// This method traverses parent nodes from the terminal by following parent pointers; thus,
|
// This method traverses parent nodes from the terminal by following parent pointers; thus,
|
||||||
// node code points are stored in the buffer in the reverse order.
|
// node code points are stored in the buffer in the reverse order.
|
||||||
int reverseCodePoints[maxCodePointCount];
|
int reverseCodePoints[maxCodePointCount];
|
||||||
DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer,
|
DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer, &mNodeReader);
|
||||||
getBigramsStructurePolicy(), getShortcutsStructurePolicy());
|
|
||||||
// First, read the terminal node and get its probability.
|
// First, read the terminal node and get its probability.
|
||||||
readingHelper.initWithPtNodePos(ptNodePos);
|
readingHelper.initWithPtNodePos(ptNodePos);
|
||||||
if (!readingHelper.isValidTerminalNode()) {
|
|
||||||
|
const PtNodeParams terminalPtNodeParams(readingHelper.getPtNodeParams());
|
||||||
|
if (!readingHelper.isValidTerminalNode(terminalPtNodeParams)) {
|
||||||
// Node at the ptNodePos is not a valid terminal node.
|
// Node at the ptNodePos is not a valid terminal node.
|
||||||
*outUnigramProbability = NOT_A_PROBABILITY;
|
*outUnigramProbability = NOT_A_PROBABILITY;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
// Store terminal node probability.
|
// Store terminal node probability.
|
||||||
*outUnigramProbability = readingHelper.getNodeReader()->getProbability();
|
*outUnigramProbability = terminalPtNodeParams.getProbability();
|
||||||
// Then, following parent node link to the dictionary root and fetch node code points.
|
// Then, following parent node link to the dictionary root and fetch node code points.
|
||||||
|
int totalCodePointCount = 0;
|
||||||
while (!readingHelper.isEnd()) {
|
while (!readingHelper.isEnd()) {
|
||||||
if (readingHelper.getTotalCodePointCount() > maxCodePointCount) {
|
const PtNodeParams ptNodeParams(readingHelper.getPtNodeParams());
|
||||||
|
totalCodePointCount = readingHelper.getTotalCodePointCount(ptNodeParams);
|
||||||
|
if (!ptNodeParams.isValid() || totalCodePointCount > maxCodePointCount) {
|
||||||
// The ptNodePos is not a valid terminal node position in the dictionary.
|
// The ptNodePos is not a valid terminal node position in the dictionary.
|
||||||
*outUnigramProbability = NOT_A_PROBABILITY;
|
*outUnigramProbability = NOT_A_PROBABILITY;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
// Store node code points to buffer in the reverse order.
|
// Store node code points to buffer in the reverse order.
|
||||||
readingHelper.fetchMergedNodeCodePointsInReverseOrder(
|
readingHelper.fetchMergedNodeCodePointsInReverseOrder(ptNodeParams,
|
||||||
readingHelper.getPrevTotalCodePointCount(), reverseCodePoints);
|
readingHelper.getPrevTotalCodePointCount(), reverseCodePoints);
|
||||||
// Follow parent node toward the root node.
|
// Follow parent node toward the root node.
|
||||||
readingHelper.readParentNode();
|
readingHelper.readParentNode(ptNodeParams);
|
||||||
}
|
}
|
||||||
if (readingHelper.isError()) {
|
if (readingHelper.isError()) {
|
||||||
// The node position or the dictionary is invalid.
|
// The node position or the dictionary is invalid.
|
||||||
|
@ -107,11 +114,10 @@ int DynamicPatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCoun
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
// Reverse the stored code points to output them.
|
// Reverse the stored code points to output them.
|
||||||
const int codePointCount = readingHelper.getTotalCodePointCount();
|
for (int i = 0; i < totalCodePointCount; ++i) {
|
||||||
for (int i = 0; i < codePointCount; ++i) {
|
outCodePoints[i] = reverseCodePoints[totalCodePointCount - i - 1];
|
||||||
outCodePoints[i] = reverseCodePoints[codePointCount - i - 1];
|
|
||||||
}
|
}
|
||||||
return codePointCount;
|
return totalCodePointCount;
|
||||||
}
|
}
|
||||||
|
|
||||||
int DynamicPatriciaTriePolicy::getTerminalPtNodePositionOfWord(const int *const inWord,
|
int DynamicPatriciaTriePolicy::getTerminalPtNodePositionOfWord(const int *const inWord,
|
||||||
|
@ -120,39 +126,42 @@ int DynamicPatriciaTriePolicy::getTerminalPtNodePositionOfWord(const int *const
|
||||||
for (int i = 0; i < length; ++i) {
|
for (int i = 0; i < length; ++i) {
|
||||||
searchCodePoints[i] = forceLowerCaseSearch ? CharUtils::toLowerCase(inWord[i]) : inWord[i];
|
searchCodePoints[i] = forceLowerCaseSearch ? CharUtils::toLowerCase(inWord[i]) : inWord[i];
|
||||||
}
|
}
|
||||||
DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer,
|
|
||||||
getBigramsStructurePolicy(), getShortcutsStructurePolicy());
|
DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer, &mNodeReader);
|
||||||
readingHelper.initWithPtNodeArrayPos(getRootPosition());
|
readingHelper.initWithPtNodeArrayPos(getRootPosition());
|
||||||
const DynamicPatriciaTrieNodeReader *const nodeReader = readingHelper.getNodeReader();
|
|
||||||
while (!readingHelper.isEnd()) {
|
while (!readingHelper.isEnd()) {
|
||||||
|
const PtNodeParams ptNodeParams(readingHelper.getPtNodeParams());
|
||||||
|
if (!ptNodeParams.isValid()) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
const int matchedCodePointCount = readingHelper.getPrevTotalCodePointCount();
|
const int matchedCodePointCount = readingHelper.getPrevTotalCodePointCount();
|
||||||
if (readingHelper.getTotalCodePointCount() > length
|
if (readingHelper.getTotalCodePointCount(ptNodeParams) > length
|
||||||
|| !readingHelper.isMatchedCodePoint(0 /* index */,
|
|| !readingHelper.isMatchedCodePoint(ptNodeParams, 0 /* index */,
|
||||||
searchCodePoints[matchedCodePointCount])) {
|
searchCodePoints[matchedCodePointCount])) {
|
||||||
// Current node has too many code points or its first code point is different from
|
// Current node has too many code points or its first code point is different from
|
||||||
// target code point. Skip this node and read the next sibling node.
|
// target code point. Skip this node and read the next sibling node.
|
||||||
readingHelper.readNextSiblingNode();
|
readingHelper.readNextSiblingNode(ptNodeParams);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
// Check following merged node code points.
|
// Check following merged node code points.
|
||||||
const int nodeCodePointCount = nodeReader->getCodePointCount();
|
const int nodeCodePointCount = ptNodeParams.getCodePointCount();
|
||||||
for (int j = 1; j < nodeCodePointCount; ++j) {
|
for (int j = 1; j < nodeCodePointCount; ++j) {
|
||||||
if (!readingHelper.isMatchedCodePoint(
|
if (!readingHelper.isMatchedCodePoint(ptNodeParams,
|
||||||
j, searchCodePoints[matchedCodePointCount + j])) {
|
j, searchCodePoints[matchedCodePointCount + j])) {
|
||||||
// Different code point is found. The given word is not included in the dictionary.
|
// Different code point is found. The given word is not included in the dictionary.
|
||||||
return NOT_A_DICT_POS;
|
return NOT_A_DICT_POS;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// All characters are matched.
|
// All characters are matched.
|
||||||
if (length == readingHelper.getTotalCodePointCount()) {
|
if (length == readingHelper.getTotalCodePointCount(ptNodeParams)) {
|
||||||
// Terminal position is found.
|
// Terminal position is found.
|
||||||
return nodeReader->getHeadPos();
|
return ptNodeParams.getHeadPos();
|
||||||
}
|
}
|
||||||
if (!nodeReader->hasChildren()) {
|
if (!ptNodeParams.hasChildren()) {
|
||||||
return NOT_A_DICT_POS;
|
return NOT_A_DICT_POS;
|
||||||
}
|
}
|
||||||
// Advance to the children nodes.
|
// Advance to the children nodes.
|
||||||
readingHelper.readChildNode();
|
readingHelper.readChildNode(ptNodeParams);
|
||||||
}
|
}
|
||||||
// If we already traversed the tree further than the word is long, there means
|
// If we already traversed the tree further than the word is long, there means
|
||||||
// there was no match (or we would have found it).
|
// there was no match (or we would have found it).
|
||||||
|
@ -179,39 +188,33 @@ int DynamicPatriciaTriePolicy::getUnigramProbabilityOfPtNode(const int ptNodePos
|
||||||
if (ptNodePos == NOT_A_DICT_POS) {
|
if (ptNodePos == NOT_A_DICT_POS) {
|
||||||
return NOT_A_PROBABILITY;
|
return NOT_A_PROBABILITY;
|
||||||
}
|
}
|
||||||
DynamicPatriciaTrieNodeReader nodeReader(&mBufferWithExtendableBuffer,
|
const PtNodeParams ptNodeParams(mNodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos));
|
||||||
getBigramsStructurePolicy(), getShortcutsStructurePolicy());
|
if (ptNodeParams.isDeleted() || ptNodeParams.isBlacklisted() || ptNodeParams.isNotAWord()) {
|
||||||
nodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos);
|
|
||||||
if (nodeReader.isDeleted() || nodeReader.isBlacklisted() || nodeReader.isNotAWord()) {
|
|
||||||
return NOT_A_PROBABILITY;
|
return NOT_A_PROBABILITY;
|
||||||
}
|
}
|
||||||
return getProbability(nodeReader.getProbability(), NOT_A_PROBABILITY);
|
return getProbability(ptNodeParams.getProbability(), NOT_A_PROBABILITY);
|
||||||
}
|
}
|
||||||
|
|
||||||
int DynamicPatriciaTriePolicy::getShortcutPositionOfPtNode(const int ptNodePos) const {
|
int DynamicPatriciaTriePolicy::getShortcutPositionOfPtNode(const int ptNodePos) const {
|
||||||
if (ptNodePos == NOT_A_DICT_POS) {
|
if (ptNodePos == NOT_A_DICT_POS) {
|
||||||
return NOT_A_DICT_POS;
|
return NOT_A_DICT_POS;
|
||||||
}
|
}
|
||||||
DynamicPatriciaTrieNodeReader nodeReader(&mBufferWithExtendableBuffer,
|
const PtNodeParams ptNodeParams(mNodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos));
|
||||||
getBigramsStructurePolicy(), getShortcutsStructurePolicy());
|
if (ptNodeParams.isDeleted()) {
|
||||||
nodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos);
|
|
||||||
if (nodeReader.isDeleted()) {
|
|
||||||
return NOT_A_DICT_POS;
|
return NOT_A_DICT_POS;
|
||||||
}
|
}
|
||||||
return nodeReader.getShortcutPos();
|
return ptNodeParams.getShortcutPos();
|
||||||
}
|
}
|
||||||
|
|
||||||
int DynamicPatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) const {
|
int DynamicPatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) const {
|
||||||
if (ptNodePos == NOT_A_DICT_POS) {
|
if (ptNodePos == NOT_A_DICT_POS) {
|
||||||
return NOT_A_DICT_POS;
|
return NOT_A_DICT_POS;
|
||||||
}
|
}
|
||||||
DynamicPatriciaTrieNodeReader nodeReader(&mBufferWithExtendableBuffer,
|
const PtNodeParams ptNodeParams(mNodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos));
|
||||||
getBigramsStructurePolicy(), getShortcutsStructurePolicy());
|
if (ptNodeParams.isDeleted()) {
|
||||||
nodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos);
|
|
||||||
if (nodeReader.isDeleted()) {
|
|
||||||
return NOT_A_DICT_POS;
|
return NOT_A_DICT_POS;
|
||||||
}
|
}
|
||||||
return nodeReader.getBigramsPos();
|
return ptNodeParams.getBigramsPos();
|
||||||
}
|
}
|
||||||
|
|
||||||
bool DynamicPatriciaTriePolicy::addUnigramWord(const int *const word, const int length,
|
bool DynamicPatriciaTriePolicy::addUnigramWord(const int *const word, const int length,
|
||||||
|
@ -225,8 +228,7 @@ bool DynamicPatriciaTriePolicy::addUnigramWord(const int *const word, const int
|
||||||
AKLOGE("The dictionary is too large to dynamically update.");
|
AKLOGE("The dictionary is too large to dynamically update.");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer,
|
DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer, &mNodeReader);
|
||||||
getBigramsStructurePolicy(), getShortcutsStructurePolicy());
|
|
||||||
readingHelper.initWithPtNodeArrayPos(getRootPosition());
|
readingHelper.initWithPtNodeArrayPos(getRootPosition());
|
||||||
DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer,
|
DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer,
|
||||||
&mBigramListPolicy, &mShortcutListPolicy, mHeaderPolicy.isDecayingDict());
|
&mBigramListPolicy, &mShortcutListPolicy, mHeaderPolicy.isDecayingDict());
|
||||||
|
|
|
@ -22,6 +22,7 @@
|
||||||
#include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h"
|
#include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h"
|
||||||
#include "suggest/policyimpl/dictionary/header/header_policy.h"
|
#include "suggest/policyimpl/dictionary/header/header_policy.h"
|
||||||
#include "suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h"
|
#include "suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.h"
|
||||||
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
|
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
|
||||||
#include "suggest/policyimpl/dictionary/utils/format_utils.h"
|
#include "suggest/policyimpl/dictionary/utils/format_utils.h"
|
||||||
#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
|
#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
|
||||||
|
@ -44,6 +45,7 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
||||||
mShortcutListPolicy(&mBufferWithExtendableBuffer),
|
mShortcutListPolicy(&mBufferWithExtendableBuffer),
|
||||||
mBigramListPolicy(&mHeaderPolicy, &mBufferWithExtendableBuffer, &mShortcutListPolicy,
|
mBigramListPolicy(&mHeaderPolicy, &mBufferWithExtendableBuffer, &mShortcutListPolicy,
|
||||||
mHeaderPolicy.isDecayingDict()),
|
mHeaderPolicy.isDecayingDict()),
|
||||||
|
mNodeReader(&mBufferWithExtendableBuffer, &mBigramListPolicy, &mShortcutListPolicy),
|
||||||
mUnigramCount(mHeaderPolicy.getUnigramCount()),
|
mUnigramCount(mHeaderPolicy.getUnigramCount()),
|
||||||
mBigramCount(mHeaderPolicy.getBigramCount()), mNeedsToDecayForTesting(false) {}
|
mBigramCount(mHeaderPolicy.getBigramCount()), mNeedsToDecayForTesting(false) {}
|
||||||
|
|
||||||
|
@ -114,6 +116,7 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
||||||
BufferWithExtendableBuffer mBufferWithExtendableBuffer;
|
BufferWithExtendableBuffer mBufferWithExtendableBuffer;
|
||||||
DynamicShortcutListPolicy mShortcutListPolicy;
|
DynamicShortcutListPolicy mShortcutListPolicy;
|
||||||
DynamicBigramListPolicy mBigramListPolicy;
|
DynamicBigramListPolicy mBigramListPolicy;
|
||||||
|
DynamicPatriciaTrieNodeReader mNodeReader;
|
||||||
int mUnigramCount;
|
int mUnigramCount;
|
||||||
int mBigramCount;
|
int mBigramCount;
|
||||||
int mNeedsToDecayForTesting;
|
int mNeedsToDecayForTesting;
|
||||||
|
|
|
@ -17,6 +17,8 @@
|
||||||
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.h"
|
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.h"
|
||||||
|
|
||||||
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
|
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.h"
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
|
||||||
|
@ -37,22 +39,26 @@ bool DynamicPatriciaTrieReadingHelper::traverseAllPtNodesInPostorderDepthFirstMa
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
while (!isEnd()) {
|
while (!isEnd()) {
|
||||||
|
const PtNodeParams ptNodeParams(getPtNodeParams());
|
||||||
|
if (!ptNodeParams.isValid()) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
if (!alreadyVisitedChildren) {
|
if (!alreadyVisitedChildren) {
|
||||||
if (mNodeReader.hasChildren()) {
|
if (ptNodeParams.hasChildren()) {
|
||||||
// Move to the first child.
|
// Move to the first child.
|
||||||
if (!listener->onDescend(mNodeReader.getChildrenPos())) {
|
if (!listener->onDescend(ptNodeParams.getChildrenPos())) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
pushReadingStateToStack();
|
pushReadingStateToStack();
|
||||||
readChildNode();
|
readChildNode(ptNodeParams);
|
||||||
} else {
|
} else {
|
||||||
alreadyVisitedChildren = true;
|
alreadyVisitedChildren = true;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if (!listener->onVisitingPtNode(&mNodeReader, mMergedNodeCodePoints)) {
|
if (!listener->onVisitingPtNode(&ptNodeParams)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
readNextSiblingNode();
|
readNextSiblingNode(ptNodeParams);
|
||||||
if (isEnd()) {
|
if (isEnd()) {
|
||||||
// All PtNodes in current linked PtNode arrays have been visited.
|
// All PtNodes in current linked PtNode arrays have been visited.
|
||||||
// Return to the parent.
|
// Return to the parent.
|
||||||
|
@ -101,10 +107,14 @@ bool DynamicPatriciaTrieReadingHelper::traverseAllPtNodesInPtNodeArrayLevelPreor
|
||||||
}
|
}
|
||||||
pushReadingStateToStack();
|
pushReadingStateToStack();
|
||||||
while (!isEnd()) {
|
while (!isEnd()) {
|
||||||
|
const PtNodeParams ptNodeParams(getPtNodeParams());
|
||||||
|
if (!ptNodeParams.isValid()) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
if (alreadyVisitedAllPtNodesInArray) {
|
if (alreadyVisitedAllPtNodesInArray) {
|
||||||
if (alreadyVisitedChildren) {
|
if (alreadyVisitedChildren) {
|
||||||
// Move to next sibling PtNode's children.
|
// Move to next sibling PtNode's children.
|
||||||
readNextSiblingNode();
|
readNextSiblingNode(ptNodeParams);
|
||||||
if (isEnd()) {
|
if (isEnd()) {
|
||||||
// Return to the parent PTNode.
|
// Return to the parent PTNode.
|
||||||
if (!listener->onAscend()) {
|
if (!listener->onAscend()) {
|
||||||
|
@ -120,13 +130,13 @@ bool DynamicPatriciaTrieReadingHelper::traverseAllPtNodesInPtNodeArrayLevelPreor
|
||||||
alreadyVisitedChildren = false;
|
alreadyVisitedChildren = false;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if (mNodeReader.hasChildren()) {
|
if (ptNodeParams.hasChildren()) {
|
||||||
// Move to the first child.
|
// Move to the first child.
|
||||||
if (!listener->onDescend(mNodeReader.getChildrenPos())) {
|
if (!listener->onDescend(ptNodeParams.getChildrenPos())) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
pushReadingStateToStack();
|
pushReadingStateToStack();
|
||||||
readChildNode();
|
readChildNode(ptNodeParams);
|
||||||
// Push state to return the head of PtNode array.
|
// Push state to return the head of PtNode array.
|
||||||
pushReadingStateToStack();
|
pushReadingStateToStack();
|
||||||
alreadyVisitedAllPtNodesInArray = false;
|
alreadyVisitedAllPtNodesInArray = false;
|
||||||
|
@ -136,10 +146,10 @@ bool DynamicPatriciaTrieReadingHelper::traverseAllPtNodesInPtNodeArrayLevelPreor
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if (!listener->onVisitingPtNode(&mNodeReader, mMergedNodeCodePoints)) {
|
if (!listener->onVisitingPtNode(&ptNodeParams)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
readNextSiblingNode();
|
readNextSiblingNode(ptNodeParams);
|
||||||
if (isEnd()) {
|
if (isEnd()) {
|
||||||
if (!listener->onReadingPtNodeArrayTail()) {
|
if (!listener->onReadingPtNodeArrayTail()) {
|
||||||
return false;
|
return false;
|
||||||
|
|
|
@ -21,9 +21,8 @@
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include "defines.h"
|
#include "defines.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h"
|
#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.h"
|
#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_reader.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.h"
|
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
|
||||||
|
@ -35,6 +34,7 @@ class DictionaryShortcutsStructurePolicy;
|
||||||
* This class is used for traversing dynamic patricia trie. This class supports iterating nodes and
|
* This class is used for traversing dynamic patricia trie. This class supports iterating nodes and
|
||||||
* dealing with additional buffer. This class counts nodes and node arrays to avoid infinite loop.
|
* dealing with additional buffer. This class counts nodes and node arrays to avoid infinite loop.
|
||||||
*/
|
*/
|
||||||
|
// TODO: Move to pt_common.
|
||||||
class DynamicPatriciaTrieReadingHelper {
|
class DynamicPatriciaTrieReadingHelper {
|
||||||
public:
|
public:
|
||||||
class TraversingEventListener {
|
class TraversingEventListener {
|
||||||
|
@ -51,8 +51,7 @@ class DynamicPatriciaTrieReadingHelper {
|
||||||
virtual bool onReadingPtNodeArrayTail() = 0;
|
virtual bool onReadingPtNodeArrayTail() = 0;
|
||||||
|
|
||||||
// Returns whether the event handling was succeeded or not.
|
// Returns whether the event handling was succeeded or not.
|
||||||
virtual bool onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node,
|
virtual bool onVisitingPtNode(const PtNodeParams *const node) = 0;
|
||||||
const int *const nodeCodePoints) = 0;
|
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
TraversingEventListener() {};
|
TraversingEventListener() {};
|
||||||
|
@ -62,10 +61,9 @@ class DynamicPatriciaTrieReadingHelper {
|
||||||
};
|
};
|
||||||
|
|
||||||
DynamicPatriciaTrieReadingHelper(const BufferWithExtendableBuffer *const buffer,
|
DynamicPatriciaTrieReadingHelper(const BufferWithExtendableBuffer *const buffer,
|
||||||
const DictionaryBigramsStructurePolicy *const bigramsPolicy,
|
const PtNodeReader *const ptNodeReader)
|
||||||
const DictionaryShortcutsStructurePolicy *const shortcutsPolicy)
|
|
||||||
: mIsError(false), mReadingState(), mBuffer(buffer),
|
: mIsError(false), mReadingState(), mBuffer(buffer),
|
||||||
mNodeReader(mBuffer, bigramsPolicy, shortcutsPolicy), mReadingStateStack() {}
|
mPtNodeReader(ptNodeReader), mReadingStateStack() {}
|
||||||
|
|
||||||
~DynamicPatriciaTrieReadingHelper() {}
|
~DynamicPatriciaTrieReadingHelper() {}
|
||||||
|
|
||||||
|
@ -90,9 +88,6 @@ class DynamicPatriciaTrieReadingHelper {
|
||||||
mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS;
|
mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS;
|
||||||
mReadingStateStack.clear();
|
mReadingStateStack.clear();
|
||||||
nextPtNodeArray();
|
nextPtNodeArray();
|
||||||
if (!isEnd()) {
|
|
||||||
fetchPtNodeInfo();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -110,20 +105,23 @@ class DynamicPatriciaTrieReadingHelper {
|
||||||
mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS;
|
mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS;
|
||||||
mReadingState.mPosOfThisPtNodeArrayHead = NOT_A_DICT_POS;
|
mReadingState.mPosOfThisPtNodeArrayHead = NOT_A_DICT_POS;
|
||||||
mReadingStateStack.clear();
|
mReadingStateStack.clear();
|
||||||
fetchPtNodeInfo();
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
AK_FORCE_INLINE const DynamicPatriciaTrieNodeReader* getNodeReader() const {
|
AK_FORCE_INLINE const PtNodeParams getPtNodeParams() const {
|
||||||
return &mNodeReader;
|
if (isEnd()) {
|
||||||
|
return PtNodeParams();
|
||||||
|
}
|
||||||
|
return mPtNodeReader->fetchNodeInfoInBufferFromPtNodePos(mReadingState.mPos);
|
||||||
}
|
}
|
||||||
|
|
||||||
AK_FORCE_INLINE bool isValidTerminalNode() const {
|
AK_FORCE_INLINE bool isValidTerminalNode(const PtNodeParams &ptNodeParams) const {
|
||||||
return !isEnd() && !mNodeReader.isDeleted() && mNodeReader.isTerminal();
|
return !isEnd() && !ptNodeParams.isDeleted() && ptNodeParams.isTerminal();
|
||||||
}
|
}
|
||||||
|
|
||||||
AK_FORCE_INLINE bool isMatchedCodePoint(const int index, const int codePoint) const {
|
AK_FORCE_INLINE bool isMatchedCodePoint(const PtNodeParams &ptNodeParams, const int index,
|
||||||
return mMergedNodeCodePoints[index] == codePoint;
|
const int codePoint) const {
|
||||||
|
return ptNodeParams.getCodePoints()[index] == codePoint;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Return code point count exclude the last read node's code points.
|
// Return code point count exclude the last read node's code points.
|
||||||
|
@ -132,68 +130,56 @@ class DynamicPatriciaTrieReadingHelper {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Return code point count include the last read node's code points.
|
// Return code point count include the last read node's code points.
|
||||||
AK_FORCE_INLINE int getTotalCodePointCount() const {
|
AK_FORCE_INLINE int getTotalCodePointCount(const PtNodeParams &ptNodeParams) const {
|
||||||
return mReadingState.mTotalCodePointCountSinceInitialization
|
return mReadingState.mTotalCodePointCountSinceInitialization
|
||||||
+ mNodeReader.getCodePointCount();
|
+ ptNodeParams.getCodePointCount();
|
||||||
}
|
}
|
||||||
|
|
||||||
AK_FORCE_INLINE void fetchMergedNodeCodePointsInReverseOrder(
|
AK_FORCE_INLINE void fetchMergedNodeCodePointsInReverseOrder(const PtNodeParams &ptNodeParams,
|
||||||
const int index, int *const outCodePoints) const {
|
const int index, int *const outCodePoints) const {
|
||||||
const int nodeCodePointCount = mNodeReader.getCodePointCount();
|
const int nodeCodePointCount = ptNodeParams.getCodePointCount();
|
||||||
|
const int *const nodeCodePoints = ptNodeParams.getCodePoints();
|
||||||
for (int i = 0; i < nodeCodePointCount; ++i) {
|
for (int i = 0; i < nodeCodePointCount; ++i) {
|
||||||
outCodePoints[index + i] = mMergedNodeCodePoints[nodeCodePointCount - 1 - i];
|
outCodePoints[index + i] = nodeCodePoints[nodeCodePointCount - 1 - i];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
AK_FORCE_INLINE const int *getMergedNodeCodePoints() const {
|
AK_FORCE_INLINE void readNextSiblingNode(const PtNodeParams &ptNodeParams) {
|
||||||
return mMergedNodeCodePoints;
|
|
||||||
}
|
|
||||||
|
|
||||||
AK_FORCE_INLINE void readNextSiblingNode() {
|
|
||||||
mReadingState.mRemainingPtNodeCountInThisArray -= 1;
|
mReadingState.mRemainingPtNodeCountInThisArray -= 1;
|
||||||
mReadingState.mPos = mNodeReader.getSiblingNodePos();
|
mReadingState.mPos = ptNodeParams.getSiblingNodePos();
|
||||||
if (mReadingState.mRemainingPtNodeCountInThisArray <= 0) {
|
if (mReadingState.mRemainingPtNodeCountInThisArray <= 0) {
|
||||||
// All nodes in the current node array have been read.
|
// All nodes in the current node array have been read.
|
||||||
followForwardLink();
|
followForwardLink();
|
||||||
if (!isEnd()) {
|
|
||||||
fetchPtNodeInfo();
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
fetchPtNodeInfo();
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Read the first child node of the current node.
|
// Read the first child node of the current node.
|
||||||
AK_FORCE_INLINE void readChildNode() {
|
AK_FORCE_INLINE void readChildNode(const PtNodeParams &ptNodeParams) {
|
||||||
if (mNodeReader.hasChildren()) {
|
if (ptNodeParams.hasChildren()) {
|
||||||
mReadingState.mTotalCodePointCountSinceInitialization +=
|
mReadingState.mTotalCodePointCountSinceInitialization +=
|
||||||
mNodeReader.getCodePointCount();
|
ptNodeParams.getCodePointCount();
|
||||||
mReadingState.mTotalPtNodeIndexInThisArrayChain = 0;
|
mReadingState.mTotalPtNodeIndexInThisArrayChain = 0;
|
||||||
mReadingState.mPtNodeArrayIndexInThisArrayChain = 0;
|
mReadingState.mPtNodeArrayIndexInThisArrayChain = 0;
|
||||||
mReadingState.mPos = mNodeReader.getChildrenPos();
|
mReadingState.mPos = ptNodeParams.getChildrenPos();
|
||||||
mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS;
|
mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS;
|
||||||
// Read children node array.
|
// Read children node array.
|
||||||
nextPtNodeArray();
|
nextPtNodeArray();
|
||||||
if (!isEnd()) {
|
|
||||||
fetchPtNodeInfo();
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
mReadingState.mPos = NOT_A_DICT_POS;
|
mReadingState.mPos = NOT_A_DICT_POS;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Read the parent node of the current node.
|
// Read the parent node of the current node.
|
||||||
AK_FORCE_INLINE void readParentNode() {
|
AK_FORCE_INLINE void readParentNode(const PtNodeParams &ptNodeParams) {
|
||||||
if (mNodeReader.getParentPos() != NOT_A_DICT_POS) {
|
if (ptNodeParams.getParentPos() != NOT_A_DICT_POS) {
|
||||||
mReadingState.mTotalCodePointCountSinceInitialization +=
|
mReadingState.mTotalCodePointCountSinceInitialization +=
|
||||||
mNodeReader.getCodePointCount();
|
ptNodeParams.getCodePointCount();
|
||||||
mReadingState.mTotalPtNodeIndexInThisArrayChain = 1;
|
mReadingState.mTotalPtNodeIndexInThisArrayChain = 1;
|
||||||
mReadingState.mPtNodeArrayIndexInThisArrayChain = 1;
|
mReadingState.mPtNodeArrayIndexInThisArrayChain = 1;
|
||||||
mReadingState.mRemainingPtNodeCountInThisArray = 1;
|
mReadingState.mRemainingPtNodeCountInThisArray = 1;
|
||||||
mReadingState.mPos = mNodeReader.getParentPos();
|
mReadingState.mPos = ptNodeParams.getParentPos();
|
||||||
mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS;
|
mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS;
|
||||||
mReadingState.mPosOfThisPtNodeArrayHead = NOT_A_DICT_POS;
|
mReadingState.mPosOfThisPtNodeArrayHead = NOT_A_DICT_POS;
|
||||||
fetchPtNodeInfo();
|
|
||||||
} else {
|
} else {
|
||||||
mReadingState.mPos = NOT_A_DICT_POS;
|
mReadingState.mPos = NOT_A_DICT_POS;
|
||||||
}
|
}
|
||||||
|
@ -207,12 +193,6 @@ class DynamicPatriciaTrieReadingHelper {
|
||||||
return mReadingState.mPosOfThisPtNodeArrayHead;
|
return mReadingState.mPosOfThisPtNodeArrayHead;
|
||||||
}
|
}
|
||||||
|
|
||||||
AK_FORCE_INLINE void reloadCurrentPtNodeInfo() {
|
|
||||||
if (!isEnd()) {
|
|
||||||
fetchPtNodeInfo();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
bool traverseAllPtNodesInPostorderDepthFirstManner(TraversingEventListener *const listener);
|
bool traverseAllPtNodesInPostorderDepthFirstManner(TraversingEventListener *const listener);
|
||||||
|
|
||||||
bool traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner(
|
bool traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner(
|
||||||
|
@ -253,24 +233,13 @@ class DynamicPatriciaTrieReadingHelper {
|
||||||
bool mIsError;
|
bool mIsError;
|
||||||
PtNodeReadingState mReadingState;
|
PtNodeReadingState mReadingState;
|
||||||
const BufferWithExtendableBuffer *const mBuffer;
|
const BufferWithExtendableBuffer *const mBuffer;
|
||||||
DynamicPatriciaTrieNodeReader mNodeReader;
|
const PtNodeReader *const mPtNodeReader;
|
||||||
int mMergedNodeCodePoints[MAX_WORD_LENGTH];
|
|
||||||
std::vector<PtNodeReadingState> mReadingStateStack;
|
std::vector<PtNodeReadingState> mReadingStateStack;
|
||||||
|
|
||||||
void nextPtNodeArray();
|
void nextPtNodeArray();
|
||||||
|
|
||||||
void followForwardLink();
|
void followForwardLink();
|
||||||
|
|
||||||
AK_FORCE_INLINE void fetchPtNodeInfo() {
|
|
||||||
mNodeReader.fetchNodeInfoInBufferFromPtNodePosAndGetNodeCodePoints(mReadingState.mPos,
|
|
||||||
MAX_WORD_LENGTH, mMergedNodeCodePoints);
|
|
||||||
if (mNodeReader.getCodePointCount() <= 0) {
|
|
||||||
// Empty node is not allowed.
|
|
||||||
mIsError = true;
|
|
||||||
mReadingState.mPos = NOT_A_DICT_POS;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
AK_FORCE_INLINE void pushReadingStateToStack() {
|
AK_FORCE_INLINE void pushReadingStateToStack() {
|
||||||
if (mReadingStateStack.size() > MAX_READING_STATE_STACK_SIZE) {
|
if (mReadingStateStack.size() > MAX_READING_STATE_STACK_SIZE) {
|
||||||
AKLOGI("Reading state stack overflow. Max size: %zd", MAX_READING_STATE_STACK_SIZE);
|
AKLOGI("Reading state stack overflow. Max size: %zd", MAX_READING_STATE_STACK_SIZE);
|
||||||
|
@ -288,9 +257,6 @@ class DynamicPatriciaTrieReadingHelper {
|
||||||
} else {
|
} else {
|
||||||
mReadingState = mReadingStateStack.back();
|
mReadingState = mReadingStateStack.back();
|
||||||
mReadingStateStack.pop_back();
|
mReadingStateStack.pop_back();
|
||||||
if (!isEnd()) {
|
|
||||||
fetchPtNodeInfo();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
|
@ -41,24 +41,26 @@ bool DynamicPatriciaTrieWritingHelper::addUnigramWord(
|
||||||
bool *const outAddedNewUnigram) {
|
bool *const outAddedNewUnigram) {
|
||||||
int parentPos = NOT_A_DICT_POS;
|
int parentPos = NOT_A_DICT_POS;
|
||||||
while (!readingHelper->isEnd()) {
|
while (!readingHelper->isEnd()) {
|
||||||
|
const PtNodeParams ptNodeParams(readingHelper->getPtNodeParams());
|
||||||
|
if (!ptNodeParams.isValid()) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
const int matchedCodePointCount = readingHelper->getPrevTotalCodePointCount();
|
const int matchedCodePointCount = readingHelper->getPrevTotalCodePointCount();
|
||||||
if (!readingHelper->isMatchedCodePoint(0 /* index */,
|
if (!readingHelper->isMatchedCodePoint(ptNodeParams, 0 /* index */,
|
||||||
wordCodePoints[matchedCodePointCount])) {
|
wordCodePoints[matchedCodePointCount])) {
|
||||||
// The first code point is different from target code point. Skip this node and read
|
// The first code point is different from target code point. Skip this node and read
|
||||||
// the next sibling node.
|
// the next sibling node.
|
||||||
readingHelper->readNextSiblingNode();
|
readingHelper->readNextSiblingNode(ptNodeParams);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
// Check following merged node code points.
|
// Check following merged node code points.
|
||||||
const DynamicPatriciaTrieNodeReader *const nodeReader = readingHelper->getNodeReader();
|
const int nodeCodePointCount = ptNodeParams.getCodePointCount();
|
||||||
const int nodeCodePointCount = nodeReader->getCodePointCount();
|
|
||||||
for (int j = 1; j < nodeCodePointCount; ++j) {
|
for (int j = 1; j < nodeCodePointCount; ++j) {
|
||||||
const int nextIndex = matchedCodePointCount + j;
|
const int nextIndex = matchedCodePointCount + j;
|
||||||
if (nextIndex >= codePointCount || !readingHelper->isMatchedCodePoint(j,
|
if (nextIndex >= codePointCount || !readingHelper->isMatchedCodePoint(ptNodeParams, j,
|
||||||
wordCodePoints[matchedCodePointCount + j])) {
|
wordCodePoints[matchedCodePointCount + j])) {
|
||||||
*outAddedNewUnigram = true;
|
*outAddedNewUnigram = true;
|
||||||
return reallocatePtNodeAndAddNewPtNodes(nodeReader,
|
return reallocatePtNodeAndAddNewPtNodes(&ptNodeParams, j,
|
||||||
readingHelper->getMergedNodeCodePoints(), j,
|
|
||||||
getUpdatedProbability(NOT_A_PROBABILITY /* originalProbability */,
|
getUpdatedProbability(NOT_A_PROBABILITY /* originalProbability */,
|
||||||
probability),
|
probability),
|
||||||
wordCodePoints + matchedCodePointCount,
|
wordCodePoints + matchedCodePointCount,
|
||||||
|
@ -66,20 +68,19 @@ bool DynamicPatriciaTrieWritingHelper::addUnigramWord(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// All characters are matched.
|
// All characters are matched.
|
||||||
if (codePointCount == readingHelper->getTotalCodePointCount()) {
|
if (codePointCount == readingHelper->getTotalCodePointCount(ptNodeParams)) {
|
||||||
return setPtNodeProbability(nodeReader, probability,
|
return setPtNodeProbability(&ptNodeParams, probability, outAddedNewUnigram);
|
||||||
readingHelper->getMergedNodeCodePoints(), outAddedNewUnigram);
|
|
||||||
}
|
}
|
||||||
if (!nodeReader->hasChildren()) {
|
if (!ptNodeParams.hasChildren()) {
|
||||||
*outAddedNewUnigram = true;
|
*outAddedNewUnigram = true;
|
||||||
return createChildrenPtNodeArrayAndAChildPtNode(nodeReader,
|
return createChildrenPtNodeArrayAndAChildPtNode(&ptNodeParams,
|
||||||
getUpdatedProbability(NOT_A_PROBABILITY /* originalProbability */, probability),
|
getUpdatedProbability(NOT_A_PROBABILITY /* originalProbability */, probability),
|
||||||
wordCodePoints + readingHelper->getTotalCodePointCount(),
|
wordCodePoints + readingHelper->getTotalCodePointCount(ptNodeParams),
|
||||||
codePointCount - readingHelper->getTotalCodePointCount());
|
codePointCount - readingHelper->getTotalCodePointCount(ptNodeParams));
|
||||||
}
|
}
|
||||||
// Advance to the children nodes.
|
// Advance to the children nodes.
|
||||||
parentPos = nodeReader->getHeadPos();
|
parentPos = ptNodeParams.getHeadPos();
|
||||||
readingHelper->readChildNode();
|
readingHelper->readChildNode(ptNodeParams);
|
||||||
}
|
}
|
||||||
if (readingHelper->isError()) {
|
if (readingHelper->isError()) {
|
||||||
// The dictionary is invalid.
|
// The dictionary is invalid.
|
||||||
|
@ -95,26 +96,24 @@ bool DynamicPatriciaTrieWritingHelper::addUnigramWord(
|
||||||
|
|
||||||
bool DynamicPatriciaTrieWritingHelper::addBigramWords(const int word0Pos, const int word1Pos,
|
bool DynamicPatriciaTrieWritingHelper::addBigramWords(const int word0Pos, const int word1Pos,
|
||||||
const int probability, bool *const outAddedNewBigram) {
|
const int probability, bool *const outAddedNewBigram) {
|
||||||
int mMergedNodeCodePoints[MAX_WORD_LENGTH];
|
|
||||||
DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy);
|
DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy);
|
||||||
nodeReader.fetchNodeInfoInBufferFromPtNodePosAndGetNodeCodePoints(word0Pos, MAX_WORD_LENGTH,
|
const PtNodeParams ptNodeParams(nodeReader.fetchNodeInfoInBufferFromPtNodePos(word0Pos));
|
||||||
mMergedNodeCodePoints);
|
|
||||||
// Move node to add bigram entry.
|
// Move node to add bigram entry.
|
||||||
const int newNodePos = mBuffer->getTailPosition();
|
const int newNodePos = mBuffer->getTailPosition();
|
||||||
if (!markNodeAsMovedAndSetPosition(&nodeReader, newNodePos, newNodePos)) {
|
if (!markNodeAsMovedAndSetPosition(&ptNodeParams, newNodePos, newNodePos)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
int writingPos = newNodePos;
|
int writingPos = newNodePos;
|
||||||
// Write a new PtNode using original PtNode's info to the tail of the dictionary in mBuffer.
|
// Write a new PtNode using original PtNode's info to the tail of the dictionary in mBuffer.
|
||||||
if (!writePtNodeToBufferByCopyingPtNodeInfo(mBuffer, &nodeReader, nodeReader.getParentPos(),
|
if (!writePtNodeToBufferByCopyingPtNodeInfo(mBuffer, &ptNodeParams, ptNodeParams.getParentPos(),
|
||||||
mMergedNodeCodePoints, nodeReader.getCodePointCount(), nodeReader.getProbability(),
|
ptNodeParams.getCodePoints(), ptNodeParams.getCodePointCount(),
|
||||||
&writingPos)) {
|
ptNodeParams.getProbability(), &writingPos)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
nodeReader.fetchNodeInfoInBufferFromPtNodePos(newNodePos);
|
const PtNodeParams newPtNodeParams(nodeReader.fetchNodeInfoInBufferFromPtNodePos(newNodePos));
|
||||||
if (nodeReader.getBigramsPos() != NOT_A_DICT_POS) {
|
if (newPtNodeParams.getBigramsPos() != NOT_A_DICT_POS) {
|
||||||
// Insert a new bigram entry into the existing bigram list.
|
// Insert a new bigram entry into the existing bigram list.
|
||||||
int bigramListPos = nodeReader.getBigramsPos();
|
int bigramListPos = newPtNodeParams.getBigramsPos();
|
||||||
return mBigramPolicy->addNewBigramEntryToBigramList(word1Pos, probability, &bigramListPos,
|
return mBigramPolicy->addNewBigramEntryToBigramList(word1Pos, probability, &bigramListPos,
|
||||||
outAddedNewBigram);
|
outAddedNewBigram);
|
||||||
} else {
|
} else {
|
||||||
|
@ -126,10 +125,11 @@ bool DynamicPatriciaTrieWritingHelper::addBigramWords(const int word0Pos, const
|
||||||
}
|
}
|
||||||
// Then, Mark as the PtNode having bigram list in the flags.
|
// Then, Mark as the PtNode having bigram list in the flags.
|
||||||
const PatriciaTrieReadingUtils::NodeFlags updatedFlags =
|
const PatriciaTrieReadingUtils::NodeFlags updatedFlags =
|
||||||
PatriciaTrieReadingUtils::createAndGetFlags(nodeReader.isBlacklisted(),
|
PatriciaTrieReadingUtils::createAndGetFlags(newPtNodeParams.isBlacklisted(),
|
||||||
nodeReader.isNotAWord(), nodeReader.getProbability() != NOT_A_PROBABILITY,
|
newPtNodeParams.isNotAWord(),
|
||||||
nodeReader.getShortcutPos() != NOT_A_DICT_POS, true /* hasBigrams */,
|
newPtNodeParams.getProbability() != NOT_A_PROBABILITY,
|
||||||
nodeReader.getCodePointCount() > 1, CHILDREN_POSITION_FIELD_SIZE);
|
newPtNodeParams.getShortcutPos() != NOT_A_DICT_POS, true /* hasBigrams */,
|
||||||
|
newPtNodeParams.getCodePointCount() > 1, CHILDREN_POSITION_FIELD_SIZE);
|
||||||
writingPos = newNodePos;
|
writingPos = newNodePos;
|
||||||
// Write updated flags into the moved PtNode's flags field.
|
// Write updated flags into the moved PtNode's flags field.
|
||||||
return DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, updatedFlags,
|
return DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, updatedFlags,
|
||||||
|
@ -140,11 +140,11 @@ bool DynamicPatriciaTrieWritingHelper::addBigramWords(const int word0Pos, const
|
||||||
// Remove a bigram relation from word0Pos to word1Pos.
|
// Remove a bigram relation from word0Pos to word1Pos.
|
||||||
bool DynamicPatriciaTrieWritingHelper::removeBigramWords(const int word0Pos, const int word1Pos) {
|
bool DynamicPatriciaTrieWritingHelper::removeBigramWords(const int word0Pos, const int word1Pos) {
|
||||||
DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy);
|
DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy);
|
||||||
nodeReader.fetchNodeInfoInBufferFromPtNodePos(word0Pos);
|
const PtNodeParams ptNodeParams(nodeReader.fetchNodeInfoInBufferFromPtNodePos(word0Pos));
|
||||||
if (nodeReader.getBigramsPos() == NOT_A_DICT_POS) {
|
if (ptNodeParams.getBigramsPos() == NOT_A_DICT_POS) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
return mBigramPolicy->removeBigram(nodeReader.getBigramsPos(), word1Pos);
|
return mBigramPolicy->removeBigram(ptNodeParams.getBigramsPos(), word1Pos);
|
||||||
}
|
}
|
||||||
|
|
||||||
void DynamicPatriciaTrieWritingHelper::writeToDictFile(const char *const fileName,
|
void DynamicPatriciaTrieWritingHelper::writeToDictFile(const char *const fileName,
|
||||||
|
@ -181,8 +181,8 @@ void DynamicPatriciaTrieWritingHelper::writeToDictFileWithGC(const int rootPtNod
|
||||||
}
|
}
|
||||||
|
|
||||||
bool DynamicPatriciaTrieWritingHelper::markNodeAsDeleted(
|
bool DynamicPatriciaTrieWritingHelper::markNodeAsDeleted(
|
||||||
const DynamicPatriciaTrieNodeReader *const nodeToUpdate) {
|
const PtNodeParams *const toBeUpdatedPtNodeParams) {
|
||||||
int pos = nodeToUpdate->getHeadPos();
|
int pos = toBeUpdatedPtNodeParams->getHeadPos();
|
||||||
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(pos);
|
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(pos);
|
||||||
const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
|
const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
|
||||||
if (usesAdditionalBuffer) {
|
if (usesAdditionalBuffer) {
|
||||||
|
@ -194,16 +194,16 @@ bool DynamicPatriciaTrieWritingHelper::markNodeAsDeleted(
|
||||||
const PatriciaTrieReadingUtils::NodeFlags updatedFlags =
|
const PatriciaTrieReadingUtils::NodeFlags updatedFlags =
|
||||||
DynamicPatriciaTrieReadingUtils::updateAndGetFlags(originalFlags, false /* isMoved */,
|
DynamicPatriciaTrieReadingUtils::updateAndGetFlags(originalFlags, false /* isMoved */,
|
||||||
true /* isDeleted */);
|
true /* isDeleted */);
|
||||||
int writingPos = nodeToUpdate->getHeadPos();
|
int writingPos = toBeUpdatedPtNodeParams->getHeadPos();
|
||||||
// Update flags.
|
// Update flags.
|
||||||
return DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, updatedFlags,
|
return DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, updatedFlags,
|
||||||
&writingPos);
|
&writingPos);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool DynamicPatriciaTrieWritingHelper::markNodeAsMovedAndSetPosition(
|
bool DynamicPatriciaTrieWritingHelper::markNodeAsMovedAndSetPosition(
|
||||||
const DynamicPatriciaTrieNodeReader *const originalNode, const int movedPos,
|
const PtNodeParams *const toBeUpdatedPtNodeParams, const int movedPos,
|
||||||
const int bigramLinkedNodePos) {
|
const int bigramLinkedNodePos) {
|
||||||
int pos = originalNode->getHeadPos();
|
int pos = toBeUpdatedPtNodeParams->getHeadPos();
|
||||||
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(pos);
|
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(pos);
|
||||||
const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
|
const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
|
||||||
if (usesAdditionalBuffer) {
|
if (usesAdditionalBuffer) {
|
||||||
|
@ -215,7 +215,7 @@ bool DynamicPatriciaTrieWritingHelper::markNodeAsMovedAndSetPosition(
|
||||||
const PatriciaTrieReadingUtils::NodeFlags updatedFlags =
|
const PatriciaTrieReadingUtils::NodeFlags updatedFlags =
|
||||||
DynamicPatriciaTrieReadingUtils::updateAndGetFlags(originalFlags, true /* isMoved */,
|
DynamicPatriciaTrieReadingUtils::updateAndGetFlags(originalFlags, true /* isMoved */,
|
||||||
false /* isDeleted */);
|
false /* isDeleted */);
|
||||||
int writingPos = originalNode->getHeadPos();
|
int writingPos = toBeUpdatedPtNodeParams->getHeadPos();
|
||||||
// Update flags.
|
// Update flags.
|
||||||
if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, updatedFlags,
|
if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, updatedFlags,
|
||||||
&writingPos)) {
|
&writingPos)) {
|
||||||
|
@ -223,31 +223,32 @@ bool DynamicPatriciaTrieWritingHelper::markNodeAsMovedAndSetPosition(
|
||||||
}
|
}
|
||||||
// Update moved position, which is stored in the parent offset field.
|
// Update moved position, which is stored in the parent offset field.
|
||||||
if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition(
|
if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition(
|
||||||
mBuffer, movedPos, originalNode->getHeadPos(), &writingPos)) {
|
mBuffer, movedPos, toBeUpdatedPtNodeParams->getHeadPos(), &writingPos)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
// Update bigram linked node position, which is stored in the children position field.
|
// Update bigram linked node position, which is stored in the children position field.
|
||||||
int childrenPosFieldPos = originalNode->getChildrenPosFieldPos();
|
int childrenPosFieldPos = toBeUpdatedPtNodeParams->getChildrenPosFieldPos();
|
||||||
if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(
|
if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(
|
||||||
mBuffer, bigramLinkedNodePos, &childrenPosFieldPos)) {
|
mBuffer, bigramLinkedNodePos, &childrenPosFieldPos)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (originalNode->hasChildren()) {
|
if (toBeUpdatedPtNodeParams->hasChildren()) {
|
||||||
// Update children's parent position.
|
// Update children's parent position.
|
||||||
DynamicPatriciaTrieReadingHelper readingHelper(mBuffer, mBigramPolicy, mShortcutPolicy);
|
DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy);
|
||||||
const DynamicPatriciaTrieNodeReader *const nodeReader = readingHelper.getNodeReader();
|
DynamicPatriciaTrieReadingHelper readingHelper(mBuffer, &nodeReader);
|
||||||
readingHelper.initWithPtNodeArrayPos(originalNode->getChildrenPos());
|
readingHelper.initWithPtNodeArrayPos(toBeUpdatedPtNodeParams->getChildrenPos());
|
||||||
while (!readingHelper.isEnd()) {
|
while (!readingHelper.isEnd()) {
|
||||||
int parentOffsetFieldPos = nodeReader->getHeadPos()
|
const PtNodeParams childPtNodeParams(readingHelper.getPtNodeParams());
|
||||||
|
int parentOffsetFieldPos = childPtNodeParams.getHeadPos()
|
||||||
+ DynamicPatriciaTrieWritingUtils::NODE_FLAG_FIELD_SIZE;
|
+ DynamicPatriciaTrieWritingUtils::NODE_FLAG_FIELD_SIZE;
|
||||||
if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition(
|
if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition(
|
||||||
mBuffer, bigramLinkedNodePos, nodeReader->getHeadPos(),
|
mBuffer, bigramLinkedNodePos, childPtNodeParams.getHeadPos(),
|
||||||
&parentOffsetFieldPos)) {
|
&parentOffsetFieldPos)) {
|
||||||
// Parent offset cannot be written because of a bug or a broken dictionary; thus,
|
// Parent offset cannot be written because of a bug or a broken dictionary; thus,
|
||||||
// we give up to update dictionary.
|
// we give up to update dictionary.
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
readingHelper.readNextSiblingNode();
|
readingHelper.readNextSiblingNode(childPtNodeParams);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
|
@ -333,13 +334,13 @@ bool DynamicPatriciaTrieWritingHelper::writePtNodeToBuffer(
|
||||||
|
|
||||||
bool DynamicPatriciaTrieWritingHelper::writePtNodeToBufferByCopyingPtNodeInfo(
|
bool DynamicPatriciaTrieWritingHelper::writePtNodeToBufferByCopyingPtNodeInfo(
|
||||||
BufferWithExtendableBuffer *const bufferToWrite,
|
BufferWithExtendableBuffer *const bufferToWrite,
|
||||||
const DynamicPatriciaTrieNodeReader *const originalNode, const int parentPos,
|
const PtNodeParams *const originalPtNodeParams, const int parentPos,
|
||||||
const int *const codePoints, const int codePointCount, const int probability,
|
const int *const codePoints, const int codePointCount, const int probability,
|
||||||
int *const writingPos) {
|
int *const writingPos) {
|
||||||
return writePtNodeWithFullInfoToBuffer(bufferToWrite, originalNode->isBlacklisted(),
|
return writePtNodeWithFullInfoToBuffer(bufferToWrite, originalPtNodeParams->isBlacklisted(),
|
||||||
originalNode->isNotAWord(), parentPos, codePoints, codePointCount, probability,
|
originalPtNodeParams->isNotAWord(), parentPos, codePoints, codePointCount, probability,
|
||||||
originalNode->getChildrenPos(), originalNode->getBigramsPos(),
|
originalPtNodeParams->getChildrenPos(), originalPtNodeParams->getBigramsPos(),
|
||||||
originalNode->getShortcutPos(), writingPos);
|
originalPtNodeParams->getShortcutPos(), writingPos);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool DynamicPatriciaTrieWritingHelper::createAndInsertNodeIntoPtNodeArray(const int parentPos,
|
bool DynamicPatriciaTrieWritingHelper::createAndInsertNodeIntoPtNodeArray(const int parentPos,
|
||||||
|
@ -355,14 +356,14 @@ bool DynamicPatriciaTrieWritingHelper::createAndInsertNodeIntoPtNodeArray(const
|
||||||
}
|
}
|
||||||
|
|
||||||
bool DynamicPatriciaTrieWritingHelper::setPtNodeProbability(
|
bool DynamicPatriciaTrieWritingHelper::setPtNodeProbability(
|
||||||
const DynamicPatriciaTrieNodeReader *const originalPtNode, const int probability,
|
const PtNodeParams *const originalPtNodeParams, const int probability,
|
||||||
const int *const codePoints, bool *const outAddedNewUnigram) {
|
bool *const outAddedNewUnigram) {
|
||||||
if (originalPtNode->isTerminal()) {
|
if (originalPtNodeParams->isTerminal()) {
|
||||||
// Overwrites the probability.
|
// Overwrites the probability.
|
||||||
*outAddedNewUnigram = false;
|
*outAddedNewUnigram = false;
|
||||||
const int probabilityToWrite = getUpdatedProbability(originalPtNode->getProbability(),
|
const int probabilityToWrite = getUpdatedProbability(
|
||||||
probability);
|
originalPtNodeParams->getProbability(), probability);
|
||||||
int probabilityFieldPos = originalPtNode->getProbabilityFieldPos();
|
int probabilityFieldPos = originalPtNodeParams->getProbabilityFieldPos();
|
||||||
if (!DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition(mBuffer,
|
if (!DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition(mBuffer,
|
||||||
probabilityToWrite, &probabilityFieldPos)) {
|
probabilityToWrite, &probabilityFieldPos)) {
|
||||||
return false;
|
return false;
|
||||||
|
@ -371,11 +372,12 @@ bool DynamicPatriciaTrieWritingHelper::setPtNodeProbability(
|
||||||
// Make the node terminal and write the probability.
|
// Make the node terminal and write the probability.
|
||||||
*outAddedNewUnigram = true;
|
*outAddedNewUnigram = true;
|
||||||
int movedPos = mBuffer->getTailPosition();
|
int movedPos = mBuffer->getTailPosition();
|
||||||
if (!markNodeAsMovedAndSetPosition(originalPtNode, movedPos, movedPos)) {
|
if (!markNodeAsMovedAndSetPosition(originalPtNodeParams, movedPos, movedPos)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (!writePtNodeToBufferByCopyingPtNodeInfo(mBuffer, originalPtNode,
|
if (!writePtNodeToBufferByCopyingPtNodeInfo(mBuffer, originalPtNodeParams,
|
||||||
originalPtNode->getParentPos(), codePoints, originalPtNode->getCodePointCount(),
|
originalPtNodeParams->getParentPos(), originalPtNodeParams->getCodePoints(),
|
||||||
|
originalPtNodeParams->getCodePointCount(),
|
||||||
getUpdatedProbability(NOT_A_PROBABILITY /* originalProbability */, probability),
|
getUpdatedProbability(NOT_A_PROBABILITY /* originalProbability */, probability),
|
||||||
&movedPos)) {
|
&movedPos)) {
|
||||||
return false;
|
return false;
|
||||||
|
@ -385,15 +387,15 @@ bool DynamicPatriciaTrieWritingHelper::setPtNodeProbability(
|
||||||
}
|
}
|
||||||
|
|
||||||
bool DynamicPatriciaTrieWritingHelper::createChildrenPtNodeArrayAndAChildPtNode(
|
bool DynamicPatriciaTrieWritingHelper::createChildrenPtNodeArrayAndAChildPtNode(
|
||||||
const DynamicPatriciaTrieNodeReader *const parentNode, const int probability,
|
const PtNodeParams *const parentPtNodeParams, const int probability,
|
||||||
const int *const codePoints, const int codePointCount) {
|
const int *const codePoints, const int codePointCount) {
|
||||||
const int newPtNodeArrayPos = mBuffer->getTailPosition();
|
const int newPtNodeArrayPos = mBuffer->getTailPosition();
|
||||||
int childrenPosFieldPos = parentNode->getChildrenPosFieldPos();
|
int childrenPosFieldPos = parentPtNodeParams->getChildrenPosFieldPos();
|
||||||
if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(mBuffer,
|
if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(mBuffer,
|
||||||
newPtNodeArrayPos, &childrenPosFieldPos)) {
|
newPtNodeArrayPos, &childrenPosFieldPos)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
return createNewPtNodeArrayWithAChildPtNode(parentNode->getHeadPos(), codePoints,
|
return createNewPtNodeArrayWithAChildPtNode(parentPtNodeParams->getHeadPos(), codePoints,
|
||||||
codePointCount, probability);
|
codePointCount, probability);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -418,8 +420,7 @@ bool DynamicPatriciaTrieWritingHelper::createNewPtNodeArrayWithAChildPtNode(
|
||||||
|
|
||||||
// Returns whether the dictionary updating was succeeded or not.
|
// Returns whether the dictionary updating was succeeded or not.
|
||||||
bool DynamicPatriciaTrieWritingHelper::reallocatePtNodeAndAddNewPtNodes(
|
bool DynamicPatriciaTrieWritingHelper::reallocatePtNodeAndAddNewPtNodes(
|
||||||
const DynamicPatriciaTrieNodeReader *const reallocatingPtNode,
|
const PtNodeParams *const reallocatingPtNodeParams, const int overlappingCodePointCount,
|
||||||
const int *const reallocatingPtNodeCodePoints, const int overlappingCodePointCount,
|
|
||||||
const int probabilityOfNewPtNode, const int *const newNodeCodePoints,
|
const int probabilityOfNewPtNode, const int *const newNodeCodePoints,
|
||||||
const int newNodeCodePointCount) {
|
const int newNodeCodePointCount) {
|
||||||
// When addsExtraChild is true, split the reallocating PtNode and add new child.
|
// When addsExtraChild is true, split the reallocating PtNode and add new child.
|
||||||
|
@ -435,8 +436,8 @@ bool DynamicPatriciaTrieWritingHelper::reallocatePtNodeAndAddNewPtNodes(
|
||||||
// Write the 1st part of the reallocating node. The children position will be updated later
|
// Write the 1st part of the reallocating node. The children position will be updated later
|
||||||
// with actual children position.
|
// with actual children position.
|
||||||
const int newProbability = addsExtraChild ? NOT_A_PROBABILITY : probabilityOfNewPtNode;
|
const int newProbability = addsExtraChild ? NOT_A_PROBABILITY : probabilityOfNewPtNode;
|
||||||
if (!writePtNodeToBuffer(mBuffer, reallocatingPtNode->getParentPos(),
|
if (!writePtNodeToBuffer(mBuffer, reallocatingPtNodeParams->getParentPos(),
|
||||||
reallocatingPtNodeCodePoints, overlappingCodePointCount, newProbability,
|
reallocatingPtNodeParams->getCodePoints(), overlappingCodePointCount, newProbability,
|
||||||
&writingPos)) {
|
&writingPos)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -449,11 +450,11 @@ bool DynamicPatriciaTrieWritingHelper::reallocatePtNodeAndAddNewPtNodes(
|
||||||
}
|
}
|
||||||
// Write the 2nd part of the reallocating node.
|
// Write the 2nd part of the reallocating node.
|
||||||
const int secondPartOfReallocatedPtNodePos = writingPos;
|
const int secondPartOfReallocatedPtNodePos = writingPos;
|
||||||
if (!writePtNodeToBufferByCopyingPtNodeInfo(mBuffer, reallocatingPtNode,
|
if (!writePtNodeToBufferByCopyingPtNodeInfo(mBuffer, reallocatingPtNodeParams,
|
||||||
firstPartOfReallocatedPtNodePos,
|
firstPartOfReallocatedPtNodePos,
|
||||||
reallocatingPtNodeCodePoints + overlappingCodePointCount,
|
reallocatingPtNodeParams->getCodePoints() + overlappingCodePointCount,
|
||||||
reallocatingPtNode->getCodePointCount() - overlappingCodePointCount,
|
reallocatingPtNodeParams->getCodePointCount() - overlappingCodePointCount,
|
||||||
reallocatingPtNode->getProbability(), &writingPos)) {
|
reallocatingPtNodeParams->getProbability(), &writingPos)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (addsExtraChild) {
|
if (addsExtraChild) {
|
||||||
|
@ -469,15 +470,16 @@ bool DynamicPatriciaTrieWritingHelper::reallocatePtNodeAndAddNewPtNodes(
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
// Update original reallocating PtNode as moved.
|
// Update original reallocating PtNode as moved.
|
||||||
if (!markNodeAsMovedAndSetPosition(reallocatingPtNode, firstPartOfReallocatedPtNodePos,
|
if (!markNodeAsMovedAndSetPosition(reallocatingPtNodeParams, firstPartOfReallocatedPtNodePos,
|
||||||
secondPartOfReallocatedPtNodePos)) {
|
secondPartOfReallocatedPtNodePos)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
// Load node info. Information of the 1st part will be fetched.
|
// Load node info. Information of the 1st part will be fetched.
|
||||||
DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy);
|
DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy);
|
||||||
nodeReader.fetchNodeInfoInBufferFromPtNodePos(firstPartOfReallocatedPtNodePos);
|
const PtNodeParams ptNodeParams(
|
||||||
|
nodeReader.fetchNodeInfoInBufferFromPtNodePos(firstPartOfReallocatedPtNodePos));
|
||||||
// Update children position.
|
// Update children position.
|
||||||
int childrenPosFieldPos = nodeReader.getChildrenPosFieldPos();
|
int childrenPosFieldPos = ptNodeParams.getChildrenPosFieldPos();
|
||||||
if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(mBuffer,
|
if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(mBuffer,
|
||||||
actualChildrenPos, &childrenPosFieldPos)) {
|
actualChildrenPos, &childrenPosFieldPos)) {
|
||||||
return false;
|
return false;
|
||||||
|
@ -488,7 +490,8 @@ bool DynamicPatriciaTrieWritingHelper::reallocatePtNodeAndAddNewPtNodes(
|
||||||
bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
|
bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
|
||||||
const HeaderPolicy *const headerPolicy, BufferWithExtendableBuffer *const bufferToWrite,
|
const HeaderPolicy *const headerPolicy, BufferWithExtendableBuffer *const bufferToWrite,
|
||||||
int *const outUnigramCount, int *const outBigramCount) {
|
int *const outUnigramCount, int *const outBigramCount) {
|
||||||
DynamicPatriciaTrieReadingHelper readingHelper(mBuffer, mBigramPolicy, mShortcutPolicy);
|
DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy);
|
||||||
|
DynamicPatriciaTrieReadingHelper readingHelper(mBuffer, &nodeReader);
|
||||||
readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
|
readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
|
||||||
DynamicPatriciaTrieGcEventListeners
|
DynamicPatriciaTrieGcEventListeners
|
||||||
::TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted
|
::TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted
|
||||||
|
@ -530,9 +533,10 @@ bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
|
||||||
DynamicShortcutListPolicy newDictShortcutPolicy(bufferToWrite);
|
DynamicShortcutListPolicy newDictShortcutPolicy(bufferToWrite);
|
||||||
DynamicBigramListPolicy newDictBigramPolicy(headerPolicy, bufferToWrite, &newDictShortcutPolicy,
|
DynamicBigramListPolicy newDictBigramPolicy(headerPolicy, bufferToWrite, &newDictShortcutPolicy,
|
||||||
mNeedsToDecay);
|
mNeedsToDecay);
|
||||||
// Create reading helper for the GCed dictionary.
|
// Create reading node reader and reading helper for the GCed dictionary.
|
||||||
DynamicPatriciaTrieReadingHelper newDictReadingHelper(bufferToWrite, &newDictBigramPolicy,
|
DynamicPatriciaTrieNodeReader newDictNodeReader(bufferToWrite, &newDictBigramPolicy,
|
||||||
&newDictShortcutPolicy);
|
&newDictShortcutPolicy);
|
||||||
|
DynamicPatriciaTrieReadingHelper newDictReadingHelper(bufferToWrite, &newDictNodeReader);
|
||||||
newDictReadingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
|
newDictReadingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
|
||||||
DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateAllPositionFields
|
DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateAllPositionFields
|
||||||
traversePolicyToUpdateAllPositionFields(this, &newDictBigramPolicy, bufferToWrite,
|
traversePolicyToUpdateAllPositionFields(this, &newDictBigramPolicy, bufferToWrite,
|
||||||
|
|
|
@ -26,11 +26,12 @@ namespace latinime {
|
||||||
|
|
||||||
class BufferWithExtendableBuffer;
|
class BufferWithExtendableBuffer;
|
||||||
class DynamicBigramListPolicy;
|
class DynamicBigramListPolicy;
|
||||||
class DynamicPatriciaTrieNodeReader;
|
|
||||||
class DynamicPatriciaTrieReadingHelper;
|
class DynamicPatriciaTrieReadingHelper;
|
||||||
class DynamicShortcutListPolicy;
|
class DynamicShortcutListPolicy;
|
||||||
class HeaderPolicy;
|
class HeaderPolicy;
|
||||||
|
class PtNodeParams;
|
||||||
|
|
||||||
|
// TODO: Make it independent from a particular format and move to pt_common.
|
||||||
class DynamicPatriciaTrieWritingHelper {
|
class DynamicPatriciaTrieWritingHelper {
|
||||||
public:
|
public:
|
||||||
typedef hash_map_compat<int, int> PtNodeArrayPositionRelocationMap;
|
typedef hash_map_compat<int, int> PtNodeArrayPositionRelocationMap;
|
||||||
|
@ -77,12 +78,12 @@ class DynamicPatriciaTrieWritingHelper {
|
||||||
|
|
||||||
// CAVEAT: This method must be called only from inner classes of
|
// CAVEAT: This method must be called only from inner classes of
|
||||||
// DynamicPatriciaTrieGcEventListeners.
|
// DynamicPatriciaTrieGcEventListeners.
|
||||||
bool markNodeAsDeleted(const DynamicPatriciaTrieNodeReader *const nodeToUpdate);
|
bool markNodeAsDeleted(const PtNodeParams *const toBeUpdatedPtNodeParams);
|
||||||
|
|
||||||
// CAVEAT: This method must be called only from this class or inner classes of
|
// CAVEAT: This method must be called only from this class or inner classes of
|
||||||
// DynamicPatriciaTrieGcEventListeners.
|
// DynamicPatriciaTrieGcEventListeners.
|
||||||
bool writePtNodeToBufferByCopyingPtNodeInfo(BufferWithExtendableBuffer *const bufferToWrite,
|
bool writePtNodeToBufferByCopyingPtNodeInfo(BufferWithExtendableBuffer *const bufferToWrite,
|
||||||
const DynamicPatriciaTrieNodeReader *const originalNode, const int parentPos,
|
const PtNodeParams *const originalPtNodeParams, const int parentPos,
|
||||||
const int *const codePoints, const int codePointCount, const int probability,
|
const int *const codePoints, const int codePointCount, const int probability,
|
||||||
int *const writingPos);
|
int *const writingPos);
|
||||||
|
|
||||||
|
@ -96,7 +97,7 @@ class DynamicPatriciaTrieWritingHelper {
|
||||||
DynamicShortcutListPolicy *const mShortcutPolicy;
|
DynamicShortcutListPolicy *const mShortcutPolicy;
|
||||||
const bool mNeedsToDecay;
|
const bool mNeedsToDecay;
|
||||||
|
|
||||||
bool markNodeAsMovedAndSetPosition(const DynamicPatriciaTrieNodeReader *const nodeToUpdate,
|
bool markNodeAsMovedAndSetPosition(const PtNodeParams *const toBeUpdatedPtNodeParams,
|
||||||
const int movedPos, const int bigramLinkedNodePos);
|
const int movedPos, const int bigramLinkedNodePos);
|
||||||
|
|
||||||
bool writePtNodeWithFullInfoToBuffer(BufferWithExtendableBuffer *const bufferToWrite,
|
bool writePtNodeWithFullInfoToBuffer(BufferWithExtendableBuffer *const bufferToWrite,
|
||||||
|
@ -112,19 +113,17 @@ class DynamicPatriciaTrieWritingHelper {
|
||||||
bool createAndInsertNodeIntoPtNodeArray(const int parentPos, const int *const nodeCodePoints,
|
bool createAndInsertNodeIntoPtNodeArray(const int parentPos, const int *const nodeCodePoints,
|
||||||
const int nodeCodePointCount, const int probability, int *const forwardLinkFieldPos);
|
const int nodeCodePointCount, const int probability, int *const forwardLinkFieldPos);
|
||||||
|
|
||||||
bool setPtNodeProbability(const DynamicPatriciaTrieNodeReader *const originalNode,
|
bool setPtNodeProbability(const PtNodeParams *const originalPtNodeParams, const int probability,
|
||||||
const int probability, const int *const codePoints, bool *const outAddedNewUnigram);
|
bool *const outAddedNewUnigram);
|
||||||
|
|
||||||
bool createChildrenPtNodeArrayAndAChildPtNode(
|
bool createChildrenPtNodeArrayAndAChildPtNode(const PtNodeParams *const parentPtNodeParams,
|
||||||
const DynamicPatriciaTrieNodeReader *const parentNode, const int probability,
|
const int probability, const int *const codePoints, const int codePointCount);
|
||||||
const int *const codePoints, const int codePointCount);
|
|
||||||
|
|
||||||
bool createNewPtNodeArrayWithAChildPtNode(const int parentPos, const int *const nodeCodePoints,
|
bool createNewPtNodeArrayWithAChildPtNode(const int parentPos, const int *const nodeCodePoints,
|
||||||
const int nodeCodePointCount, const int probability);
|
const int nodeCodePointCount, const int probability);
|
||||||
|
|
||||||
bool reallocatePtNodeAndAddNewPtNodes(
|
bool reallocatePtNodeAndAddNewPtNodes(
|
||||||
const DynamicPatriciaTrieNodeReader *const reallocatingPtNode,
|
const PtNodeParams *const reallocatingPtNodeParams, const int overlappingCodePointCount,
|
||||||
const int *const reallocatingPtNodeCodePoints, const int overlappingCodePointCount,
|
|
||||||
const int probabilityOfNewPtNode, const int *const newNodeCodePoints,
|
const int probabilityOfNewPtNode, const int *const newNodeCodePoints,
|
||||||
const int newNodeCodePointCount);
|
const int newNodeCodePointCount);
|
||||||
|
|
||||||
|
|
|
@ -30,4 +30,6 @@ const char *const Ver4DictConstants::SHORTCUT_LOOKUP_TABLE_FILE_EXTENSION = ".sh
|
||||||
const char *const Ver4DictConstants::SHORTCUT_CONTENT_TABLE_FILE_EXTENSION =
|
const char *const Ver4DictConstants::SHORTCUT_CONTENT_TABLE_FILE_EXTENSION =
|
||||||
".shortcut_index_shortcut";
|
".shortcut_index_shortcut";
|
||||||
|
|
||||||
|
const int Ver4DictConstants::NOT_A_TERMINAL = -1;
|
||||||
|
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
|
|
|
@ -34,6 +34,8 @@ class Ver4DictConstants {
|
||||||
static const char *const SHORTCUT_LOOKUP_TABLE_FILE_EXTENSION;
|
static const char *const SHORTCUT_LOOKUP_TABLE_FILE_EXTENSION;
|
||||||
static const char *const SHORTCUT_CONTENT_TABLE_FILE_EXTENSION;
|
static const char *const SHORTCUT_CONTENT_TABLE_FILE_EXTENSION;
|
||||||
|
|
||||||
|
static const int NOT_A_TERMINAL;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4DictConstants);
|
DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4DictConstants);
|
||||||
};
|
};
|
||||||
|
|
Loading…
Reference in a new issue