Refactoring: Introduce PtNode and PtNodeReader.

To handle multiple dictionary formats in helpers.

Bug: 11073222

Change-Id: Iaef7be08534f9010e837ffcf8c8292b174b64d2b
This commit is contained in:
Keisuke Kuroyanagi 2013-10-25 11:18:55 -07:00
parent 75d7f0fbf5
commit c481d0556f
15 changed files with 528 additions and 436 deletions

View file

@ -157,8 +157,9 @@ bool DynamicBigramListPolicy::updateAllBigramEntriesAndDeleteUselessEntries(
} }
const int bigramTargetNodePos = const int bigramTargetNodePos =
followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos); followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos);
nodeReader.fetchNodeInfoInBufferFromPtNodePos(bigramTargetNodePos); const PtNodeParams ptNodeParams(nodeReader.fetchNodeInfoInBufferFromPtNodePos(
if (nodeReader.isDeleted() || !nodeReader.isTerminal() bigramTargetNodePos));
if (ptNodeParams.isDeleted() || !ptNodeParams.isTerminal()
|| bigramTargetNodePos == NOT_A_DICT_POS) { || bigramTargetNodePos == NOT_A_DICT_POS) {
// The target is no longer valid terminal. Invalidate the current bigram entry. // The target is no longer valid terminal. Invalidate the current bigram entry.
if (!BigramListReadWriteUtils::writeBigramEntry(mBuffer, bigramFlags, if (!BigramListReadWriteUtils::writeBigramEntry(mBuffer, bigramFlags,
@ -342,20 +343,22 @@ int DynamicBigramListPolicy::followBigramLinkAndGetCurrentBigramPtNodePos(
if (originalBigramPos == NOT_A_DICT_POS) { if (originalBigramPos == NOT_A_DICT_POS) {
return NOT_A_DICT_POS; return NOT_A_DICT_POS;
} }
int currentPos = originalBigramPos;
DynamicPatriciaTrieNodeReader nodeReader(mBuffer, this /* bigramsPolicy */, mShortcutPolicy); DynamicPatriciaTrieNodeReader nodeReader(mBuffer, this /* bigramsPolicy */, mShortcutPolicy);
nodeReader.fetchNodeInfoInBufferFromPtNodePos(currentPos); int currentPos = NOT_A_DICT_POS;
int bigramLinkCount = 0; int bigramLinkCount = 0;
while (nodeReader.getBigramLinkedNodePos() != NOT_A_DICT_POS) { int bigramLinkedNodePos = originalBigramPos;
currentPos = nodeReader.getBigramLinkedNodePos(); do {
nodeReader.fetchNodeInfoInBufferFromPtNodePos(currentPos); currentPos = bigramLinkedNodePos;
const PtNodeParams ptNodeParams(nodeReader.fetchNodeInfoInBufferFromPtNodePos(currentPos));
bigramLinkedNodePos = ptNodeParams.getBigramLinkedNodePos();
bigramLinkCount++; bigramLinkCount++;
if (bigramLinkCount > CONTINUING_BIGRAM_LINK_COUNT_LIMIT) { if (bigramLinkCount > CONTINUING_BIGRAM_LINK_COUNT_LIMIT) {
AKLOGE("Bigram link is invalid. start position: %d", originalBigramPos); AKLOGE("Bigram link is invalid. start position: %d", originalBigramPos);
ASSERT(false); ASSERT(false);
return NOT_A_DICT_POS; return NOT_A_DICT_POS;
} }
} bigramLinkedNodePos = ptNodeParams.getBigramLinkedNodePos();
} while (bigramLinkedNodePos != NOT_A_DICT_POS);
return currentPos; return currentPos;
} }

View file

@ -0,0 +1,185 @@
/*
* Copyright (C) 2013, The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef LATINIME_PT_NODE_PARAMS_H
#define LATINIME_PT_NODE_PARAMS_H
#include <cstring>
#include "defines.h"
#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h"
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
namespace latinime {
// This class has information of a PtNode. This class is immutable.
class PtNodeParams {
public:
// Invalid PtNode.
PtNodeParams() : mHeadPos(NOT_A_DICT_POS), mFlags(0), mParentPos(NOT_A_DICT_POS),
mCodePointCount(0), mCodePoints(), mTerminalIdFieldPos(NOT_A_DICT_POS),
mTerminalId(Ver4DictConstants::NOT_A_TERMINAL), mProbabilityFieldPos(NOT_A_DICT_POS),
mProbability(NOT_A_PROBABILITY), mChildrenPosFieldPos(NOT_A_DICT_POS),
mChildrenPos(NOT_A_DICT_POS), mBigramLinkedNodePos(NOT_A_DICT_POS),
mShortcutPos(NOT_A_DICT_POS), mBigramPos(NOT_A_DICT_POS),
mSiblingPos(NOT_A_DICT_POS) {}
PtNodeParams(const PtNodeParams& ptNodeParams)
: mHeadPos(ptNodeParams.mHeadPos), mFlags(ptNodeParams.mFlags),
mParentPos(ptNodeParams.mParentPos), mCodePointCount(ptNodeParams.mCodePointCount),
mCodePoints(), mTerminalIdFieldPos(ptNodeParams.mTerminalIdFieldPos),
mTerminalId(ptNodeParams.mTerminalId),
mProbabilityFieldPos(ptNodeParams.mProbabilityFieldPos),
mProbability(ptNodeParams.mProbability),
mChildrenPosFieldPos(ptNodeParams.mChildrenPosFieldPos),
mChildrenPos(ptNodeParams.mChildrenPos),
mBigramLinkedNodePos(ptNodeParams.mBigramLinkedNodePos),
mShortcutPos(ptNodeParams.mShortcutPos), mBigramPos(ptNodeParams.mBigramPos),
mSiblingPos(ptNodeParams.mSiblingPos) {
memcpy(mCodePoints, ptNodeParams.getCodePoints(), sizeof(int) * mCodePointCount);
}
PtNodeParams(const int headPos, const PatriciaTrieReadingUtils::NodeFlags flags,
const int parentPos, const int codePointCount, const int *const codePoints,
const int probabilityFieldPos, const int probability, const int childrenPosFieldPos,
const int childrenPos, const int bigramLinkedNodePos, const int shortcutPos,
const int bigramPos, const int siblingPos)
: mHeadPos(headPos), mFlags(flags), mParentPos(parentPos),
mCodePointCount(codePointCount), mCodePoints(),
mTerminalIdFieldPos(NOT_A_DICT_POS), mTerminalId(Ver4DictConstants::NOT_A_TERMINAL),
mProbabilityFieldPos(probabilityFieldPos), mProbability(probability),
mChildrenPosFieldPos(childrenPosFieldPos), mChildrenPos(childrenPos),
mBigramLinkedNodePos(bigramLinkedNodePos), mShortcutPos(shortcutPos),
mBigramPos(bigramPos), mSiblingPos(siblingPos) {
memcpy(mCodePoints, codePoints, sizeof(int) * mCodePointCount);
}
AK_FORCE_INLINE bool isValid() const {
return mCodePointCount > 0;
}
// Head position of the PtNode
AK_FORCE_INLINE int getHeadPos() const {
return mHeadPos;
}
// Flags
AK_FORCE_INLINE bool isDeleted() const {
return DynamicPatriciaTrieReadingUtils::isDeleted(mFlags);
}
AK_FORCE_INLINE bool hasChildren() const {
return mChildrenPos != NOT_A_DICT_POS;
}
AK_FORCE_INLINE bool isTerminal() const {
return PatriciaTrieReadingUtils::isTerminal(mFlags);
}
AK_FORCE_INLINE bool isBlacklisted() const {
return PatriciaTrieReadingUtils::isBlacklisted(mFlags);
}
AK_FORCE_INLINE bool isNotAWord() const {
return PatriciaTrieReadingUtils::isNotAWord(mFlags);
}
// Parent node position
AK_FORCE_INLINE int getParentPos() const {
return mParentPos;
}
// Number of code points
AK_FORCE_INLINE uint8_t getCodePointCount() const {
return mCodePointCount;
}
AK_FORCE_INLINE const int *getCodePoints() const {
return mCodePoints;
}
// Probability
AK_FORCE_INLINE int getTerminalIdFieldPos() const {
return mTerminalIdFieldPos;
}
AK_FORCE_INLINE int getTerminalId() const {
return mTerminalId;
}
// Probability
AK_FORCE_INLINE int getProbabilityFieldPos() const {
return mProbabilityFieldPos;
}
AK_FORCE_INLINE int getProbability() const {
return mProbability;
}
// Children PtNode array position
AK_FORCE_INLINE int getChildrenPosFieldPos() const {
return mChildrenPosFieldPos;
}
AK_FORCE_INLINE int getChildrenPos() const {
return mChildrenPos;
}
// Bigram linked node position.
AK_FORCE_INLINE int getBigramLinkedNodePos() const {
return mBigramLinkedNodePos;
}
// Shortcutlist position
AK_FORCE_INLINE int getShortcutPos() const {
return mShortcutPos;
}
// Bigrams position
AK_FORCE_INLINE int getBigramsPos() const {
return mBigramPos;
}
// Sibling node position
AK_FORCE_INLINE int getSiblingNodePos() const {
return mSiblingPos;
}
private:
// This class have a public copy constructor to be used as a return value.
// Disallowing the assignment operator.
PtNodeParams &operator=(PtNodeParams &ptNodeParams);
const int mHeadPos;
const PatriciaTrieReadingUtils::NodeFlags mFlags;
const int mParentPos;
const uint8_t mCodePointCount;
int mCodePoints[MAX_WORD_LENGTH];
const int mTerminalIdFieldPos;
const int mTerminalId;
const int mProbabilityFieldPos;
const int mProbability;
const int mChildrenPosFieldPos;
const int mChildrenPos;
const int mBigramLinkedNodePos;
const int mShortcutPos;
const int mBigramPos;
const int mSiblingPos;
};
} // namespace latinime
#endif /* LATINIME_PT_NODE_PARAMS_H */

View file

@ -0,0 +1,39 @@
/*
* Copyright (C) 2013, The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef LATINIME_PT_NODE_READER_H
#define LATINIME_PT_NODE_READER_H
#include "defines.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h"
namespace latinime {
// Interface class used to read PtNode information.
class PtNodeReader {
public:
virtual ~PtNodeReader() {}
virtual const PtNodeParams fetchNodeInfoInBufferFromPtNodePos(const int ptNodePos) const = 0;
protected:
PtNodeReader() {};
private:
DISALLOW_COPY_AND_ASSIGN(PtNodeReader);
};
} // namespace latinime
#endif /* LATINIME_PT_NODE_READER_H */

View file

@ -17,22 +17,22 @@
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_gc_event_listeners.h" #include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_gc_event_listeners.h"
#include "suggest/core/policy/dictionary_header_structure_policy.h" #include "suggest/core/policy/dictionary_header_structure_policy.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h"
#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h" #include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
namespace latinime { namespace latinime {
bool DynamicPatriciaTrieGcEventListeners bool DynamicPatriciaTrieGcEventListeners
::TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted ::TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted
::onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node, ::onVisitingPtNode(const PtNodeParams *const ptNodeParams) {
const int *const nodeCodePoints) {
// PtNode is useless when the PtNode is not a terminal and doesn't have any not useless // PtNode is useless when the PtNode is not a terminal and doesn't have any not useless
// children. // children.
bool isUselessPtNode = !node->isTerminal(); bool isUselessPtNode = !ptNodeParams->isTerminal();
if (node->isTerminal() && mIsDecayingDict) { if (ptNodeParams->isTerminal() && mIsDecayingDict) {
const int newProbability = const int newProbability =
ForgettingCurveUtils::getEncodedProbabilityToSave(node->getProbability(), ForgettingCurveUtils::getEncodedProbabilityToSave(ptNodeParams->getProbability(),
mHeaderPolicy); mHeaderPolicy);
int writingPos = node->getProbabilityFieldPos(); int writingPos = ptNodeParams->getProbabilityFieldPos();
// Update probability. // Update probability.
if (!DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition( if (!DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition(
mBuffer, newProbability, &writingPos)) { mBuffer, newProbability, &writingPos)) {
@ -44,9 +44,9 @@ bool DynamicPatriciaTrieGcEventListeners
} }
if (mChildrenValue > 0) { if (mChildrenValue > 0) {
isUselessPtNode = false; isUselessPtNode = false;
} else if (node->isTerminal()) { } else if (ptNodeParams->isTerminal()) {
// Remove children as all children are useless. // Remove children as all children are useless.
int writingPos = node->getChildrenPosFieldPos(); int writingPos = ptNodeParams->getChildrenPosFieldPos();
if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition( if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(
mBuffer, NOT_A_DICT_POS /* childrenPosition */, &writingPos)) { mBuffer, NOT_A_DICT_POS /* childrenPosition */, &writingPos)) {
return false; return false;
@ -54,12 +54,12 @@ bool DynamicPatriciaTrieGcEventListeners
} }
if (isUselessPtNode) { if (isUselessPtNode) {
// Current PtNode is no longer needed. Mark it as deleted. // Current PtNode is no longer needed. Mark it as deleted.
if (!mWritingHelper->markNodeAsDeleted(node)) { if (!mWritingHelper->markNodeAsDeleted(ptNodeParams)) {
return false; return false;
} }
} else { } else {
mValueStack.back() += 1; mValueStack.back() += 1;
if (node->isTerminal()) { if (ptNodeParams->isTerminal()) {
mValidUnigramCount += 1; mValidUnigramCount += 1;
} }
} }
@ -67,10 +67,9 @@ bool DynamicPatriciaTrieGcEventListeners
} }
bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateBigramProbability bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateBigramProbability
::onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node, ::onVisitingPtNode(const PtNodeParams *const ptNodeParams) {
const int *const nodeCodePoints) { if (!ptNodeParams->isDeleted()) {
if (!node->isDeleted()) { int pos = ptNodeParams->getBigramsPos();
int pos = node->getBigramsPos();
if (pos != NOT_A_DICT_POS) { if (pos != NOT_A_DICT_POS) {
int bigramEntryCount = 0; int bigramEntryCount = 0;
if (!mBigramPolicy->updateAllBigramEntriesAndDeleteUselessEntries(&pos, if (!mBigramPolicy->updateAllBigramEntriesAndDeleteUselessEntries(&pos,
@ -117,31 +116,29 @@ bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNo
// Write valid PtNode to buffer and memorize mapping from the old position to the new position. // Write valid PtNode to buffer and memorize mapping from the old position to the new position.
bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer
::onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node, ::onVisitingPtNode(const PtNodeParams *const ptNodeParams) {
const int *const nodeCodePoints) { if (ptNodeParams->isDeleted()) {
if (node->isDeleted()) {
// Current PtNode is not written in new buffer because it has been deleted. // Current PtNode is not written in new buffer because it has been deleted.
mDictPositionRelocationMap->mPtNodePositionRelocationMap.insert( mDictPositionRelocationMap->mPtNodePositionRelocationMap.insert(
DynamicPatriciaTrieWritingHelper::PtNodePositionRelocationMap::value_type( DynamicPatriciaTrieWritingHelper::PtNodePositionRelocationMap::value_type(
node->getHeadPos(), NOT_A_DICT_POS)); ptNodeParams->getHeadPos(), NOT_A_DICT_POS));
return true; return true;
} }
int writingPos = mBufferToWrite->getTailPosition(); int writingPos = mBufferToWrite->getTailPosition();
mDictPositionRelocationMap->mPtNodePositionRelocationMap.insert( mDictPositionRelocationMap->mPtNodePositionRelocationMap.insert(
DynamicPatriciaTrieWritingHelper::PtNodePositionRelocationMap::value_type( DynamicPatriciaTrieWritingHelper::PtNodePositionRelocationMap::value_type(
node->getHeadPos(), writingPos)); ptNodeParams->getHeadPos(), writingPos));
mValidPtNodeCount++; mValidPtNodeCount++;
// Writes current PtNode. // Writes current PtNode.
return mWritingHelper->writePtNodeToBufferByCopyingPtNodeInfo(mBufferToWrite, node, return mWritingHelper->writePtNodeToBufferByCopyingPtNodeInfo(mBufferToWrite, ptNodeParams,
node->getParentPos(), nodeCodePoints, node->getCodePointCount(), ptNodeParams->getParentPos(), ptNodeParams->getCodePoints(),
node->getProbability(), &writingPos); ptNodeParams->getCodePointCount(), ptNodeParams->getProbability(), &writingPos);
} }
bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateAllPositionFields bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateAllPositionFields
::onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node, ::onVisitingPtNode(const PtNodeParams *const ptNodeParams) {
const int *const nodeCodePoints) {
// Updates parent position. // Updates parent position.
int parentPos = node->getParentPos(); int parentPos = ptNodeParams->getParentPos();
if (parentPos != NOT_A_DICT_POS) { if (parentPos != NOT_A_DICT_POS) {
DynamicPatriciaTrieWritingHelper::PtNodePositionRelocationMap::const_iterator it = DynamicPatriciaTrieWritingHelper::PtNodePositionRelocationMap::const_iterator it =
mDictPositionRelocationMap->mPtNodePositionRelocationMap.find(parentPos); mDictPositionRelocationMap->mPtNodePositionRelocationMap.find(parentPos);
@ -149,15 +146,16 @@ bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateAllPositionField
parentPos = it->second; parentPos = it->second;
} }
} }
int writingPos = node->getHeadPos() + DynamicPatriciaTrieWritingUtils::NODE_FLAG_FIELD_SIZE; int writingPos = ptNodeParams->getHeadPos()
+ DynamicPatriciaTrieWritingUtils::NODE_FLAG_FIELD_SIZE;
// Write updated parent offset. // Write updated parent offset.
if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition(mBufferToWrite, if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition(mBufferToWrite,
parentPos, node->getHeadPos(), &writingPos)) { parentPos, ptNodeParams->getHeadPos(), &writingPos)) {
return false; return false;
} }
// Updates children position. // Updates children position.
int childrenPos = node->getChildrenPos(); int childrenPos = ptNodeParams->getChildrenPos();
if (childrenPos != NOT_A_DICT_POS) { if (childrenPos != NOT_A_DICT_POS) {
DynamicPatriciaTrieWritingHelper::PtNodeArrayPositionRelocationMap::const_iterator it = DynamicPatriciaTrieWritingHelper::PtNodeArrayPositionRelocationMap::const_iterator it =
mDictPositionRelocationMap->mPtNodeArrayPositionRelocationMap.find(childrenPos); mDictPositionRelocationMap->mPtNodeArrayPositionRelocationMap.find(childrenPos);
@ -165,14 +163,14 @@ bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateAllPositionField
childrenPos = it->second; childrenPos = it->second;
} }
} }
writingPos = node->getChildrenPosFieldPos(); writingPos = ptNodeParams->getChildrenPosFieldPos();
if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(mBufferToWrite, if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(mBufferToWrite,
childrenPos, &writingPos)) { childrenPos, &writingPos)) {
return false; return false;
} }
// Updates bigram target PtNode positions in the bigram list. // Updates bigram target PtNode positions in the bigram list.
int bigramsPos = node->getBigramsPos(); int bigramsPos = ptNodeParams->getBigramsPos();
if (bigramsPos != NOT_A_DICT_POS) { if (bigramsPos != NOT_A_DICT_POS) {
int bigramEntryCount; int bigramEntryCount;
if (!mBigramPolicy->updateAllBigramTargetPtNodePositions(&bigramsPos, if (!mBigramPolicy->updateAllBigramTargetPtNodePositions(&bigramsPos,
@ -181,7 +179,7 @@ bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateAllPositionField
} }
mBigramCount += bigramEntryCount; mBigramCount += bigramEntryCount;
} }
if (node->isTerminal()) { if (ptNodeParams->isTerminal()) {
mUnigramCount++; mUnigramCount++;
} }

View file

@ -30,6 +30,7 @@
namespace latinime { namespace latinime {
class DictionaryHeaderStructurePolicy; class DictionaryHeaderStructurePolicy;
class PtNodeParams;
class DynamicPatriciaTrieGcEventListeners { class DynamicPatriciaTrieGcEventListeners {
public: public:
@ -66,8 +67,7 @@ class DynamicPatriciaTrieGcEventListeners {
bool onReadingPtNodeArrayTail() { return true; } bool onReadingPtNodeArrayTail() { return true; }
bool onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node, bool onVisitingPtNode(const PtNodeParams *const ptNodeParams);
const int *const nodeCodePoints);
int getValidUnigramCount() const { int getValidUnigramCount() const {
return mValidUnigramCount; return mValidUnigramCount;
@ -101,8 +101,7 @@ class DynamicPatriciaTrieGcEventListeners {
bool onReadingPtNodeArrayTail() { return true; } bool onReadingPtNodeArrayTail() { return true; }
bool onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node, bool onVisitingPtNode(const PtNodeParams *const ptNodeParams);
const int *const nodeCodePoints);
int getValidBigramEntryCount() const { int getValidBigramEntryCount() const {
return mValidBigramEntryCount; return mValidBigramEntryCount;
@ -133,8 +132,7 @@ class DynamicPatriciaTrieGcEventListeners {
bool onReadingPtNodeArrayTail(); bool onReadingPtNodeArrayTail();
bool onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node, bool onVisitingPtNode(const PtNodeParams *const ptNodeParams);
const int *const nodeCodePoints);
private: private:
DISALLOW_IMPLICIT_CONSTRUCTORS(TraversePolicyToPlaceAndWriteValidPtNodesToBuffer); DISALLOW_IMPLICIT_CONSTRUCTORS(TraversePolicyToPlaceAndWriteValidPtNodesToBuffer);
@ -167,8 +165,7 @@ class DynamicPatriciaTrieGcEventListeners {
bool onReadingPtNodeArrayTail() { return true; } bool onReadingPtNodeArrayTail() { return true; }
bool onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node, bool onVisitingPtNode(const PtNodeParams *const ptNodeParams);
const int *const nodeCodePoints);
int getUnigramCount() const { int getUnigramCount() const {
return mUnigramCount; return mUnigramCount;

View file

@ -18,107 +18,90 @@
#include "suggest/core/policy/dictionary_bigrams_structure_policy.h" #include "suggest/core/policy/dictionary_bigrams_structure_policy.h"
#include "suggest/core/policy/dictionary_shortcuts_structure_policy.h" #include "suggest/core/policy/dictionary_shortcuts_structure_policy.h"
#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h"
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.h" #include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" #include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
namespace latinime { namespace latinime {
void DynamicPatriciaTrieNodeReader::fetchPtNodeInfoFromBufferAndProcessMovedPtNode( const PtNodeParams DynamicPatriciaTrieNodeReader::fetchPtNodeInfoFromBufferAndProcessMovedPtNode(
const int ptNodePos, const int maxCodePointCount, int *const outCodePoints) { const int ptNodePos, const int siblingNodePos, const int bigramLinkedNodePos) const {
if (ptNodePos < 0 || ptNodePos >= mBuffer->getTailPosition()) { if (ptNodePos < 0 || ptNodePos >= mBuffer->getTailPosition()) {
// Reading invalid position because of bug or broken dictionary. // Reading invalid position because of bug or broken dictionary.
AKLOGE("Fetching PtNode info from invalid dictionary position: %d, dictionary size: %d", AKLOGE("Fetching PtNode info from invalid dictionary position: %d, dictionary size: %d",
ptNodePos, mBuffer->getTailPosition()); ptNodePos, mBuffer->getTailPosition());
ASSERT(false); ASSERT(false);
invalidatePtNodeInfo(); return PtNodeParams();
return;
} }
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(ptNodePos); const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(ptNodePos);
const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer); const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
int pos = ptNodePos; int pos = ptNodePos;
mHeadPos = ptNodePos; const int headPos = ptNodePos;
if (usesAdditionalBuffer) { if (usesAdditionalBuffer) {
pos -= mBuffer->getOriginalBufferSize(); pos -= mBuffer->getOriginalBufferSize();
} }
mFlags = PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos); const PatriciaTrieReadingUtils::NodeFlags flags =
PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos);
const int parentPosOffset = const int parentPosOffset =
DynamicPatriciaTrieReadingUtils::getParentPtNodePosOffsetAndAdvancePosition(dictBuf, DynamicPatriciaTrieReadingUtils::getParentPtNodePosOffsetAndAdvancePosition(dictBuf,
&pos); &pos);
mParentPos = DynamicPatriciaTrieReadingUtils::getParentPtNodePos(parentPosOffset, mHeadPos); const int parentPos =
if (outCodePoints != 0) { DynamicPatriciaTrieReadingUtils::getParentPtNodePos(parentPosOffset, headPos);
mCodePointCount = PatriciaTrieReadingUtils::getCharsAndAdvancePosition( int codePoints[MAX_WORD_LENGTH];
dictBuf, mFlags, maxCodePointCount, outCodePoints, &pos); const int codePonitCount = PatriciaTrieReadingUtils::getCharsAndAdvancePosition(
} else { dictBuf, flags, MAX_WORD_LENGTH, codePoints, &pos);
mCodePointCount = PatriciaTrieReadingUtils::skipCharacters( int probability = NOT_A_PROBABILITY;
dictBuf, mFlags, MAX_WORD_LENGTH, &pos); int probabilityFieldPos = NOT_A_DICT_POS;
} if (PatriciaTrieReadingUtils::isTerminal(flags)) {
if (isTerminal()) { probabilityFieldPos = pos;
mProbabilityFieldPos = pos;
if (usesAdditionalBuffer) { if (usesAdditionalBuffer) {
mProbabilityFieldPos += mBuffer->getOriginalBufferSize(); probabilityFieldPos += mBuffer->getOriginalBufferSize();
} }
mProbability = PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(dictBuf, &pos); probability = PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(dictBuf, &pos);
} else {
mProbabilityFieldPos = NOT_A_DICT_POS;
mProbability = NOT_A_PROBABILITY;
} }
mChildrenPosFieldPos = pos; int childrenPosFieldPos = pos;
if (usesAdditionalBuffer) { if (usesAdditionalBuffer) {
mChildrenPosFieldPos += mBuffer->getOriginalBufferSize(); childrenPosFieldPos += mBuffer->getOriginalBufferSize();
} }
mChildrenPos = DynamicPatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition( int childrenPos = DynamicPatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(
dictBuf, &pos); dictBuf, &pos);
if (usesAdditionalBuffer && mChildrenPos != NOT_A_DICT_POS) { if (usesAdditionalBuffer && childrenPos != NOT_A_DICT_POS) {
mChildrenPos += mBuffer->getOriginalBufferSize(); childrenPos += mBuffer->getOriginalBufferSize();
} }
if (mSiblingPos == NOT_A_DICT_POS) { int newBigramLinkedNodePos = bigramLinkedNodePos;
if (DynamicPatriciaTrieReadingUtils::isMoved(mFlags)) { if (siblingNodePos == NOT_A_DICT_POS) {
mBigramLinkedNodePos = mChildrenPos; if (DynamicPatriciaTrieReadingUtils::isMoved(flags)) {
} else { newBigramLinkedNodePos = childrenPos;
mBigramLinkedNodePos = NOT_A_DICT_POS;
} }
} }
if (usesAdditionalBuffer) { if (usesAdditionalBuffer) {
pos += mBuffer->getOriginalBufferSize(); pos += mBuffer->getOriginalBufferSize();
} }
if (PatriciaTrieReadingUtils::hasShortcutTargets(mFlags)) { int shortcutsPos = NOT_A_DICT_POS;
mShortcutPos = pos; if (PatriciaTrieReadingUtils::hasShortcutTargets(flags)) {
shortcutsPos = pos;
mShortcutsPolicy->skipAllShortcuts(&pos); mShortcutsPolicy->skipAllShortcuts(&pos);
} else {
mShortcutPos = NOT_A_DICT_POS;
} }
if (PatriciaTrieReadingUtils::hasBigrams(mFlags)) { int bigramsPos = NOT_A_DICT_POS;
mBigramPos = pos; if (PatriciaTrieReadingUtils::hasBigrams(flags)) {
bigramsPos = pos;
mBigramsPolicy->skipAllBigrams(&pos); mBigramsPolicy->skipAllBigrams(&pos);
} else {
mBigramPos = NOT_A_DICT_POS;
} }
// Update siblingPos if needed. int newSiblingNodePos = siblingNodePos;
if (mSiblingPos == NOT_A_DICT_POS) { if (siblingNodePos == NOT_A_DICT_POS) {
// Sibling position is the tail position of current node. // Sibling position is the tail position of current node.
mSiblingPos = pos; newSiblingNodePos = pos;
} }
// Read destination node if the read node is a moved node. // Read destination node if the read node is a moved node.
if (DynamicPatriciaTrieReadingUtils::isMoved(mFlags)) { if (DynamicPatriciaTrieReadingUtils::isMoved(flags)) {
// The destination position is stored at the same place as the parent position. // The destination position is stored at the same place as the parent position.
fetchPtNodeInfoFromBufferAndProcessMovedPtNode(mParentPos, maxCodePointCount, return fetchPtNodeInfoFromBufferAndProcessMovedPtNode(parentPos, newSiblingNodePos,
outCodePoints); newBigramLinkedNodePos);
} else {
return PtNodeParams(headPos, flags, parentPos, codePonitCount, codePoints,
probabilityFieldPos, probability, childrenPosFieldPos, childrenPos,
newBigramLinkedNodePos, shortcutsPos, bigramsPos, newSiblingNodePos);
} }
} }
void DynamicPatriciaTrieNodeReader::invalidatePtNodeInfo() {
mHeadPos = NOT_A_DICT_POS;
mFlags = 0;
mParentPos = NOT_A_DICT_POS;
mCodePointCount = 0;
mProbabilityFieldPos = NOT_A_DICT_POS;
mProbability = NOT_A_PROBABILITY;
mChildrenPosFieldPos = NOT_A_DICT_POS;
mChildrenPos = NOT_A_DICT_POS;
mBigramLinkedNodePos = NOT_A_DICT_POS;
mShortcutPos = NOT_A_DICT_POS;
mBigramPos = NOT_A_DICT_POS;
mSiblingPos = NOT_A_DICT_POS;
}
} }

View file

@ -20,8 +20,8 @@
#include <stdint.h> #include <stdint.h>
#include "defines.h" #include "defines.h"
#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h" #include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h"
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.h" #include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_reader.h"
namespace latinime { namespace latinime {
@ -33,106 +33,19 @@ class DictionaryShortcutsStructurePolicy;
* This class is used for helping to read nodes of dynamic patricia trie. This class handles moved * This class is used for helping to read nodes of dynamic patricia trie. This class handles moved
* node and reads node attributes. * node and reads node attributes.
*/ */
class DynamicPatriciaTrieNodeReader { class DynamicPatriciaTrieNodeReader : public PtNodeReader {
public: public:
DynamicPatriciaTrieNodeReader(const BufferWithExtendableBuffer *const buffer, DynamicPatriciaTrieNodeReader(const BufferWithExtendableBuffer *const buffer,
const DictionaryBigramsStructurePolicy *const bigramsPolicy, const DictionaryBigramsStructurePolicy *const bigramsPolicy,
const DictionaryShortcutsStructurePolicy *const shortcutsPolicy) const DictionaryShortcutsStructurePolicy *const shortcutsPolicy)
: mBuffer(buffer), mBigramsPolicy(bigramsPolicy), : mBuffer(buffer), mBigramsPolicy(bigramsPolicy),
mShortcutsPolicy(shortcutsPolicy), mHeadPos(NOT_A_DICT_POS), mFlags(0), mShortcutsPolicy(shortcutsPolicy) {}
mParentPos(NOT_A_DICT_POS), mCodePointCount(0), mProbabilityFieldPos(NOT_A_DICT_POS),
mProbability(NOT_A_PROBABILITY), mChildrenPosFieldPos(NOT_A_DICT_POS),
mChildrenPos(NOT_A_DICT_POS), mBigramLinkedNodePos(NOT_A_DICT_POS),
mShortcutPos(NOT_A_DICT_POS), mBigramPos(NOT_A_DICT_POS),
mSiblingPos(NOT_A_DICT_POS) {}
~DynamicPatriciaTrieNodeReader() {} ~DynamicPatriciaTrieNodeReader() {}
// Reads PtNode information from dictionary buffer and updates members with the information. virtual const PtNodeParams fetchNodeInfoInBufferFromPtNodePos(const int ptNodePos) const {
AK_FORCE_INLINE void fetchNodeInfoInBufferFromPtNodePos(const int ptNodePos) { return fetchPtNodeInfoFromBufferAndProcessMovedPtNode(ptNodePos,
fetchNodeInfoInBufferFromPtNodePosAndGetNodeCodePoints(ptNodePos , NOT_A_DICT_POS /* siblingNodePos */, NOT_A_DICT_POS /* bigramLinkedNodePos */);
0 /* maxCodePointCount */, 0 /* outCodePoints */);
}
AK_FORCE_INLINE void fetchNodeInfoInBufferFromPtNodePosAndGetNodeCodePoints(
const int ptNodePos, const int maxCodePointCount, int *const outCodePoints) {
mSiblingPos = NOT_A_DICT_POS;
mBigramLinkedNodePos = NOT_A_DICT_POS;
fetchPtNodeInfoFromBufferAndProcessMovedPtNode(ptNodePos, maxCodePointCount, outCodePoints);
}
// HeadPos is different from NodePos when the current PtNode is a moved PtNode.
AK_FORCE_INLINE int getHeadPos() const {
return mHeadPos;
}
// Flags
AK_FORCE_INLINE bool isDeleted() const {
return DynamicPatriciaTrieReadingUtils::isDeleted(mFlags);
}
AK_FORCE_INLINE bool hasChildren() const {
return mChildrenPos != NOT_A_DICT_POS;
}
AK_FORCE_INLINE bool isTerminal() const {
return PatriciaTrieReadingUtils::isTerminal(mFlags);
}
AK_FORCE_INLINE bool isBlacklisted() const {
return PatriciaTrieReadingUtils::isBlacklisted(mFlags);
}
AK_FORCE_INLINE bool isNotAWord() const {
return PatriciaTrieReadingUtils::isNotAWord(mFlags);
}
// Parent node position
AK_FORCE_INLINE int getParentPos() const {
return mParentPos;
}
// Number of code points
AK_FORCE_INLINE uint8_t getCodePointCount() const {
return mCodePointCount;
}
// Probability
AK_FORCE_INLINE int getProbabilityFieldPos() const {
return mProbabilityFieldPos;
}
AK_FORCE_INLINE int getProbability() const {
return mProbability;
}
// Children PtNode array position
AK_FORCE_INLINE int getChildrenPosFieldPos() const {
return mChildrenPosFieldPos;
}
AK_FORCE_INLINE int getChildrenPos() const {
return mChildrenPos;
}
// Bigram linked node position.
AK_FORCE_INLINE int getBigramLinkedNodePos() const {
return mBigramLinkedNodePos;
}
// Shortcutlist position
AK_FORCE_INLINE int getShortcutPos() const {
return mShortcutPos;
}
// Bigrams position
AK_FORCE_INLINE int getBigramsPos() const {
return mBigramPos;
}
// Sibling node position
AK_FORCE_INLINE int getSiblingNodePos() const {
return mSiblingPos;
} }
private: private:
@ -141,23 +54,9 @@ class DynamicPatriciaTrieNodeReader {
const BufferWithExtendableBuffer *const mBuffer; const BufferWithExtendableBuffer *const mBuffer;
const DictionaryBigramsStructurePolicy *const mBigramsPolicy; const DictionaryBigramsStructurePolicy *const mBigramsPolicy;
const DictionaryShortcutsStructurePolicy *const mShortcutsPolicy; const DictionaryShortcutsStructurePolicy *const mShortcutsPolicy;
int mHeadPos;
DynamicPatriciaTrieReadingUtils::NodeFlags mFlags;
int mParentPos;
uint8_t mCodePointCount;
int mProbabilityFieldPos;
int mProbability;
int mChildrenPosFieldPos;
int mChildrenPos;
int mBigramLinkedNodePos;
int mShortcutPos;
int mBigramPos;
int mSiblingPos;
void fetchPtNodeInfoFromBufferAndProcessMovedPtNode(const int ptNodePos, const PtNodeParams fetchPtNodeInfoFromBufferAndProcessMovedPtNode(const int ptNodePos,
const int maxCodePointCount, int *const outCodePoints); const int siblingNodePos, const int bigramLinkedNodePos) const;
void invalidatePtNodeInfo();
}; };
} // namespace latinime } // namespace latinime
#endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_NODE_READER_H */ #endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_NODE_READER_H */

View file

@ -50,24 +50,27 @@ void DynamicPatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *cons
if (!dicNode->hasChildren()) { if (!dicNode->hasChildren()) {
return; return;
} }
DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer, DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer, &mNodeReader);
getBigramsStructurePolicy(), getShortcutsStructurePolicy());
readingHelper.initWithPtNodeArrayPos(dicNode->getChildrenPtNodeArrayPos()); readingHelper.initWithPtNodeArrayPos(dicNode->getChildrenPtNodeArrayPos());
const DynamicPatriciaTrieNodeReader *const nodeReader = readingHelper.getNodeReader();
while (!readingHelper.isEnd()) { while (!readingHelper.isEnd()) {
bool isTerminal = nodeReader->isTerminal() && !nodeReader->isDeleted(); const PtNodeParams ptNodeParams(readingHelper.getPtNodeParams());
if (!ptNodeParams.isValid()) {
break;
}
bool isTerminal = ptNodeParams.isTerminal() && !ptNodeParams.isDeleted();
if (isTerminal && mHeaderPolicy.isDecayingDict()) { if (isTerminal && mHeaderPolicy.isDecayingDict()) {
// A DecayingDict may have a terminal PtNode that has a terminal DicNode whose // A DecayingDict may have a terminal PtNode that has a terminal DicNode whose
// probability is NOT_A_PROBABILITY. In such case, we don't want to treat it as a // probability is NOT_A_PROBABILITY. In such case, we don't want to treat it as a
// valid terminal DicNode. // valid terminal DicNode.
isTerminal = getProbability(nodeReader->getProbability(), NOT_A_PROBABILITY) isTerminal = getProbability(ptNodeParams.getProbability(), NOT_A_PROBABILITY)
!= NOT_A_PROBABILITY; != NOT_A_PROBABILITY;
} }
childDicNodes->pushLeavingChild(dicNode, nodeReader->getHeadPos(), childDicNodes->pushLeavingChild(dicNode, ptNodeParams.getHeadPos(),
nodeReader->getChildrenPos(), nodeReader->getProbability(), isTerminal, ptNodeParams.getChildrenPos(), ptNodeParams.getProbability(), isTerminal,
nodeReader->hasChildren(), nodeReader->isBlacklisted() || nodeReader->isNotAWord(), ptNodeParams.hasChildren(),
nodeReader->getCodePointCount(), readingHelper.getMergedNodeCodePoints()); ptNodeParams.isBlacklisted() || ptNodeParams.isNotAWord(),
readingHelper.readNextSiblingNode(); ptNodeParams.getCodePointCount(), ptNodeParams.getCodePoints());
readingHelper.readNextSiblingNode(ptNodeParams);
} }
} }
@ -77,29 +80,33 @@ int DynamicPatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCoun
// This method traverses parent nodes from the terminal by following parent pointers; thus, // This method traverses parent nodes from the terminal by following parent pointers; thus,
// node code points are stored in the buffer in the reverse order. // node code points are stored in the buffer in the reverse order.
int reverseCodePoints[maxCodePointCount]; int reverseCodePoints[maxCodePointCount];
DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer, DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer, &mNodeReader);
getBigramsStructurePolicy(), getShortcutsStructurePolicy());
// First, read the terminal node and get its probability. // First, read the terminal node and get its probability.
readingHelper.initWithPtNodePos(ptNodePos); readingHelper.initWithPtNodePos(ptNodePos);
if (!readingHelper.isValidTerminalNode()) {
const PtNodeParams terminalPtNodeParams(readingHelper.getPtNodeParams());
if (!readingHelper.isValidTerminalNode(terminalPtNodeParams)) {
// Node at the ptNodePos is not a valid terminal node. // Node at the ptNodePos is not a valid terminal node.
*outUnigramProbability = NOT_A_PROBABILITY; *outUnigramProbability = NOT_A_PROBABILITY;
return 0; return 0;
} }
// Store terminal node probability. // Store terminal node probability.
*outUnigramProbability = readingHelper.getNodeReader()->getProbability(); *outUnigramProbability = terminalPtNodeParams.getProbability();
// Then, following parent node link to the dictionary root and fetch node code points. // Then, following parent node link to the dictionary root and fetch node code points.
int totalCodePointCount = 0;
while (!readingHelper.isEnd()) { while (!readingHelper.isEnd()) {
if (readingHelper.getTotalCodePointCount() > maxCodePointCount) { const PtNodeParams ptNodeParams(readingHelper.getPtNodeParams());
totalCodePointCount = readingHelper.getTotalCodePointCount(ptNodeParams);
if (!ptNodeParams.isValid() || totalCodePointCount > maxCodePointCount) {
// The ptNodePos is not a valid terminal node position in the dictionary. // The ptNodePos is not a valid terminal node position in the dictionary.
*outUnigramProbability = NOT_A_PROBABILITY; *outUnigramProbability = NOT_A_PROBABILITY;
return 0; return 0;
} }
// Store node code points to buffer in the reverse order. // Store node code points to buffer in the reverse order.
readingHelper.fetchMergedNodeCodePointsInReverseOrder( readingHelper.fetchMergedNodeCodePointsInReverseOrder(ptNodeParams,
readingHelper.getPrevTotalCodePointCount(), reverseCodePoints); readingHelper.getPrevTotalCodePointCount(), reverseCodePoints);
// Follow parent node toward the root node. // Follow parent node toward the root node.
readingHelper.readParentNode(); readingHelper.readParentNode(ptNodeParams);
} }
if (readingHelper.isError()) { if (readingHelper.isError()) {
// The node position or the dictionary is invalid. // The node position or the dictionary is invalid.
@ -107,11 +114,10 @@ int DynamicPatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCoun
return 0; return 0;
} }
// Reverse the stored code points to output them. // Reverse the stored code points to output them.
const int codePointCount = readingHelper.getTotalCodePointCount(); for (int i = 0; i < totalCodePointCount; ++i) {
for (int i = 0; i < codePointCount; ++i) { outCodePoints[i] = reverseCodePoints[totalCodePointCount - i - 1];
outCodePoints[i] = reverseCodePoints[codePointCount - i - 1];
} }
return codePointCount; return totalCodePointCount;
} }
int DynamicPatriciaTriePolicy::getTerminalPtNodePositionOfWord(const int *const inWord, int DynamicPatriciaTriePolicy::getTerminalPtNodePositionOfWord(const int *const inWord,
@ -120,39 +126,42 @@ int DynamicPatriciaTriePolicy::getTerminalPtNodePositionOfWord(const int *const
for (int i = 0; i < length; ++i) { for (int i = 0; i < length; ++i) {
searchCodePoints[i] = forceLowerCaseSearch ? CharUtils::toLowerCase(inWord[i]) : inWord[i]; searchCodePoints[i] = forceLowerCaseSearch ? CharUtils::toLowerCase(inWord[i]) : inWord[i];
} }
DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer,
getBigramsStructurePolicy(), getShortcutsStructurePolicy()); DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer, &mNodeReader);
readingHelper.initWithPtNodeArrayPos(getRootPosition()); readingHelper.initWithPtNodeArrayPos(getRootPosition());
const DynamicPatriciaTrieNodeReader *const nodeReader = readingHelper.getNodeReader();
while (!readingHelper.isEnd()) { while (!readingHelper.isEnd()) {
const PtNodeParams ptNodeParams(readingHelper.getPtNodeParams());
if (!ptNodeParams.isValid()) {
break;
}
const int matchedCodePointCount = readingHelper.getPrevTotalCodePointCount(); const int matchedCodePointCount = readingHelper.getPrevTotalCodePointCount();
if (readingHelper.getTotalCodePointCount() > length if (readingHelper.getTotalCodePointCount(ptNodeParams) > length
|| !readingHelper.isMatchedCodePoint(0 /* index */, || !readingHelper.isMatchedCodePoint(ptNodeParams, 0 /* index */,
searchCodePoints[matchedCodePointCount])) { searchCodePoints[matchedCodePointCount])) {
// Current node has too many code points or its first code point is different from // Current node has too many code points or its first code point is different from
// target code point. Skip this node and read the next sibling node. // target code point. Skip this node and read the next sibling node.
readingHelper.readNextSiblingNode(); readingHelper.readNextSiblingNode(ptNodeParams);
continue; continue;
} }
// Check following merged node code points. // Check following merged node code points.
const int nodeCodePointCount = nodeReader->getCodePointCount(); const int nodeCodePointCount = ptNodeParams.getCodePointCount();
for (int j = 1; j < nodeCodePointCount; ++j) { for (int j = 1; j < nodeCodePointCount; ++j) {
if (!readingHelper.isMatchedCodePoint( if (!readingHelper.isMatchedCodePoint(ptNodeParams,
j, searchCodePoints[matchedCodePointCount + j])) { j, searchCodePoints[matchedCodePointCount + j])) {
// Different code point is found. The given word is not included in the dictionary. // Different code point is found. The given word is not included in the dictionary.
return NOT_A_DICT_POS; return NOT_A_DICT_POS;
} }
} }
// All characters are matched. // All characters are matched.
if (length == readingHelper.getTotalCodePointCount()) { if (length == readingHelper.getTotalCodePointCount(ptNodeParams)) {
// Terminal position is found. // Terminal position is found.
return nodeReader->getHeadPos(); return ptNodeParams.getHeadPos();
} }
if (!nodeReader->hasChildren()) { if (!ptNodeParams.hasChildren()) {
return NOT_A_DICT_POS; return NOT_A_DICT_POS;
} }
// Advance to the children nodes. // Advance to the children nodes.
readingHelper.readChildNode(); readingHelper.readChildNode(ptNodeParams);
} }
// If we already traversed the tree further than the word is long, there means // If we already traversed the tree further than the word is long, there means
// there was no match (or we would have found it). // there was no match (or we would have found it).
@ -179,39 +188,33 @@ int DynamicPatriciaTriePolicy::getUnigramProbabilityOfPtNode(const int ptNodePos
if (ptNodePos == NOT_A_DICT_POS) { if (ptNodePos == NOT_A_DICT_POS) {
return NOT_A_PROBABILITY; return NOT_A_PROBABILITY;
} }
DynamicPatriciaTrieNodeReader nodeReader(&mBufferWithExtendableBuffer, const PtNodeParams ptNodeParams(mNodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos));
getBigramsStructurePolicy(), getShortcutsStructurePolicy()); if (ptNodeParams.isDeleted() || ptNodeParams.isBlacklisted() || ptNodeParams.isNotAWord()) {
nodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos);
if (nodeReader.isDeleted() || nodeReader.isBlacklisted() || nodeReader.isNotAWord()) {
return NOT_A_PROBABILITY; return NOT_A_PROBABILITY;
} }
return getProbability(nodeReader.getProbability(), NOT_A_PROBABILITY); return getProbability(ptNodeParams.getProbability(), NOT_A_PROBABILITY);
} }
int DynamicPatriciaTriePolicy::getShortcutPositionOfPtNode(const int ptNodePos) const { int DynamicPatriciaTriePolicy::getShortcutPositionOfPtNode(const int ptNodePos) const {
if (ptNodePos == NOT_A_DICT_POS) { if (ptNodePos == NOT_A_DICT_POS) {
return NOT_A_DICT_POS; return NOT_A_DICT_POS;
} }
DynamicPatriciaTrieNodeReader nodeReader(&mBufferWithExtendableBuffer, const PtNodeParams ptNodeParams(mNodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos));
getBigramsStructurePolicy(), getShortcutsStructurePolicy()); if (ptNodeParams.isDeleted()) {
nodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos);
if (nodeReader.isDeleted()) {
return NOT_A_DICT_POS; return NOT_A_DICT_POS;
} }
return nodeReader.getShortcutPos(); return ptNodeParams.getShortcutPos();
} }
int DynamicPatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) const { int DynamicPatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) const {
if (ptNodePos == NOT_A_DICT_POS) { if (ptNodePos == NOT_A_DICT_POS) {
return NOT_A_DICT_POS; return NOT_A_DICT_POS;
} }
DynamicPatriciaTrieNodeReader nodeReader(&mBufferWithExtendableBuffer, const PtNodeParams ptNodeParams(mNodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos));
getBigramsStructurePolicy(), getShortcutsStructurePolicy()); if (ptNodeParams.isDeleted()) {
nodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos);
if (nodeReader.isDeleted()) {
return NOT_A_DICT_POS; return NOT_A_DICT_POS;
} }
return nodeReader.getBigramsPos(); return ptNodeParams.getBigramsPos();
} }
bool DynamicPatriciaTriePolicy::addUnigramWord(const int *const word, const int length, bool DynamicPatriciaTriePolicy::addUnigramWord(const int *const word, const int length,
@ -225,8 +228,7 @@ bool DynamicPatriciaTriePolicy::addUnigramWord(const int *const word, const int
AKLOGE("The dictionary is too large to dynamically update."); AKLOGE("The dictionary is too large to dynamically update.");
return false; return false;
} }
DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer, DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer, &mNodeReader);
getBigramsStructurePolicy(), getShortcutsStructurePolicy());
readingHelper.initWithPtNodeArrayPos(getRootPosition()); readingHelper.initWithPtNodeArrayPos(getRootPosition());
DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer, DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer,
&mBigramListPolicy, &mShortcutListPolicy, mHeaderPolicy.isDecayingDict()); &mBigramListPolicy, &mShortcutListPolicy, mHeaderPolicy.isDecayingDict());

View file

@ -22,6 +22,7 @@
#include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h" #include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h"
#include "suggest/policyimpl/dictionary/header/header_policy.h" #include "suggest/policyimpl/dictionary/header/header_policy.h"
#include "suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h" #include "suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h"
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" #include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
#include "suggest/policyimpl/dictionary/utils/format_utils.h" #include "suggest/policyimpl/dictionary/utils/format_utils.h"
#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h" #include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
@ -44,6 +45,7 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
mShortcutListPolicy(&mBufferWithExtendableBuffer), mShortcutListPolicy(&mBufferWithExtendableBuffer),
mBigramListPolicy(&mHeaderPolicy, &mBufferWithExtendableBuffer, &mShortcutListPolicy, mBigramListPolicy(&mHeaderPolicy, &mBufferWithExtendableBuffer, &mShortcutListPolicy,
mHeaderPolicy.isDecayingDict()), mHeaderPolicy.isDecayingDict()),
mNodeReader(&mBufferWithExtendableBuffer, &mBigramListPolicy, &mShortcutListPolicy),
mUnigramCount(mHeaderPolicy.getUnigramCount()), mUnigramCount(mHeaderPolicy.getUnigramCount()),
mBigramCount(mHeaderPolicy.getBigramCount()), mNeedsToDecayForTesting(false) {} mBigramCount(mHeaderPolicy.getBigramCount()), mNeedsToDecayForTesting(false) {}
@ -114,6 +116,7 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
BufferWithExtendableBuffer mBufferWithExtendableBuffer; BufferWithExtendableBuffer mBufferWithExtendableBuffer;
DynamicShortcutListPolicy mShortcutListPolicy; DynamicShortcutListPolicy mShortcutListPolicy;
DynamicBigramListPolicy mBigramListPolicy; DynamicBigramListPolicy mBigramListPolicy;
DynamicPatriciaTrieNodeReader mNodeReader;
int mUnigramCount; int mUnigramCount;
int mBigramCount; int mBigramCount;
int mNeedsToDecayForTesting; int mNeedsToDecayForTesting;

View file

@ -17,6 +17,8 @@
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.h" #include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" #include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h"
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.h"
namespace latinime { namespace latinime {
@ -37,22 +39,26 @@ bool DynamicPatriciaTrieReadingHelper::traverseAllPtNodesInPostorderDepthFirstMa
return false; return false;
} }
while (!isEnd()) { while (!isEnd()) {
const PtNodeParams ptNodeParams(getPtNodeParams());
if (!ptNodeParams.isValid()) {
break;
}
if (!alreadyVisitedChildren) { if (!alreadyVisitedChildren) {
if (mNodeReader.hasChildren()) { if (ptNodeParams.hasChildren()) {
// Move to the first child. // Move to the first child.
if (!listener->onDescend(mNodeReader.getChildrenPos())) { if (!listener->onDescend(ptNodeParams.getChildrenPos())) {
return false; return false;
} }
pushReadingStateToStack(); pushReadingStateToStack();
readChildNode(); readChildNode(ptNodeParams);
} else { } else {
alreadyVisitedChildren = true; alreadyVisitedChildren = true;
} }
} else { } else {
if (!listener->onVisitingPtNode(&mNodeReader, mMergedNodeCodePoints)) { if (!listener->onVisitingPtNode(&ptNodeParams)) {
return false; return false;
} }
readNextSiblingNode(); readNextSiblingNode(ptNodeParams);
if (isEnd()) { if (isEnd()) {
// All PtNodes in current linked PtNode arrays have been visited. // All PtNodes in current linked PtNode arrays have been visited.
// Return to the parent. // Return to the parent.
@ -101,10 +107,14 @@ bool DynamicPatriciaTrieReadingHelper::traverseAllPtNodesInPtNodeArrayLevelPreor
} }
pushReadingStateToStack(); pushReadingStateToStack();
while (!isEnd()) { while (!isEnd()) {
const PtNodeParams ptNodeParams(getPtNodeParams());
if (!ptNodeParams.isValid()) {
break;
}
if (alreadyVisitedAllPtNodesInArray) { if (alreadyVisitedAllPtNodesInArray) {
if (alreadyVisitedChildren) { if (alreadyVisitedChildren) {
// Move to next sibling PtNode's children. // Move to next sibling PtNode's children.
readNextSiblingNode(); readNextSiblingNode(ptNodeParams);
if (isEnd()) { if (isEnd()) {
// Return to the parent PTNode. // Return to the parent PTNode.
if (!listener->onAscend()) { if (!listener->onAscend()) {
@ -120,13 +130,13 @@ bool DynamicPatriciaTrieReadingHelper::traverseAllPtNodesInPtNodeArrayLevelPreor
alreadyVisitedChildren = false; alreadyVisitedChildren = false;
} }
} else { } else {
if (mNodeReader.hasChildren()) { if (ptNodeParams.hasChildren()) {
// Move to the first child. // Move to the first child.
if (!listener->onDescend(mNodeReader.getChildrenPos())) { if (!listener->onDescend(ptNodeParams.getChildrenPos())) {
return false; return false;
} }
pushReadingStateToStack(); pushReadingStateToStack();
readChildNode(); readChildNode(ptNodeParams);
// Push state to return the head of PtNode array. // Push state to return the head of PtNode array.
pushReadingStateToStack(); pushReadingStateToStack();
alreadyVisitedAllPtNodesInArray = false; alreadyVisitedAllPtNodesInArray = false;
@ -136,10 +146,10 @@ bool DynamicPatriciaTrieReadingHelper::traverseAllPtNodesInPtNodeArrayLevelPreor
} }
} }
} else { } else {
if (!listener->onVisitingPtNode(&mNodeReader, mMergedNodeCodePoints)) { if (!listener->onVisitingPtNode(&ptNodeParams)) {
return false; return false;
} }
readNextSiblingNode(); readNextSiblingNode(ptNodeParams);
if (isEnd()) { if (isEnd()) {
if (!listener->onReadingPtNodeArrayTail()) { if (!listener->onReadingPtNodeArrayTail()) {
return false; return false;

View file

@ -21,9 +21,8 @@
#include <vector> #include <vector>
#include "defines.h" #include "defines.h"
#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h" #include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h"
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.h" #include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_reader.h"
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.h"
namespace latinime { namespace latinime {
@ -35,6 +34,7 @@ class DictionaryShortcutsStructurePolicy;
* This class is used for traversing dynamic patricia trie. This class supports iterating nodes and * This class is used for traversing dynamic patricia trie. This class supports iterating nodes and
* dealing with additional buffer. This class counts nodes and node arrays to avoid infinite loop. * dealing with additional buffer. This class counts nodes and node arrays to avoid infinite loop.
*/ */
// TODO: Move to pt_common.
class DynamicPatriciaTrieReadingHelper { class DynamicPatriciaTrieReadingHelper {
public: public:
class TraversingEventListener { class TraversingEventListener {
@ -51,8 +51,7 @@ class DynamicPatriciaTrieReadingHelper {
virtual bool onReadingPtNodeArrayTail() = 0; virtual bool onReadingPtNodeArrayTail() = 0;
// Returns whether the event handling was succeeded or not. // Returns whether the event handling was succeeded or not.
virtual bool onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node, virtual bool onVisitingPtNode(const PtNodeParams *const node) = 0;
const int *const nodeCodePoints) = 0;
protected: protected:
TraversingEventListener() {}; TraversingEventListener() {};
@ -62,10 +61,9 @@ class DynamicPatriciaTrieReadingHelper {
}; };
DynamicPatriciaTrieReadingHelper(const BufferWithExtendableBuffer *const buffer, DynamicPatriciaTrieReadingHelper(const BufferWithExtendableBuffer *const buffer,
const DictionaryBigramsStructurePolicy *const bigramsPolicy, const PtNodeReader *const ptNodeReader)
const DictionaryShortcutsStructurePolicy *const shortcutsPolicy)
: mIsError(false), mReadingState(), mBuffer(buffer), : mIsError(false), mReadingState(), mBuffer(buffer),
mNodeReader(mBuffer, bigramsPolicy, shortcutsPolicy), mReadingStateStack() {} mPtNodeReader(ptNodeReader), mReadingStateStack() {}
~DynamicPatriciaTrieReadingHelper() {} ~DynamicPatriciaTrieReadingHelper() {}
@ -90,9 +88,6 @@ class DynamicPatriciaTrieReadingHelper {
mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS; mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS;
mReadingStateStack.clear(); mReadingStateStack.clear();
nextPtNodeArray(); nextPtNodeArray();
if (!isEnd()) {
fetchPtNodeInfo();
}
} }
} }
@ -110,20 +105,23 @@ class DynamicPatriciaTrieReadingHelper {
mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS; mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS;
mReadingState.mPosOfThisPtNodeArrayHead = NOT_A_DICT_POS; mReadingState.mPosOfThisPtNodeArrayHead = NOT_A_DICT_POS;
mReadingStateStack.clear(); mReadingStateStack.clear();
fetchPtNodeInfo();
} }
} }
AK_FORCE_INLINE const DynamicPatriciaTrieNodeReader* getNodeReader() const { AK_FORCE_INLINE const PtNodeParams getPtNodeParams() const {
return &mNodeReader; if (isEnd()) {
return PtNodeParams();
}
return mPtNodeReader->fetchNodeInfoInBufferFromPtNodePos(mReadingState.mPos);
} }
AK_FORCE_INLINE bool isValidTerminalNode() const { AK_FORCE_INLINE bool isValidTerminalNode(const PtNodeParams &ptNodeParams) const {
return !isEnd() && !mNodeReader.isDeleted() && mNodeReader.isTerminal(); return !isEnd() && !ptNodeParams.isDeleted() && ptNodeParams.isTerminal();
} }
AK_FORCE_INLINE bool isMatchedCodePoint(const int index, const int codePoint) const { AK_FORCE_INLINE bool isMatchedCodePoint(const PtNodeParams &ptNodeParams, const int index,
return mMergedNodeCodePoints[index] == codePoint; const int codePoint) const {
return ptNodeParams.getCodePoints()[index] == codePoint;
} }
// Return code point count exclude the last read node's code points. // Return code point count exclude the last read node's code points.
@ -132,68 +130,56 @@ class DynamicPatriciaTrieReadingHelper {
} }
// Return code point count include the last read node's code points. // Return code point count include the last read node's code points.
AK_FORCE_INLINE int getTotalCodePointCount() const { AK_FORCE_INLINE int getTotalCodePointCount(const PtNodeParams &ptNodeParams) const {
return mReadingState.mTotalCodePointCountSinceInitialization return mReadingState.mTotalCodePointCountSinceInitialization
+ mNodeReader.getCodePointCount(); + ptNodeParams.getCodePointCount();
} }
AK_FORCE_INLINE void fetchMergedNodeCodePointsInReverseOrder( AK_FORCE_INLINE void fetchMergedNodeCodePointsInReverseOrder(const PtNodeParams &ptNodeParams,
const int index, int *const outCodePoints) const { const int index, int *const outCodePoints) const {
const int nodeCodePointCount = mNodeReader.getCodePointCount(); const int nodeCodePointCount = ptNodeParams.getCodePointCount();
const int *const nodeCodePoints = ptNodeParams.getCodePoints();
for (int i = 0; i < nodeCodePointCount; ++i) { for (int i = 0; i < nodeCodePointCount; ++i) {
outCodePoints[index + i] = mMergedNodeCodePoints[nodeCodePointCount - 1 - i]; outCodePoints[index + i] = nodeCodePoints[nodeCodePointCount - 1 - i];
} }
} }
AK_FORCE_INLINE const int *getMergedNodeCodePoints() const { AK_FORCE_INLINE void readNextSiblingNode(const PtNodeParams &ptNodeParams) {
return mMergedNodeCodePoints;
}
AK_FORCE_INLINE void readNextSiblingNode() {
mReadingState.mRemainingPtNodeCountInThisArray -= 1; mReadingState.mRemainingPtNodeCountInThisArray -= 1;
mReadingState.mPos = mNodeReader.getSiblingNodePos(); mReadingState.mPos = ptNodeParams.getSiblingNodePos();
if (mReadingState.mRemainingPtNodeCountInThisArray <= 0) { if (mReadingState.mRemainingPtNodeCountInThisArray <= 0) {
// All nodes in the current node array have been read. // All nodes in the current node array have been read.
followForwardLink(); followForwardLink();
if (!isEnd()) {
fetchPtNodeInfo();
}
} else {
fetchPtNodeInfo();
} }
} }
// Read the first child node of the current node. // Read the first child node of the current node.
AK_FORCE_INLINE void readChildNode() { AK_FORCE_INLINE void readChildNode(const PtNodeParams &ptNodeParams) {
if (mNodeReader.hasChildren()) { if (ptNodeParams.hasChildren()) {
mReadingState.mTotalCodePointCountSinceInitialization += mReadingState.mTotalCodePointCountSinceInitialization +=
mNodeReader.getCodePointCount(); ptNodeParams.getCodePointCount();
mReadingState.mTotalPtNodeIndexInThisArrayChain = 0; mReadingState.mTotalPtNodeIndexInThisArrayChain = 0;
mReadingState.mPtNodeArrayIndexInThisArrayChain = 0; mReadingState.mPtNodeArrayIndexInThisArrayChain = 0;
mReadingState.mPos = mNodeReader.getChildrenPos(); mReadingState.mPos = ptNodeParams.getChildrenPos();
mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS; mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS;
// Read children node array. // Read children node array.
nextPtNodeArray(); nextPtNodeArray();
if (!isEnd()) {
fetchPtNodeInfo();
}
} else { } else {
mReadingState.mPos = NOT_A_DICT_POS; mReadingState.mPos = NOT_A_DICT_POS;
} }
} }
// Read the parent node of the current node. // Read the parent node of the current node.
AK_FORCE_INLINE void readParentNode() { AK_FORCE_INLINE void readParentNode(const PtNodeParams &ptNodeParams) {
if (mNodeReader.getParentPos() != NOT_A_DICT_POS) { if (ptNodeParams.getParentPos() != NOT_A_DICT_POS) {
mReadingState.mTotalCodePointCountSinceInitialization += mReadingState.mTotalCodePointCountSinceInitialization +=
mNodeReader.getCodePointCount(); ptNodeParams.getCodePointCount();
mReadingState.mTotalPtNodeIndexInThisArrayChain = 1; mReadingState.mTotalPtNodeIndexInThisArrayChain = 1;
mReadingState.mPtNodeArrayIndexInThisArrayChain = 1; mReadingState.mPtNodeArrayIndexInThisArrayChain = 1;
mReadingState.mRemainingPtNodeCountInThisArray = 1; mReadingState.mRemainingPtNodeCountInThisArray = 1;
mReadingState.mPos = mNodeReader.getParentPos(); mReadingState.mPos = ptNodeParams.getParentPos();
mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS; mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS;
mReadingState.mPosOfThisPtNodeArrayHead = NOT_A_DICT_POS; mReadingState.mPosOfThisPtNodeArrayHead = NOT_A_DICT_POS;
fetchPtNodeInfo();
} else { } else {
mReadingState.mPos = NOT_A_DICT_POS; mReadingState.mPos = NOT_A_DICT_POS;
} }
@ -207,12 +193,6 @@ class DynamicPatriciaTrieReadingHelper {
return mReadingState.mPosOfThisPtNodeArrayHead; return mReadingState.mPosOfThisPtNodeArrayHead;
} }
AK_FORCE_INLINE void reloadCurrentPtNodeInfo() {
if (!isEnd()) {
fetchPtNodeInfo();
}
}
bool traverseAllPtNodesInPostorderDepthFirstManner(TraversingEventListener *const listener); bool traverseAllPtNodesInPostorderDepthFirstManner(TraversingEventListener *const listener);
bool traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner( bool traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner(
@ -253,24 +233,13 @@ class DynamicPatriciaTrieReadingHelper {
bool mIsError; bool mIsError;
PtNodeReadingState mReadingState; PtNodeReadingState mReadingState;
const BufferWithExtendableBuffer *const mBuffer; const BufferWithExtendableBuffer *const mBuffer;
DynamicPatriciaTrieNodeReader mNodeReader; const PtNodeReader *const mPtNodeReader;
int mMergedNodeCodePoints[MAX_WORD_LENGTH];
std::vector<PtNodeReadingState> mReadingStateStack; std::vector<PtNodeReadingState> mReadingStateStack;
void nextPtNodeArray(); void nextPtNodeArray();
void followForwardLink(); void followForwardLink();
AK_FORCE_INLINE void fetchPtNodeInfo() {
mNodeReader.fetchNodeInfoInBufferFromPtNodePosAndGetNodeCodePoints(mReadingState.mPos,
MAX_WORD_LENGTH, mMergedNodeCodePoints);
if (mNodeReader.getCodePointCount() <= 0) {
// Empty node is not allowed.
mIsError = true;
mReadingState.mPos = NOT_A_DICT_POS;
}
}
AK_FORCE_INLINE void pushReadingStateToStack() { AK_FORCE_INLINE void pushReadingStateToStack() {
if (mReadingStateStack.size() > MAX_READING_STATE_STACK_SIZE) { if (mReadingStateStack.size() > MAX_READING_STATE_STACK_SIZE) {
AKLOGI("Reading state stack overflow. Max size: %zd", MAX_READING_STATE_STACK_SIZE); AKLOGI("Reading state stack overflow. Max size: %zd", MAX_READING_STATE_STACK_SIZE);
@ -288,9 +257,6 @@ class DynamicPatriciaTrieReadingHelper {
} else { } else {
mReadingState = mReadingStateStack.back(); mReadingState = mReadingStateStack.back();
mReadingStateStack.pop_back(); mReadingStateStack.pop_back();
if (!isEnd()) {
fetchPtNodeInfo();
}
} }
} }
}; };

View file

@ -41,24 +41,26 @@ bool DynamicPatriciaTrieWritingHelper::addUnigramWord(
bool *const outAddedNewUnigram) { bool *const outAddedNewUnigram) {
int parentPos = NOT_A_DICT_POS; int parentPos = NOT_A_DICT_POS;
while (!readingHelper->isEnd()) { while (!readingHelper->isEnd()) {
const PtNodeParams ptNodeParams(readingHelper->getPtNodeParams());
if (!ptNodeParams.isValid()) {
break;
}
const int matchedCodePointCount = readingHelper->getPrevTotalCodePointCount(); const int matchedCodePointCount = readingHelper->getPrevTotalCodePointCount();
if (!readingHelper->isMatchedCodePoint(0 /* index */, if (!readingHelper->isMatchedCodePoint(ptNodeParams, 0 /* index */,
wordCodePoints[matchedCodePointCount])) { wordCodePoints[matchedCodePointCount])) {
// The first code point is different from target code point. Skip this node and read // The first code point is different from target code point. Skip this node and read
// the next sibling node. // the next sibling node.
readingHelper->readNextSiblingNode(); readingHelper->readNextSiblingNode(ptNodeParams);
continue; continue;
} }
// Check following merged node code points. // Check following merged node code points.
const DynamicPatriciaTrieNodeReader *const nodeReader = readingHelper->getNodeReader(); const int nodeCodePointCount = ptNodeParams.getCodePointCount();
const int nodeCodePointCount = nodeReader->getCodePointCount();
for (int j = 1; j < nodeCodePointCount; ++j) { for (int j = 1; j < nodeCodePointCount; ++j) {
const int nextIndex = matchedCodePointCount + j; const int nextIndex = matchedCodePointCount + j;
if (nextIndex >= codePointCount || !readingHelper->isMatchedCodePoint(j, if (nextIndex >= codePointCount || !readingHelper->isMatchedCodePoint(ptNodeParams, j,
wordCodePoints[matchedCodePointCount + j])) { wordCodePoints[matchedCodePointCount + j])) {
*outAddedNewUnigram = true; *outAddedNewUnigram = true;
return reallocatePtNodeAndAddNewPtNodes(nodeReader, return reallocatePtNodeAndAddNewPtNodes(&ptNodeParams, j,
readingHelper->getMergedNodeCodePoints(), j,
getUpdatedProbability(NOT_A_PROBABILITY /* originalProbability */, getUpdatedProbability(NOT_A_PROBABILITY /* originalProbability */,
probability), probability),
wordCodePoints + matchedCodePointCount, wordCodePoints + matchedCodePointCount,
@ -66,20 +68,19 @@ bool DynamicPatriciaTrieWritingHelper::addUnigramWord(
} }
} }
// All characters are matched. // All characters are matched.
if (codePointCount == readingHelper->getTotalCodePointCount()) { if (codePointCount == readingHelper->getTotalCodePointCount(ptNodeParams)) {
return setPtNodeProbability(nodeReader, probability, return setPtNodeProbability(&ptNodeParams, probability, outAddedNewUnigram);
readingHelper->getMergedNodeCodePoints(), outAddedNewUnigram);
} }
if (!nodeReader->hasChildren()) { if (!ptNodeParams.hasChildren()) {
*outAddedNewUnigram = true; *outAddedNewUnigram = true;
return createChildrenPtNodeArrayAndAChildPtNode(nodeReader, return createChildrenPtNodeArrayAndAChildPtNode(&ptNodeParams,
getUpdatedProbability(NOT_A_PROBABILITY /* originalProbability */, probability), getUpdatedProbability(NOT_A_PROBABILITY /* originalProbability */, probability),
wordCodePoints + readingHelper->getTotalCodePointCount(), wordCodePoints + readingHelper->getTotalCodePointCount(ptNodeParams),
codePointCount - readingHelper->getTotalCodePointCount()); codePointCount - readingHelper->getTotalCodePointCount(ptNodeParams));
} }
// Advance to the children nodes. // Advance to the children nodes.
parentPos = nodeReader->getHeadPos(); parentPos = ptNodeParams.getHeadPos();
readingHelper->readChildNode(); readingHelper->readChildNode(ptNodeParams);
} }
if (readingHelper->isError()) { if (readingHelper->isError()) {
// The dictionary is invalid. // The dictionary is invalid.
@ -95,26 +96,24 @@ bool DynamicPatriciaTrieWritingHelper::addUnigramWord(
bool DynamicPatriciaTrieWritingHelper::addBigramWords(const int word0Pos, const int word1Pos, bool DynamicPatriciaTrieWritingHelper::addBigramWords(const int word0Pos, const int word1Pos,
const int probability, bool *const outAddedNewBigram) { const int probability, bool *const outAddedNewBigram) {
int mMergedNodeCodePoints[MAX_WORD_LENGTH];
DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy); DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy);
nodeReader.fetchNodeInfoInBufferFromPtNodePosAndGetNodeCodePoints(word0Pos, MAX_WORD_LENGTH, const PtNodeParams ptNodeParams(nodeReader.fetchNodeInfoInBufferFromPtNodePos(word0Pos));
mMergedNodeCodePoints);
// Move node to add bigram entry. // Move node to add bigram entry.
const int newNodePos = mBuffer->getTailPosition(); const int newNodePos = mBuffer->getTailPosition();
if (!markNodeAsMovedAndSetPosition(&nodeReader, newNodePos, newNodePos)) { if (!markNodeAsMovedAndSetPosition(&ptNodeParams, newNodePos, newNodePos)) {
return false; return false;
} }
int writingPos = newNodePos; int writingPos = newNodePos;
// Write a new PtNode using original PtNode's info to the tail of the dictionary in mBuffer. // Write a new PtNode using original PtNode's info to the tail of the dictionary in mBuffer.
if (!writePtNodeToBufferByCopyingPtNodeInfo(mBuffer, &nodeReader, nodeReader.getParentPos(), if (!writePtNodeToBufferByCopyingPtNodeInfo(mBuffer, &ptNodeParams, ptNodeParams.getParentPos(),
mMergedNodeCodePoints, nodeReader.getCodePointCount(), nodeReader.getProbability(), ptNodeParams.getCodePoints(), ptNodeParams.getCodePointCount(),
&writingPos)) { ptNodeParams.getProbability(), &writingPos)) {
return false; return false;
} }
nodeReader.fetchNodeInfoInBufferFromPtNodePos(newNodePos); const PtNodeParams newPtNodeParams(nodeReader.fetchNodeInfoInBufferFromPtNodePos(newNodePos));
if (nodeReader.getBigramsPos() != NOT_A_DICT_POS) { if (newPtNodeParams.getBigramsPos() != NOT_A_DICT_POS) {
// Insert a new bigram entry into the existing bigram list. // Insert a new bigram entry into the existing bigram list.
int bigramListPos = nodeReader.getBigramsPos(); int bigramListPos = newPtNodeParams.getBigramsPos();
return mBigramPolicy->addNewBigramEntryToBigramList(word1Pos, probability, &bigramListPos, return mBigramPolicy->addNewBigramEntryToBigramList(word1Pos, probability, &bigramListPos,
outAddedNewBigram); outAddedNewBigram);
} else { } else {
@ -126,10 +125,11 @@ bool DynamicPatriciaTrieWritingHelper::addBigramWords(const int word0Pos, const
} }
// Then, Mark as the PtNode having bigram list in the flags. // Then, Mark as the PtNode having bigram list in the flags.
const PatriciaTrieReadingUtils::NodeFlags updatedFlags = const PatriciaTrieReadingUtils::NodeFlags updatedFlags =
PatriciaTrieReadingUtils::createAndGetFlags(nodeReader.isBlacklisted(), PatriciaTrieReadingUtils::createAndGetFlags(newPtNodeParams.isBlacklisted(),
nodeReader.isNotAWord(), nodeReader.getProbability() != NOT_A_PROBABILITY, newPtNodeParams.isNotAWord(),
nodeReader.getShortcutPos() != NOT_A_DICT_POS, true /* hasBigrams */, newPtNodeParams.getProbability() != NOT_A_PROBABILITY,
nodeReader.getCodePointCount() > 1, CHILDREN_POSITION_FIELD_SIZE); newPtNodeParams.getShortcutPos() != NOT_A_DICT_POS, true /* hasBigrams */,
newPtNodeParams.getCodePointCount() > 1, CHILDREN_POSITION_FIELD_SIZE);
writingPos = newNodePos; writingPos = newNodePos;
// Write updated flags into the moved PtNode's flags field. // Write updated flags into the moved PtNode's flags field.
return DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, updatedFlags, return DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, updatedFlags,
@ -140,11 +140,11 @@ bool DynamicPatriciaTrieWritingHelper::addBigramWords(const int word0Pos, const
// Remove a bigram relation from word0Pos to word1Pos. // Remove a bigram relation from word0Pos to word1Pos.
bool DynamicPatriciaTrieWritingHelper::removeBigramWords(const int word0Pos, const int word1Pos) { bool DynamicPatriciaTrieWritingHelper::removeBigramWords(const int word0Pos, const int word1Pos) {
DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy); DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy);
nodeReader.fetchNodeInfoInBufferFromPtNodePos(word0Pos); const PtNodeParams ptNodeParams(nodeReader.fetchNodeInfoInBufferFromPtNodePos(word0Pos));
if (nodeReader.getBigramsPos() == NOT_A_DICT_POS) { if (ptNodeParams.getBigramsPos() == NOT_A_DICT_POS) {
return false; return false;
} }
return mBigramPolicy->removeBigram(nodeReader.getBigramsPos(), word1Pos); return mBigramPolicy->removeBigram(ptNodeParams.getBigramsPos(), word1Pos);
} }
void DynamicPatriciaTrieWritingHelper::writeToDictFile(const char *const fileName, void DynamicPatriciaTrieWritingHelper::writeToDictFile(const char *const fileName,
@ -181,8 +181,8 @@ void DynamicPatriciaTrieWritingHelper::writeToDictFileWithGC(const int rootPtNod
} }
bool DynamicPatriciaTrieWritingHelper::markNodeAsDeleted( bool DynamicPatriciaTrieWritingHelper::markNodeAsDeleted(
const DynamicPatriciaTrieNodeReader *const nodeToUpdate) { const PtNodeParams *const toBeUpdatedPtNodeParams) {
int pos = nodeToUpdate->getHeadPos(); int pos = toBeUpdatedPtNodeParams->getHeadPos();
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(pos); const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(pos);
const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer); const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
if (usesAdditionalBuffer) { if (usesAdditionalBuffer) {
@ -194,16 +194,16 @@ bool DynamicPatriciaTrieWritingHelper::markNodeAsDeleted(
const PatriciaTrieReadingUtils::NodeFlags updatedFlags = const PatriciaTrieReadingUtils::NodeFlags updatedFlags =
DynamicPatriciaTrieReadingUtils::updateAndGetFlags(originalFlags, false /* isMoved */, DynamicPatriciaTrieReadingUtils::updateAndGetFlags(originalFlags, false /* isMoved */,
true /* isDeleted */); true /* isDeleted */);
int writingPos = nodeToUpdate->getHeadPos(); int writingPos = toBeUpdatedPtNodeParams->getHeadPos();
// Update flags. // Update flags.
return DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, updatedFlags, return DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, updatedFlags,
&writingPos); &writingPos);
} }
bool DynamicPatriciaTrieWritingHelper::markNodeAsMovedAndSetPosition( bool DynamicPatriciaTrieWritingHelper::markNodeAsMovedAndSetPosition(
const DynamicPatriciaTrieNodeReader *const originalNode, const int movedPos, const PtNodeParams *const toBeUpdatedPtNodeParams, const int movedPos,
const int bigramLinkedNodePos) { const int bigramLinkedNodePos) {
int pos = originalNode->getHeadPos(); int pos = toBeUpdatedPtNodeParams->getHeadPos();
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(pos); const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(pos);
const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer); const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
if (usesAdditionalBuffer) { if (usesAdditionalBuffer) {
@ -215,7 +215,7 @@ bool DynamicPatriciaTrieWritingHelper::markNodeAsMovedAndSetPosition(
const PatriciaTrieReadingUtils::NodeFlags updatedFlags = const PatriciaTrieReadingUtils::NodeFlags updatedFlags =
DynamicPatriciaTrieReadingUtils::updateAndGetFlags(originalFlags, true /* isMoved */, DynamicPatriciaTrieReadingUtils::updateAndGetFlags(originalFlags, true /* isMoved */,
false /* isDeleted */); false /* isDeleted */);
int writingPos = originalNode->getHeadPos(); int writingPos = toBeUpdatedPtNodeParams->getHeadPos();
// Update flags. // Update flags.
if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, updatedFlags, if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, updatedFlags,
&writingPos)) { &writingPos)) {
@ -223,31 +223,32 @@ bool DynamicPatriciaTrieWritingHelper::markNodeAsMovedAndSetPosition(
} }
// Update moved position, which is stored in the parent offset field. // Update moved position, which is stored in the parent offset field.
if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition( if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition(
mBuffer, movedPos, originalNode->getHeadPos(), &writingPos)) { mBuffer, movedPos, toBeUpdatedPtNodeParams->getHeadPos(), &writingPos)) {
return false; return false;
} }
// Update bigram linked node position, which is stored in the children position field. // Update bigram linked node position, which is stored in the children position field.
int childrenPosFieldPos = originalNode->getChildrenPosFieldPos(); int childrenPosFieldPos = toBeUpdatedPtNodeParams->getChildrenPosFieldPos();
if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition( if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(
mBuffer, bigramLinkedNodePos, &childrenPosFieldPos)) { mBuffer, bigramLinkedNodePos, &childrenPosFieldPos)) {
return false; return false;
} }
if (originalNode->hasChildren()) { if (toBeUpdatedPtNodeParams->hasChildren()) {
// Update children's parent position. // Update children's parent position.
DynamicPatriciaTrieReadingHelper readingHelper(mBuffer, mBigramPolicy, mShortcutPolicy); DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy);
const DynamicPatriciaTrieNodeReader *const nodeReader = readingHelper.getNodeReader(); DynamicPatriciaTrieReadingHelper readingHelper(mBuffer, &nodeReader);
readingHelper.initWithPtNodeArrayPos(originalNode->getChildrenPos()); readingHelper.initWithPtNodeArrayPos(toBeUpdatedPtNodeParams->getChildrenPos());
while (!readingHelper.isEnd()) { while (!readingHelper.isEnd()) {
int parentOffsetFieldPos = nodeReader->getHeadPos() const PtNodeParams childPtNodeParams(readingHelper.getPtNodeParams());
int parentOffsetFieldPos = childPtNodeParams.getHeadPos()
+ DynamicPatriciaTrieWritingUtils::NODE_FLAG_FIELD_SIZE; + DynamicPatriciaTrieWritingUtils::NODE_FLAG_FIELD_SIZE;
if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition( if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition(
mBuffer, bigramLinkedNodePos, nodeReader->getHeadPos(), mBuffer, bigramLinkedNodePos, childPtNodeParams.getHeadPos(),
&parentOffsetFieldPos)) { &parentOffsetFieldPos)) {
// Parent offset cannot be written because of a bug or a broken dictionary; thus, // Parent offset cannot be written because of a bug or a broken dictionary; thus,
// we give up to update dictionary. // we give up to update dictionary.
return false; return false;
} }
readingHelper.readNextSiblingNode(); readingHelper.readNextSiblingNode(childPtNodeParams);
} }
} }
return true; return true;
@ -333,13 +334,13 @@ bool DynamicPatriciaTrieWritingHelper::writePtNodeToBuffer(
bool DynamicPatriciaTrieWritingHelper::writePtNodeToBufferByCopyingPtNodeInfo( bool DynamicPatriciaTrieWritingHelper::writePtNodeToBufferByCopyingPtNodeInfo(
BufferWithExtendableBuffer *const bufferToWrite, BufferWithExtendableBuffer *const bufferToWrite,
const DynamicPatriciaTrieNodeReader *const originalNode, const int parentPos, const PtNodeParams *const originalPtNodeParams, const int parentPos,
const int *const codePoints, const int codePointCount, const int probability, const int *const codePoints, const int codePointCount, const int probability,
int *const writingPos) { int *const writingPos) {
return writePtNodeWithFullInfoToBuffer(bufferToWrite, originalNode->isBlacklisted(), return writePtNodeWithFullInfoToBuffer(bufferToWrite, originalPtNodeParams->isBlacklisted(),
originalNode->isNotAWord(), parentPos, codePoints, codePointCount, probability, originalPtNodeParams->isNotAWord(), parentPos, codePoints, codePointCount, probability,
originalNode->getChildrenPos(), originalNode->getBigramsPos(), originalPtNodeParams->getChildrenPos(), originalPtNodeParams->getBigramsPos(),
originalNode->getShortcutPos(), writingPos); originalPtNodeParams->getShortcutPos(), writingPos);
} }
bool DynamicPatriciaTrieWritingHelper::createAndInsertNodeIntoPtNodeArray(const int parentPos, bool DynamicPatriciaTrieWritingHelper::createAndInsertNodeIntoPtNodeArray(const int parentPos,
@ -355,14 +356,14 @@ bool DynamicPatriciaTrieWritingHelper::createAndInsertNodeIntoPtNodeArray(const
} }
bool DynamicPatriciaTrieWritingHelper::setPtNodeProbability( bool DynamicPatriciaTrieWritingHelper::setPtNodeProbability(
const DynamicPatriciaTrieNodeReader *const originalPtNode, const int probability, const PtNodeParams *const originalPtNodeParams, const int probability,
const int *const codePoints, bool *const outAddedNewUnigram) { bool *const outAddedNewUnigram) {
if (originalPtNode->isTerminal()) { if (originalPtNodeParams->isTerminal()) {
// Overwrites the probability. // Overwrites the probability.
*outAddedNewUnigram = false; *outAddedNewUnigram = false;
const int probabilityToWrite = getUpdatedProbability(originalPtNode->getProbability(), const int probabilityToWrite = getUpdatedProbability(
probability); originalPtNodeParams->getProbability(), probability);
int probabilityFieldPos = originalPtNode->getProbabilityFieldPos(); int probabilityFieldPos = originalPtNodeParams->getProbabilityFieldPos();
if (!DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition(mBuffer, if (!DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition(mBuffer,
probabilityToWrite, &probabilityFieldPos)) { probabilityToWrite, &probabilityFieldPos)) {
return false; return false;
@ -371,11 +372,12 @@ bool DynamicPatriciaTrieWritingHelper::setPtNodeProbability(
// Make the node terminal and write the probability. // Make the node terminal and write the probability.
*outAddedNewUnigram = true; *outAddedNewUnigram = true;
int movedPos = mBuffer->getTailPosition(); int movedPos = mBuffer->getTailPosition();
if (!markNodeAsMovedAndSetPosition(originalPtNode, movedPos, movedPos)) { if (!markNodeAsMovedAndSetPosition(originalPtNodeParams, movedPos, movedPos)) {
return false; return false;
} }
if (!writePtNodeToBufferByCopyingPtNodeInfo(mBuffer, originalPtNode, if (!writePtNodeToBufferByCopyingPtNodeInfo(mBuffer, originalPtNodeParams,
originalPtNode->getParentPos(), codePoints, originalPtNode->getCodePointCount(), originalPtNodeParams->getParentPos(), originalPtNodeParams->getCodePoints(),
originalPtNodeParams->getCodePointCount(),
getUpdatedProbability(NOT_A_PROBABILITY /* originalProbability */, probability), getUpdatedProbability(NOT_A_PROBABILITY /* originalProbability */, probability),
&movedPos)) { &movedPos)) {
return false; return false;
@ -385,15 +387,15 @@ bool DynamicPatriciaTrieWritingHelper::setPtNodeProbability(
} }
bool DynamicPatriciaTrieWritingHelper::createChildrenPtNodeArrayAndAChildPtNode( bool DynamicPatriciaTrieWritingHelper::createChildrenPtNodeArrayAndAChildPtNode(
const DynamicPatriciaTrieNodeReader *const parentNode, const int probability, const PtNodeParams *const parentPtNodeParams, const int probability,
const int *const codePoints, const int codePointCount) { const int *const codePoints, const int codePointCount) {
const int newPtNodeArrayPos = mBuffer->getTailPosition(); const int newPtNodeArrayPos = mBuffer->getTailPosition();
int childrenPosFieldPos = parentNode->getChildrenPosFieldPos(); int childrenPosFieldPos = parentPtNodeParams->getChildrenPosFieldPos();
if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(mBuffer, if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(mBuffer,
newPtNodeArrayPos, &childrenPosFieldPos)) { newPtNodeArrayPos, &childrenPosFieldPos)) {
return false; return false;
} }
return createNewPtNodeArrayWithAChildPtNode(parentNode->getHeadPos(), codePoints, return createNewPtNodeArrayWithAChildPtNode(parentPtNodeParams->getHeadPos(), codePoints,
codePointCount, probability); codePointCount, probability);
} }
@ -418,8 +420,7 @@ bool DynamicPatriciaTrieWritingHelper::createNewPtNodeArrayWithAChildPtNode(
// Returns whether the dictionary updating was succeeded or not. // Returns whether the dictionary updating was succeeded or not.
bool DynamicPatriciaTrieWritingHelper::reallocatePtNodeAndAddNewPtNodes( bool DynamicPatriciaTrieWritingHelper::reallocatePtNodeAndAddNewPtNodes(
const DynamicPatriciaTrieNodeReader *const reallocatingPtNode, const PtNodeParams *const reallocatingPtNodeParams, const int overlappingCodePointCount,
const int *const reallocatingPtNodeCodePoints, const int overlappingCodePointCount,
const int probabilityOfNewPtNode, const int *const newNodeCodePoints, const int probabilityOfNewPtNode, const int *const newNodeCodePoints,
const int newNodeCodePointCount) { const int newNodeCodePointCount) {
// When addsExtraChild is true, split the reallocating PtNode and add new child. // When addsExtraChild is true, split the reallocating PtNode and add new child.
@ -435,8 +436,8 @@ bool DynamicPatriciaTrieWritingHelper::reallocatePtNodeAndAddNewPtNodes(
// Write the 1st part of the reallocating node. The children position will be updated later // Write the 1st part of the reallocating node. The children position will be updated later
// with actual children position. // with actual children position.
const int newProbability = addsExtraChild ? NOT_A_PROBABILITY : probabilityOfNewPtNode; const int newProbability = addsExtraChild ? NOT_A_PROBABILITY : probabilityOfNewPtNode;
if (!writePtNodeToBuffer(mBuffer, reallocatingPtNode->getParentPos(), if (!writePtNodeToBuffer(mBuffer, reallocatingPtNodeParams->getParentPos(),
reallocatingPtNodeCodePoints, overlappingCodePointCount, newProbability, reallocatingPtNodeParams->getCodePoints(), overlappingCodePointCount, newProbability,
&writingPos)) { &writingPos)) {
return false; return false;
} }
@ -449,11 +450,11 @@ bool DynamicPatriciaTrieWritingHelper::reallocatePtNodeAndAddNewPtNodes(
} }
// Write the 2nd part of the reallocating node. // Write the 2nd part of the reallocating node.
const int secondPartOfReallocatedPtNodePos = writingPos; const int secondPartOfReallocatedPtNodePos = writingPos;
if (!writePtNodeToBufferByCopyingPtNodeInfo(mBuffer, reallocatingPtNode, if (!writePtNodeToBufferByCopyingPtNodeInfo(mBuffer, reallocatingPtNodeParams,
firstPartOfReallocatedPtNodePos, firstPartOfReallocatedPtNodePos,
reallocatingPtNodeCodePoints + overlappingCodePointCount, reallocatingPtNodeParams->getCodePoints() + overlappingCodePointCount,
reallocatingPtNode->getCodePointCount() - overlappingCodePointCount, reallocatingPtNodeParams->getCodePointCount() - overlappingCodePointCount,
reallocatingPtNode->getProbability(), &writingPos)) { reallocatingPtNodeParams->getProbability(), &writingPos)) {
return false; return false;
} }
if (addsExtraChild) { if (addsExtraChild) {
@ -468,16 +469,17 @@ bool DynamicPatriciaTrieWritingHelper::reallocatePtNodeAndAddNewPtNodes(
NOT_A_DICT_POS /* forwardLinkPos */, &writingPos)) { NOT_A_DICT_POS /* forwardLinkPos */, &writingPos)) {
return false; return false;
} }
// Update original reallocatingPtNode as moved. // Update original reallocating PtNode as moved.
if (!markNodeAsMovedAndSetPosition(reallocatingPtNode, firstPartOfReallocatedPtNodePos, if (!markNodeAsMovedAndSetPosition(reallocatingPtNodeParams, firstPartOfReallocatedPtNodePos,
secondPartOfReallocatedPtNodePos)) { secondPartOfReallocatedPtNodePos)) {
return false; return false;
} }
// Load node info. Information of the 1st part will be fetched. // Load node info. Information of the 1st part will be fetched.
DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy); DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy);
nodeReader.fetchNodeInfoInBufferFromPtNodePos(firstPartOfReallocatedPtNodePos); const PtNodeParams ptNodeParams(
nodeReader.fetchNodeInfoInBufferFromPtNodePos(firstPartOfReallocatedPtNodePos));
// Update children position. // Update children position.
int childrenPosFieldPos = nodeReader.getChildrenPosFieldPos(); int childrenPosFieldPos = ptNodeParams.getChildrenPosFieldPos();
if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(mBuffer, if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(mBuffer,
actualChildrenPos, &childrenPosFieldPos)) { actualChildrenPos, &childrenPosFieldPos)) {
return false; return false;
@ -488,7 +490,8 @@ bool DynamicPatriciaTrieWritingHelper::reallocatePtNodeAndAddNewPtNodes(
bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos, bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
const HeaderPolicy *const headerPolicy, BufferWithExtendableBuffer *const bufferToWrite, const HeaderPolicy *const headerPolicy, BufferWithExtendableBuffer *const bufferToWrite,
int *const outUnigramCount, int *const outBigramCount) { int *const outUnigramCount, int *const outBigramCount) {
DynamicPatriciaTrieReadingHelper readingHelper(mBuffer, mBigramPolicy, mShortcutPolicy); DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy);
DynamicPatriciaTrieReadingHelper readingHelper(mBuffer, &nodeReader);
readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos); readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
DynamicPatriciaTrieGcEventListeners DynamicPatriciaTrieGcEventListeners
::TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted ::TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted
@ -530,9 +533,10 @@ bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
DynamicShortcutListPolicy newDictShortcutPolicy(bufferToWrite); DynamicShortcutListPolicy newDictShortcutPolicy(bufferToWrite);
DynamicBigramListPolicy newDictBigramPolicy(headerPolicy, bufferToWrite, &newDictShortcutPolicy, DynamicBigramListPolicy newDictBigramPolicy(headerPolicy, bufferToWrite, &newDictShortcutPolicy,
mNeedsToDecay); mNeedsToDecay);
// Create reading helper for the GCed dictionary. // Create reading node reader and reading helper for the GCed dictionary.
DynamicPatriciaTrieReadingHelper newDictReadingHelper(bufferToWrite, &newDictBigramPolicy, DynamicPatriciaTrieNodeReader newDictNodeReader(bufferToWrite, &newDictBigramPolicy,
&newDictShortcutPolicy); &newDictShortcutPolicy);
DynamicPatriciaTrieReadingHelper newDictReadingHelper(bufferToWrite, &newDictNodeReader);
newDictReadingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos); newDictReadingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateAllPositionFields DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateAllPositionFields
traversePolicyToUpdateAllPositionFields(this, &newDictBigramPolicy, bufferToWrite, traversePolicyToUpdateAllPositionFields(this, &newDictBigramPolicy, bufferToWrite,

View file

@ -26,11 +26,12 @@ namespace latinime {
class BufferWithExtendableBuffer; class BufferWithExtendableBuffer;
class DynamicBigramListPolicy; class DynamicBigramListPolicy;
class DynamicPatriciaTrieNodeReader;
class DynamicPatriciaTrieReadingHelper; class DynamicPatriciaTrieReadingHelper;
class DynamicShortcutListPolicy; class DynamicShortcutListPolicy;
class HeaderPolicy; class HeaderPolicy;
class PtNodeParams;
// TODO: Make it independent from a particular format and move to pt_common.
class DynamicPatriciaTrieWritingHelper { class DynamicPatriciaTrieWritingHelper {
public: public:
typedef hash_map_compat<int, int> PtNodeArrayPositionRelocationMap; typedef hash_map_compat<int, int> PtNodeArrayPositionRelocationMap;
@ -77,12 +78,12 @@ class DynamicPatriciaTrieWritingHelper {
// CAVEAT: This method must be called only from inner classes of // CAVEAT: This method must be called only from inner classes of
// DynamicPatriciaTrieGcEventListeners. // DynamicPatriciaTrieGcEventListeners.
bool markNodeAsDeleted(const DynamicPatriciaTrieNodeReader *const nodeToUpdate); bool markNodeAsDeleted(const PtNodeParams *const toBeUpdatedPtNodeParams);
// CAVEAT: This method must be called only from this class or inner classes of // CAVEAT: This method must be called only from this class or inner classes of
// DynamicPatriciaTrieGcEventListeners. // DynamicPatriciaTrieGcEventListeners.
bool writePtNodeToBufferByCopyingPtNodeInfo(BufferWithExtendableBuffer *const bufferToWrite, bool writePtNodeToBufferByCopyingPtNodeInfo(BufferWithExtendableBuffer *const bufferToWrite,
const DynamicPatriciaTrieNodeReader *const originalNode, const int parentPos, const PtNodeParams *const originalPtNodeParams, const int parentPos,
const int *const codePoints, const int codePointCount, const int probability, const int *const codePoints, const int codePointCount, const int probability,
int *const writingPos); int *const writingPos);
@ -96,7 +97,7 @@ class DynamicPatriciaTrieWritingHelper {
DynamicShortcutListPolicy *const mShortcutPolicy; DynamicShortcutListPolicy *const mShortcutPolicy;
const bool mNeedsToDecay; const bool mNeedsToDecay;
bool markNodeAsMovedAndSetPosition(const DynamicPatriciaTrieNodeReader *const nodeToUpdate, bool markNodeAsMovedAndSetPosition(const PtNodeParams *const toBeUpdatedPtNodeParams,
const int movedPos, const int bigramLinkedNodePos); const int movedPos, const int bigramLinkedNodePos);
bool writePtNodeWithFullInfoToBuffer(BufferWithExtendableBuffer *const bufferToWrite, bool writePtNodeWithFullInfoToBuffer(BufferWithExtendableBuffer *const bufferToWrite,
@ -112,19 +113,17 @@ class DynamicPatriciaTrieWritingHelper {
bool createAndInsertNodeIntoPtNodeArray(const int parentPos, const int *const nodeCodePoints, bool createAndInsertNodeIntoPtNodeArray(const int parentPos, const int *const nodeCodePoints,
const int nodeCodePointCount, const int probability, int *const forwardLinkFieldPos); const int nodeCodePointCount, const int probability, int *const forwardLinkFieldPos);
bool setPtNodeProbability(const DynamicPatriciaTrieNodeReader *const originalNode, bool setPtNodeProbability(const PtNodeParams *const originalPtNodeParams, const int probability,
const int probability, const int *const codePoints, bool *const outAddedNewUnigram); bool *const outAddedNewUnigram);
bool createChildrenPtNodeArrayAndAChildPtNode( bool createChildrenPtNodeArrayAndAChildPtNode(const PtNodeParams *const parentPtNodeParams,
const DynamicPatriciaTrieNodeReader *const parentNode, const int probability, const int probability, const int *const codePoints, const int codePointCount);
const int *const codePoints, const int codePointCount);
bool createNewPtNodeArrayWithAChildPtNode(const int parentPos, const int *const nodeCodePoints, bool createNewPtNodeArrayWithAChildPtNode(const int parentPos, const int *const nodeCodePoints,
const int nodeCodePointCount, const int probability); const int nodeCodePointCount, const int probability);
bool reallocatePtNodeAndAddNewPtNodes( bool reallocatePtNodeAndAddNewPtNodes(
const DynamicPatriciaTrieNodeReader *const reallocatingPtNode, const PtNodeParams *const reallocatingPtNodeParams, const int overlappingCodePointCount,
const int *const reallocatingPtNodeCodePoints, const int overlappingCodePointCount,
const int probabilityOfNewPtNode, const int *const newNodeCodePoints, const int probabilityOfNewPtNode, const int *const newNodeCodePoints,
const int newNodeCodePointCount); const int newNodeCodePointCount);

View file

@ -30,4 +30,6 @@ const char *const Ver4DictConstants::SHORTCUT_LOOKUP_TABLE_FILE_EXTENSION = ".sh
const char *const Ver4DictConstants::SHORTCUT_CONTENT_TABLE_FILE_EXTENSION = const char *const Ver4DictConstants::SHORTCUT_CONTENT_TABLE_FILE_EXTENSION =
".shortcut_index_shortcut"; ".shortcut_index_shortcut";
const int Ver4DictConstants::NOT_A_TERMINAL = -1;
} // namespace latinime } // namespace latinime

View file

@ -34,6 +34,8 @@ class Ver4DictConstants {
static const char *const SHORTCUT_LOOKUP_TABLE_FILE_EXTENSION; static const char *const SHORTCUT_LOOKUP_TABLE_FILE_EXTENSION;
static const char *const SHORTCUT_CONTENT_TABLE_FILE_EXTENSION; static const char *const SHORTCUT_CONTENT_TABLE_FILE_EXTENSION;
static const int NOT_A_TERMINAL;
private: private:
DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4DictConstants); DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4DictConstants);
}; };