From 0be85cfdf78af4fdd1a825e5cf80481a09f0a60d Mon Sep 17 00:00:00 2001 From: Keisuke Kuroyanagi Date: Wed, 6 Nov 2013 19:22:34 +0900 Subject: [PATCH] Refactoring: PtNodeWriter to separate version specific writing methods. Bug: 11073222 Change-Id: Id2ade16316f33e17ead5faf45404a2054c303d74 --- native/jni/Android.mk | 1 + .../structure/pt_common/pt_node_params.h | 32 ++ .../structure/pt_common/pt_node_writer.h | 56 ++++ ...namic_patricia_trie_gc_event_listeners.cpp | 8 +- ...dynamic_patricia_trie_gc_event_listeners.h | 22 +- .../v3/dynamic_patricia_trie_node_writer.cpp | 227 +++++++++++++ .../v3/dynamic_patricia_trie_node_writer.h | 79 +++++ .../v3/dynamic_patricia_trie_policy.cpp | 20 +- .../v3/dynamic_patricia_trie_policy.h | 4 + .../dynamic_patricia_trie_writing_helper.cpp | 317 ++++-------------- .../v3/dynamic_patricia_trie_writing_helper.h | 43 +-- 11 files changed, 509 insertions(+), 300 deletions(-) create mode 100644 native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h create mode 100644 native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_writer.cpp create mode 100644 native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_writer.h diff --git a/native/jni/Android.mk b/native/jni/Android.mk index 333688015..99a479e22 100644 --- a/native/jni/Android.mk +++ b/native/jni/Android.mk @@ -80,6 +80,7 @@ LATIN_IME_CORE_SRC_FILES := \ $(addprefix suggest/policyimpl/dictionary/structure/v3/, \ dynamic_patricia_trie_gc_event_listeners.cpp \ dynamic_patricia_trie_node_reader.cpp \ + dynamic_patricia_trie_node_writer.cpp \ dynamic_patricia_trie_policy.cpp \ dynamic_patricia_trie_reading_helper.cpp \ dynamic_patricia_trie_reading_utils.cpp \ diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h index edc7b954c..6a4cfee3c 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h @@ -86,6 +86,38 @@ class PtNodeParams { memcpy(mCodePoints, codePoints, sizeof(int) * mCodePointCount); } + // Construct new params by updating existing PtNode params. + PtNodeParams(const PtNodeParams *const ptNodeParams, + const PatriciaTrieReadingUtils::NodeFlags flags, const int parentPos, + const int codePointCount, const int *const codePoints, const int probability) + : mHeadPos(ptNodeParams->getHeadPos()), mFlags(flags), mParentPos(parentPos), + mCodePointCount(codePointCount), mCodePoints(), + mTerminalIdFieldPos(ptNodeParams->getTerminalIdFieldPos()), + mTerminalId(ptNodeParams->getTerminalId()), + mProbabilityFieldPos(ptNodeParams->getProbabilityFieldPos()), + mProbability(probability), + mChildrenPosFieldPos(ptNodeParams->getChildrenPosFieldPos()), + mChildrenPos(ptNodeParams->getChildrenPos()), + mBigramLinkedNodePos(ptNodeParams->getBigramLinkedNodePos()), + mShortcutPos(ptNodeParams->getShortcutPos()), + mBigramPos(ptNodeParams->getBigramsPos()), + mSiblingPos(ptNodeParams->getSiblingNodePos()) { + memcpy(mCodePoints, codePoints, sizeof(int) * mCodePointCount); + } + + PtNodeParams(const PatriciaTrieReadingUtils::NodeFlags flags, const int parentPos, + const int codePointCount, const int *const codePoints, const int probability) + : mHeadPos(NOT_A_DICT_POS), mFlags(flags), mParentPos(parentPos), + mCodePointCount(codePointCount), mCodePoints(), + mTerminalIdFieldPos(NOT_A_DICT_POS), + mTerminalId(Ver4DictConstants::NOT_A_TERMINAL_ID), + mProbabilityFieldPos(NOT_A_DICT_POS), mProbability(probability), + mChildrenPosFieldPos(NOT_A_DICT_POS), mChildrenPos(NOT_A_DICT_POS), + mBigramLinkedNodePos(NOT_A_DICT_POS), mShortcutPos(NOT_A_DICT_POS), + mBigramPos(NOT_A_DICT_POS), mSiblingPos(NOT_A_DICT_POS) { + memcpy(mCodePoints, codePoints, sizeof(int) * mCodePointCount); + } + AK_FORCE_INLINE bool isValid() const { return mCodePointCount > 0; } diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h new file mode 100644 index 000000000..c9eff3d80 --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h @@ -0,0 +1,56 @@ +/* + * Copyright (C) 2013, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_PT_NODE_WRITER_H +#define LATINIME_PT_NODE_WRITER_H + +#include "defines.h" + +#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h" + +namespace latinime { + +// Interface class used to write PtNode information. +class PtNodeWriter { + public: + virtual ~PtNodeWriter() {} + + virtual bool markPtNodeAsDeleted(const PtNodeParams *const toBeUpdatedPtNodeParams) = 0; + + virtual bool markPtNodeAsMoved(const PtNodeParams *const toBeUpdatedPtNodeParams, + const int movedPos, const int bigramLinkedNodePos) = 0; + + virtual bool updatePtNodeProbability(const PtNodeParams *const toBeUpdatedPtNodeParams, + const int probability) = 0; + + virtual bool updateChildrenPosition(const PtNodeParams *const toBeUpdatedPtNodeParams, + const int newChildrenPosition) = 0; + + virtual bool writePtNodeAndAdvancePosition(const PtNodeParams *const ptNodeParams, + int *const ptNodeWritingPos) = 0; + + virtual bool addNewBigramEntry(const PtNodeParams *const sourcePtNodeParams, + const PtNodeParams *const targetPtNodeParam, const int probability, + bool *const outAddedNewBigram) = 0; + + protected: + PtNodeWriter() {}; + + private: + DISALLOW_COPY_AND_ASSIGN(PtNodeWriter); +}; +} // namespace latinime +#endif /* LATINIME_PT_NODE_WRITER_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_gc_event_listeners.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_gc_event_listeners.cpp index db4e86da1..a252c6fb2 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_gc_event_listeners.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_gc_event_listeners.cpp @@ -18,6 +18,8 @@ #include "suggest/core/policy/dictionary_header_structure_policy.h" #include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h" +#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_utils.h" #include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h" namespace latinime { @@ -54,7 +56,7 @@ bool DynamicPatriciaTrieGcEventListeners } if (isUselessPtNode) { // Current PtNode is no longer needed. Mark it as deleted. - if (!mWritingHelper->markNodeAsDeleted(ptNodeParams)) { + if (!mPtNodeWriter->markPtNodeAsDeleted(ptNodeParams)) { return false; } } else { @@ -130,9 +132,7 @@ bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNo ptNodeParams->getHeadPos(), writingPos)); mValidPtNodeCount++; // Writes current PtNode. - return mWritingHelper->writePtNodeToBufferByCopyingPtNodeInfo(mBufferToWrite, ptNodeParams, - ptNodeParams->getParentPos(), ptNodeParams->getCodePoints(), - ptNodeParams->getCodePointCount(), ptNodeParams->getProbability(), &writingPos); + return mPtNodeWriter->writePtNodeAndAdvancePosition(ptNodeParams, &writingPos); } bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateAllPositionFields diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_gc_event_listeners.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_gc_event_listeners.h index cfe3c145c..a0b3b4d3a 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_gc_event_listeners.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_gc_event_listeners.h @@ -23,13 +23,13 @@ #include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h" #include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.h" #include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_helper.h" -#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_utils.h" #include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" #include "utils/hash_map_compat.h" namespace latinime { class DictionaryHeaderStructurePolicy; +class PtNodeWriter; class PtNodeParams; class DynamicPatriciaTrieGcEventListeners { @@ -42,9 +42,9 @@ class DynamicPatriciaTrieGcEventListeners { public: TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted( const DictionaryHeaderStructurePolicy *const headerPolicy, - DynamicPatriciaTrieWritingHelper *const writingHelper, - BufferWithExtendableBuffer *const buffer, const bool isDecayingDict) - : mHeaderPolicy(headerPolicy), mWritingHelper(writingHelper), mBuffer(buffer), + PtNodeWriter *const ptNodeWriter, BufferWithExtendableBuffer *const buffer, + const bool isDecayingDict) + : mHeaderPolicy(headerPolicy), mPtNodeWriter(ptNodeWriter), mBuffer(buffer), mIsDecayingDict(isDecayingDict), mValueStack(), mChildrenValue(0), mValidUnigramCount(0) {} @@ -78,7 +78,7 @@ class DynamicPatriciaTrieGcEventListeners { TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted); const DictionaryHeaderStructurePolicy *const mHeaderPolicy; - DynamicPatriciaTrieWritingHelper *const mWritingHelper; + PtNodeWriter *const mPtNodeWriter; BufferWithExtendableBuffer *const mBuffer; const bool mIsDecayingDict; std::vector mValueStack; @@ -118,11 +118,10 @@ class DynamicPatriciaTrieGcEventListeners { : public DynamicPatriciaTrieReadingHelper::TraversingEventListener { public: TraversePolicyToPlaceAndWriteValidPtNodesToBuffer( - DynamicPatriciaTrieWritingHelper *const writingHelper, - BufferWithExtendableBuffer *const bufferToWrite, + PtNodeWriter *const ptNodeWriter, BufferWithExtendableBuffer *const bufferToWrite, DynamicPatriciaTrieWritingHelper::DictPositionRelocationMap *const dictPositionRelocationMap) - : mWritingHelper(writingHelper), mBufferToWrite(bufferToWrite), + : mPtNodeWriter(ptNodeWriter), mBufferToWrite(bufferToWrite), mDictPositionRelocationMap(dictPositionRelocationMap), mValidPtNodeCount(0), mPtNodeArraySizeFieldPos(NOT_A_DICT_POS) {}; @@ -137,7 +136,7 @@ class DynamicPatriciaTrieGcEventListeners { private: DISALLOW_IMPLICIT_CONSTRUCTORS(TraversePolicyToPlaceAndWriteValidPtNodesToBuffer); - DynamicPatriciaTrieWritingHelper *const mWritingHelper; + PtNodeWriter *const mPtNodeWriter; BufferWithExtendableBuffer *const mBufferToWrite; DynamicPatriciaTrieWritingHelper::DictPositionRelocationMap *const mDictPositionRelocationMap; @@ -149,13 +148,11 @@ class DynamicPatriciaTrieGcEventListeners { : public DynamicPatriciaTrieReadingHelper::TraversingEventListener { public: TraversePolicyToUpdateAllPositionFields( - DynamicPatriciaTrieWritingHelper *const writingHelper, DynamicBigramListPolicy *const bigramPolicy, BufferWithExtendableBuffer *const bufferToWrite, const DynamicPatriciaTrieWritingHelper::DictPositionRelocationMap *const dictPositionRelocationMap) - : mWritingHelper(writingHelper), mBigramPolicy(bigramPolicy), - mBufferToWrite(bufferToWrite), + : mBigramPolicy(bigramPolicy), mBufferToWrite(bufferToWrite), mDictPositionRelocationMap(dictPositionRelocationMap), mUnigramCount(0), mBigramCount(0) {}; @@ -178,7 +175,6 @@ class DynamicPatriciaTrieGcEventListeners { private: DISALLOW_IMPLICIT_CONSTRUCTORS(TraversePolicyToUpdateAllPositionFields); - DynamicPatriciaTrieWritingHelper *const mWritingHelper; DynamicBigramListPolicy *const mBigramPolicy; BufferWithExtendableBuffer *const mBufferToWrite; const DynamicPatriciaTrieWritingHelper::DictPositionRelocationMap *const diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_writer.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_writer.cpp new file mode 100644 index 000000000..890606f89 --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_writer.cpp @@ -0,0 +1,227 @@ +/* + * Copyright (C) 2013, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_writer.h" + +#include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h" +#include "suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h" +#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_utils.h" +#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" + +namespace latinime { + +const int DynamicPatriciaTrieNodeWriter::CHILDREN_POSITION_FIELD_SIZE = 3; + +bool DynamicPatriciaTrieNodeWriter::markPtNodeAsDeleted( + const PtNodeParams *const toBeUpdatedPtNodeParams) { + int pos = toBeUpdatedPtNodeParams->getHeadPos(); + const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(pos); + const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer); + if (usesAdditionalBuffer) { + pos -= mBuffer->getOriginalBufferSize(); + } + // Read original flags + const PatriciaTrieReadingUtils::NodeFlags originalFlags = + PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos); + const PatriciaTrieReadingUtils::NodeFlags updatedFlags = + DynamicPatriciaTrieReadingUtils::updateAndGetFlags(originalFlags, false /* isMoved */, + true /* isDeleted */); + int writingPos = toBeUpdatedPtNodeParams->getHeadPos(); + // Update flags. + return DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, updatedFlags, + &writingPos); +} + +bool DynamicPatriciaTrieNodeWriter::markPtNodeAsMoved( + const PtNodeParams *const toBeUpdatedPtNodeParams, + const int movedPos, const int bigramLinkedNodePos) { + int pos = toBeUpdatedPtNodeParams->getHeadPos(); + const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(pos); + const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer); + if (usesAdditionalBuffer) { + pos -= mBuffer->getOriginalBufferSize(); + } + // Read original flags + const PatriciaTrieReadingUtils::NodeFlags originalFlags = + PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos); + const PatriciaTrieReadingUtils::NodeFlags updatedFlags = + DynamicPatriciaTrieReadingUtils::updateAndGetFlags(originalFlags, true /* isMoved */, + false /* isDeleted */); + int writingPos = toBeUpdatedPtNodeParams->getHeadPos(); + // Update flags. + if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, updatedFlags, + &writingPos)) { + return false; + } + // Update moved position, which is stored in the parent offset field. + if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition( + mBuffer, movedPos, toBeUpdatedPtNodeParams->getHeadPos(), &writingPos)) { + return false; + } + // Update bigram linked node position, which is stored in the children position field. + int childrenPosFieldPos = toBeUpdatedPtNodeParams->getChildrenPosFieldPos(); + if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition( + mBuffer, bigramLinkedNodePos, &childrenPosFieldPos)) { + return false; + } + if (toBeUpdatedPtNodeParams->hasChildren()) { + // Update children's parent position. + mReadingHelper.initWithPtNodeArrayPos(toBeUpdatedPtNodeParams->getChildrenPos()); + while (!mReadingHelper.isEnd()) { + const PtNodeParams childPtNodeParams(mReadingHelper.getPtNodeParams()); + int parentOffsetFieldPos = childPtNodeParams.getHeadPos() + + DynamicPatriciaTrieWritingUtils::NODE_FLAG_FIELD_SIZE; + if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition( + mBuffer, bigramLinkedNodePos, childPtNodeParams.getHeadPos(), + &parentOffsetFieldPos)) { + // Parent offset cannot be written because of a bug or a broken dictionary; thus, + // we give up to update dictionary. + return false; + } + mReadingHelper.readNextSiblingNode(childPtNodeParams); + } + } + return true; +} + +bool DynamicPatriciaTrieNodeWriter::updatePtNodeProbability( + const PtNodeParams *const toBeUpdatedPtNodeParams, const int newProbability) { + if (!toBeUpdatedPtNodeParams->isTerminal()) { + return false; + } + int probabilityFieldPos = toBeUpdatedPtNodeParams->getProbabilityFieldPos(); + return DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition(mBuffer, + newProbability, &probabilityFieldPos); +} + +bool DynamicPatriciaTrieNodeWriter::updateChildrenPosition( + const PtNodeParams *const toBeUpdatedPtNodeParams, const int newChildrenPosition) { + int childrenPosFieldPos = toBeUpdatedPtNodeParams->getChildrenPosFieldPos(); + return DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(mBuffer, + newChildrenPosition, &childrenPosFieldPos); +} + +bool DynamicPatriciaTrieNodeWriter::writePtNodeAndAdvancePosition( + const PtNodeParams *const ptNodeParams, int *const ptNodeWritingPos) { + const int nodePos = *ptNodeWritingPos; + // Write dummy flags. The Node flags are updated with appropriate flags at the last step of the + // PtNode writing. + if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, + 0 /* nodeFlags */, ptNodeWritingPos)) { + return false; + } + // Calculate a parent offset and write the offset. + if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition(mBuffer, + ptNodeParams->getParentPos(), nodePos, ptNodeWritingPos)) { + return false; + } + // Write code points + if (!DynamicPatriciaTrieWritingUtils::writeCodePointsAndAdvancePosition(mBuffer, + ptNodeParams->getCodePoints(), ptNodeParams->getCodePointCount(), ptNodeWritingPos)) { + return false; + } + // Write probability when the probability is a valid probability, which means this node is + // terminal. + if (ptNodeParams->getProbability() != NOT_A_PROBABILITY) { + if (!DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition(mBuffer, + ptNodeParams->getProbability(), ptNodeWritingPos)) { + return false; + } + } + // Write children position + if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(mBuffer, + ptNodeParams->getChildrenPos(), ptNodeWritingPos)) { + return false; + } + // Copy shortcut list when the originalShortcutListPos is valid dictionary position. + if (ptNodeParams->getShortcutPos() != NOT_A_DICT_POS) { + int fromPos = ptNodeParams->getShortcutPos(); + if (!mShortcutPolicy->copyAllShortcutsAndReturnIfSucceededOrNot(mBuffer, &fromPos, + ptNodeWritingPos)) { + return false; + } + } + // Copy bigram list when the originalBigramListPos is valid dictionary position. + int bigramCount = 0; + if (ptNodeParams->getBigramsPos() != NOT_A_DICT_POS) { + int fromPos = ptNodeParams->getBigramsPos(); + if (!mBigramPolicy->copyAllBigrams(mBuffer, &fromPos, ptNodeWritingPos, &bigramCount)) { + return false; + } + } + // Create node flags and write them. + PatriciaTrieReadingUtils::NodeFlags nodeFlags = + PatriciaTrieReadingUtils::createAndGetFlags(ptNodeParams->isBlacklisted(), + ptNodeParams->isNotAWord(), + ptNodeParams->getProbability() != NOT_A_PROBABILITY /* isTerminal */, + ptNodeParams->getShortcutPos() != NOT_A_DICT_POS /* hasShortcutTargets */, + bigramCount > 0 /* hasBigrams */, + ptNodeParams->getCodePointCount() > 1 /* hasMultipleChars */, + CHILDREN_POSITION_FIELD_SIZE); + int flagsFieldPos = nodePos; + if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, nodeFlags, + &flagsFieldPos)) { + return false; + } + return true; +} + +bool DynamicPatriciaTrieNodeWriter::addNewBigramEntry( + const PtNodeParams *const sourcePtNodeParams, + const PtNodeParams *const targetPtNodeParam, const int probability, + bool *const outAddedNewBigram) { + const int newNodePos = mBuffer->getTailPosition(); + int writingPos = newNodePos; + // Write a new PtNode using original PtNode's info to the tail of the dictionary in mBuffer. + if (!writePtNodeAndAdvancePosition(sourcePtNodeParams, &writingPos)) { + return false; + } + if (!markPtNodeAsMoved(sourcePtNodeParams, newNodePos, newNodePos)) { + return false; + } + const PtNodeParams newPtNodeParams( + mPtNodeReader->fetchNodeInfoInBufferFromPtNodePos(newNodePos)); + if (newPtNodeParams.getBigramsPos() != NOT_A_DICT_POS) { + // Insert a new bigram entry into the existing bigram list. + int bigramListPos = newPtNodeParams.getBigramsPos(); + return mBigramPolicy->addNewBigramEntryToBigramList(targetPtNodeParam->getHeadPos(), + probability, &bigramListPos, outAddedNewBigram); + } else { + // The PtNode doesn't have a bigram list. + *outAddedNewBigram = true; + // First, Write a bigram entry at the tail position of the PtNode. + if (!mBigramPolicy->writeNewBigramEntry(targetPtNodeParam->getHeadPos(), probability, + &writingPos)) { + return false; + } + // Then, Mark as the PtNode having bigram list in the flags. + const PatriciaTrieReadingUtils::NodeFlags updatedFlags = + PatriciaTrieReadingUtils::createAndGetFlags(newPtNodeParams.isBlacklisted(), + newPtNodeParams.isNotAWord(), + newPtNodeParams.getProbability() != NOT_A_PROBABILITY, + newPtNodeParams.getShortcutPos() != NOT_A_DICT_POS, true /* hasBigrams */, + newPtNodeParams.getCodePointCount() > 1, CHILDREN_POSITION_FIELD_SIZE); + writingPos = newNodePos; + // Write updated flags into the moved PtNode's flags field. + return DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, updatedFlags, + &writingPos); + } +} + +} diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_writer.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_writer.h new file mode 100644 index 000000000..e3a00e94e --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_writer.h @@ -0,0 +1,79 @@ +/* + * Copyright (C) 2013, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_DYNAMIC_PATRICIA_TRIE_NODE_WRITER_H +#define LATINIME_DYNAMIC_PATRICIA_TRIE_NODE_WRITER_H + +#include + +#include "defines.h" +#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h" +#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.h" + +namespace latinime { + +class BufferWithExtendableBuffer; +class DynamicBigramListPolicy; +class DynamicShortcutListPolicy; + +/* + * This class is used for helping to writes nodes of dynamic patricia trie. + */ +class DynamicPatriciaTrieNodeWriter : public PtNodeWriter { + public: + DynamicPatriciaTrieNodeWriter(BufferWithExtendableBuffer *const buffer, + const DynamicPatriciaTrieNodeReader *const ptNodeReader, + DynamicBigramListPolicy *const bigramPolicy, + DynamicShortcutListPolicy *const shortcutPolicy) + : mBuffer(buffer), mPtNodeReader(ptNodeReader), mReadingHelper(mBuffer, ptNodeReader), + mBigramPolicy(bigramPolicy), mShortcutPolicy(shortcutPolicy) {} + + virtual ~DynamicPatriciaTrieNodeWriter() {} + + virtual bool markPtNodeAsDeleted(const PtNodeParams *const toBeUpdatedPtNodeParams); + + virtual bool markPtNodeAsMoved(const PtNodeParams *const toBeUpdatedPtNodeParams, + const int movedPos, const int bigramLinkedNodePos); + + virtual bool updatePtNodeProbability(const PtNodeParams *const toBeUpdatedPtNodeParams, + const int newProbability); + + virtual bool updateChildrenPosition(const PtNodeParams *const toBeUpdatedPtNodeParams, + const int newChildrenPosition); + + virtual bool writePtNodeAndAdvancePosition(const PtNodeParams *const ptNodeParams, + int *const ptNodeWritingPos); + + virtual bool addNewBigramEntry(const PtNodeParams *const sourcePtNodeParams, + const PtNodeParams *const targetPtNodeParam, const int probability, + bool *const outAddedNewBigram); + + private: + DISALLOW_COPY_AND_ASSIGN(DynamicPatriciaTrieNodeWriter); + + static const int CHILDREN_POSITION_FIELD_SIZE; + + BufferWithExtendableBuffer *const mBuffer; + const DynamicPatriciaTrieNodeReader *const mPtNodeReader; + DynamicPatriciaTrieReadingHelper mReadingHelper; + DynamicBigramListPolicy *const mBigramPolicy; + DynamicShortcutListPolicy *const mShortcutPolicy; + +}; +} // namespace latinime +#endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_NODE_WRITER_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_policy.cpp index 205d181e7..0500f23d1 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_policy.cpp @@ -152,8 +152,8 @@ bool DynamicPatriciaTriePolicy::addUnigramWord(const int *const word, const int } DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer, &mNodeReader); readingHelper.initWithPtNodeArrayPos(getRootPosition()); - DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer, - &mBigramListPolicy, &mShortcutListPolicy, mHeaderPolicy.isDecayingDict()); + DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer, &mNodeReader, + &mNodeWriter, &mBigramListPolicy, &mShortcutListPolicy, mHeaderPolicy.isDecayingDict()); bool addedNewUnigram = false; if (writingHelper.addUnigramWord(&readingHelper, word, length, probability, &addedNewUnigram)) { @@ -187,8 +187,8 @@ bool DynamicPatriciaTriePolicy::addBigramWords(const int *const word0, const int if (word1Pos == NOT_A_DICT_POS) { return false; } - DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer, - &mBigramListPolicy, &mShortcutListPolicy, mHeaderPolicy.isDecayingDict()); + DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer, &mNodeReader, + &mNodeWriter, &mBigramListPolicy, &mShortcutListPolicy, mHeaderPolicy.isDecayingDict()); bool addedNewBigram = false; if (writingHelper.addBigramWords(word0Pos, word1Pos, probability, &addedNewBigram)) { if (addedNewBigram) { @@ -221,8 +221,8 @@ bool DynamicPatriciaTriePolicy::removeBigramWords(const int *const word0, const if (word1Pos == NOT_A_DICT_POS) { return false; } - DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer, - &mBigramListPolicy, &mShortcutListPolicy, mHeaderPolicy.isDecayingDict()); + DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer, &mNodeReader, + &mNodeWriter, &mBigramListPolicy, &mShortcutListPolicy, mHeaderPolicy.isDecayingDict()); if (writingHelper.removeBigramWords(word0Pos, word1Pos)) { mBigramCount--; return true; @@ -236,8 +236,8 @@ void DynamicPatriciaTriePolicy::flush(const char *const filePath) { AKLOGI("Warning: flush() is called for non-updatable dictionary."); return; } - DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer, - &mBigramListPolicy, &mShortcutListPolicy, false /* needsToDecay */); + DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer, &mNodeReader, + &mNodeWriter, &mBigramListPolicy, &mShortcutListPolicy, false /* needsToDecay */); writingHelper.writeToDictFile(filePath, &mHeaderPolicy, mUnigramCount, mBigramCount); } @@ -251,8 +251,8 @@ void DynamicPatriciaTriePolicy::flushWithGC(const char *const filePath) { false /* mindsBlockByDecay */, mUnigramCount, mBigramCount, &mHeaderPolicy)); DynamicBigramListPolicy bigramListPolicyForGC(&mHeaderPolicy, &mBufferWithExtendableBuffer, &mShortcutListPolicy, needsToDecay); - DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer, - &bigramListPolicyForGC, &mShortcutListPolicy, needsToDecay); + DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer, &mNodeReader, + &mNodeWriter, &bigramListPolicyForGC, &mShortcutListPolicy, needsToDecay); writingHelper.writeToDictFileWithGC(getRootPosition(), filePath, &mHeaderPolicy); mNeedsToDecayForTesting = false; } diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_policy.h index 7a81a9c0a..1c490ceb9 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_policy.h @@ -23,6 +23,7 @@ #include "suggest/policyimpl/dictionary/header/header_policy.h" #include "suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h" #include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_writer.h" #include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" #include "suggest/policyimpl/dictionary/utils/format_utils.h" #include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h" @@ -46,6 +47,8 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { mBigramListPolicy(&mHeaderPolicy, &mBufferWithExtendableBuffer, &mShortcutListPolicy, mHeaderPolicy.isDecayingDict()), mNodeReader(&mBufferWithExtendableBuffer, &mBigramListPolicy, &mShortcutListPolicy), + mNodeWriter(&mBufferWithExtendableBuffer, &mNodeReader, &mBigramListPolicy, + &mShortcutListPolicy), mUnigramCount(mHeaderPolicy.getUnigramCount()), mBigramCount(mHeaderPolicy.getBigramCount()), mNeedsToDecayForTesting(false) {} @@ -117,6 +120,7 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { DynamicShortcutListPolicy mShortcutListPolicy; DynamicBigramListPolicy mBigramListPolicy; DynamicPatriciaTrieNodeReader mNodeReader; + DynamicPatriciaTrieNodeWriter mNodeWriter; int mUnigramCount; int mBigramCount; int mNeedsToDecayForTesting; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_helper.cpp index 05caaebac..6a550a1af 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_helper.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_helper.cpp @@ -17,9 +17,12 @@ #include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_helper.h" #include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h" +#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_reader.h" +#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h" #include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h" #include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_gc_event_listeners.h" #include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_writer.h" #include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.h" #include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.h" #include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_utils.h" @@ -96,51 +99,17 @@ bool DynamicPatriciaTrieWritingHelper::addUnigramWord( bool DynamicPatriciaTrieWritingHelper::addBigramWords(const int word0Pos, const int word1Pos, const int probability, bool *const outAddedNewBigram) { - DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy); - const PtNodeParams ptNodeParams(nodeReader.fetchNodeInfoInBufferFromPtNodePos(word0Pos)); - // Move node to add bigram entry. - const int newNodePos = mBuffer->getTailPosition(); - if (!markNodeAsMovedAndSetPosition(&ptNodeParams, newNodePos, newNodePos)) { - return false; - } - int writingPos = newNodePos; - // Write a new PtNode using original PtNode's info to the tail of the dictionary in mBuffer. - if (!writePtNodeToBufferByCopyingPtNodeInfo(mBuffer, &ptNodeParams, ptNodeParams.getParentPos(), - ptNodeParams.getCodePoints(), ptNodeParams.getCodePointCount(), - ptNodeParams.getProbability(), &writingPos)) { - return false; - } - const PtNodeParams newPtNodeParams(nodeReader.fetchNodeInfoInBufferFromPtNodePos(newNodePos)); - if (newPtNodeParams.getBigramsPos() != NOT_A_DICT_POS) { - // Insert a new bigram entry into the existing bigram list. - int bigramListPos = newPtNodeParams.getBigramsPos(); - return mBigramPolicy->addNewBigramEntryToBigramList(word1Pos, probability, &bigramListPos, - outAddedNewBigram); - } else { - // The PtNode doesn't have a bigram list. - *outAddedNewBigram = true; - // First, Write a bigram entry at the tail position of the PtNode. - if (!mBigramPolicy->writeNewBigramEntry(word1Pos, probability, &writingPos)) { - return false; - } - // Then, Mark as the PtNode having bigram list in the flags. - const PatriciaTrieReadingUtils::NodeFlags updatedFlags = - PatriciaTrieReadingUtils::createAndGetFlags(newPtNodeParams.isBlacklisted(), - newPtNodeParams.isNotAWord(), - newPtNodeParams.getProbability() != NOT_A_PROBABILITY, - newPtNodeParams.getShortcutPos() != NOT_A_DICT_POS, true /* hasBigrams */, - newPtNodeParams.getCodePointCount() > 1, CHILDREN_POSITION_FIELD_SIZE); - writingPos = newNodePos; - // Write updated flags into the moved PtNode's flags field. - return DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, updatedFlags, - &writingPos); - } + const PtNodeParams sourcePtNodeParams( + mPtNodeReader->fetchNodeInfoInBufferFromPtNodePos(word0Pos)); + const PtNodeParams targetPtNodeParams( + mPtNodeReader->fetchNodeInfoInBufferFromPtNodePos(word1Pos)); + return mPtNodeWriter->addNewBigramEntry(&sourcePtNodeParams, &targetPtNodeParams, probability, + outAddedNewBigram); } // Remove a bigram relation from word0Pos to word1Pos. bool DynamicPatriciaTrieWritingHelper::removeBigramWords(const int word0Pos, const int word1Pos) { - DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy); - const PtNodeParams ptNodeParams(nodeReader.fetchNodeInfoInBufferFromPtNodePos(word0Pos)); + const PtNodeParams ptNodeParams(mPtNodeReader->fetchNodeInfoInBufferFromPtNodePos(word0Pos)); if (ptNodeParams.getBigramsPos() == NOT_A_DICT_POS) { return false; } @@ -180,169 +149,6 @@ void DynamicPatriciaTrieWritingHelper::writeToDictFileWithGC(const int rootPtNod DictFileWritingUtils::flushAllHeaderAndBodyToFile(fileName, &headerBuffer, &newDictBuffer); } -bool DynamicPatriciaTrieWritingHelper::markNodeAsDeleted( - const PtNodeParams *const toBeUpdatedPtNodeParams) { - int pos = toBeUpdatedPtNodeParams->getHeadPos(); - const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(pos); - const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer); - if (usesAdditionalBuffer) { - pos -= mBuffer->getOriginalBufferSize(); - } - // Read original flags - const PatriciaTrieReadingUtils::NodeFlags originalFlags = - PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos); - const PatriciaTrieReadingUtils::NodeFlags updatedFlags = - DynamicPatriciaTrieReadingUtils::updateAndGetFlags(originalFlags, false /* isMoved */, - true /* isDeleted */); - int writingPos = toBeUpdatedPtNodeParams->getHeadPos(); - // Update flags. - return DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, updatedFlags, - &writingPos); -} - -bool DynamicPatriciaTrieWritingHelper::markNodeAsMovedAndSetPosition( - const PtNodeParams *const toBeUpdatedPtNodeParams, const int movedPos, - const int bigramLinkedNodePos) { - int pos = toBeUpdatedPtNodeParams->getHeadPos(); - const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(pos); - const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer); - if (usesAdditionalBuffer) { - pos -= mBuffer->getOriginalBufferSize(); - } - // Read original flags - const PatriciaTrieReadingUtils::NodeFlags originalFlags = - PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos); - const PatriciaTrieReadingUtils::NodeFlags updatedFlags = - DynamicPatriciaTrieReadingUtils::updateAndGetFlags(originalFlags, true /* isMoved */, - false /* isDeleted */); - int writingPos = toBeUpdatedPtNodeParams->getHeadPos(); - // Update flags. - if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, updatedFlags, - &writingPos)) { - return false; - } - // Update moved position, which is stored in the parent offset field. - if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition( - mBuffer, movedPos, toBeUpdatedPtNodeParams->getHeadPos(), &writingPos)) { - return false; - } - // Update bigram linked node position, which is stored in the children position field. - int childrenPosFieldPos = toBeUpdatedPtNodeParams->getChildrenPosFieldPos(); - if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition( - mBuffer, bigramLinkedNodePos, &childrenPosFieldPos)) { - return false; - } - if (toBeUpdatedPtNodeParams->hasChildren()) { - // Update children's parent position. - DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy); - DynamicPatriciaTrieReadingHelper readingHelper(mBuffer, &nodeReader); - readingHelper.initWithPtNodeArrayPos(toBeUpdatedPtNodeParams->getChildrenPos()); - while (!readingHelper.isEnd()) { - const PtNodeParams childPtNodeParams(readingHelper.getPtNodeParams()); - int parentOffsetFieldPos = childPtNodeParams.getHeadPos() - + DynamicPatriciaTrieWritingUtils::NODE_FLAG_FIELD_SIZE; - if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition( - mBuffer, bigramLinkedNodePos, childPtNodeParams.getHeadPos(), - &parentOffsetFieldPos)) { - // Parent offset cannot be written because of a bug or a broken dictionary; thus, - // we give up to update dictionary. - return false; - } - readingHelper.readNextSiblingNode(childPtNodeParams); - } - } - return true; -} - -// Write new PtNode at writingPos. -bool DynamicPatriciaTrieWritingHelper::writePtNodeWithFullInfoToBuffer( - BufferWithExtendableBuffer *const bufferToWrite, const bool isBlacklisted, - const bool isNotAWord, const int parentPos, const int *const codePoints, - const int codePointCount, const int probability, const int childrenPos, - const int originalBigramListPos, const int originalShortcutListPos, - int *const writingPos) { - const int nodePos = *writingPos; - // Write dummy flags. The Node flags are updated with appropriate flags at the last step of the - // PtNode writing. - if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(bufferToWrite, - 0 /* nodeFlags */, writingPos)) { - return false; - } - // Calculate a parent offset and write the offset. - if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition(bufferToWrite, - parentPos, nodePos, writingPos)) { - return false; - } - // Write code points - if (!DynamicPatriciaTrieWritingUtils::writeCodePointsAndAdvancePosition(bufferToWrite, - codePoints, codePointCount, writingPos)) { - return false; - } - // Write probability when the probability is a valid probability, which means this node is - // terminal. - if (probability != NOT_A_PROBABILITY) { - if (!DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition(bufferToWrite, - probability, writingPos)) { - return false; - } - } - // Write children position - if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(bufferToWrite, - childrenPos, writingPos)) { - return false; - } - // Copy shortcut list when the originalShortcutListPos is valid dictionary position. - if (originalShortcutListPos != NOT_A_DICT_POS) { - int fromPos = originalShortcutListPos; - if (!mShortcutPolicy->copyAllShortcutsAndReturnIfSucceededOrNot(bufferToWrite, &fromPos, - writingPos)) { - return false; - } - } - // Copy bigram list when the originalBigramListPos is valid dictionary position. - int bigramCount = 0; - if (originalBigramListPos != NOT_A_DICT_POS) { - int fromPos = originalBigramListPos; - if (!mBigramPolicy->copyAllBigrams(bufferToWrite, &fromPos, writingPos, &bigramCount)) { - return false; - } - } - // Create node flags and write them. - PatriciaTrieReadingUtils::NodeFlags nodeFlags = - PatriciaTrieReadingUtils::createAndGetFlags(isBlacklisted, isNotAWord, - probability != NOT_A_PROBABILITY /* isTerminal */, - originalShortcutListPos != NOT_A_DICT_POS /* hasShortcutTargets */, - bigramCount > 0 /* hasBigrams */, codePointCount > 1 /* hasMultipleChars */, - CHILDREN_POSITION_FIELD_SIZE); - int flagsFieldPos = nodePos; - if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(bufferToWrite, nodeFlags, - &flagsFieldPos)) { - return false; - } - return true; -} - -bool DynamicPatriciaTrieWritingHelper::writePtNodeToBuffer( - BufferWithExtendableBuffer *const bufferToWrite, const int parentPos, - const int *const codePoints, const int codePointCount, const int probability, - int *const writingPos) { - return writePtNodeWithFullInfoToBuffer(bufferToWrite, false /* isBlacklisted */, - false /* isNotAWord */, parentPos, codePoints, codePointCount, probability, - NOT_A_DICT_POS /* childrenPos */, NOT_A_DICT_POS /* originalBigramsPos */, - NOT_A_DICT_POS /* originalShortcutPos */, writingPos); -} - -bool DynamicPatriciaTrieWritingHelper::writePtNodeToBufferByCopyingPtNodeInfo( - BufferWithExtendableBuffer *const bufferToWrite, - const PtNodeParams *const originalPtNodeParams, const int parentPos, - const int *const codePoints, const int codePointCount, const int probability, - int *const writingPos) { - return writePtNodeWithFullInfoToBuffer(bufferToWrite, originalPtNodeParams->isBlacklisted(), - originalPtNodeParams->isNotAWord(), parentPos, codePoints, codePointCount, probability, - originalPtNodeParams->getChildrenPos(), originalPtNodeParams->getBigramsPos(), - originalPtNodeParams->getShortcutPos(), writingPos); -} - bool DynamicPatriciaTrieWritingHelper::createAndInsertNodeIntoPtNodeArray(const int parentPos, const int *const nodeCodePoints, const int nodeCodePointCount, const int probability, int *const forwardLinkFieldPos) { @@ -363,23 +169,20 @@ bool DynamicPatriciaTrieWritingHelper::setPtNodeProbability( *outAddedNewUnigram = false; const int probabilityToWrite = getUpdatedProbability( originalPtNodeParams->getProbability(), probability); - int probabilityFieldPos = originalPtNodeParams->getProbabilityFieldPos(); - if (!DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition(mBuffer, - probabilityToWrite, &probabilityFieldPos)) { - return false; - } + return mPtNodeWriter->updatePtNodeProbability(originalPtNodeParams, probabilityToWrite); } else { // Make the node terminal and write the probability. *outAddedNewUnigram = true; - int movedPos = mBuffer->getTailPosition(); - if (!markNodeAsMovedAndSetPosition(originalPtNodeParams, movedPos, movedPos)) { + const int movedPos = mBuffer->getTailPosition(); + int writingPos = movedPos; + const PtNodeParams ptNodeParamsToWrite(getUpdatedPtNodeParams(originalPtNodeParams, + originalPtNodeParams->getParentPos(), originalPtNodeParams->getCodePointCount(), + originalPtNodeParams->getCodePoints(), + getUpdatedProbability(NOT_A_PROBABILITY /* originalProbability */, probability))); + if (!mPtNodeWriter->writePtNodeAndAdvancePosition(&ptNodeParamsToWrite, &writingPos)) { return false; } - if (!writePtNodeToBufferByCopyingPtNodeInfo(mBuffer, originalPtNodeParams, - originalPtNodeParams->getParentPos(), originalPtNodeParams->getCodePoints(), - originalPtNodeParams->getCodePointCount(), - getUpdatedProbability(NOT_A_PROBABILITY /* originalProbability */, probability), - &movedPos)) { + if (!mPtNodeWriter->markPtNodeAsMoved(originalPtNodeParams, movedPos, movedPos)) { return false; } } @@ -390,9 +193,7 @@ bool DynamicPatriciaTrieWritingHelper::createChildrenPtNodeArrayAndAChildPtNode( const PtNodeParams *const parentPtNodeParams, const int probability, const int *const codePoints, const int codePointCount) { const int newPtNodeArrayPos = mBuffer->getTailPosition(); - int childrenPosFieldPos = parentPtNodeParams->getChildrenPosFieldPos(); - if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(mBuffer, - newPtNodeArrayPos, &childrenPosFieldPos)) { + if (!mPtNodeWriter->updateChildrenPosition(parentPtNodeParams, newPtNodeArrayPos)) { return false; } return createNewPtNodeArrayWithAChildPtNode(parentPtNodeParams->getHeadPos(), codePoints, @@ -407,8 +208,9 @@ bool DynamicPatriciaTrieWritingHelper::createNewPtNodeArrayWithAChildPtNode( 1 /* arraySize */, &writingPos)) { return false; } - if (!writePtNodeToBuffer(mBuffer, parentPtNodePos, nodeCodePoints, nodeCodePointCount, - probability, &writingPos)) { + const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode( + parentPtNodePos, nodeCodePointCount, nodeCodePoints, probability)); + if (!mPtNodeWriter->writePtNodeAndAdvancePosition(&ptNodeParamsToWrite, &writingPos)) { return false; } if (!DynamicPatriciaTrieWritingUtils::writeForwardLinkPositionAndAdvancePosition(mBuffer, @@ -436,9 +238,10 @@ bool DynamicPatriciaTrieWritingHelper::reallocatePtNodeAndAddNewPtNodes( // Write the 1st part of the reallocating node. The children position will be updated later // with actual children position. const int newProbability = addsExtraChild ? NOT_A_PROBABILITY : probabilityOfNewPtNode; - if (!writePtNodeToBuffer(mBuffer, reallocatingPtNodeParams->getParentPos(), - reallocatingPtNodeParams->getCodePoints(), overlappingCodePointCount, newProbability, - &writingPos)) { + const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode( + reallocatingPtNodeParams->getParentPos(), overlappingCodePointCount, + reallocatingPtNodeParams->getCodePoints(), newProbability)); + if (!mPtNodeWriter->writePtNodeAndAdvancePosition(&ptNodeParamsToWrite, &writingPos)) { return false; } const int actualChildrenPos = writingPos; @@ -450,18 +253,19 @@ bool DynamicPatriciaTrieWritingHelper::reallocatePtNodeAndAddNewPtNodes( } // Write the 2nd part of the reallocating node. const int secondPartOfReallocatedPtNodePos = writingPos; - if (!writePtNodeToBufferByCopyingPtNodeInfo(mBuffer, reallocatingPtNodeParams, + const PtNodeParams childPartPtNodeParams(getUpdatedPtNodeParams(reallocatingPtNodeParams, firstPartOfReallocatedPtNodePos, - reallocatingPtNodeParams->getCodePoints() + overlappingCodePointCount, reallocatingPtNodeParams->getCodePointCount() - overlappingCodePointCount, - reallocatingPtNodeParams->getProbability(), &writingPos)) { + reallocatingPtNodeParams->getCodePoints() + overlappingCodePointCount, + reallocatingPtNodeParams->getProbability())); + if (!mPtNodeWriter->writePtNodeAndAdvancePosition(&childPartPtNodeParams, &writingPos)) { return false; } if (addsExtraChild) { - if (!writePtNodeToBuffer(mBuffer, firstPartOfReallocatedPtNodePos, - newNodeCodePoints + overlappingCodePointCount, - newNodeCodePointCount - overlappingCodePointCount, probabilityOfNewPtNode, - &writingPos)) { + const PtNodeParams extraChildPtNodeParams(getPtNodeParamsForNewPtNode( + firstPartOfReallocatedPtNodePos, newNodeCodePointCount - overlappingCodePointCount, + newNodeCodePoints + overlappingCodePointCount, probabilityOfNewPtNode)); + if (!mPtNodeWriter->writePtNodeAndAdvancePosition(&extraChildPtNodeParams, &writingPos)) { return false; } } @@ -470,33 +274,28 @@ bool DynamicPatriciaTrieWritingHelper::reallocatePtNodeAndAddNewPtNodes( return false; } // Update original reallocating PtNode as moved. - if (!markNodeAsMovedAndSetPosition(reallocatingPtNodeParams, firstPartOfReallocatedPtNodePos, + if (!mPtNodeWriter->markPtNodeAsMoved(reallocatingPtNodeParams, firstPartOfReallocatedPtNodePos, secondPartOfReallocatedPtNodePos)) { return false; } // Load node info. Information of the 1st part will be fetched. - DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy); const PtNodeParams ptNodeParams( - nodeReader.fetchNodeInfoInBufferFromPtNodePos(firstPartOfReallocatedPtNodePos)); + mPtNodeReader->fetchNodeInfoInBufferFromPtNodePos(firstPartOfReallocatedPtNodePos)); // Update children position. - int childrenPosFieldPos = ptNodeParams.getChildrenPosFieldPos(); - if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(mBuffer, - actualChildrenPos, &childrenPosFieldPos)) { - return false; - } - return true; + return mPtNodeWriter->updateChildrenPosition(&ptNodeParams, actualChildrenPos); } +// TODO: Make this method version independent. bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos, const HeaderPolicy *const headerPolicy, BufferWithExtendableBuffer *const bufferToWrite, int *const outUnigramCount, int *const outBigramCount) { - DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy); - DynamicPatriciaTrieReadingHelper readingHelper(mBuffer, &nodeReader); + DynamicPatriciaTrieNodeReader ptNodeReader(mBuffer, mBigramPolicy, mShortcutPolicy); + DynamicPatriciaTrieReadingHelper readingHelper(mBuffer, &ptNodeReader); readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos); DynamicPatriciaTrieGcEventListeners ::TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted( - headerPolicy, this, mBuffer, mNeedsToDecay); + headerPolicy, mPtNodeWriter, mBuffer, mNeedsToDecay); if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner( &traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted)) { return false; @@ -521,8 +320,10 @@ bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos, // Mapping from positions in mBuffer to positions in bufferToWrite. DictPositionRelocationMap dictPositionRelocationMap; readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos); + DynamicPatriciaTrieNodeWriter newPtNodeWriter(bufferToWrite, + &ptNodeReader, mBigramPolicy, mShortcutPolicy); DynamicPatriciaTrieGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer - traversePolicyToPlaceAndWriteValidPtNodesToBuffer(this, bufferToWrite, + traversePolicyToPlaceAndWriteValidPtNodesToBuffer(&newPtNodeWriter, bufferToWrite, &dictPositionRelocationMap); if (!readingHelper.traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner( &traversePolicyToPlaceAndWriteValidPtNodesToBuffer)) { @@ -539,7 +340,7 @@ bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos, DynamicPatriciaTrieReadingHelper newDictReadingHelper(bufferToWrite, &newDictNodeReader); newDictReadingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos); DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateAllPositionFields - traversePolicyToUpdateAllPositionFields(this, &newDictBigramPolicy, bufferToWrite, + traversePolicyToUpdateAllPositionFields(&newDictBigramPolicy, bufferToWrite, &dictPositionRelocationMap); if (!newDictReadingHelper.traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner( &traversePolicyToUpdateAllPositionFields)) { @@ -551,7 +352,7 @@ bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos, } int DynamicPatriciaTrieWritingHelper::getUpdatedProbability(const int originalProbability, - const int newProbability) { + const int newProbability) const { if (mNeedsToDecay) { return ForgettingCurveUtils::getUpdatedEncodedProbability(originalProbability, newProbability); @@ -560,4 +361,28 @@ int DynamicPatriciaTrieWritingHelper::getUpdatedProbability(const int originalPr } } +const PtNodeParams DynamicPatriciaTrieWritingHelper::getUpdatedPtNodeParams( + const PtNodeParams *const originalPtNodeParams, const int parentPos, + const int codePointCount, const int *const codePoints, const int probability) const { + const PatriciaTrieReadingUtils::NodeFlags flags = PatriciaTrieReadingUtils::createAndGetFlags( + originalPtNodeParams->isBlacklisted(), originalPtNodeParams->isNotAWord(), + probability != NOT_A_PROBABILITY /* isTerminal */, + originalPtNodeParams->getShortcutPos() != NOT_A_DICT_POS /* hasShortcutTargets */, + originalPtNodeParams->getBigramsPos() != NOT_A_DICT_POS /* hasBigrams */, + codePointCount > 1 /* hasMultipleChars */, CHILDREN_POSITION_FIELD_SIZE); + return PtNodeParams(originalPtNodeParams, flags, parentPos, codePointCount, codePoints, + probability); +} + +const PtNodeParams DynamicPatriciaTrieWritingHelper::getPtNodeParamsForNewPtNode( + const int parentPos, const int codePointCount, const int *const codePoints, + const int probability) const { + const PatriciaTrieReadingUtils::NodeFlags flags = PatriciaTrieReadingUtils::createAndGetFlags( + false /* isBlacklisted */, false /* isNotAWord */, + probability != NOT_A_PROBABILITY /* isTerminal */, + false /* hasShortcutTargets */, false /* hasBigrams */, + codePointCount > 1 /* hasMultipleChars */, CHILDREN_POSITION_FIELD_SIZE); + return PtNodeParams(flags, parentPos, codePointCount, codePoints, probability); +} + } // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_helper.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_helper.h index 5614cb3ac..8135f5054 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_helper.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_helper.h @@ -20,6 +20,7 @@ #include #include "defines.h" +#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h" #include "utils/hash_map_compat.h" namespace latinime { @@ -29,7 +30,8 @@ class DynamicBigramListPolicy; class DynamicPatriciaTrieReadingHelper; class DynamicShortcutListPolicy; class HeaderPolicy; -class PtNodeParams; +class PtNodeReader; +class PtNodeWriter; // TODO: Make it independent from a particular format and move to pt_common. class DynamicPatriciaTrieWritingHelper { @@ -51,9 +53,11 @@ class DynamicPatriciaTrieWritingHelper { static const size_t MAX_DICTIONARY_SIZE; DynamicPatriciaTrieWritingHelper(BufferWithExtendableBuffer *const buffer, + const PtNodeReader *const ptNodeReader, PtNodeWriter *const ptNodeWriter, DynamicBigramListPolicy *const bigramPolicy, DynamicShortcutListPolicy *const shortcutPolicy, const bool needsToDecay) - : mBuffer(buffer), mBigramPolicy(bigramPolicy), mShortcutPolicy(shortcutPolicy), + : mBuffer(buffer), mPtNodeReader(ptNodeReader), mPtNodeWriter(ptNodeWriter), + mBigramPolicy(bigramPolicy), mShortcutPolicy(shortcutPolicy), mNeedsToDecay(needsToDecay) {} ~DynamicPatriciaTrieWritingHelper() {} @@ -76,40 +80,18 @@ class DynamicPatriciaTrieWritingHelper { void writeToDictFileWithGC(const int rootPtNodeArrayPos, const char *const fileName, const HeaderPolicy *const headerPolicy); - // CAVEAT: This method must be called only from inner classes of - // DynamicPatriciaTrieGcEventListeners. - bool markNodeAsDeleted(const PtNodeParams *const toBeUpdatedPtNodeParams); - - // CAVEAT: This method must be called only from this class or inner classes of - // DynamicPatriciaTrieGcEventListeners. - bool writePtNodeToBufferByCopyingPtNodeInfo(BufferWithExtendableBuffer *const bufferToWrite, - const PtNodeParams *const originalPtNodeParams, const int parentPos, - const int *const codePoints, const int codePointCount, const int probability, - int *const writingPos); - private: DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTrieWritingHelper); static const int CHILDREN_POSITION_FIELD_SIZE; BufferWithExtendableBuffer *const mBuffer; + const PtNodeReader *const mPtNodeReader; + PtNodeWriter *const mPtNodeWriter; DynamicBigramListPolicy *const mBigramPolicy; DynamicShortcutListPolicy *const mShortcutPolicy; const bool mNeedsToDecay; - bool markNodeAsMovedAndSetPosition(const PtNodeParams *const toBeUpdatedPtNodeParams, - const int movedPos, const int bigramLinkedNodePos); - - bool writePtNodeWithFullInfoToBuffer(BufferWithExtendableBuffer *const bufferToWrite, - const bool isBlacklisted, const bool isNotAWord, - const int parentPos, const int *const codePoints, const int codePointCount, - const int probability, const int childrenPos, const int originalBigramListPos, - const int originalShortcutListPos, int *const writingPos); - - bool writePtNodeToBuffer(BufferWithExtendableBuffer *const bufferToWrite, - const int parentPos, const int *const codePoints, const int codePointCount, - const int probability, int *const writingPos); - bool createAndInsertNodeIntoPtNodeArray(const int parentPos, const int *const nodeCodePoints, const int nodeCodePointCount, const int probability, int *const forwardLinkFieldPos); @@ -131,7 +113,14 @@ class DynamicPatriciaTrieWritingHelper { BufferWithExtendableBuffer *const bufferToWrite, int *const outUnigramCount, int *const outBigramCount); - int getUpdatedProbability(const int originalProbability, const int newProbability); + int getUpdatedProbability(const int originalProbability, const int newProbability) const; + + const PtNodeParams getUpdatedPtNodeParams(const PtNodeParams *const originalPtNodeParams, + const int parentPos, const int codePointCount, const int *const codePoints, + const int probability) const; + + const PtNodeParams getPtNodeParamsForNewPtNode(const int parentPos, const int codePointCount, + const int *const codePoints, const int probability) const; }; } // namespace latinime #endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_WRITING_HELPER_H */