am 0aa32dd3: Merge "Refactoring: PtNodeWriter to separate version specific writing methods."
* commit '0aa32dd30755c5f6849645c64ab87203b5677f0f': Refactoring: PtNodeWriter to separate version specific writing methods.main
commit
294f525819
|
@ -80,6 +80,7 @@ LATIN_IME_CORE_SRC_FILES := \
|
||||||
$(addprefix suggest/policyimpl/dictionary/structure/v3/, \
|
$(addprefix suggest/policyimpl/dictionary/structure/v3/, \
|
||||||
dynamic_patricia_trie_gc_event_listeners.cpp \
|
dynamic_patricia_trie_gc_event_listeners.cpp \
|
||||||
dynamic_patricia_trie_node_reader.cpp \
|
dynamic_patricia_trie_node_reader.cpp \
|
||||||
|
dynamic_patricia_trie_node_writer.cpp \
|
||||||
dynamic_patricia_trie_policy.cpp \
|
dynamic_patricia_trie_policy.cpp \
|
||||||
dynamic_patricia_trie_reading_helper.cpp \
|
dynamic_patricia_trie_reading_helper.cpp \
|
||||||
dynamic_patricia_trie_reading_utils.cpp \
|
dynamic_patricia_trie_reading_utils.cpp \
|
||||||
|
|
|
@ -86,6 +86,38 @@ class PtNodeParams {
|
||||||
memcpy(mCodePoints, codePoints, sizeof(int) * mCodePointCount);
|
memcpy(mCodePoints, codePoints, sizeof(int) * mCodePointCount);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Construct new params by updating existing PtNode params.
|
||||||
|
PtNodeParams(const PtNodeParams *const ptNodeParams,
|
||||||
|
const PatriciaTrieReadingUtils::NodeFlags flags, const int parentPos,
|
||||||
|
const int codePointCount, const int *const codePoints, const int probability)
|
||||||
|
: mHeadPos(ptNodeParams->getHeadPos()), mFlags(flags), mParentPos(parentPos),
|
||||||
|
mCodePointCount(codePointCount), mCodePoints(),
|
||||||
|
mTerminalIdFieldPos(ptNodeParams->getTerminalIdFieldPos()),
|
||||||
|
mTerminalId(ptNodeParams->getTerminalId()),
|
||||||
|
mProbabilityFieldPos(ptNodeParams->getProbabilityFieldPos()),
|
||||||
|
mProbability(probability),
|
||||||
|
mChildrenPosFieldPos(ptNodeParams->getChildrenPosFieldPos()),
|
||||||
|
mChildrenPos(ptNodeParams->getChildrenPos()),
|
||||||
|
mBigramLinkedNodePos(ptNodeParams->getBigramLinkedNodePos()),
|
||||||
|
mShortcutPos(ptNodeParams->getShortcutPos()),
|
||||||
|
mBigramPos(ptNodeParams->getBigramsPos()),
|
||||||
|
mSiblingPos(ptNodeParams->getSiblingNodePos()) {
|
||||||
|
memcpy(mCodePoints, codePoints, sizeof(int) * mCodePointCount);
|
||||||
|
}
|
||||||
|
|
||||||
|
PtNodeParams(const PatriciaTrieReadingUtils::NodeFlags flags, const int parentPos,
|
||||||
|
const int codePointCount, const int *const codePoints, const int probability)
|
||||||
|
: mHeadPos(NOT_A_DICT_POS), mFlags(flags), mParentPos(parentPos),
|
||||||
|
mCodePointCount(codePointCount), mCodePoints(),
|
||||||
|
mTerminalIdFieldPos(NOT_A_DICT_POS),
|
||||||
|
mTerminalId(Ver4DictConstants::NOT_A_TERMINAL_ID),
|
||||||
|
mProbabilityFieldPos(NOT_A_DICT_POS), mProbability(probability),
|
||||||
|
mChildrenPosFieldPos(NOT_A_DICT_POS), mChildrenPos(NOT_A_DICT_POS),
|
||||||
|
mBigramLinkedNodePos(NOT_A_DICT_POS), mShortcutPos(NOT_A_DICT_POS),
|
||||||
|
mBigramPos(NOT_A_DICT_POS), mSiblingPos(NOT_A_DICT_POS) {
|
||||||
|
memcpy(mCodePoints, codePoints, sizeof(int) * mCodePointCount);
|
||||||
|
}
|
||||||
|
|
||||||
AK_FORCE_INLINE bool isValid() const {
|
AK_FORCE_INLINE bool isValid() const {
|
||||||
return mCodePointCount > 0;
|
return mCodePointCount > 0;
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,56 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2013, The Android Open Source Project
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef LATINIME_PT_NODE_WRITER_H
|
||||||
|
#define LATINIME_PT_NODE_WRITER_H
|
||||||
|
|
||||||
|
#include "defines.h"
|
||||||
|
|
||||||
|
#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h"
|
||||||
|
|
||||||
|
namespace latinime {
|
||||||
|
|
||||||
|
// Interface class used to write PtNode information.
|
||||||
|
class PtNodeWriter {
|
||||||
|
public:
|
||||||
|
virtual ~PtNodeWriter() {}
|
||||||
|
|
||||||
|
virtual bool markPtNodeAsDeleted(const PtNodeParams *const toBeUpdatedPtNodeParams) = 0;
|
||||||
|
|
||||||
|
virtual bool markPtNodeAsMoved(const PtNodeParams *const toBeUpdatedPtNodeParams,
|
||||||
|
const int movedPos, const int bigramLinkedNodePos) = 0;
|
||||||
|
|
||||||
|
virtual bool updatePtNodeProbability(const PtNodeParams *const toBeUpdatedPtNodeParams,
|
||||||
|
const int probability) = 0;
|
||||||
|
|
||||||
|
virtual bool updateChildrenPosition(const PtNodeParams *const toBeUpdatedPtNodeParams,
|
||||||
|
const int newChildrenPosition) = 0;
|
||||||
|
|
||||||
|
virtual bool writePtNodeAndAdvancePosition(const PtNodeParams *const ptNodeParams,
|
||||||
|
int *const ptNodeWritingPos) = 0;
|
||||||
|
|
||||||
|
virtual bool addNewBigramEntry(const PtNodeParams *const sourcePtNodeParams,
|
||||||
|
const PtNodeParams *const targetPtNodeParam, const int probability,
|
||||||
|
bool *const outAddedNewBigram) = 0;
|
||||||
|
|
||||||
|
protected:
|
||||||
|
PtNodeWriter() {};
|
||||||
|
|
||||||
|
private:
|
||||||
|
DISALLOW_COPY_AND_ASSIGN(PtNodeWriter);
|
||||||
|
};
|
||||||
|
} // namespace latinime
|
||||||
|
#endif /* LATINIME_PT_NODE_WRITER_H */
|
|
@ -18,6 +18,8 @@
|
||||||
|
|
||||||
#include "suggest/core/policy/dictionary_header_structure_policy.h"
|
#include "suggest/core/policy/dictionary_header_structure_policy.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h"
|
#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_utils.h"
|
||||||
#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
|
#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
@ -54,7 +56,7 @@ bool DynamicPatriciaTrieGcEventListeners
|
||||||
}
|
}
|
||||||
if (isUselessPtNode) {
|
if (isUselessPtNode) {
|
||||||
// Current PtNode is no longer needed. Mark it as deleted.
|
// Current PtNode is no longer needed. Mark it as deleted.
|
||||||
if (!mWritingHelper->markNodeAsDeleted(ptNodeParams)) {
|
if (!mPtNodeWriter->markPtNodeAsDeleted(ptNodeParams)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
@ -130,9 +132,7 @@ bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNo
|
||||||
ptNodeParams->getHeadPos(), writingPos));
|
ptNodeParams->getHeadPos(), writingPos));
|
||||||
mValidPtNodeCount++;
|
mValidPtNodeCount++;
|
||||||
// Writes current PtNode.
|
// Writes current PtNode.
|
||||||
return mWritingHelper->writePtNodeToBufferByCopyingPtNodeInfo(mBufferToWrite, ptNodeParams,
|
return mPtNodeWriter->writePtNodeAndAdvancePosition(ptNodeParams, &writingPos);
|
||||||
ptNodeParams->getParentPos(), ptNodeParams->getCodePoints(),
|
|
||||||
ptNodeParams->getCodePointCount(), ptNodeParams->getProbability(), &writingPos);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateAllPositionFields
|
bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateAllPositionFields
|
||||||
|
|
|
@ -23,13 +23,13 @@
|
||||||
#include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h"
|
#include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.h"
|
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_helper.h"
|
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_helper.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_utils.h"
|
|
||||||
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
|
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
|
||||||
#include "utils/hash_map_compat.h"
|
#include "utils/hash_map_compat.h"
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
|
||||||
class DictionaryHeaderStructurePolicy;
|
class DictionaryHeaderStructurePolicy;
|
||||||
|
class PtNodeWriter;
|
||||||
class PtNodeParams;
|
class PtNodeParams;
|
||||||
|
|
||||||
class DynamicPatriciaTrieGcEventListeners {
|
class DynamicPatriciaTrieGcEventListeners {
|
||||||
|
@ -42,9 +42,9 @@ class DynamicPatriciaTrieGcEventListeners {
|
||||||
public:
|
public:
|
||||||
TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted(
|
TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted(
|
||||||
const DictionaryHeaderStructurePolicy *const headerPolicy,
|
const DictionaryHeaderStructurePolicy *const headerPolicy,
|
||||||
DynamicPatriciaTrieWritingHelper *const writingHelper,
|
PtNodeWriter *const ptNodeWriter, BufferWithExtendableBuffer *const buffer,
|
||||||
BufferWithExtendableBuffer *const buffer, const bool isDecayingDict)
|
const bool isDecayingDict)
|
||||||
: mHeaderPolicy(headerPolicy), mWritingHelper(writingHelper), mBuffer(buffer),
|
: mHeaderPolicy(headerPolicy), mPtNodeWriter(ptNodeWriter), mBuffer(buffer),
|
||||||
mIsDecayingDict(isDecayingDict), mValueStack(), mChildrenValue(0),
|
mIsDecayingDict(isDecayingDict), mValueStack(), mChildrenValue(0),
|
||||||
mValidUnigramCount(0) {}
|
mValidUnigramCount(0) {}
|
||||||
|
|
||||||
|
@ -78,7 +78,7 @@ class DynamicPatriciaTrieGcEventListeners {
|
||||||
TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted);
|
TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted);
|
||||||
|
|
||||||
const DictionaryHeaderStructurePolicy *const mHeaderPolicy;
|
const DictionaryHeaderStructurePolicy *const mHeaderPolicy;
|
||||||
DynamicPatriciaTrieWritingHelper *const mWritingHelper;
|
PtNodeWriter *const mPtNodeWriter;
|
||||||
BufferWithExtendableBuffer *const mBuffer;
|
BufferWithExtendableBuffer *const mBuffer;
|
||||||
const bool mIsDecayingDict;
|
const bool mIsDecayingDict;
|
||||||
std::vector<int> mValueStack;
|
std::vector<int> mValueStack;
|
||||||
|
@ -118,11 +118,10 @@ class DynamicPatriciaTrieGcEventListeners {
|
||||||
: public DynamicPatriciaTrieReadingHelper::TraversingEventListener {
|
: public DynamicPatriciaTrieReadingHelper::TraversingEventListener {
|
||||||
public:
|
public:
|
||||||
TraversePolicyToPlaceAndWriteValidPtNodesToBuffer(
|
TraversePolicyToPlaceAndWriteValidPtNodesToBuffer(
|
||||||
DynamicPatriciaTrieWritingHelper *const writingHelper,
|
PtNodeWriter *const ptNodeWriter, BufferWithExtendableBuffer *const bufferToWrite,
|
||||||
BufferWithExtendableBuffer *const bufferToWrite,
|
|
||||||
DynamicPatriciaTrieWritingHelper::DictPositionRelocationMap *const
|
DynamicPatriciaTrieWritingHelper::DictPositionRelocationMap *const
|
||||||
dictPositionRelocationMap)
|
dictPositionRelocationMap)
|
||||||
: mWritingHelper(writingHelper), mBufferToWrite(bufferToWrite),
|
: mPtNodeWriter(ptNodeWriter), mBufferToWrite(bufferToWrite),
|
||||||
mDictPositionRelocationMap(dictPositionRelocationMap), mValidPtNodeCount(0),
|
mDictPositionRelocationMap(dictPositionRelocationMap), mValidPtNodeCount(0),
|
||||||
mPtNodeArraySizeFieldPos(NOT_A_DICT_POS) {};
|
mPtNodeArraySizeFieldPos(NOT_A_DICT_POS) {};
|
||||||
|
|
||||||
|
@ -137,7 +136,7 @@ class DynamicPatriciaTrieGcEventListeners {
|
||||||
private:
|
private:
|
||||||
DISALLOW_IMPLICIT_CONSTRUCTORS(TraversePolicyToPlaceAndWriteValidPtNodesToBuffer);
|
DISALLOW_IMPLICIT_CONSTRUCTORS(TraversePolicyToPlaceAndWriteValidPtNodesToBuffer);
|
||||||
|
|
||||||
DynamicPatriciaTrieWritingHelper *const mWritingHelper;
|
PtNodeWriter *const mPtNodeWriter;
|
||||||
BufferWithExtendableBuffer *const mBufferToWrite;
|
BufferWithExtendableBuffer *const mBufferToWrite;
|
||||||
DynamicPatriciaTrieWritingHelper::DictPositionRelocationMap *const
|
DynamicPatriciaTrieWritingHelper::DictPositionRelocationMap *const
|
||||||
mDictPositionRelocationMap;
|
mDictPositionRelocationMap;
|
||||||
|
@ -149,13 +148,11 @@ class DynamicPatriciaTrieGcEventListeners {
|
||||||
: public DynamicPatriciaTrieReadingHelper::TraversingEventListener {
|
: public DynamicPatriciaTrieReadingHelper::TraversingEventListener {
|
||||||
public:
|
public:
|
||||||
TraversePolicyToUpdateAllPositionFields(
|
TraversePolicyToUpdateAllPositionFields(
|
||||||
DynamicPatriciaTrieWritingHelper *const writingHelper,
|
|
||||||
DynamicBigramListPolicy *const bigramPolicy,
|
DynamicBigramListPolicy *const bigramPolicy,
|
||||||
BufferWithExtendableBuffer *const bufferToWrite,
|
BufferWithExtendableBuffer *const bufferToWrite,
|
||||||
const DynamicPatriciaTrieWritingHelper::DictPositionRelocationMap *const
|
const DynamicPatriciaTrieWritingHelper::DictPositionRelocationMap *const
|
||||||
dictPositionRelocationMap)
|
dictPositionRelocationMap)
|
||||||
: mWritingHelper(writingHelper), mBigramPolicy(bigramPolicy),
|
: mBigramPolicy(bigramPolicy), mBufferToWrite(bufferToWrite),
|
||||||
mBufferToWrite(bufferToWrite),
|
|
||||||
mDictPositionRelocationMap(dictPositionRelocationMap), mUnigramCount(0),
|
mDictPositionRelocationMap(dictPositionRelocationMap), mUnigramCount(0),
|
||||||
mBigramCount(0) {};
|
mBigramCount(0) {};
|
||||||
|
|
||||||
|
@ -178,7 +175,6 @@ class DynamicPatriciaTrieGcEventListeners {
|
||||||
private:
|
private:
|
||||||
DISALLOW_IMPLICIT_CONSTRUCTORS(TraversePolicyToUpdateAllPositionFields);
|
DISALLOW_IMPLICIT_CONSTRUCTORS(TraversePolicyToUpdateAllPositionFields);
|
||||||
|
|
||||||
DynamicPatriciaTrieWritingHelper *const mWritingHelper;
|
|
||||||
DynamicBigramListPolicy *const mBigramPolicy;
|
DynamicBigramListPolicy *const mBigramPolicy;
|
||||||
BufferWithExtendableBuffer *const mBufferToWrite;
|
BufferWithExtendableBuffer *const mBufferToWrite;
|
||||||
const DynamicPatriciaTrieWritingHelper::DictPositionRelocationMap *const
|
const DynamicPatriciaTrieWritingHelper::DictPositionRelocationMap *const
|
||||||
|
|
|
@ -0,0 +1,227 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2013, The Android Open Source Project
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_writer.h"
|
||||||
|
|
||||||
|
#include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_utils.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
|
||||||
|
|
||||||
|
namespace latinime {
|
||||||
|
|
||||||
|
const int DynamicPatriciaTrieNodeWriter::CHILDREN_POSITION_FIELD_SIZE = 3;
|
||||||
|
|
||||||
|
bool DynamicPatriciaTrieNodeWriter::markPtNodeAsDeleted(
|
||||||
|
const PtNodeParams *const toBeUpdatedPtNodeParams) {
|
||||||
|
int pos = toBeUpdatedPtNodeParams->getHeadPos();
|
||||||
|
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(pos);
|
||||||
|
const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
|
||||||
|
if (usesAdditionalBuffer) {
|
||||||
|
pos -= mBuffer->getOriginalBufferSize();
|
||||||
|
}
|
||||||
|
// Read original flags
|
||||||
|
const PatriciaTrieReadingUtils::NodeFlags originalFlags =
|
||||||
|
PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos);
|
||||||
|
const PatriciaTrieReadingUtils::NodeFlags updatedFlags =
|
||||||
|
DynamicPatriciaTrieReadingUtils::updateAndGetFlags(originalFlags, false /* isMoved */,
|
||||||
|
true /* isDeleted */);
|
||||||
|
int writingPos = toBeUpdatedPtNodeParams->getHeadPos();
|
||||||
|
// Update flags.
|
||||||
|
return DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, updatedFlags,
|
||||||
|
&writingPos);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool DynamicPatriciaTrieNodeWriter::markPtNodeAsMoved(
|
||||||
|
const PtNodeParams *const toBeUpdatedPtNodeParams,
|
||||||
|
const int movedPos, const int bigramLinkedNodePos) {
|
||||||
|
int pos = toBeUpdatedPtNodeParams->getHeadPos();
|
||||||
|
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(pos);
|
||||||
|
const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
|
||||||
|
if (usesAdditionalBuffer) {
|
||||||
|
pos -= mBuffer->getOriginalBufferSize();
|
||||||
|
}
|
||||||
|
// Read original flags
|
||||||
|
const PatriciaTrieReadingUtils::NodeFlags originalFlags =
|
||||||
|
PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos);
|
||||||
|
const PatriciaTrieReadingUtils::NodeFlags updatedFlags =
|
||||||
|
DynamicPatriciaTrieReadingUtils::updateAndGetFlags(originalFlags, true /* isMoved */,
|
||||||
|
false /* isDeleted */);
|
||||||
|
int writingPos = toBeUpdatedPtNodeParams->getHeadPos();
|
||||||
|
// Update flags.
|
||||||
|
if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, updatedFlags,
|
||||||
|
&writingPos)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
// Update moved position, which is stored in the parent offset field.
|
||||||
|
if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition(
|
||||||
|
mBuffer, movedPos, toBeUpdatedPtNodeParams->getHeadPos(), &writingPos)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
// Update bigram linked node position, which is stored in the children position field.
|
||||||
|
int childrenPosFieldPos = toBeUpdatedPtNodeParams->getChildrenPosFieldPos();
|
||||||
|
if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(
|
||||||
|
mBuffer, bigramLinkedNodePos, &childrenPosFieldPos)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (toBeUpdatedPtNodeParams->hasChildren()) {
|
||||||
|
// Update children's parent position.
|
||||||
|
mReadingHelper.initWithPtNodeArrayPos(toBeUpdatedPtNodeParams->getChildrenPos());
|
||||||
|
while (!mReadingHelper.isEnd()) {
|
||||||
|
const PtNodeParams childPtNodeParams(mReadingHelper.getPtNodeParams());
|
||||||
|
int parentOffsetFieldPos = childPtNodeParams.getHeadPos()
|
||||||
|
+ DynamicPatriciaTrieWritingUtils::NODE_FLAG_FIELD_SIZE;
|
||||||
|
if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition(
|
||||||
|
mBuffer, bigramLinkedNodePos, childPtNodeParams.getHeadPos(),
|
||||||
|
&parentOffsetFieldPos)) {
|
||||||
|
// Parent offset cannot be written because of a bug or a broken dictionary; thus,
|
||||||
|
// we give up to update dictionary.
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
mReadingHelper.readNextSiblingNode(childPtNodeParams);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool DynamicPatriciaTrieNodeWriter::updatePtNodeProbability(
|
||||||
|
const PtNodeParams *const toBeUpdatedPtNodeParams, const int newProbability) {
|
||||||
|
if (!toBeUpdatedPtNodeParams->isTerminal()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
int probabilityFieldPos = toBeUpdatedPtNodeParams->getProbabilityFieldPos();
|
||||||
|
return DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition(mBuffer,
|
||||||
|
newProbability, &probabilityFieldPos);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool DynamicPatriciaTrieNodeWriter::updateChildrenPosition(
|
||||||
|
const PtNodeParams *const toBeUpdatedPtNodeParams, const int newChildrenPosition) {
|
||||||
|
int childrenPosFieldPos = toBeUpdatedPtNodeParams->getChildrenPosFieldPos();
|
||||||
|
return DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(mBuffer,
|
||||||
|
newChildrenPosition, &childrenPosFieldPos);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool DynamicPatriciaTrieNodeWriter::writePtNodeAndAdvancePosition(
|
||||||
|
const PtNodeParams *const ptNodeParams, int *const ptNodeWritingPos) {
|
||||||
|
const int nodePos = *ptNodeWritingPos;
|
||||||
|
// Write dummy flags. The Node flags are updated with appropriate flags at the last step of the
|
||||||
|
// PtNode writing.
|
||||||
|
if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer,
|
||||||
|
0 /* nodeFlags */, ptNodeWritingPos)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
// Calculate a parent offset and write the offset.
|
||||||
|
if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition(mBuffer,
|
||||||
|
ptNodeParams->getParentPos(), nodePos, ptNodeWritingPos)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
// Write code points
|
||||||
|
if (!DynamicPatriciaTrieWritingUtils::writeCodePointsAndAdvancePosition(mBuffer,
|
||||||
|
ptNodeParams->getCodePoints(), ptNodeParams->getCodePointCount(), ptNodeWritingPos)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
// Write probability when the probability is a valid probability, which means this node is
|
||||||
|
// terminal.
|
||||||
|
if (ptNodeParams->getProbability() != NOT_A_PROBABILITY) {
|
||||||
|
if (!DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition(mBuffer,
|
||||||
|
ptNodeParams->getProbability(), ptNodeWritingPos)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Write children position
|
||||||
|
if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(mBuffer,
|
||||||
|
ptNodeParams->getChildrenPos(), ptNodeWritingPos)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
// Copy shortcut list when the originalShortcutListPos is valid dictionary position.
|
||||||
|
if (ptNodeParams->getShortcutPos() != NOT_A_DICT_POS) {
|
||||||
|
int fromPos = ptNodeParams->getShortcutPos();
|
||||||
|
if (!mShortcutPolicy->copyAllShortcutsAndReturnIfSucceededOrNot(mBuffer, &fromPos,
|
||||||
|
ptNodeWritingPos)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Copy bigram list when the originalBigramListPos is valid dictionary position.
|
||||||
|
int bigramCount = 0;
|
||||||
|
if (ptNodeParams->getBigramsPos() != NOT_A_DICT_POS) {
|
||||||
|
int fromPos = ptNodeParams->getBigramsPos();
|
||||||
|
if (!mBigramPolicy->copyAllBigrams(mBuffer, &fromPos, ptNodeWritingPos, &bigramCount)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Create node flags and write them.
|
||||||
|
PatriciaTrieReadingUtils::NodeFlags nodeFlags =
|
||||||
|
PatriciaTrieReadingUtils::createAndGetFlags(ptNodeParams->isBlacklisted(),
|
||||||
|
ptNodeParams->isNotAWord(),
|
||||||
|
ptNodeParams->getProbability() != NOT_A_PROBABILITY /* isTerminal */,
|
||||||
|
ptNodeParams->getShortcutPos() != NOT_A_DICT_POS /* hasShortcutTargets */,
|
||||||
|
bigramCount > 0 /* hasBigrams */,
|
||||||
|
ptNodeParams->getCodePointCount() > 1 /* hasMultipleChars */,
|
||||||
|
CHILDREN_POSITION_FIELD_SIZE);
|
||||||
|
int flagsFieldPos = nodePos;
|
||||||
|
if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, nodeFlags,
|
||||||
|
&flagsFieldPos)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool DynamicPatriciaTrieNodeWriter::addNewBigramEntry(
|
||||||
|
const PtNodeParams *const sourcePtNodeParams,
|
||||||
|
const PtNodeParams *const targetPtNodeParam, const int probability,
|
||||||
|
bool *const outAddedNewBigram) {
|
||||||
|
const int newNodePos = mBuffer->getTailPosition();
|
||||||
|
int writingPos = newNodePos;
|
||||||
|
// Write a new PtNode using original PtNode's info to the tail of the dictionary in mBuffer.
|
||||||
|
if (!writePtNodeAndAdvancePosition(sourcePtNodeParams, &writingPos)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (!markPtNodeAsMoved(sourcePtNodeParams, newNodePos, newNodePos)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
const PtNodeParams newPtNodeParams(
|
||||||
|
mPtNodeReader->fetchNodeInfoInBufferFromPtNodePos(newNodePos));
|
||||||
|
if (newPtNodeParams.getBigramsPos() != NOT_A_DICT_POS) {
|
||||||
|
// Insert a new bigram entry into the existing bigram list.
|
||||||
|
int bigramListPos = newPtNodeParams.getBigramsPos();
|
||||||
|
return mBigramPolicy->addNewBigramEntryToBigramList(targetPtNodeParam->getHeadPos(),
|
||||||
|
probability, &bigramListPos, outAddedNewBigram);
|
||||||
|
} else {
|
||||||
|
// The PtNode doesn't have a bigram list.
|
||||||
|
*outAddedNewBigram = true;
|
||||||
|
// First, Write a bigram entry at the tail position of the PtNode.
|
||||||
|
if (!mBigramPolicy->writeNewBigramEntry(targetPtNodeParam->getHeadPos(), probability,
|
||||||
|
&writingPos)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
// Then, Mark as the PtNode having bigram list in the flags.
|
||||||
|
const PatriciaTrieReadingUtils::NodeFlags updatedFlags =
|
||||||
|
PatriciaTrieReadingUtils::createAndGetFlags(newPtNodeParams.isBlacklisted(),
|
||||||
|
newPtNodeParams.isNotAWord(),
|
||||||
|
newPtNodeParams.getProbability() != NOT_A_PROBABILITY,
|
||||||
|
newPtNodeParams.getShortcutPos() != NOT_A_DICT_POS, true /* hasBigrams */,
|
||||||
|
newPtNodeParams.getCodePointCount() > 1, CHILDREN_POSITION_FIELD_SIZE);
|
||||||
|
writingPos = newNodePos;
|
||||||
|
// Write updated flags into the moved PtNode's flags field.
|
||||||
|
return DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, updatedFlags,
|
||||||
|
&writingPos);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,79 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2013, The Android Open Source Project
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef LATINIME_DYNAMIC_PATRICIA_TRIE_NODE_WRITER_H
|
||||||
|
#define LATINIME_DYNAMIC_PATRICIA_TRIE_NODE_WRITER_H
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
#include "defines.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.h"
|
||||||
|
|
||||||
|
namespace latinime {
|
||||||
|
|
||||||
|
class BufferWithExtendableBuffer;
|
||||||
|
class DynamicBigramListPolicy;
|
||||||
|
class DynamicShortcutListPolicy;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This class is used for helping to writes nodes of dynamic patricia trie.
|
||||||
|
*/
|
||||||
|
class DynamicPatriciaTrieNodeWriter : public PtNodeWriter {
|
||||||
|
public:
|
||||||
|
DynamicPatriciaTrieNodeWriter(BufferWithExtendableBuffer *const buffer,
|
||||||
|
const DynamicPatriciaTrieNodeReader *const ptNodeReader,
|
||||||
|
DynamicBigramListPolicy *const bigramPolicy,
|
||||||
|
DynamicShortcutListPolicy *const shortcutPolicy)
|
||||||
|
: mBuffer(buffer), mPtNodeReader(ptNodeReader), mReadingHelper(mBuffer, ptNodeReader),
|
||||||
|
mBigramPolicy(bigramPolicy), mShortcutPolicy(shortcutPolicy) {}
|
||||||
|
|
||||||
|
virtual ~DynamicPatriciaTrieNodeWriter() {}
|
||||||
|
|
||||||
|
virtual bool markPtNodeAsDeleted(const PtNodeParams *const toBeUpdatedPtNodeParams);
|
||||||
|
|
||||||
|
virtual bool markPtNodeAsMoved(const PtNodeParams *const toBeUpdatedPtNodeParams,
|
||||||
|
const int movedPos, const int bigramLinkedNodePos);
|
||||||
|
|
||||||
|
virtual bool updatePtNodeProbability(const PtNodeParams *const toBeUpdatedPtNodeParams,
|
||||||
|
const int newProbability);
|
||||||
|
|
||||||
|
virtual bool updateChildrenPosition(const PtNodeParams *const toBeUpdatedPtNodeParams,
|
||||||
|
const int newChildrenPosition);
|
||||||
|
|
||||||
|
virtual bool writePtNodeAndAdvancePosition(const PtNodeParams *const ptNodeParams,
|
||||||
|
int *const ptNodeWritingPos);
|
||||||
|
|
||||||
|
virtual bool addNewBigramEntry(const PtNodeParams *const sourcePtNodeParams,
|
||||||
|
const PtNodeParams *const targetPtNodeParam, const int probability,
|
||||||
|
bool *const outAddedNewBigram);
|
||||||
|
|
||||||
|
private:
|
||||||
|
DISALLOW_COPY_AND_ASSIGN(DynamicPatriciaTrieNodeWriter);
|
||||||
|
|
||||||
|
static const int CHILDREN_POSITION_FIELD_SIZE;
|
||||||
|
|
||||||
|
BufferWithExtendableBuffer *const mBuffer;
|
||||||
|
const DynamicPatriciaTrieNodeReader *const mPtNodeReader;
|
||||||
|
DynamicPatriciaTrieReadingHelper mReadingHelper;
|
||||||
|
DynamicBigramListPolicy *const mBigramPolicy;
|
||||||
|
DynamicShortcutListPolicy *const mShortcutPolicy;
|
||||||
|
|
||||||
|
};
|
||||||
|
} // namespace latinime
|
||||||
|
#endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_NODE_WRITER_H */
|
|
@ -152,8 +152,8 @@ bool DynamicPatriciaTriePolicy::addUnigramWord(const int *const word, const int
|
||||||
}
|
}
|
||||||
DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer, &mNodeReader);
|
DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer, &mNodeReader);
|
||||||
readingHelper.initWithPtNodeArrayPos(getRootPosition());
|
readingHelper.initWithPtNodeArrayPos(getRootPosition());
|
||||||
DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer,
|
DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer, &mNodeReader,
|
||||||
&mBigramListPolicy, &mShortcutListPolicy, mHeaderPolicy.isDecayingDict());
|
&mNodeWriter, &mBigramListPolicy, &mShortcutListPolicy, mHeaderPolicy.isDecayingDict());
|
||||||
bool addedNewUnigram = false;
|
bool addedNewUnigram = false;
|
||||||
if (writingHelper.addUnigramWord(&readingHelper, word, length, probability,
|
if (writingHelper.addUnigramWord(&readingHelper, word, length, probability,
|
||||||
&addedNewUnigram)) {
|
&addedNewUnigram)) {
|
||||||
|
@ -187,8 +187,8 @@ bool DynamicPatriciaTriePolicy::addBigramWords(const int *const word0, const int
|
||||||
if (word1Pos == NOT_A_DICT_POS) {
|
if (word1Pos == NOT_A_DICT_POS) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer,
|
DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer, &mNodeReader,
|
||||||
&mBigramListPolicy, &mShortcutListPolicy, mHeaderPolicy.isDecayingDict());
|
&mNodeWriter, &mBigramListPolicy, &mShortcutListPolicy, mHeaderPolicy.isDecayingDict());
|
||||||
bool addedNewBigram = false;
|
bool addedNewBigram = false;
|
||||||
if (writingHelper.addBigramWords(word0Pos, word1Pos, probability, &addedNewBigram)) {
|
if (writingHelper.addBigramWords(word0Pos, word1Pos, probability, &addedNewBigram)) {
|
||||||
if (addedNewBigram) {
|
if (addedNewBigram) {
|
||||||
|
@ -221,8 +221,8 @@ bool DynamicPatriciaTriePolicy::removeBigramWords(const int *const word0, const
|
||||||
if (word1Pos == NOT_A_DICT_POS) {
|
if (word1Pos == NOT_A_DICT_POS) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer,
|
DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer, &mNodeReader,
|
||||||
&mBigramListPolicy, &mShortcutListPolicy, mHeaderPolicy.isDecayingDict());
|
&mNodeWriter, &mBigramListPolicy, &mShortcutListPolicy, mHeaderPolicy.isDecayingDict());
|
||||||
if (writingHelper.removeBigramWords(word0Pos, word1Pos)) {
|
if (writingHelper.removeBigramWords(word0Pos, word1Pos)) {
|
||||||
mBigramCount--;
|
mBigramCount--;
|
||||||
return true;
|
return true;
|
||||||
|
@ -236,8 +236,8 @@ void DynamicPatriciaTriePolicy::flush(const char *const filePath) {
|
||||||
AKLOGI("Warning: flush() is called for non-updatable dictionary.");
|
AKLOGI("Warning: flush() is called for non-updatable dictionary.");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer,
|
DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer, &mNodeReader,
|
||||||
&mBigramListPolicy, &mShortcutListPolicy, false /* needsToDecay */);
|
&mNodeWriter, &mBigramListPolicy, &mShortcutListPolicy, false /* needsToDecay */);
|
||||||
writingHelper.writeToDictFile(filePath, &mHeaderPolicy, mUnigramCount, mBigramCount);
|
writingHelper.writeToDictFile(filePath, &mHeaderPolicy, mUnigramCount, mBigramCount);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -251,8 +251,8 @@ void DynamicPatriciaTriePolicy::flushWithGC(const char *const filePath) {
|
||||||
false /* mindsBlockByDecay */, mUnigramCount, mBigramCount, &mHeaderPolicy));
|
false /* mindsBlockByDecay */, mUnigramCount, mBigramCount, &mHeaderPolicy));
|
||||||
DynamicBigramListPolicy bigramListPolicyForGC(&mHeaderPolicy, &mBufferWithExtendableBuffer,
|
DynamicBigramListPolicy bigramListPolicyForGC(&mHeaderPolicy, &mBufferWithExtendableBuffer,
|
||||||
&mShortcutListPolicy, needsToDecay);
|
&mShortcutListPolicy, needsToDecay);
|
||||||
DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer,
|
DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer, &mNodeReader,
|
||||||
&bigramListPolicyForGC, &mShortcutListPolicy, needsToDecay);
|
&mNodeWriter, &bigramListPolicyForGC, &mShortcutListPolicy, needsToDecay);
|
||||||
writingHelper.writeToDictFileWithGC(getRootPosition(), filePath, &mHeaderPolicy);
|
writingHelper.writeToDictFileWithGC(getRootPosition(), filePath, &mHeaderPolicy);
|
||||||
mNeedsToDecayForTesting = false;
|
mNeedsToDecayForTesting = false;
|
||||||
}
|
}
|
||||||
|
|
|
@ -23,6 +23,7 @@
|
||||||
#include "suggest/policyimpl/dictionary/header/header_policy.h"
|
#include "suggest/policyimpl/dictionary/header/header_policy.h"
|
||||||
#include "suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h"
|
#include "suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.h"
|
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_writer.h"
|
||||||
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
|
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
|
||||||
#include "suggest/policyimpl/dictionary/utils/format_utils.h"
|
#include "suggest/policyimpl/dictionary/utils/format_utils.h"
|
||||||
#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
|
#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
|
||||||
|
@ -46,6 +47,8 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
||||||
mBigramListPolicy(&mHeaderPolicy, &mBufferWithExtendableBuffer, &mShortcutListPolicy,
|
mBigramListPolicy(&mHeaderPolicy, &mBufferWithExtendableBuffer, &mShortcutListPolicy,
|
||||||
mHeaderPolicy.isDecayingDict()),
|
mHeaderPolicy.isDecayingDict()),
|
||||||
mNodeReader(&mBufferWithExtendableBuffer, &mBigramListPolicy, &mShortcutListPolicy),
|
mNodeReader(&mBufferWithExtendableBuffer, &mBigramListPolicy, &mShortcutListPolicy),
|
||||||
|
mNodeWriter(&mBufferWithExtendableBuffer, &mNodeReader, &mBigramListPolicy,
|
||||||
|
&mShortcutListPolicy),
|
||||||
mUnigramCount(mHeaderPolicy.getUnigramCount()),
|
mUnigramCount(mHeaderPolicy.getUnigramCount()),
|
||||||
mBigramCount(mHeaderPolicy.getBigramCount()), mNeedsToDecayForTesting(false) {}
|
mBigramCount(mHeaderPolicy.getBigramCount()), mNeedsToDecayForTesting(false) {}
|
||||||
|
|
||||||
|
@ -117,6 +120,7 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
||||||
DynamicShortcutListPolicy mShortcutListPolicy;
|
DynamicShortcutListPolicy mShortcutListPolicy;
|
||||||
DynamicBigramListPolicy mBigramListPolicy;
|
DynamicBigramListPolicy mBigramListPolicy;
|
||||||
DynamicPatriciaTrieNodeReader mNodeReader;
|
DynamicPatriciaTrieNodeReader mNodeReader;
|
||||||
|
DynamicPatriciaTrieNodeWriter mNodeWriter;
|
||||||
int mUnigramCount;
|
int mUnigramCount;
|
||||||
int mBigramCount;
|
int mBigramCount;
|
||||||
int mNeedsToDecayForTesting;
|
int mNeedsToDecayForTesting;
|
||||||
|
|
|
@ -17,9 +17,12 @@
|
||||||
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_helper.h"
|
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_helper.h"
|
||||||
|
|
||||||
#include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h"
|
#include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_reader.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h"
|
#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_gc_event_listeners.h"
|
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_gc_event_listeners.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.h"
|
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_writer.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.h"
|
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.h"
|
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_utils.h"
|
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_utils.h"
|
||||||
|
@ -96,51 +99,17 @@ bool DynamicPatriciaTrieWritingHelper::addUnigramWord(
|
||||||
|
|
||||||
bool DynamicPatriciaTrieWritingHelper::addBigramWords(const int word0Pos, const int word1Pos,
|
bool DynamicPatriciaTrieWritingHelper::addBigramWords(const int word0Pos, const int word1Pos,
|
||||||
const int probability, bool *const outAddedNewBigram) {
|
const int probability, bool *const outAddedNewBigram) {
|
||||||
DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy);
|
const PtNodeParams sourcePtNodeParams(
|
||||||
const PtNodeParams ptNodeParams(nodeReader.fetchNodeInfoInBufferFromPtNodePos(word0Pos));
|
mPtNodeReader->fetchNodeInfoInBufferFromPtNodePos(word0Pos));
|
||||||
// Move node to add bigram entry.
|
const PtNodeParams targetPtNodeParams(
|
||||||
const int newNodePos = mBuffer->getTailPosition();
|
mPtNodeReader->fetchNodeInfoInBufferFromPtNodePos(word1Pos));
|
||||||
if (!markNodeAsMovedAndSetPosition(&ptNodeParams, newNodePos, newNodePos)) {
|
return mPtNodeWriter->addNewBigramEntry(&sourcePtNodeParams, &targetPtNodeParams, probability,
|
||||||
return false;
|
outAddedNewBigram);
|
||||||
}
|
|
||||||
int writingPos = newNodePos;
|
|
||||||
// Write a new PtNode using original PtNode's info to the tail of the dictionary in mBuffer.
|
|
||||||
if (!writePtNodeToBufferByCopyingPtNodeInfo(mBuffer, &ptNodeParams, ptNodeParams.getParentPos(),
|
|
||||||
ptNodeParams.getCodePoints(), ptNodeParams.getCodePointCount(),
|
|
||||||
ptNodeParams.getProbability(), &writingPos)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
const PtNodeParams newPtNodeParams(nodeReader.fetchNodeInfoInBufferFromPtNodePos(newNodePos));
|
|
||||||
if (newPtNodeParams.getBigramsPos() != NOT_A_DICT_POS) {
|
|
||||||
// Insert a new bigram entry into the existing bigram list.
|
|
||||||
int bigramListPos = newPtNodeParams.getBigramsPos();
|
|
||||||
return mBigramPolicy->addNewBigramEntryToBigramList(word1Pos, probability, &bigramListPos,
|
|
||||||
outAddedNewBigram);
|
|
||||||
} else {
|
|
||||||
// The PtNode doesn't have a bigram list.
|
|
||||||
*outAddedNewBigram = true;
|
|
||||||
// First, Write a bigram entry at the tail position of the PtNode.
|
|
||||||
if (!mBigramPolicy->writeNewBigramEntry(word1Pos, probability, &writingPos)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
// Then, Mark as the PtNode having bigram list in the flags.
|
|
||||||
const PatriciaTrieReadingUtils::NodeFlags updatedFlags =
|
|
||||||
PatriciaTrieReadingUtils::createAndGetFlags(newPtNodeParams.isBlacklisted(),
|
|
||||||
newPtNodeParams.isNotAWord(),
|
|
||||||
newPtNodeParams.getProbability() != NOT_A_PROBABILITY,
|
|
||||||
newPtNodeParams.getShortcutPos() != NOT_A_DICT_POS, true /* hasBigrams */,
|
|
||||||
newPtNodeParams.getCodePointCount() > 1, CHILDREN_POSITION_FIELD_SIZE);
|
|
||||||
writingPos = newNodePos;
|
|
||||||
// Write updated flags into the moved PtNode's flags field.
|
|
||||||
return DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, updatedFlags,
|
|
||||||
&writingPos);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Remove a bigram relation from word0Pos to word1Pos.
|
// Remove a bigram relation from word0Pos to word1Pos.
|
||||||
bool DynamicPatriciaTrieWritingHelper::removeBigramWords(const int word0Pos, const int word1Pos) {
|
bool DynamicPatriciaTrieWritingHelper::removeBigramWords(const int word0Pos, const int word1Pos) {
|
||||||
DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy);
|
const PtNodeParams ptNodeParams(mPtNodeReader->fetchNodeInfoInBufferFromPtNodePos(word0Pos));
|
||||||
const PtNodeParams ptNodeParams(nodeReader.fetchNodeInfoInBufferFromPtNodePos(word0Pos));
|
|
||||||
if (ptNodeParams.getBigramsPos() == NOT_A_DICT_POS) {
|
if (ptNodeParams.getBigramsPos() == NOT_A_DICT_POS) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -180,169 +149,6 @@ void DynamicPatriciaTrieWritingHelper::writeToDictFileWithGC(const int rootPtNod
|
||||||
DictFileWritingUtils::flushAllHeaderAndBodyToFile(fileName, &headerBuffer, &newDictBuffer);
|
DictFileWritingUtils::flushAllHeaderAndBodyToFile(fileName, &headerBuffer, &newDictBuffer);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool DynamicPatriciaTrieWritingHelper::markNodeAsDeleted(
|
|
||||||
const PtNodeParams *const toBeUpdatedPtNodeParams) {
|
|
||||||
int pos = toBeUpdatedPtNodeParams->getHeadPos();
|
|
||||||
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(pos);
|
|
||||||
const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
|
|
||||||
if (usesAdditionalBuffer) {
|
|
||||||
pos -= mBuffer->getOriginalBufferSize();
|
|
||||||
}
|
|
||||||
// Read original flags
|
|
||||||
const PatriciaTrieReadingUtils::NodeFlags originalFlags =
|
|
||||||
PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos);
|
|
||||||
const PatriciaTrieReadingUtils::NodeFlags updatedFlags =
|
|
||||||
DynamicPatriciaTrieReadingUtils::updateAndGetFlags(originalFlags, false /* isMoved */,
|
|
||||||
true /* isDeleted */);
|
|
||||||
int writingPos = toBeUpdatedPtNodeParams->getHeadPos();
|
|
||||||
// Update flags.
|
|
||||||
return DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, updatedFlags,
|
|
||||||
&writingPos);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool DynamicPatriciaTrieWritingHelper::markNodeAsMovedAndSetPosition(
|
|
||||||
const PtNodeParams *const toBeUpdatedPtNodeParams, const int movedPos,
|
|
||||||
const int bigramLinkedNodePos) {
|
|
||||||
int pos = toBeUpdatedPtNodeParams->getHeadPos();
|
|
||||||
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(pos);
|
|
||||||
const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
|
|
||||||
if (usesAdditionalBuffer) {
|
|
||||||
pos -= mBuffer->getOriginalBufferSize();
|
|
||||||
}
|
|
||||||
// Read original flags
|
|
||||||
const PatriciaTrieReadingUtils::NodeFlags originalFlags =
|
|
||||||
PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos);
|
|
||||||
const PatriciaTrieReadingUtils::NodeFlags updatedFlags =
|
|
||||||
DynamicPatriciaTrieReadingUtils::updateAndGetFlags(originalFlags, true /* isMoved */,
|
|
||||||
false /* isDeleted */);
|
|
||||||
int writingPos = toBeUpdatedPtNodeParams->getHeadPos();
|
|
||||||
// Update flags.
|
|
||||||
if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, updatedFlags,
|
|
||||||
&writingPos)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
// Update moved position, which is stored in the parent offset field.
|
|
||||||
if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition(
|
|
||||||
mBuffer, movedPos, toBeUpdatedPtNodeParams->getHeadPos(), &writingPos)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
// Update bigram linked node position, which is stored in the children position field.
|
|
||||||
int childrenPosFieldPos = toBeUpdatedPtNodeParams->getChildrenPosFieldPos();
|
|
||||||
if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(
|
|
||||||
mBuffer, bigramLinkedNodePos, &childrenPosFieldPos)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
if (toBeUpdatedPtNodeParams->hasChildren()) {
|
|
||||||
// Update children's parent position.
|
|
||||||
DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy);
|
|
||||||
DynamicPatriciaTrieReadingHelper readingHelper(mBuffer, &nodeReader);
|
|
||||||
readingHelper.initWithPtNodeArrayPos(toBeUpdatedPtNodeParams->getChildrenPos());
|
|
||||||
while (!readingHelper.isEnd()) {
|
|
||||||
const PtNodeParams childPtNodeParams(readingHelper.getPtNodeParams());
|
|
||||||
int parentOffsetFieldPos = childPtNodeParams.getHeadPos()
|
|
||||||
+ DynamicPatriciaTrieWritingUtils::NODE_FLAG_FIELD_SIZE;
|
|
||||||
if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition(
|
|
||||||
mBuffer, bigramLinkedNodePos, childPtNodeParams.getHeadPos(),
|
|
||||||
&parentOffsetFieldPos)) {
|
|
||||||
// Parent offset cannot be written because of a bug or a broken dictionary; thus,
|
|
||||||
// we give up to update dictionary.
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
readingHelper.readNextSiblingNode(childPtNodeParams);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Write new PtNode at writingPos.
|
|
||||||
bool DynamicPatriciaTrieWritingHelper::writePtNodeWithFullInfoToBuffer(
|
|
||||||
BufferWithExtendableBuffer *const bufferToWrite, const bool isBlacklisted,
|
|
||||||
const bool isNotAWord, const int parentPos, const int *const codePoints,
|
|
||||||
const int codePointCount, const int probability, const int childrenPos,
|
|
||||||
const int originalBigramListPos, const int originalShortcutListPos,
|
|
||||||
int *const writingPos) {
|
|
||||||
const int nodePos = *writingPos;
|
|
||||||
// Write dummy flags. The Node flags are updated with appropriate flags at the last step of the
|
|
||||||
// PtNode writing.
|
|
||||||
if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(bufferToWrite,
|
|
||||||
0 /* nodeFlags */, writingPos)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
// Calculate a parent offset and write the offset.
|
|
||||||
if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition(bufferToWrite,
|
|
||||||
parentPos, nodePos, writingPos)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
// Write code points
|
|
||||||
if (!DynamicPatriciaTrieWritingUtils::writeCodePointsAndAdvancePosition(bufferToWrite,
|
|
||||||
codePoints, codePointCount, writingPos)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
// Write probability when the probability is a valid probability, which means this node is
|
|
||||||
// terminal.
|
|
||||||
if (probability != NOT_A_PROBABILITY) {
|
|
||||||
if (!DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition(bufferToWrite,
|
|
||||||
probability, writingPos)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// Write children position
|
|
||||||
if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(bufferToWrite,
|
|
||||||
childrenPos, writingPos)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
// Copy shortcut list when the originalShortcutListPos is valid dictionary position.
|
|
||||||
if (originalShortcutListPos != NOT_A_DICT_POS) {
|
|
||||||
int fromPos = originalShortcutListPos;
|
|
||||||
if (!mShortcutPolicy->copyAllShortcutsAndReturnIfSucceededOrNot(bufferToWrite, &fromPos,
|
|
||||||
writingPos)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// Copy bigram list when the originalBigramListPos is valid dictionary position.
|
|
||||||
int bigramCount = 0;
|
|
||||||
if (originalBigramListPos != NOT_A_DICT_POS) {
|
|
||||||
int fromPos = originalBigramListPos;
|
|
||||||
if (!mBigramPolicy->copyAllBigrams(bufferToWrite, &fromPos, writingPos, &bigramCount)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// Create node flags and write them.
|
|
||||||
PatriciaTrieReadingUtils::NodeFlags nodeFlags =
|
|
||||||
PatriciaTrieReadingUtils::createAndGetFlags(isBlacklisted, isNotAWord,
|
|
||||||
probability != NOT_A_PROBABILITY /* isTerminal */,
|
|
||||||
originalShortcutListPos != NOT_A_DICT_POS /* hasShortcutTargets */,
|
|
||||||
bigramCount > 0 /* hasBigrams */, codePointCount > 1 /* hasMultipleChars */,
|
|
||||||
CHILDREN_POSITION_FIELD_SIZE);
|
|
||||||
int flagsFieldPos = nodePos;
|
|
||||||
if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(bufferToWrite, nodeFlags,
|
|
||||||
&flagsFieldPos)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool DynamicPatriciaTrieWritingHelper::writePtNodeToBuffer(
|
|
||||||
BufferWithExtendableBuffer *const bufferToWrite, const int parentPos,
|
|
||||||
const int *const codePoints, const int codePointCount, const int probability,
|
|
||||||
int *const writingPos) {
|
|
||||||
return writePtNodeWithFullInfoToBuffer(bufferToWrite, false /* isBlacklisted */,
|
|
||||||
false /* isNotAWord */, parentPos, codePoints, codePointCount, probability,
|
|
||||||
NOT_A_DICT_POS /* childrenPos */, NOT_A_DICT_POS /* originalBigramsPos */,
|
|
||||||
NOT_A_DICT_POS /* originalShortcutPos */, writingPos);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool DynamicPatriciaTrieWritingHelper::writePtNodeToBufferByCopyingPtNodeInfo(
|
|
||||||
BufferWithExtendableBuffer *const bufferToWrite,
|
|
||||||
const PtNodeParams *const originalPtNodeParams, const int parentPos,
|
|
||||||
const int *const codePoints, const int codePointCount, const int probability,
|
|
||||||
int *const writingPos) {
|
|
||||||
return writePtNodeWithFullInfoToBuffer(bufferToWrite, originalPtNodeParams->isBlacklisted(),
|
|
||||||
originalPtNodeParams->isNotAWord(), parentPos, codePoints, codePointCount, probability,
|
|
||||||
originalPtNodeParams->getChildrenPos(), originalPtNodeParams->getBigramsPos(),
|
|
||||||
originalPtNodeParams->getShortcutPos(), writingPos);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool DynamicPatriciaTrieWritingHelper::createAndInsertNodeIntoPtNodeArray(const int parentPos,
|
bool DynamicPatriciaTrieWritingHelper::createAndInsertNodeIntoPtNodeArray(const int parentPos,
|
||||||
const int *const nodeCodePoints, const int nodeCodePointCount, const int probability,
|
const int *const nodeCodePoints, const int nodeCodePointCount, const int probability,
|
||||||
int *const forwardLinkFieldPos) {
|
int *const forwardLinkFieldPos) {
|
||||||
|
@ -363,23 +169,20 @@ bool DynamicPatriciaTrieWritingHelper::setPtNodeProbability(
|
||||||
*outAddedNewUnigram = false;
|
*outAddedNewUnigram = false;
|
||||||
const int probabilityToWrite = getUpdatedProbability(
|
const int probabilityToWrite = getUpdatedProbability(
|
||||||
originalPtNodeParams->getProbability(), probability);
|
originalPtNodeParams->getProbability(), probability);
|
||||||
int probabilityFieldPos = originalPtNodeParams->getProbabilityFieldPos();
|
return mPtNodeWriter->updatePtNodeProbability(originalPtNodeParams, probabilityToWrite);
|
||||||
if (!DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition(mBuffer,
|
|
||||||
probabilityToWrite, &probabilityFieldPos)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
// Make the node terminal and write the probability.
|
// Make the node terminal and write the probability.
|
||||||
*outAddedNewUnigram = true;
|
*outAddedNewUnigram = true;
|
||||||
int movedPos = mBuffer->getTailPosition();
|
const int movedPos = mBuffer->getTailPosition();
|
||||||
if (!markNodeAsMovedAndSetPosition(originalPtNodeParams, movedPos, movedPos)) {
|
int writingPos = movedPos;
|
||||||
|
const PtNodeParams ptNodeParamsToWrite(getUpdatedPtNodeParams(originalPtNodeParams,
|
||||||
|
originalPtNodeParams->getParentPos(), originalPtNodeParams->getCodePointCount(),
|
||||||
|
originalPtNodeParams->getCodePoints(),
|
||||||
|
getUpdatedProbability(NOT_A_PROBABILITY /* originalProbability */, probability)));
|
||||||
|
if (!mPtNodeWriter->writePtNodeAndAdvancePosition(&ptNodeParamsToWrite, &writingPos)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (!writePtNodeToBufferByCopyingPtNodeInfo(mBuffer, originalPtNodeParams,
|
if (!mPtNodeWriter->markPtNodeAsMoved(originalPtNodeParams, movedPos, movedPos)) {
|
||||||
originalPtNodeParams->getParentPos(), originalPtNodeParams->getCodePoints(),
|
|
||||||
originalPtNodeParams->getCodePointCount(),
|
|
||||||
getUpdatedProbability(NOT_A_PROBABILITY /* originalProbability */, probability),
|
|
||||||
&movedPos)) {
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -390,9 +193,7 @@ bool DynamicPatriciaTrieWritingHelper::createChildrenPtNodeArrayAndAChildPtNode(
|
||||||
const PtNodeParams *const parentPtNodeParams, const int probability,
|
const PtNodeParams *const parentPtNodeParams, const int probability,
|
||||||
const int *const codePoints, const int codePointCount) {
|
const int *const codePoints, const int codePointCount) {
|
||||||
const int newPtNodeArrayPos = mBuffer->getTailPosition();
|
const int newPtNodeArrayPos = mBuffer->getTailPosition();
|
||||||
int childrenPosFieldPos = parentPtNodeParams->getChildrenPosFieldPos();
|
if (!mPtNodeWriter->updateChildrenPosition(parentPtNodeParams, newPtNodeArrayPos)) {
|
||||||
if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(mBuffer,
|
|
||||||
newPtNodeArrayPos, &childrenPosFieldPos)) {
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
return createNewPtNodeArrayWithAChildPtNode(parentPtNodeParams->getHeadPos(), codePoints,
|
return createNewPtNodeArrayWithAChildPtNode(parentPtNodeParams->getHeadPos(), codePoints,
|
||||||
|
@ -407,8 +208,9 @@ bool DynamicPatriciaTrieWritingHelper::createNewPtNodeArrayWithAChildPtNode(
|
||||||
1 /* arraySize */, &writingPos)) {
|
1 /* arraySize */, &writingPos)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (!writePtNodeToBuffer(mBuffer, parentPtNodePos, nodeCodePoints, nodeCodePointCount,
|
const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode(
|
||||||
probability, &writingPos)) {
|
parentPtNodePos, nodeCodePointCount, nodeCodePoints, probability));
|
||||||
|
if (!mPtNodeWriter->writePtNodeAndAdvancePosition(&ptNodeParamsToWrite, &writingPos)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (!DynamicPatriciaTrieWritingUtils::writeForwardLinkPositionAndAdvancePosition(mBuffer,
|
if (!DynamicPatriciaTrieWritingUtils::writeForwardLinkPositionAndAdvancePosition(mBuffer,
|
||||||
|
@ -436,9 +238,10 @@ bool DynamicPatriciaTrieWritingHelper::reallocatePtNodeAndAddNewPtNodes(
|
||||||
// Write the 1st part of the reallocating node. The children position will be updated later
|
// Write the 1st part of the reallocating node. The children position will be updated later
|
||||||
// with actual children position.
|
// with actual children position.
|
||||||
const int newProbability = addsExtraChild ? NOT_A_PROBABILITY : probabilityOfNewPtNode;
|
const int newProbability = addsExtraChild ? NOT_A_PROBABILITY : probabilityOfNewPtNode;
|
||||||
if (!writePtNodeToBuffer(mBuffer, reallocatingPtNodeParams->getParentPos(),
|
const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode(
|
||||||
reallocatingPtNodeParams->getCodePoints(), overlappingCodePointCount, newProbability,
|
reallocatingPtNodeParams->getParentPos(), overlappingCodePointCount,
|
||||||
&writingPos)) {
|
reallocatingPtNodeParams->getCodePoints(), newProbability));
|
||||||
|
if (!mPtNodeWriter->writePtNodeAndAdvancePosition(&ptNodeParamsToWrite, &writingPos)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
const int actualChildrenPos = writingPos;
|
const int actualChildrenPos = writingPos;
|
||||||
|
@ -450,18 +253,19 @@ bool DynamicPatriciaTrieWritingHelper::reallocatePtNodeAndAddNewPtNodes(
|
||||||
}
|
}
|
||||||
// Write the 2nd part of the reallocating node.
|
// Write the 2nd part of the reallocating node.
|
||||||
const int secondPartOfReallocatedPtNodePos = writingPos;
|
const int secondPartOfReallocatedPtNodePos = writingPos;
|
||||||
if (!writePtNodeToBufferByCopyingPtNodeInfo(mBuffer, reallocatingPtNodeParams,
|
const PtNodeParams childPartPtNodeParams(getUpdatedPtNodeParams(reallocatingPtNodeParams,
|
||||||
firstPartOfReallocatedPtNodePos,
|
firstPartOfReallocatedPtNodePos,
|
||||||
reallocatingPtNodeParams->getCodePoints() + overlappingCodePointCount,
|
|
||||||
reallocatingPtNodeParams->getCodePointCount() - overlappingCodePointCount,
|
reallocatingPtNodeParams->getCodePointCount() - overlappingCodePointCount,
|
||||||
reallocatingPtNodeParams->getProbability(), &writingPos)) {
|
reallocatingPtNodeParams->getCodePoints() + overlappingCodePointCount,
|
||||||
|
reallocatingPtNodeParams->getProbability()));
|
||||||
|
if (!mPtNodeWriter->writePtNodeAndAdvancePosition(&childPartPtNodeParams, &writingPos)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (addsExtraChild) {
|
if (addsExtraChild) {
|
||||||
if (!writePtNodeToBuffer(mBuffer, firstPartOfReallocatedPtNodePos,
|
const PtNodeParams extraChildPtNodeParams(getPtNodeParamsForNewPtNode(
|
||||||
newNodeCodePoints + overlappingCodePointCount,
|
firstPartOfReallocatedPtNodePos, newNodeCodePointCount - overlappingCodePointCount,
|
||||||
newNodeCodePointCount - overlappingCodePointCount, probabilityOfNewPtNode,
|
newNodeCodePoints + overlappingCodePointCount, probabilityOfNewPtNode));
|
||||||
&writingPos)) {
|
if (!mPtNodeWriter->writePtNodeAndAdvancePosition(&extraChildPtNodeParams, &writingPos)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -470,33 +274,28 @@ bool DynamicPatriciaTrieWritingHelper::reallocatePtNodeAndAddNewPtNodes(
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
// Update original reallocating PtNode as moved.
|
// Update original reallocating PtNode as moved.
|
||||||
if (!markNodeAsMovedAndSetPosition(reallocatingPtNodeParams, firstPartOfReallocatedPtNodePos,
|
if (!mPtNodeWriter->markPtNodeAsMoved(reallocatingPtNodeParams, firstPartOfReallocatedPtNodePos,
|
||||||
secondPartOfReallocatedPtNodePos)) {
|
secondPartOfReallocatedPtNodePos)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
// Load node info. Information of the 1st part will be fetched.
|
// Load node info. Information of the 1st part will be fetched.
|
||||||
DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy);
|
|
||||||
const PtNodeParams ptNodeParams(
|
const PtNodeParams ptNodeParams(
|
||||||
nodeReader.fetchNodeInfoInBufferFromPtNodePos(firstPartOfReallocatedPtNodePos));
|
mPtNodeReader->fetchNodeInfoInBufferFromPtNodePos(firstPartOfReallocatedPtNodePos));
|
||||||
// Update children position.
|
// Update children position.
|
||||||
int childrenPosFieldPos = ptNodeParams.getChildrenPosFieldPos();
|
return mPtNodeWriter->updateChildrenPosition(&ptNodeParams, actualChildrenPos);
|
||||||
if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(mBuffer,
|
|
||||||
actualChildrenPos, &childrenPosFieldPos)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO: Make this method version independent.
|
||||||
bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
|
bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
|
||||||
const HeaderPolicy *const headerPolicy, BufferWithExtendableBuffer *const bufferToWrite,
|
const HeaderPolicy *const headerPolicy, BufferWithExtendableBuffer *const bufferToWrite,
|
||||||
int *const outUnigramCount, int *const outBigramCount) {
|
int *const outUnigramCount, int *const outBigramCount) {
|
||||||
DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy);
|
DynamicPatriciaTrieNodeReader ptNodeReader(mBuffer, mBigramPolicy, mShortcutPolicy);
|
||||||
DynamicPatriciaTrieReadingHelper readingHelper(mBuffer, &nodeReader);
|
DynamicPatriciaTrieReadingHelper readingHelper(mBuffer, &ptNodeReader);
|
||||||
readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
|
readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
|
||||||
DynamicPatriciaTrieGcEventListeners
|
DynamicPatriciaTrieGcEventListeners
|
||||||
::TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted
|
::TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted
|
||||||
traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted(
|
traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted(
|
||||||
headerPolicy, this, mBuffer, mNeedsToDecay);
|
headerPolicy, mPtNodeWriter, mBuffer, mNeedsToDecay);
|
||||||
if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(
|
if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(
|
||||||
&traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted)) {
|
&traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted)) {
|
||||||
return false;
|
return false;
|
||||||
|
@ -521,8 +320,10 @@ bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
|
||||||
// Mapping from positions in mBuffer to positions in bufferToWrite.
|
// Mapping from positions in mBuffer to positions in bufferToWrite.
|
||||||
DictPositionRelocationMap dictPositionRelocationMap;
|
DictPositionRelocationMap dictPositionRelocationMap;
|
||||||
readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
|
readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
|
||||||
|
DynamicPatriciaTrieNodeWriter newPtNodeWriter(bufferToWrite,
|
||||||
|
&ptNodeReader, mBigramPolicy, mShortcutPolicy);
|
||||||
DynamicPatriciaTrieGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer
|
DynamicPatriciaTrieGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer
|
||||||
traversePolicyToPlaceAndWriteValidPtNodesToBuffer(this, bufferToWrite,
|
traversePolicyToPlaceAndWriteValidPtNodesToBuffer(&newPtNodeWriter, bufferToWrite,
|
||||||
&dictPositionRelocationMap);
|
&dictPositionRelocationMap);
|
||||||
if (!readingHelper.traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner(
|
if (!readingHelper.traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner(
|
||||||
&traversePolicyToPlaceAndWriteValidPtNodesToBuffer)) {
|
&traversePolicyToPlaceAndWriteValidPtNodesToBuffer)) {
|
||||||
|
@ -539,7 +340,7 @@ bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
|
||||||
DynamicPatriciaTrieReadingHelper newDictReadingHelper(bufferToWrite, &newDictNodeReader);
|
DynamicPatriciaTrieReadingHelper newDictReadingHelper(bufferToWrite, &newDictNodeReader);
|
||||||
newDictReadingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
|
newDictReadingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
|
||||||
DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateAllPositionFields
|
DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateAllPositionFields
|
||||||
traversePolicyToUpdateAllPositionFields(this, &newDictBigramPolicy, bufferToWrite,
|
traversePolicyToUpdateAllPositionFields(&newDictBigramPolicy, bufferToWrite,
|
||||||
&dictPositionRelocationMap);
|
&dictPositionRelocationMap);
|
||||||
if (!newDictReadingHelper.traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner(
|
if (!newDictReadingHelper.traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner(
|
||||||
&traversePolicyToUpdateAllPositionFields)) {
|
&traversePolicyToUpdateAllPositionFields)) {
|
||||||
|
@ -551,7 +352,7 @@ bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
|
||||||
}
|
}
|
||||||
|
|
||||||
int DynamicPatriciaTrieWritingHelper::getUpdatedProbability(const int originalProbability,
|
int DynamicPatriciaTrieWritingHelper::getUpdatedProbability(const int originalProbability,
|
||||||
const int newProbability) {
|
const int newProbability) const {
|
||||||
if (mNeedsToDecay) {
|
if (mNeedsToDecay) {
|
||||||
return ForgettingCurveUtils::getUpdatedEncodedProbability(originalProbability,
|
return ForgettingCurveUtils::getUpdatedEncodedProbability(originalProbability,
|
||||||
newProbability);
|
newProbability);
|
||||||
|
@ -560,4 +361,28 @@ int DynamicPatriciaTrieWritingHelper::getUpdatedProbability(const int originalPr
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const PtNodeParams DynamicPatriciaTrieWritingHelper::getUpdatedPtNodeParams(
|
||||||
|
const PtNodeParams *const originalPtNodeParams, const int parentPos,
|
||||||
|
const int codePointCount, const int *const codePoints, const int probability) const {
|
||||||
|
const PatriciaTrieReadingUtils::NodeFlags flags = PatriciaTrieReadingUtils::createAndGetFlags(
|
||||||
|
originalPtNodeParams->isBlacklisted(), originalPtNodeParams->isNotAWord(),
|
||||||
|
probability != NOT_A_PROBABILITY /* isTerminal */,
|
||||||
|
originalPtNodeParams->getShortcutPos() != NOT_A_DICT_POS /* hasShortcutTargets */,
|
||||||
|
originalPtNodeParams->getBigramsPos() != NOT_A_DICT_POS /* hasBigrams */,
|
||||||
|
codePointCount > 1 /* hasMultipleChars */, CHILDREN_POSITION_FIELD_SIZE);
|
||||||
|
return PtNodeParams(originalPtNodeParams, flags, parentPos, codePointCount, codePoints,
|
||||||
|
probability);
|
||||||
|
}
|
||||||
|
|
||||||
|
const PtNodeParams DynamicPatriciaTrieWritingHelper::getPtNodeParamsForNewPtNode(
|
||||||
|
const int parentPos, const int codePointCount, const int *const codePoints,
|
||||||
|
const int probability) const {
|
||||||
|
const PatriciaTrieReadingUtils::NodeFlags flags = PatriciaTrieReadingUtils::createAndGetFlags(
|
||||||
|
false /* isBlacklisted */, false /* isNotAWord */,
|
||||||
|
probability != NOT_A_PROBABILITY /* isTerminal */,
|
||||||
|
false /* hasShortcutTargets */, false /* hasBigrams */,
|
||||||
|
codePointCount > 1 /* hasMultipleChars */, CHILDREN_POSITION_FIELD_SIZE);
|
||||||
|
return PtNodeParams(flags, parentPos, codePointCount, codePoints, probability);
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
|
|
|
@ -20,6 +20,7 @@
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
|
|
||||||
#include "defines.h"
|
#include "defines.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h"
|
||||||
#include "utils/hash_map_compat.h"
|
#include "utils/hash_map_compat.h"
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
@ -29,7 +30,8 @@ class DynamicBigramListPolicy;
|
||||||
class DynamicPatriciaTrieReadingHelper;
|
class DynamicPatriciaTrieReadingHelper;
|
||||||
class DynamicShortcutListPolicy;
|
class DynamicShortcutListPolicy;
|
||||||
class HeaderPolicy;
|
class HeaderPolicy;
|
||||||
class PtNodeParams;
|
class PtNodeReader;
|
||||||
|
class PtNodeWriter;
|
||||||
|
|
||||||
// TODO: Make it independent from a particular format and move to pt_common.
|
// TODO: Make it independent from a particular format and move to pt_common.
|
||||||
class DynamicPatriciaTrieWritingHelper {
|
class DynamicPatriciaTrieWritingHelper {
|
||||||
|
@ -51,9 +53,11 @@ class DynamicPatriciaTrieWritingHelper {
|
||||||
static const size_t MAX_DICTIONARY_SIZE;
|
static const size_t MAX_DICTIONARY_SIZE;
|
||||||
|
|
||||||
DynamicPatriciaTrieWritingHelper(BufferWithExtendableBuffer *const buffer,
|
DynamicPatriciaTrieWritingHelper(BufferWithExtendableBuffer *const buffer,
|
||||||
|
const PtNodeReader *const ptNodeReader, PtNodeWriter *const ptNodeWriter,
|
||||||
DynamicBigramListPolicy *const bigramPolicy,
|
DynamicBigramListPolicy *const bigramPolicy,
|
||||||
DynamicShortcutListPolicy *const shortcutPolicy, const bool needsToDecay)
|
DynamicShortcutListPolicy *const shortcutPolicy, const bool needsToDecay)
|
||||||
: mBuffer(buffer), mBigramPolicy(bigramPolicy), mShortcutPolicy(shortcutPolicy),
|
: mBuffer(buffer), mPtNodeReader(ptNodeReader), mPtNodeWriter(ptNodeWriter),
|
||||||
|
mBigramPolicy(bigramPolicy), mShortcutPolicy(shortcutPolicy),
|
||||||
mNeedsToDecay(needsToDecay) {}
|
mNeedsToDecay(needsToDecay) {}
|
||||||
|
|
||||||
~DynamicPatriciaTrieWritingHelper() {}
|
~DynamicPatriciaTrieWritingHelper() {}
|
||||||
|
@ -76,40 +80,18 @@ class DynamicPatriciaTrieWritingHelper {
|
||||||
void writeToDictFileWithGC(const int rootPtNodeArrayPos, const char *const fileName,
|
void writeToDictFileWithGC(const int rootPtNodeArrayPos, const char *const fileName,
|
||||||
const HeaderPolicy *const headerPolicy);
|
const HeaderPolicy *const headerPolicy);
|
||||||
|
|
||||||
// CAVEAT: This method must be called only from inner classes of
|
|
||||||
// DynamicPatriciaTrieGcEventListeners.
|
|
||||||
bool markNodeAsDeleted(const PtNodeParams *const toBeUpdatedPtNodeParams);
|
|
||||||
|
|
||||||
// CAVEAT: This method must be called only from this class or inner classes of
|
|
||||||
// DynamicPatriciaTrieGcEventListeners.
|
|
||||||
bool writePtNodeToBufferByCopyingPtNodeInfo(BufferWithExtendableBuffer *const bufferToWrite,
|
|
||||||
const PtNodeParams *const originalPtNodeParams, const int parentPos,
|
|
||||||
const int *const codePoints, const int codePointCount, const int probability,
|
|
||||||
int *const writingPos);
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTrieWritingHelper);
|
DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTrieWritingHelper);
|
||||||
|
|
||||||
static const int CHILDREN_POSITION_FIELD_SIZE;
|
static const int CHILDREN_POSITION_FIELD_SIZE;
|
||||||
|
|
||||||
BufferWithExtendableBuffer *const mBuffer;
|
BufferWithExtendableBuffer *const mBuffer;
|
||||||
|
const PtNodeReader *const mPtNodeReader;
|
||||||
|
PtNodeWriter *const mPtNodeWriter;
|
||||||
DynamicBigramListPolicy *const mBigramPolicy;
|
DynamicBigramListPolicy *const mBigramPolicy;
|
||||||
DynamicShortcutListPolicy *const mShortcutPolicy;
|
DynamicShortcutListPolicy *const mShortcutPolicy;
|
||||||
const bool mNeedsToDecay;
|
const bool mNeedsToDecay;
|
||||||
|
|
||||||
bool markNodeAsMovedAndSetPosition(const PtNodeParams *const toBeUpdatedPtNodeParams,
|
|
||||||
const int movedPos, const int bigramLinkedNodePos);
|
|
||||||
|
|
||||||
bool writePtNodeWithFullInfoToBuffer(BufferWithExtendableBuffer *const bufferToWrite,
|
|
||||||
const bool isBlacklisted, const bool isNotAWord,
|
|
||||||
const int parentPos, const int *const codePoints, const int codePointCount,
|
|
||||||
const int probability, const int childrenPos, const int originalBigramListPos,
|
|
||||||
const int originalShortcutListPos, int *const writingPos);
|
|
||||||
|
|
||||||
bool writePtNodeToBuffer(BufferWithExtendableBuffer *const bufferToWrite,
|
|
||||||
const int parentPos, const int *const codePoints, const int codePointCount,
|
|
||||||
const int probability, int *const writingPos);
|
|
||||||
|
|
||||||
bool createAndInsertNodeIntoPtNodeArray(const int parentPos, const int *const nodeCodePoints,
|
bool createAndInsertNodeIntoPtNodeArray(const int parentPos, const int *const nodeCodePoints,
|
||||||
const int nodeCodePointCount, const int probability, int *const forwardLinkFieldPos);
|
const int nodeCodePointCount, const int probability, int *const forwardLinkFieldPos);
|
||||||
|
|
||||||
|
@ -131,7 +113,14 @@ class DynamicPatriciaTrieWritingHelper {
|
||||||
BufferWithExtendableBuffer *const bufferToWrite, int *const outUnigramCount,
|
BufferWithExtendableBuffer *const bufferToWrite, int *const outUnigramCount,
|
||||||
int *const outBigramCount);
|
int *const outBigramCount);
|
||||||
|
|
||||||
int getUpdatedProbability(const int originalProbability, const int newProbability);
|
int getUpdatedProbability(const int originalProbability, const int newProbability) const;
|
||||||
|
|
||||||
|
const PtNodeParams getUpdatedPtNodeParams(const PtNodeParams *const originalPtNodeParams,
|
||||||
|
const int parentPos, const int codePointCount, const int *const codePoints,
|
||||||
|
const int probability) const;
|
||||||
|
|
||||||
|
const PtNodeParams getPtNodeParamsForNewPtNode(const int parentPos, const int codePointCount,
|
||||||
|
const int *const codePoints, const int probability) const;
|
||||||
};
|
};
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
#endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_WRITING_HELPER_H */
|
#endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_WRITING_HELPER_H */
|
||||||
|
|
Loading…
Reference in New Issue