am e717f646
: Purge Ver3 dictionary writing in native code.
* commit 'e717f64659c3d8cc9bfa2b507cee11df66ad6fbb': Purge Ver3 dictionary writing in native code.
This commit is contained in:
commit
149781d770
15 changed files with 37 additions and 1272 deletions
|
@ -82,12 +82,10 @@ LATIN_IME_CORE_SRC_FILES := \
|
|||
$(addprefix suggest/policyimpl/dictionary/structure/v3/, \
|
||||
dynamic_patricia_trie_gc_event_listeners.cpp \
|
||||
dynamic_patricia_trie_node_reader.cpp \
|
||||
dynamic_patricia_trie_node_writer.cpp \
|
||||
dynamic_patricia_trie_policy.cpp \
|
||||
dynamic_patricia_trie_reading_helper.cpp \
|
||||
dynamic_patricia_trie_reading_utils.cpp \
|
||||
dynamic_patricia_trie_updating_helper.cpp \
|
||||
dynamic_patricia_trie_writing_helper.cpp \
|
||||
dynamic_patricia_trie_writing_utils.cpp) \
|
||||
$(addprefix suggest/policyimpl/dictionary/structure/v4/, \
|
||||
ver4_dict_buffers.cpp \
|
||||
|
|
|
@ -18,7 +18,6 @@
|
|||
|
||||
#include "suggest/core/policy/dictionary_shortcuts_structure_policy.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_helper.h"
|
||||
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
|
||||
#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
|
||||
|
||||
|
@ -66,278 +65,6 @@ void DynamicBigramListPolicy::skipAllBigrams(int *const bigramListPos) const {
|
|||
}
|
||||
}
|
||||
|
||||
bool DynamicBigramListPolicy::copyAllBigrams(BufferWithExtendableBuffer *const bufferToWrite,
|
||||
int *const fromPos, int *const toPos, int *const outBigramsCount) const {
|
||||
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*fromPos);
|
||||
if (usesAdditionalBuffer) {
|
||||
*fromPos -= mBuffer->getOriginalBufferSize();
|
||||
}
|
||||
*outBigramsCount = 0;
|
||||
BigramListReadWriteUtils::BigramFlags bigramFlags;
|
||||
int bigramEntryCount = 0;
|
||||
int lastWrittenEntryPos = NOT_A_DICT_POS;
|
||||
do {
|
||||
if (++bigramEntryCount > BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT) {
|
||||
AKLOGE("Too many bigram entries. Entry count: %d, Limit: %d",
|
||||
bigramEntryCount, BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT);
|
||||
ASSERT(false);
|
||||
return false;
|
||||
}
|
||||
// The buffer address can be changed after calling buffer writing methods.
|
||||
int originalBigramPos;
|
||||
BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition(
|
||||
mBuffer->getBuffer(usesAdditionalBuffer), &bigramFlags, &originalBigramPos,
|
||||
fromPos);
|
||||
if (originalBigramPos == NOT_A_DICT_POS) {
|
||||
// skip invalid bigram entry.
|
||||
continue;
|
||||
}
|
||||
if (usesAdditionalBuffer) {
|
||||
originalBigramPos += mBuffer->getOriginalBufferSize();
|
||||
}
|
||||
const int bigramPos = followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos);
|
||||
if (bigramPos == NOT_A_DICT_POS) {
|
||||
// Target PtNode has been invalidated.
|
||||
continue;
|
||||
}
|
||||
lastWrittenEntryPos = *toPos;
|
||||
if (!BigramListReadWriteUtils::createAndWriteBigramEntry(bufferToWrite, bigramPos,
|
||||
BigramListReadWriteUtils::getProbabilityFromFlags(bigramFlags),
|
||||
BigramListReadWriteUtils::hasNext(bigramFlags), toPos)) {
|
||||
return false;
|
||||
}
|
||||
(*outBigramsCount)++;
|
||||
} while(BigramListReadWriteUtils::hasNext(bigramFlags));
|
||||
// Makes the last entry the terminal of the list. Updates the flags.
|
||||
if (lastWrittenEntryPos != NOT_A_DICT_POS) {
|
||||
if (!BigramListReadWriteUtils::setHasNextFlag(bufferToWrite, false /* hasNext */,
|
||||
lastWrittenEntryPos)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if (usesAdditionalBuffer) {
|
||||
*fromPos += mBuffer->getOriginalBufferSize();
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// Finding useless bigram entries and remove them. Bigram entry is useless when the target PtNode
|
||||
// has been deleted or is not a valid terminal.
|
||||
bool DynamicBigramListPolicy::updateAllBigramEntriesAndDeleteUselessEntries(
|
||||
int *const bigramListPos, int *const outValidBigramEntryCount) {
|
||||
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*bigramListPos);
|
||||
if (usesAdditionalBuffer) {
|
||||
*bigramListPos -= mBuffer->getOriginalBufferSize();
|
||||
}
|
||||
DynamicPatriciaTrieNodeReader nodeReader(mBuffer, this /* bigramsPolicy */, mShortcutPolicy);
|
||||
BigramListReadWriteUtils::BigramFlags bigramFlags;
|
||||
int bigramEntryCount = 0;
|
||||
do {
|
||||
if (++bigramEntryCount > BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT) {
|
||||
AKLOGE("Too many bigram entries. Entry count: %d, Limit: %d",
|
||||
bigramEntryCount, BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT);
|
||||
ASSERT(false);
|
||||
return false;
|
||||
}
|
||||
int bigramEntryPos = *bigramListPos;
|
||||
int originalBigramPos;
|
||||
// The buffer address can be changed after calling buffer writing methods.
|
||||
BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition(
|
||||
mBuffer->getBuffer(usesAdditionalBuffer), &bigramFlags, &originalBigramPos,
|
||||
bigramListPos);
|
||||
if (usesAdditionalBuffer) {
|
||||
bigramEntryPos += mBuffer->getOriginalBufferSize();
|
||||
}
|
||||
if (originalBigramPos == NOT_A_DICT_POS) {
|
||||
// This entry has already been removed.
|
||||
continue;
|
||||
}
|
||||
if (usesAdditionalBuffer) {
|
||||
originalBigramPos += mBuffer->getOriginalBufferSize();
|
||||
}
|
||||
const int bigramTargetNodePos =
|
||||
followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos);
|
||||
const PtNodeParams ptNodeParams(nodeReader.fetchNodeInfoInBufferFromPtNodePos(
|
||||
bigramTargetNodePos));
|
||||
if (ptNodeParams.isDeleted() || !ptNodeParams.isTerminal()
|
||||
|| bigramTargetNodePos == NOT_A_DICT_POS) {
|
||||
// The target is no longer valid terminal. Invalidate the current bigram entry.
|
||||
if (!BigramListReadWriteUtils::writeBigramEntry(mBuffer, bigramFlags,
|
||||
NOT_A_DICT_POS /* targetPtNodePos */, &bigramEntryPos)) {
|
||||
return false;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
bool isRemoved = false;
|
||||
if (!updateProbabilityForDecay(bigramFlags, bigramTargetNodePos, &bigramEntryPos,
|
||||
&isRemoved)) {
|
||||
return false;
|
||||
}
|
||||
if (!isRemoved) {
|
||||
(*outValidBigramEntryCount) += 1;
|
||||
}
|
||||
} while(BigramListReadWriteUtils::hasNext(bigramFlags));
|
||||
return true;
|
||||
}
|
||||
|
||||
// Updates bigram target PtNode positions in the list after the placing step in GC.
|
||||
bool DynamicBigramListPolicy::updateAllBigramTargetPtNodePositions(int *const bigramListPos,
|
||||
const PtNodeWriter::PtNodePositionRelocationMap *const ptNodePositionRelocationMap,
|
||||
int *const outBigramEntryCount) {
|
||||
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*bigramListPos);
|
||||
if (usesAdditionalBuffer) {
|
||||
*bigramListPos -= mBuffer->getOriginalBufferSize();
|
||||
}
|
||||
BigramListReadWriteUtils::BigramFlags bigramFlags;
|
||||
int bigramEntryCount = 0;
|
||||
do {
|
||||
if (++bigramEntryCount > BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT) {
|
||||
AKLOGE("Too many bigram entries. Entry count: %d, Limit: %d",
|
||||
bigramEntryCount, BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT);
|
||||
ASSERT(false);
|
||||
return false;
|
||||
}
|
||||
int bigramEntryPos = *bigramListPos;
|
||||
if (usesAdditionalBuffer) {
|
||||
bigramEntryPos += mBuffer->getOriginalBufferSize();
|
||||
}
|
||||
int bigramTargetPtNodePos;
|
||||
// The buffer address can be changed after calling buffer writing methods.
|
||||
BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition(
|
||||
mBuffer->getBuffer(usesAdditionalBuffer), &bigramFlags, &bigramTargetPtNodePos,
|
||||
bigramListPos);
|
||||
if (bigramTargetPtNodePos == NOT_A_DICT_POS) {
|
||||
continue;
|
||||
}
|
||||
if (usesAdditionalBuffer) {
|
||||
bigramTargetPtNodePos += mBuffer->getOriginalBufferSize();
|
||||
}
|
||||
|
||||
PtNodeWriter::PtNodePositionRelocationMap::const_iterator it =
|
||||
ptNodePositionRelocationMap->find(bigramTargetPtNodePos);
|
||||
if (it != ptNodePositionRelocationMap->end()) {
|
||||
bigramTargetPtNodePos = it->second;
|
||||
} else {
|
||||
bigramTargetPtNodePos = NOT_A_DICT_POS;
|
||||
}
|
||||
if (!BigramListReadWriteUtils::writeBigramEntry(mBuffer, bigramFlags,
|
||||
bigramTargetPtNodePos, &bigramEntryPos)) {
|
||||
return false;
|
||||
}
|
||||
} while(BigramListReadWriteUtils::hasNext(bigramFlags));
|
||||
(*outBigramEntryCount) = bigramEntryCount;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool DynamicBigramListPolicy::addNewBigramEntryToBigramList(const int bigramTargetPos,
|
||||
const int probability, int *const bigramListPos, bool *const outAddedNewBigram) {
|
||||
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*bigramListPos);
|
||||
if (usesAdditionalBuffer) {
|
||||
*bigramListPos -= mBuffer->getOriginalBufferSize();
|
||||
}
|
||||
BigramListReadWriteUtils::BigramFlags bigramFlags;
|
||||
int bigramEntryCount = 0;
|
||||
do {
|
||||
if (++bigramEntryCount > BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT) {
|
||||
AKLOGE("Too many bigram entries. Entry count: %d, Limit: %d",
|
||||
bigramEntryCount, BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT);
|
||||
ASSERT(false);
|
||||
return false;
|
||||
}
|
||||
int entryPos = *bigramListPos;
|
||||
if (usesAdditionalBuffer) {
|
||||
entryPos += mBuffer->getOriginalBufferSize();
|
||||
}
|
||||
int originalBigramPos;
|
||||
// The buffer address can be changed after calling buffer writing methods.
|
||||
BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition(
|
||||
mBuffer->getBuffer(usesAdditionalBuffer), &bigramFlags, &originalBigramPos,
|
||||
bigramListPos);
|
||||
if (usesAdditionalBuffer && originalBigramPos != NOT_A_DICT_POS) {
|
||||
originalBigramPos += mBuffer->getOriginalBufferSize();
|
||||
}
|
||||
if (followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos) == bigramTargetPos) {
|
||||
// Update this bigram entry.
|
||||
*outAddedNewBigram = false;
|
||||
const int originalProbability = BigramListReadWriteUtils::getProbabilityFromFlags(
|
||||
bigramFlags);
|
||||
const int probabilityToWrite = mIsDecayingDict ?
|
||||
ForgettingCurveUtils::getUpdatedEncodedProbability(originalProbability,
|
||||
probability) : probability;
|
||||
const BigramListReadWriteUtils::BigramFlags updatedFlags =
|
||||
BigramListReadWriteUtils::setProbabilityInFlags(bigramFlags,
|
||||
probabilityToWrite);
|
||||
return BigramListReadWriteUtils::writeBigramEntry(mBuffer, updatedFlags,
|
||||
originalBigramPos, &entryPos);
|
||||
}
|
||||
if (BigramListReadWriteUtils::hasNext(bigramFlags)) {
|
||||
continue;
|
||||
}
|
||||
// The current last entry is found.
|
||||
// First, update the flags of the last entry.
|
||||
if (!BigramListReadWriteUtils::setHasNextFlag(mBuffer, true /* hasNext */, entryPos)) {
|
||||
*outAddedNewBigram = false;
|
||||
return false;
|
||||
}
|
||||
if (usesAdditionalBuffer) {
|
||||
*bigramListPos += mBuffer->getOriginalBufferSize();
|
||||
}
|
||||
// Then, add a new entry after the last entry.
|
||||
*outAddedNewBigram = true;
|
||||
return writeNewBigramEntry(bigramTargetPos, probability, bigramListPos);
|
||||
} while(BigramListReadWriteUtils::hasNext(bigramFlags));
|
||||
// We return directly from the while loop.
|
||||
ASSERT(false);
|
||||
return false;
|
||||
}
|
||||
|
||||
bool DynamicBigramListPolicy::writeNewBigramEntry(const int bigramTargetPos, const int probability,
|
||||
int *const writingPos) {
|
||||
// hasNext is false because we are adding a new bigram entry at the end of the bigram list.
|
||||
const int probabilityToWrite = mIsDecayingDict ?
|
||||
ForgettingCurveUtils::getUpdatedEncodedProbability(NOT_A_PROBABILITY, probability) :
|
||||
probability;
|
||||
return BigramListReadWriteUtils::createAndWriteBigramEntry(mBuffer, bigramTargetPos,
|
||||
probabilityToWrite, false /* hasNext */, writingPos);
|
||||
}
|
||||
|
||||
bool DynamicBigramListPolicy::removeBigram(const int bigramListPos, const int bigramTargetPos) {
|
||||
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(bigramListPos);
|
||||
int pos = bigramListPos;
|
||||
if (usesAdditionalBuffer) {
|
||||
pos -= mBuffer->getOriginalBufferSize();
|
||||
}
|
||||
BigramListReadWriteUtils::BigramFlags bigramFlags;
|
||||
int bigramEntryCount = 0;
|
||||
do {
|
||||
if (++bigramEntryCount > BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT) {
|
||||
AKLOGE("Too many bigram entries. Entry count: %d, Limit: %d",
|
||||
bigramEntryCount, BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT);
|
||||
ASSERT(false);
|
||||
return false;
|
||||
}
|
||||
int bigramEntryPos = pos;
|
||||
int originalBigramPos;
|
||||
// The buffer address can be changed after calling buffer writing methods.
|
||||
BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition(
|
||||
mBuffer->getBuffer(usesAdditionalBuffer), &bigramFlags, &originalBigramPos, &pos);
|
||||
if (usesAdditionalBuffer) {
|
||||
bigramEntryPos += mBuffer->getOriginalBufferSize();
|
||||
}
|
||||
if (usesAdditionalBuffer && originalBigramPos != NOT_A_DICT_POS) {
|
||||
originalBigramPos += mBuffer->getOriginalBufferSize();
|
||||
}
|
||||
const int bigramPos = followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos);
|
||||
if (bigramPos != bigramTargetPos) {
|
||||
continue;
|
||||
}
|
||||
// Target entry is found. Write an invalid target position to mark the bigram invalid.
|
||||
return BigramListReadWriteUtils::writeBigramEntry(mBuffer, bigramFlags,
|
||||
NOT_A_DICT_POS /* targetOffset */, &bigramEntryPos);
|
||||
} while(BigramListReadWriteUtils::hasNext(bigramFlags));
|
||||
return false;
|
||||
}
|
||||
|
||||
int DynamicBigramListPolicy::followBigramLinkAndGetCurrentBigramPtNodePos(
|
||||
const int originalBigramPos) const {
|
||||
if (originalBigramPos == NOT_A_DICT_POS) {
|
||||
|
@ -362,33 +89,4 @@ int DynamicBigramListPolicy::followBigramLinkAndGetCurrentBigramPtNodePos(
|
|||
return currentPos;
|
||||
}
|
||||
|
||||
bool DynamicBigramListPolicy::updateProbabilityForDecay(
|
||||
const BigramListReadWriteUtils::BigramFlags bigramFlags, const int targetPtNodePos,
|
||||
int *const bigramEntryPos, bool *const outRemoved) const {
|
||||
*outRemoved = false;
|
||||
if (mIsDecayingDict) {
|
||||
// Update bigram probability for decaying.
|
||||
const int newProbability = ForgettingCurveUtils::getEncodedProbabilityToSave(
|
||||
BigramListReadWriteUtils::getProbabilityFromFlags(bigramFlags), mHeaderPolicy);
|
||||
if (ForgettingCurveUtils::isValidEncodedProbability(newProbability)) {
|
||||
// Write new probability.
|
||||
const BigramListReadWriteUtils::BigramFlags updatedBigramFlags =
|
||||
BigramListReadWriteUtils::setProbabilityInFlags(
|
||||
bigramFlags, newProbability);
|
||||
if (!BigramListReadWriteUtils::writeBigramEntry(mBuffer, updatedBigramFlags,
|
||||
targetPtNodePos, bigramEntryPos)) {
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
// Remove current bigram entry.
|
||||
*outRemoved = true;
|
||||
if (!BigramListReadWriteUtils::writeBigramEntry(mBuffer, bigramFlags,
|
||||
NOT_A_DICT_POS /* targetPtNodePos */, bigramEntryPos)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace latinime
|
||||
|
|
|
@ -22,7 +22,6 @@
|
|||
#include "defines.h"
|
||||
#include "suggest/core/policy/dictionary_bigrams_structure_policy.h"
|
||||
#include "suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_gc_event_listeners.h"
|
||||
|
||||
namespace latinime {
|
||||
|
@ -50,28 +49,6 @@ class DynamicBigramListPolicy : public DictionaryBigramsStructurePolicy {
|
|||
|
||||
void skipAllBigrams(int *const bigramListPos) const;
|
||||
|
||||
// Copy bigrams from the bigram list that starts at fromPos in mBuffer to toPos in
|
||||
// bufferToWrite and advance these positions after bigram lists. This method skips invalid
|
||||
// bigram entries and write the valid bigram entry count to outBigramsCount.
|
||||
bool copyAllBigrams(BufferWithExtendableBuffer *const bufferToWrite, int *const fromPos,
|
||||
int *const toPos, int *const outBigramsCount) const;
|
||||
|
||||
bool updateAllBigramEntriesAndDeleteUselessEntries(int *const bigramListPos,
|
||||
int *const outBigramEntryCount);
|
||||
|
||||
bool updateAllBigramTargetPtNodePositions(int *const bigramListPos,
|
||||
const PtNodeWriter::PtNodePositionRelocationMap *const
|
||||
ptNodePositionRelocationMap, int *const outValidBigramEntryCount);
|
||||
|
||||
bool addNewBigramEntryToBigramList(const int bigramTargetPos, const int probability,
|
||||
int *const bigramListPos, bool *const outAddedNewBigram);
|
||||
|
||||
bool writeNewBigramEntry(const int bigramTargetPos, const int probability,
|
||||
int *const writingPos);
|
||||
|
||||
// Return whether or not targetBigramPos is found.
|
||||
bool removeBigram(const int bigramListPos, const int bigramTargetPos);
|
||||
|
||||
private:
|
||||
DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicBigramListPolicy);
|
||||
|
||||
|
@ -85,9 +62,6 @@ class DynamicBigramListPolicy : public DictionaryBigramsStructurePolicy {
|
|||
|
||||
// Follow bigram link and return the position of bigram target PtNode that is currently valid.
|
||||
int followBigramLinkAndGetCurrentBigramPtNodePos(const int originalBigramPos) const;
|
||||
|
||||
bool updateProbabilityForDecay(const BigramListReadWriteUtils::BigramFlags bigramFlags,
|
||||
const int targetPtNodePos, int *const bigramEntryPos, bool *const outRemoved) const;
|
||||
};
|
||||
} // namespace latinime
|
||||
#endif // LATINIME_DYNAMIC_BIGRAM_LIST_POLICY_H
|
||||
|
|
|
@ -1,338 +0,0 @@
|
|||
/*
|
||||
* Copyright (C) 2013, The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_writer.h"
|
||||
|
||||
#include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h"
|
||||
#include "suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_utils.h"
|
||||
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
|
||||
#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
|
||||
|
||||
namespace latinime {
|
||||
|
||||
const int DynamicPatriciaTrieNodeWriter::CHILDREN_POSITION_FIELD_SIZE = 3;
|
||||
|
||||
bool DynamicPatriciaTrieNodeWriter::markPtNodeAsDeleted(
|
||||
const PtNodeParams *const toBeUpdatedPtNodeParams) {
|
||||
int pos = toBeUpdatedPtNodeParams->getHeadPos();
|
||||
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(pos);
|
||||
const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
|
||||
if (usesAdditionalBuffer) {
|
||||
pos -= mBuffer->getOriginalBufferSize();
|
||||
}
|
||||
// Read original flags
|
||||
const PatriciaTrieReadingUtils::NodeFlags originalFlags =
|
||||
PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos);
|
||||
const PatriciaTrieReadingUtils::NodeFlags updatedFlags =
|
||||
DynamicPatriciaTrieReadingUtils::updateAndGetFlags(originalFlags, false /* isMoved */,
|
||||
true /* isDeleted */);
|
||||
int writingPos = toBeUpdatedPtNodeParams->getHeadPos();
|
||||
// Update flags.
|
||||
return DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, updatedFlags,
|
||||
&writingPos);
|
||||
}
|
||||
|
||||
bool DynamicPatriciaTrieNodeWriter::markPtNodeAsMoved(
|
||||
const PtNodeParams *const toBeUpdatedPtNodeParams,
|
||||
const int movedPos, const int bigramLinkedNodePos) {
|
||||
int pos = toBeUpdatedPtNodeParams->getHeadPos();
|
||||
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(pos);
|
||||
const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
|
||||
if (usesAdditionalBuffer) {
|
||||
pos -= mBuffer->getOriginalBufferSize();
|
||||
}
|
||||
// Read original flags
|
||||
const PatriciaTrieReadingUtils::NodeFlags originalFlags =
|
||||
PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos);
|
||||
const PatriciaTrieReadingUtils::NodeFlags updatedFlags =
|
||||
DynamicPatriciaTrieReadingUtils::updateAndGetFlags(originalFlags, true /* isMoved */,
|
||||
false /* isDeleted */);
|
||||
int writingPos = toBeUpdatedPtNodeParams->getHeadPos();
|
||||
// Update flags.
|
||||
if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, updatedFlags,
|
||||
&writingPos)) {
|
||||
return false;
|
||||
}
|
||||
// Update moved position, which is stored in the parent offset field.
|
||||
if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition(
|
||||
mBuffer, movedPos, toBeUpdatedPtNodeParams->getHeadPos(), &writingPos)) {
|
||||
return false;
|
||||
}
|
||||
// Update bigram linked node position, which is stored in the children position field.
|
||||
int childrenPosFieldPos = toBeUpdatedPtNodeParams->getChildrenPosFieldPos();
|
||||
if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(
|
||||
mBuffer, bigramLinkedNodePos, &childrenPosFieldPos)) {
|
||||
return false;
|
||||
}
|
||||
if (toBeUpdatedPtNodeParams->hasChildren()) {
|
||||
// Update children's parent position.
|
||||
mReadingHelper.initWithPtNodeArrayPos(toBeUpdatedPtNodeParams->getChildrenPos());
|
||||
while (!mReadingHelper.isEnd()) {
|
||||
const PtNodeParams childPtNodeParams(mReadingHelper.getPtNodeParams());
|
||||
int parentOffsetFieldPos = childPtNodeParams.getHeadPos()
|
||||
+ DynamicPatriciaTrieWritingUtils::NODE_FLAG_FIELD_SIZE;
|
||||
if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition(
|
||||
mBuffer, bigramLinkedNodePos, childPtNodeParams.getHeadPos(),
|
||||
&parentOffsetFieldPos)) {
|
||||
// Parent offset cannot be written because of a bug or a broken dictionary; thus,
|
||||
// we give up to update dictionary.
|
||||
return false;
|
||||
}
|
||||
mReadingHelper.readNextSiblingNode(childPtNodeParams);
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool DynamicPatriciaTrieNodeWriter::updatePtNodeProbability(
|
||||
const PtNodeParams *const toBeUpdatedPtNodeParams, const int newProbability,
|
||||
const int timestamp) {
|
||||
if (!toBeUpdatedPtNodeParams->isTerminal()) {
|
||||
return false;
|
||||
}
|
||||
const int probabilityToWrite = getUpdatedProbability(toBeUpdatedPtNodeParams->getProbability(),
|
||||
newProbability);
|
||||
int probabilityFieldPos = toBeUpdatedPtNodeParams->getProbabilityFieldPos();
|
||||
return DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition(mBuffer,
|
||||
probabilityToWrite, &probabilityFieldPos);
|
||||
}
|
||||
|
||||
bool DynamicPatriciaTrieNodeWriter::updateChildrenPosition(
|
||||
const PtNodeParams *const toBeUpdatedPtNodeParams, const int newChildrenPosition) {
|
||||
int childrenPosFieldPos = toBeUpdatedPtNodeParams->getChildrenPosFieldPos();
|
||||
return DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(mBuffer,
|
||||
newChildrenPosition, &childrenPosFieldPos);
|
||||
}
|
||||
|
||||
bool DynamicPatriciaTrieNodeWriter::writePtNodeAndAdvancePosition(
|
||||
const PtNodeParams *const ptNodeParams, int *const ptNodeWritingPos) {
|
||||
return writePtNodeAndGetProbabilityFieldPosAndAdvancePosition(ptNodeParams,
|
||||
0 /* outProbabilityFieldPos */, ptNodeWritingPos);
|
||||
}
|
||||
|
||||
bool DynamicPatriciaTrieNodeWriter::writeNewTerminalPtNodeAndAdvancePosition(
|
||||
const PtNodeParams *const ptNodeParams, const int timestamp, int *const ptNodeWritingPos) {
|
||||
int probabilityFieldPos = NOT_A_DICT_POS;
|
||||
if (!writePtNodeAndGetProbabilityFieldPosAndAdvancePosition(ptNodeParams, &probabilityFieldPos,
|
||||
ptNodeWritingPos)) {
|
||||
return false;
|
||||
}
|
||||
if (probabilityFieldPos == NOT_A_DICT_POS) {
|
||||
return false;
|
||||
}
|
||||
const int probabilityToWrite = getUpdatedProbability(
|
||||
NOT_A_PROBABILITY /* originalProbability */, ptNodeParams->getProbability());
|
||||
return DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition(mBuffer,
|
||||
probabilityToWrite, &probabilityFieldPos);
|
||||
}
|
||||
|
||||
bool DynamicPatriciaTrieNodeWriter::addNewBigramEntry(
|
||||
const PtNodeParams *const sourcePtNodeParams,
|
||||
const PtNodeParams *const targetPtNodeParam, const int probability, const int timestamp,
|
||||
bool *const outAddedNewBigram) {
|
||||
const int newNodePos = mBuffer->getTailPosition();
|
||||
int writingPos = newNodePos;
|
||||
// Write a new PtNode using original PtNode's info to the tail of the dictionary in mBuffer.
|
||||
if (!writePtNodeAndAdvancePosition(sourcePtNodeParams, &writingPos)) {
|
||||
return false;
|
||||
}
|
||||
if (!markPtNodeAsMoved(sourcePtNodeParams, newNodePos, newNodePos)) {
|
||||
return false;
|
||||
}
|
||||
const PtNodeParams newPtNodeParams(
|
||||
mPtNodeReader->fetchNodeInfoInBufferFromPtNodePos(newNodePos));
|
||||
if (newPtNodeParams.getBigramsPos() != NOT_A_DICT_POS) {
|
||||
// Insert a new bigram entry into the existing bigram list.
|
||||
int bigramListPos = newPtNodeParams.getBigramsPos();
|
||||
return mBigramPolicy->addNewBigramEntryToBigramList(targetPtNodeParam->getHeadPos(),
|
||||
probability, &bigramListPos, outAddedNewBigram);
|
||||
} else {
|
||||
// The PtNode doesn't have a bigram list.
|
||||
*outAddedNewBigram = true;
|
||||
// First, Write a bigram entry at the tail position of the PtNode.
|
||||
if (!mBigramPolicy->writeNewBigramEntry(targetPtNodeParam->getHeadPos(), probability,
|
||||
&writingPos)) {
|
||||
return false;
|
||||
}
|
||||
// Then, Mark as the PtNode having bigram list in the flags.
|
||||
const PatriciaTrieReadingUtils::NodeFlags updatedFlags =
|
||||
PatriciaTrieReadingUtils::createAndGetFlags(newPtNodeParams.isBlacklisted(),
|
||||
newPtNodeParams.isNotAWord(), newPtNodeParams.isTerminal(),
|
||||
newPtNodeParams.getShortcutPos() != NOT_A_DICT_POS, true /* hasBigrams */,
|
||||
newPtNodeParams.getCodePointCount() > 1, CHILDREN_POSITION_FIELD_SIZE);
|
||||
writingPos = newNodePos;
|
||||
// Write updated flags into the moved PtNode's flags field.
|
||||
return DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, updatedFlags,
|
||||
&writingPos);
|
||||
}
|
||||
}
|
||||
|
||||
bool DynamicPatriciaTrieNodeWriter::removeBigramEntry(
|
||||
const PtNodeParams *const sourcePtNodeParams, const PtNodeParams *const targetPtNodeParam) {
|
||||
if (sourcePtNodeParams->getBigramsPos() == NOT_A_DICT_POS) {
|
||||
return false;
|
||||
}
|
||||
return mBigramPolicy->removeBigram(sourcePtNodeParams->getBigramsPos(),
|
||||
targetPtNodeParam->getHeadPos());
|
||||
}
|
||||
|
||||
bool DynamicPatriciaTrieNodeWriter::updateAllBigramEntriesAndDeleteUselessEntries(
|
||||
const PtNodeParams *const sourcePtNodeParams, int *const outBigramEntryCount) {
|
||||
int bigramListPos = sourcePtNodeParams->getBigramsPos();
|
||||
return mBigramPolicy->updateAllBigramEntriesAndDeleteUselessEntries(&bigramListPos,
|
||||
outBigramEntryCount);
|
||||
}
|
||||
|
||||
bool DynamicPatriciaTrieNodeWriter::updateAllPositionFields(
|
||||
const PtNodeParams *const toBeUpdatedPtNodeParams,
|
||||
const DictPositionRelocationMap *const dictPositionRelocationMap,
|
||||
int *const outBigramEntryCount) {
|
||||
int parentPos = toBeUpdatedPtNodeParams->getParentPos();
|
||||
if (parentPos != NOT_A_DICT_POS) {
|
||||
PtNodeWriter::PtNodePositionRelocationMap::const_iterator it =
|
||||
dictPositionRelocationMap->mPtNodePositionRelocationMap.find(parentPos);
|
||||
if (it != dictPositionRelocationMap->mPtNodePositionRelocationMap.end()) {
|
||||
parentPos = it->second;
|
||||
}
|
||||
}
|
||||
int writingPos = toBeUpdatedPtNodeParams->getHeadPos()
|
||||
+ DynamicPatriciaTrieWritingUtils::NODE_FLAG_FIELD_SIZE;
|
||||
// Write an updated offset to the parent PtNode.
|
||||
if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition(mBuffer,
|
||||
parentPos, toBeUpdatedPtNodeParams->getHeadPos(), &writingPos)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Updates children position that is a PtNodeArray position.
|
||||
int childrenPos = toBeUpdatedPtNodeParams->getChildrenPos();
|
||||
if (childrenPos != NOT_A_DICT_POS) {
|
||||
PtNodeWriter::PtNodeArrayPositionRelocationMap::const_iterator it =
|
||||
dictPositionRelocationMap->mPtNodeArrayPositionRelocationMap.find(childrenPos);
|
||||
if (it != dictPositionRelocationMap->mPtNodeArrayPositionRelocationMap.end()) {
|
||||
childrenPos = it->second;
|
||||
}
|
||||
}
|
||||
if (!updateChildrenPosition(toBeUpdatedPtNodeParams, childrenPos)) {
|
||||
return false;
|
||||
}
|
||||
// Updates bigram target PtNode positions in the bigram list.
|
||||
int bigramsPos = toBeUpdatedPtNodeParams->getBigramsPos();
|
||||
int bigramCount = 0;
|
||||
if (bigramsPos != NOT_A_DICT_POS) {
|
||||
int bigramEntryCount;
|
||||
if (!mBigramPolicy->updateAllBigramTargetPtNodePositions(&bigramsPos,
|
||||
&dictPositionRelocationMap->mPtNodePositionRelocationMap, &bigramEntryCount)) {
|
||||
return false;
|
||||
}
|
||||
bigramCount += bigramEntryCount;
|
||||
}
|
||||
if (outBigramEntryCount) {
|
||||
*outBigramEntryCount = bigramCount;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool DynamicPatriciaTrieNodeWriter::writePtNodeAndGetProbabilityFieldPosAndAdvancePosition(
|
||||
const PtNodeParams *const ptNodeParams, int *const outProbabilityFieldPos,
|
||||
int *const ptNodeWritingPos) {
|
||||
const int nodePos = *ptNodeWritingPos;
|
||||
// Write dummy flags. The Node flags are updated with appropriate flags at the last step of the
|
||||
// PtNode writing.
|
||||
if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer,
|
||||
0 /* nodeFlags */, ptNodeWritingPos)) {
|
||||
return false;
|
||||
}
|
||||
// Calculate a parent offset and write the offset.
|
||||
if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition(mBuffer,
|
||||
ptNodeParams->getParentPos(), nodePos, ptNodeWritingPos)) {
|
||||
return false;
|
||||
}
|
||||
// Write code points
|
||||
if (!DynamicPatriciaTrieWritingUtils::writeCodePointsAndAdvancePosition(mBuffer,
|
||||
ptNodeParams->getCodePoints(), ptNodeParams->getCodePointCount(), ptNodeWritingPos)) {
|
||||
return false;
|
||||
}
|
||||
// Write probability when the probability is a valid probability, which means this node is
|
||||
// terminal.
|
||||
if (ptNodeParams->isTerminal()) {
|
||||
if (outProbabilityFieldPos) {
|
||||
*outProbabilityFieldPos = *ptNodeWritingPos;
|
||||
}
|
||||
if (ptNodeParams->getProbability() == NOT_A_PROBABILITY) {
|
||||
// Write a dummy probability.
|
||||
if (!DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition(mBuffer,
|
||||
0 /* probability */, ptNodeWritingPos)) {
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
if (!DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition(mBuffer,
|
||||
ptNodeParams->getProbability(), ptNodeWritingPos)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
// Write children position
|
||||
if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(mBuffer,
|
||||
ptNodeParams->getChildrenPos(), ptNodeWritingPos)) {
|
||||
return false;
|
||||
}
|
||||
// Copy shortcut list when the originalShortcutListPos is valid dictionary position.
|
||||
if (ptNodeParams->getShortcutPos() != NOT_A_DICT_POS) {
|
||||
int fromPos = ptNodeParams->getShortcutPos();
|
||||
if (!mShortcutPolicy->copyAllShortcutsAndReturnIfSucceededOrNot(mBuffer, &fromPos,
|
||||
ptNodeWritingPos)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
// Copy bigram list when the originalBigramListPos is valid dictionary position.
|
||||
int bigramCount = 0;
|
||||
if (ptNodeParams->getBigramsPos() != NOT_A_DICT_POS) {
|
||||
int fromPos = ptNodeParams->getBigramsPos();
|
||||
if (!mBigramPolicy->copyAllBigrams(mBuffer, &fromPos, ptNodeWritingPos, &bigramCount)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
// Create node flags and write them.
|
||||
PatriciaTrieReadingUtils::NodeFlags nodeFlags =
|
||||
PatriciaTrieReadingUtils::createAndGetFlags(ptNodeParams->isBlacklisted(),
|
||||
ptNodeParams->isNotAWord(), ptNodeParams->isTerminal(),
|
||||
ptNodeParams->getShortcutPos() != NOT_A_DICT_POS /* hasShortcutTargets */,
|
||||
bigramCount > 0 /* hasBigrams */,
|
||||
ptNodeParams->getCodePointCount() > 1 /* hasMultipleChars */,
|
||||
CHILDREN_POSITION_FIELD_SIZE);
|
||||
int flagsFieldPos = nodePos;
|
||||
if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, nodeFlags,
|
||||
&flagsFieldPos)) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
int DynamicPatriciaTrieNodeWriter::getUpdatedProbability(const int originalProbability,
|
||||
const int newProbability) const {
|
||||
if (mNeedsToDecayWhenUpdating) {
|
||||
return ForgettingCurveUtils::getUpdatedEncodedProbability(originalProbability,
|
||||
newProbability);
|
||||
} else {
|
||||
return newProbability;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -1,99 +0,0 @@
|
|||
/*
|
||||
* Copyright (C) 2013, The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef LATINIME_DYNAMIC_PATRICIA_TRIE_NODE_WRITER_H
|
||||
#define LATINIME_DYNAMIC_PATRICIA_TRIE_NODE_WRITER_H
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include "defines.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.h"
|
||||
|
||||
namespace latinime {
|
||||
|
||||
class BufferWithExtendableBuffer;
|
||||
class DynamicBigramListPolicy;
|
||||
class DynamicShortcutListPolicy;
|
||||
|
||||
/*
|
||||
* This class is used for helping to writes nodes of dynamic patricia trie.
|
||||
*/
|
||||
class DynamicPatriciaTrieNodeWriter : public PtNodeWriter {
|
||||
public:
|
||||
DynamicPatriciaTrieNodeWriter(BufferWithExtendableBuffer *const buffer,
|
||||
const DynamicPatriciaTrieNodeReader *const ptNodeReader,
|
||||
DynamicBigramListPolicy *const bigramPolicy,
|
||||
DynamicShortcutListPolicy *const shortcutPolicy, const bool needsToDecayWhenUpdating)
|
||||
: mBuffer(buffer), mPtNodeReader(ptNodeReader), mReadingHelper(mBuffer, ptNodeReader),
|
||||
mBigramPolicy(bigramPolicy), mShortcutPolicy(shortcutPolicy),
|
||||
mNeedsToDecayWhenUpdating(needsToDecayWhenUpdating) {}
|
||||
|
||||
virtual ~DynamicPatriciaTrieNodeWriter() {}
|
||||
|
||||
virtual bool markPtNodeAsDeleted(const PtNodeParams *const toBeUpdatedPtNodeParams);
|
||||
|
||||
virtual bool markPtNodeAsMoved(const PtNodeParams *const toBeUpdatedPtNodeParams,
|
||||
const int movedPos, const int bigramLinkedNodePos);
|
||||
|
||||
virtual bool updatePtNodeProbability(const PtNodeParams *const toBeUpdatedPtNodeParams,
|
||||
const int newProbability, const int timestamp);
|
||||
|
||||
virtual bool updateChildrenPosition(const PtNodeParams *const toBeUpdatedPtNodeParams,
|
||||
const int newChildrenPosition);
|
||||
|
||||
virtual bool writePtNodeAndAdvancePosition(const PtNodeParams *const ptNodeParams,
|
||||
int *const ptNodeWritingPos);
|
||||
|
||||
virtual bool writeNewTerminalPtNodeAndAdvancePosition(const PtNodeParams *const ptNodeParams,
|
||||
const int timestamp, int *const ptNodeWritingPos);
|
||||
|
||||
virtual bool addNewBigramEntry(const PtNodeParams *const sourcePtNodeParams,
|
||||
const PtNodeParams *const targetPtNodeParam, const int probability, const int timestamp,
|
||||
bool *const outAddedNewBigram);
|
||||
|
||||
virtual bool removeBigramEntry(const PtNodeParams *const sourcePtNodeParams,
|
||||
const PtNodeParams *const targetPtNodeParam);
|
||||
|
||||
virtual bool updateAllBigramEntriesAndDeleteUselessEntries(
|
||||
const PtNodeParams *const sourcePtNodeParams, int *const outBigramEntryCount);
|
||||
|
||||
virtual bool updateAllPositionFields(const PtNodeParams *const toBeUpdatedPtNodeParams,
|
||||
const DictPositionRelocationMap *const dictPositionRelocationMap,
|
||||
int *const outBigramEntryCount);
|
||||
|
||||
private:
|
||||
DISALLOW_COPY_AND_ASSIGN(DynamicPatriciaTrieNodeWriter);
|
||||
|
||||
bool writePtNodeAndGetProbabilityFieldPosAndAdvancePosition(
|
||||
const PtNodeParams *const ptNodeParams, int *const outProbabilityFieldPos,
|
||||
int *const ptNodeWritingPos);
|
||||
|
||||
int getUpdatedProbability(const int originalProbability, const int newProbability) const;
|
||||
|
||||
static const int CHILDREN_POSITION_FIELD_SIZE;
|
||||
|
||||
BufferWithExtendableBuffer *const mBuffer;
|
||||
const DynamicPatriciaTrieNodeReader *const mPtNodeReader;
|
||||
DynamicPatriciaTrieReadingHelper mReadingHelper;
|
||||
DynamicBigramListPolicy *const mBigramPolicy;
|
||||
DynamicShortcutListPolicy *const mShortcutPolicy;
|
||||
const bool mNeedsToDecayWhenUpdating;
|
||||
};
|
||||
} // namespace latinime
|
||||
#endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_NODE_WRITER_H */
|
|
@ -27,25 +27,11 @@
|
|||
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_updating_helper.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_helper.h"
|
||||
#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
|
||||
#include "suggest/policyimpl/dictionary/utils/probability_utils.h"
|
||||
|
||||
namespace latinime {
|
||||
|
||||
// Note that there are corresponding definitions in Java side in BinaryDictionaryTests and
|
||||
// BinaryDictionaryDecayingTests.
|
||||
const char *const DynamicPatriciaTriePolicy::UNIGRAM_COUNT_QUERY = "UNIGRAM_COUNT";
|
||||
const char *const DynamicPatriciaTriePolicy::BIGRAM_COUNT_QUERY = "BIGRAM_COUNT";
|
||||
const char *const DynamicPatriciaTriePolicy::MAX_UNIGRAM_COUNT_QUERY = "MAX_UNIGRAM_COUNT";
|
||||
const char *const DynamicPatriciaTriePolicy::MAX_BIGRAM_COUNT_QUERY = "MAX_BIGRAM_COUNT";
|
||||
const char *const DynamicPatriciaTriePolicy::SET_NEEDS_TO_DECAY_FOR_TESTING_QUERY =
|
||||
"SET_NEEDS_TO_DECAY_FOR_TESTING";
|
||||
const int DynamicPatriciaTriePolicy::MAX_DICT_EXTENDED_REGION_SIZE = 32 * 1024;
|
||||
const int DynamicPatriciaTriePolicy::MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS =
|
||||
DynamicPatriciaTrieWritingHelper::MAX_DICTIONARY_SIZE - 1024;
|
||||
|
||||
void DynamicPatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const dicNode,
|
||||
DicNodeVector *const childDicNodes) const {
|
||||
if (!dicNode->hasChildren()) {
|
||||
|
@ -140,163 +126,4 @@ int DynamicPatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) c
|
|||
return ptNodeParams.getBigramsPos();
|
||||
}
|
||||
|
||||
bool DynamicPatriciaTriePolicy::addUnigramWord(const int *const word, const int length,
|
||||
const int probability, const int timestamp) {
|
||||
if (!mMmappedBuffer.get()->isUpdatable()) {
|
||||
AKLOGI("Warning: addUnigramWord() is called for non-updatable dictionary.");
|
||||
return false;
|
||||
}
|
||||
if (mBufferWithExtendableBuffer.getTailPosition()
|
||||
>= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) {
|
||||
AKLOGE("The dictionary is too large to dynamically update.");
|
||||
return false;
|
||||
}
|
||||
DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer, &mNodeReader);
|
||||
readingHelper.initWithPtNodeArrayPos(getRootPosition());
|
||||
bool addedNewUnigram = false;
|
||||
if (mUpdatingHelper.addUnigramWord(&readingHelper, word, length, probability, timestamp,
|
||||
&addedNewUnigram)) {
|
||||
if (addedNewUnigram) {
|
||||
mUnigramCount++;
|
||||
}
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool DynamicPatriciaTriePolicy::addBigramWords(const int *const word0, const int length0,
|
||||
const int *const word1, const int length1, const int probability,
|
||||
const int timestamp) {
|
||||
if (!mMmappedBuffer.get()->isUpdatable()) {
|
||||
AKLOGI("Warning: addBigramWords() is called for non-updatable dictionary.");
|
||||
return false;
|
||||
}
|
||||
if (mBufferWithExtendableBuffer.getTailPosition()
|
||||
>= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) {
|
||||
AKLOGE("The dictionary is too large to dynamically update.");
|
||||
return false;
|
||||
}
|
||||
const int word0Pos = getTerminalPtNodePositionOfWord(word0, length0,
|
||||
false /* forceLowerCaseSearch */);
|
||||
if (word0Pos == NOT_A_DICT_POS) {
|
||||
return false;
|
||||
}
|
||||
const int word1Pos = getTerminalPtNodePositionOfWord(word1, length1,
|
||||
false /* forceLowerCaseSearch */);
|
||||
if (word1Pos == NOT_A_DICT_POS) {
|
||||
return false;
|
||||
}
|
||||
bool addedNewBigram = false;
|
||||
if (mUpdatingHelper.addBigramWords(word0Pos, word1Pos, probability, timestamp,
|
||||
&addedNewBigram)) {
|
||||
if (addedNewBigram) {
|
||||
mBigramCount++;
|
||||
}
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool DynamicPatriciaTriePolicy::removeBigramWords(const int *const word0, const int length0,
|
||||
const int *const word1, const int length1) {
|
||||
if (!mMmappedBuffer.get()->isUpdatable()) {
|
||||
AKLOGI("Warning: removeBigramWords() is called for non-updatable dictionary.");
|
||||
return false;
|
||||
}
|
||||
if (mBufferWithExtendableBuffer.getTailPosition()
|
||||
>= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) {
|
||||
AKLOGE("The dictionary is too large to dynamically update.");
|
||||
return false;
|
||||
}
|
||||
const int word0Pos = getTerminalPtNodePositionOfWord(word0, length0,
|
||||
false /* forceLowerCaseSearch */);
|
||||
if (word0Pos == NOT_A_DICT_POS) {
|
||||
return false;
|
||||
}
|
||||
const int word1Pos = getTerminalPtNodePositionOfWord(word1, length1,
|
||||
false /* forceLowerCaseSearch */);
|
||||
if (word1Pos == NOT_A_DICT_POS) {
|
||||
return false;
|
||||
}
|
||||
if (mUpdatingHelper.removeBigramWords(word0Pos, word1Pos)) {
|
||||
mBigramCount--;
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
void DynamicPatriciaTriePolicy::flush(const char *const filePath) {
|
||||
if (!mMmappedBuffer.get()->isUpdatable()) {
|
||||
AKLOGI("Warning: flush() is called for non-updatable dictionary.");
|
||||
return;
|
||||
}
|
||||
DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer,
|
||||
&mBigramListPolicy, &mShortcutListPolicy, false /* needsToDecay */);
|
||||
writingHelper.writeToDictFile(filePath, &mHeaderPolicy, mUnigramCount, mBigramCount);
|
||||
}
|
||||
|
||||
void DynamicPatriciaTriePolicy::flushWithGC(const char *const filePath) {
|
||||
if (!mMmappedBuffer.get()->isUpdatable()) {
|
||||
AKLOGI("Warning: flushWithGC() is called for non-updatable dictionary.");
|
||||
return;
|
||||
}
|
||||
const bool needsToDecay = mHeaderPolicy.isDecayingDict()
|
||||
&& (mNeedsToDecayForTesting || ForgettingCurveUtils::needsToDecay(
|
||||
false /* mindsBlockByDecay */, mUnigramCount, mBigramCount, &mHeaderPolicy));
|
||||
DynamicBigramListPolicy bigramListPolicyForGC(&mHeaderPolicy, &mBufferWithExtendableBuffer,
|
||||
&mShortcutListPolicy, needsToDecay);
|
||||
DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer,
|
||||
&bigramListPolicyForGC, &mShortcutListPolicy, needsToDecay);
|
||||
writingHelper.writeToDictFileWithGC(getRootPosition(), filePath, &mHeaderPolicy);
|
||||
mNeedsToDecayForTesting = false;
|
||||
}
|
||||
|
||||
bool DynamicPatriciaTriePolicy::needsToRunGC(const bool mindsBlockByGC) const {
|
||||
if (!mMmappedBuffer.get()->isUpdatable()) {
|
||||
AKLOGI("Warning: needsToRunGC() is called for non-updatable dictionary.");
|
||||
return false;
|
||||
}
|
||||
if (mBufferWithExtendableBuffer.isNearSizeLimit()) {
|
||||
// Additional buffer size is near the limit.
|
||||
return true;
|
||||
} else if (mHeaderPolicy.getExtendedRegionSize()
|
||||
+ mBufferWithExtendableBuffer.getUsedAdditionalBufferSize()
|
||||
> MAX_DICT_EXTENDED_REGION_SIZE) {
|
||||
// Total extended region size exceeds the limit.
|
||||
return true;
|
||||
} else if (mBufferWithExtendableBuffer.getTailPosition()
|
||||
>= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS
|
||||
&& mBufferWithExtendableBuffer.getUsedAdditionalBufferSize() > 0) {
|
||||
// Needs to reduce dictionary size.
|
||||
return true;
|
||||
} else if (mHeaderPolicy.isDecayingDict()) {
|
||||
return mNeedsToDecayForTesting || ForgettingCurveUtils::needsToDecay(
|
||||
mindsBlockByGC, mUnigramCount, mBigramCount, &mHeaderPolicy);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void DynamicPatriciaTriePolicy::getProperty(const char *const query, const int queryLength,
|
||||
char *const outResult, const int maxResultLength) {
|
||||
const int compareLength = queryLength + 1 /* terminator */;
|
||||
if (strncmp(query, UNIGRAM_COUNT_QUERY, compareLength) == 0) {
|
||||
snprintf(outResult, maxResultLength, "%d", mUnigramCount);
|
||||
} else if (strncmp(query, BIGRAM_COUNT_QUERY, compareLength) == 0) {
|
||||
snprintf(outResult, maxResultLength, "%d", mBigramCount);
|
||||
} else if (strncmp(query, MAX_UNIGRAM_COUNT_QUERY, compareLength) == 0) {
|
||||
snprintf(outResult, maxResultLength, "%d",
|
||||
mHeaderPolicy.isDecayingDict() ? ForgettingCurveUtils::MAX_UNIGRAM_COUNT :
|
||||
static_cast<int>(DynamicPatriciaTrieWritingHelper::MAX_DICTIONARY_SIZE));
|
||||
} else if (strncmp(query, MAX_BIGRAM_COUNT_QUERY, compareLength) == 0) {
|
||||
snprintf(outResult, maxResultLength, "%d",
|
||||
mHeaderPolicy.isDecayingDict() ? ForgettingCurveUtils::MAX_BIGRAM_COUNT :
|
||||
static_cast<int>(DynamicPatriciaTrieWritingHelper::MAX_DICTIONARY_SIZE));
|
||||
} else if (strncmp(query, SET_NEEDS_TO_DECAY_FOR_TESTING_QUERY, compareLength) == 0) {
|
||||
mNeedsToDecayForTesting = true;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace latinime
|
||||
|
|
|
@ -23,8 +23,6 @@
|
|||
#include "suggest/policyimpl/dictionary/header/header_policy.h"
|
||||
#include "suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_writer.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_updating_helper.h"
|
||||
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
|
||||
#include "suggest/policyimpl/dictionary/utils/format_utils.h"
|
||||
#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
|
||||
|
@ -47,12 +45,7 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
|||
mShortcutListPolicy(&mBufferWithExtendableBuffer),
|
||||
mBigramListPolicy(&mHeaderPolicy, &mBufferWithExtendableBuffer, &mShortcutListPolicy,
|
||||
mHeaderPolicy.isDecayingDict()),
|
||||
mNodeReader(&mBufferWithExtendableBuffer, &mBigramListPolicy, &mShortcutListPolicy),
|
||||
mNodeWriter(&mBufferWithExtendableBuffer, &mNodeReader, &mBigramListPolicy,
|
||||
&mShortcutListPolicy, mHeaderPolicy.isDecayingDict()),
|
||||
mUpdatingHelper(&mBufferWithExtendableBuffer, &mNodeReader, &mNodeWriter),
|
||||
mUnigramCount(mHeaderPolicy.getUnigramCount()),
|
||||
mBigramCount(mHeaderPolicy.getBigramCount()), mNeedsToDecayForTesting(false) {}
|
||||
mNodeReader(&mBufferWithExtendableBuffer, &mBigramListPolicy, &mShortcutListPolicy) {}
|
||||
|
||||
AK_FORCE_INLINE int getRootPosition() const {
|
||||
return 0;
|
||||
|
@ -89,45 +82,59 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
|||
}
|
||||
|
||||
bool addUnigramWord(const int *const word, const int length, const int probability,
|
||||
const int timestamp);
|
||||
const int timestamp) {
|
||||
// This method should not be called for non-updatable dictionary.
|
||||
AKLOGI("Warning: addUnigramWord() is called for non-updatable dictionary.");
|
||||
return false;
|
||||
}
|
||||
|
||||
bool addBigramWords(const int *const word0, const int length0, const int *const word1,
|
||||
const int length1, const int probability, const int timestamp);
|
||||
const int length1, const int probability, const int timestamp) {
|
||||
// This method should not be called for non-updatable dictionary.
|
||||
AKLOGI("Warning: addBigramWords() is called for non-updatable dictionary.");
|
||||
return false;
|
||||
}
|
||||
|
||||
bool removeBigramWords(const int *const word0, const int length0, const int *const word1,
|
||||
const int length1);
|
||||
const int length1) {
|
||||
// This method should not be called for non-updatable dictionary.
|
||||
AKLOGI("Warning: removeBigramWords() is called for non-updatable dictionary.");
|
||||
return false;
|
||||
}
|
||||
|
||||
void flush(const char *const filePath);
|
||||
void flush(const char *const filePath) {
|
||||
// This method should not be called for non-updatable dictionary.
|
||||
AKLOGI("Warning: flush() is called for non-updatable dictionary.");
|
||||
}
|
||||
|
||||
void flushWithGC(const char *const filePath);
|
||||
void flushWithGC(const char *const filePath) {
|
||||
// This method should not be called for non-updatable dictionary.
|
||||
AKLOGI("Warning: flushWithGC() is called for non-updatable dictionary.");
|
||||
}
|
||||
|
||||
bool needsToRunGC(const bool mindsBlockByGC) const;
|
||||
bool needsToRunGC(const bool mindsBlockByGC) const {
|
||||
// This method should not be called for non-updatable dictionary.
|
||||
AKLOGI("Warning: needsToRunGC() is called for non-updatable dictionary.");
|
||||
return false;
|
||||
}
|
||||
|
||||
void getProperty(const char *const query, const int queryLength, char *const outResult,
|
||||
const int maxResultLength);
|
||||
const int maxResultLength) {
|
||||
// getProperty is not supported for this class.
|
||||
if (maxResultLength > 0) {
|
||||
outResult[0] = '\0';
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTriePolicy);
|
||||
|
||||
static const char *const UNIGRAM_COUNT_QUERY;
|
||||
static const char *const BIGRAM_COUNT_QUERY;
|
||||
static const char *const MAX_UNIGRAM_COUNT_QUERY;
|
||||
static const char *const MAX_BIGRAM_COUNT_QUERY;
|
||||
static const char *const SET_NEEDS_TO_DECAY_FOR_TESTING_QUERY;
|
||||
static const int MAX_DICT_EXTENDED_REGION_SIZE;
|
||||
static const int MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS;
|
||||
|
||||
const MmappedBuffer::MmappedBufferPtr mMmappedBuffer;
|
||||
const HeaderPolicy mHeaderPolicy;
|
||||
BufferWithExtendableBuffer mBufferWithExtendableBuffer;
|
||||
DynamicShortcutListPolicy mShortcutListPolicy;
|
||||
DynamicBigramListPolicy mBigramListPolicy;
|
||||
DynamicPatriciaTrieNodeReader mNodeReader;
|
||||
DynamicPatriciaTrieNodeWriter mNodeWriter;
|
||||
DynamicPatriciaTrieUpdatingHelper mUpdatingHelper;
|
||||
int mUnigramCount;
|
||||
int mBigramCount;
|
||||
int mNeedsToDecayForTesting;
|
||||
};
|
||||
} // namespace latinime
|
||||
#endif // LATINIME_DYNAMIC_PATRICIA_TRIE_POLICY_H
|
||||
|
|
|
@ -1,142 +0,0 @@
|
|||
/*
|
||||
* Copyright (C) 2013, The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_helper.h"
|
||||
|
||||
#include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_reader.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_gc_event_listeners.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_writer.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_utils.h"
|
||||
#include "suggest/policyimpl/dictionary/header/header_policy.h"
|
||||
#include "suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h"
|
||||
#include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h"
|
||||
#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
|
||||
#include "utils/hash_map_compat.h"
|
||||
|
||||
namespace latinime {
|
||||
|
||||
// TODO: Make MAX_DICTIONARY_SIZE 8MB.
|
||||
const size_t DynamicPatriciaTrieWritingHelper::MAX_DICTIONARY_SIZE = 2 * 1024 * 1024;
|
||||
|
||||
void DynamicPatriciaTrieWritingHelper::writeToDictFile(const char *const fileName,
|
||||
const HeaderPolicy *const headerPolicy, const int unigramCount, const int bigramCount) {
|
||||
BufferWithExtendableBuffer headerBuffer(
|
||||
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE);
|
||||
const int extendedRegionSize = headerPolicy->getExtendedRegionSize() +
|
||||
mBuffer->getUsedAdditionalBufferSize();
|
||||
if (!headerPolicy->writeHeaderToBuffer(&headerBuffer, false /* updatesLastUpdatedTime */,
|
||||
false /* updatesLastDecayedTime */, unigramCount, bigramCount, extendedRegionSize)) {
|
||||
return;
|
||||
}
|
||||
DictFileWritingUtils::flushAllHeaderAndBodyToFile(fileName, &headerBuffer, mBuffer);
|
||||
}
|
||||
|
||||
void DynamicPatriciaTrieWritingHelper::writeToDictFileWithGC(const int rootPtNodeArrayPos,
|
||||
const char *const fileName, const HeaderPolicy *const headerPolicy) {
|
||||
BufferWithExtendableBuffer newDictBuffer(MAX_DICTIONARY_SIZE);
|
||||
int unigramCount = 0;
|
||||
int bigramCount = 0;
|
||||
if (mNeedsToDecay) {
|
||||
ForgettingCurveUtils::sTimeKeeper.setCurrentTime();
|
||||
}
|
||||
if (!runGC(rootPtNodeArrayPos, headerPolicy, &newDictBuffer, &unigramCount, &bigramCount)) {
|
||||
return;
|
||||
}
|
||||
BufferWithExtendableBuffer headerBuffer(
|
||||
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE);
|
||||
if (!headerPolicy->writeHeaderToBuffer(&headerBuffer, true /* updatesLastUpdatedTime */,
|
||||
mNeedsToDecay, unigramCount, bigramCount, 0 /* extendedRegionSize */)) {
|
||||
return;
|
||||
}
|
||||
DictFileWritingUtils::flushAllHeaderAndBodyToFile(fileName, &headerBuffer, &newDictBuffer);
|
||||
}
|
||||
|
||||
// TODO: Make this method version independent.
|
||||
bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
|
||||
const HeaderPolicy *const headerPolicy, BufferWithExtendableBuffer *const bufferToWrite,
|
||||
int *const outUnigramCount, int *const outBigramCount) {
|
||||
DynamicPatriciaTrieNodeReader ptNodeReader(mBuffer, mBigramPolicy, mShortcutPolicy);
|
||||
DynamicPatriciaTrieReadingHelper readingHelper(mBuffer, &ptNodeReader);
|
||||
DynamicPatriciaTrieNodeWriter ptNodeWriter(mBuffer, &ptNodeReader, mBigramPolicy,
|
||||
mShortcutPolicy, false /* needsToDecayWhenUpdating */);
|
||||
readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
|
||||
DynamicPatriciaTrieGcEventListeners
|
||||
::TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted
|
||||
traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted(
|
||||
headerPolicy, &ptNodeWriter, mBuffer, mNeedsToDecay);
|
||||
if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(
|
||||
&traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted)) {
|
||||
return false;
|
||||
}
|
||||
if (mNeedsToDecay && traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted
|
||||
.getValidUnigramCount() > ForgettingCurveUtils::MAX_UNIGRAM_COUNT_AFTER_GC) {
|
||||
// TODO: Remove more unigrams.
|
||||
}
|
||||
|
||||
readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
|
||||
DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateBigramProbability
|
||||
traversePolicyToUpdateBigramProbability(&ptNodeWriter);
|
||||
if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(
|
||||
&traversePolicyToUpdateBigramProbability)) {
|
||||
return false;
|
||||
}
|
||||
if (mNeedsToDecay && traversePolicyToUpdateBigramProbability.getValidBigramEntryCount()
|
||||
> ForgettingCurveUtils::MAX_BIGRAM_COUNT_AFTER_GC) {
|
||||
// TODO: Remove more bigrams.
|
||||
}
|
||||
|
||||
// Mapping from positions in mBuffer to positions in bufferToWrite.
|
||||
PtNodeWriter::DictPositionRelocationMap dictPositionRelocationMap;
|
||||
readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
|
||||
DynamicPatriciaTrieNodeWriter newPtNodeWriter(bufferToWrite, &ptNodeReader, mBigramPolicy,
|
||||
mShortcutPolicy, false /* needsToDecayWhenUpdating */);
|
||||
DynamicPatriciaTrieGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer
|
||||
traversePolicyToPlaceAndWriteValidPtNodesToBuffer(&newPtNodeWriter, bufferToWrite,
|
||||
&dictPositionRelocationMap);
|
||||
if (!readingHelper.traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner(
|
||||
&traversePolicyToPlaceAndWriteValidPtNodesToBuffer)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Create policy instance for the GCed dictionary.
|
||||
DynamicShortcutListPolicy newDictShortcutPolicy(bufferToWrite);
|
||||
DynamicBigramListPolicy newDictBigramPolicy(headerPolicy, bufferToWrite, &newDictShortcutPolicy,
|
||||
mNeedsToDecay);
|
||||
// Create reading node reader and reading helper for the GCed dictionary.
|
||||
DynamicPatriciaTrieNodeReader newDictNodeReader(bufferToWrite, &newDictBigramPolicy,
|
||||
&newDictShortcutPolicy);
|
||||
DynamicPatriciaTrieReadingHelper newDictReadingHelper(bufferToWrite, &newDictNodeReader);
|
||||
DynamicPatriciaTrieNodeWriter newDictNodeWriter(bufferToWrite, &newDictNodeReader,
|
||||
&newDictBigramPolicy, &newDictShortcutPolicy, false /* needsToDecayWhenUpdating */);
|
||||
newDictReadingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
|
||||
DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateAllPositionFields
|
||||
traversePolicyToUpdateAllPositionFields(&newDictNodeWriter, &dictPositionRelocationMap);
|
||||
if (!newDictReadingHelper.traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner(
|
||||
&traversePolicyToUpdateAllPositionFields)) {
|
||||
return false;
|
||||
}
|
||||
*outUnigramCount = traversePolicyToUpdateAllPositionFields.getUnigramCount();
|
||||
*outBigramCount = traversePolicyToUpdateAllPositionFields.getBigramCount();
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace latinime
|
|
@ -1,65 +0,0 @@
|
|||
/*
|
||||
* Copyright (C) 2013, The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef LATINIME_DYNAMIC_PATRICIA_TRIE_WRITING_HELPER_H
|
||||
#define LATINIME_DYNAMIC_PATRICIA_TRIE_WRITING_HELPER_H
|
||||
|
||||
#include <cstddef>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "defines.h"
|
||||
|
||||
namespace latinime {
|
||||
|
||||
class BufferWithExtendableBuffer;
|
||||
class DynamicBigramListPolicy;
|
||||
class DynamicPatriciaTrieReadingHelper;
|
||||
class DynamicShortcutListPolicy;
|
||||
class HeaderPolicy;
|
||||
|
||||
// TODO: Make it independent from a particular format and move to pt_common.
|
||||
class DynamicPatriciaTrieWritingHelper {
|
||||
public:
|
||||
static const size_t MAX_DICTIONARY_SIZE;
|
||||
|
||||
DynamicPatriciaTrieWritingHelper(BufferWithExtendableBuffer *const buffer,
|
||||
DynamicBigramListPolicy *const bigramPolicy,
|
||||
DynamicShortcutListPolicy *const shortcutPolicy, const bool needsToDecay)
|
||||
: mBuffer(buffer), mBigramPolicy(bigramPolicy), mShortcutPolicy(shortcutPolicy),
|
||||
mNeedsToDecay(needsToDecay) {}
|
||||
|
||||
~DynamicPatriciaTrieWritingHelper() {}
|
||||
|
||||
void writeToDictFile(const char *const fileName, const HeaderPolicy *const headerPolicy,
|
||||
const int unigramCount, const int bigramCount);
|
||||
|
||||
void writeToDictFileWithGC(const int rootPtNodeArrayPos, const char *const fileName,
|
||||
const HeaderPolicy *const headerPolicy);
|
||||
|
||||
private:
|
||||
DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTrieWritingHelper);
|
||||
|
||||
BufferWithExtendableBuffer *const mBuffer;
|
||||
DynamicBigramListPolicy *const mBigramPolicy;
|
||||
DynamicShortcutListPolicy *const mShortcutPolicy;
|
||||
const bool mNeedsToDecay;
|
||||
|
||||
bool runGC(const int rootPtNodeArrayPos, const HeaderPolicy *const headerPolicy,
|
||||
BufferWithExtendableBuffer *const bufferToWrite, int *const outUnigramCount,
|
||||
int *const outBigramCount);
|
||||
};
|
||||
} // namespace latinime
|
||||
#endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_WRITING_HELPER_H */
|
|
@ -33,7 +33,6 @@ const int DynamicPatriciaTrieWritingUtils::DICT_OFFSET_FIELD_SIZE = 3;
|
|||
const int DynamicPatriciaTrieWritingUtils::MAX_DICT_OFFSET_VALUE = 0x7FFFFF;
|
||||
const int DynamicPatriciaTrieWritingUtils::MIN_DICT_OFFSET_VALUE = -0x7FFFFF;
|
||||
const int DynamicPatriciaTrieWritingUtils::DICT_OFFSET_NEGATIVE_FLAG = 0x800000;
|
||||
const int DynamicPatriciaTrieWritingUtils::PROBABILITY_FIELD_SIZE = 1;
|
||||
const int DynamicPatriciaTrieWritingUtils::NODE_FLAG_FIELD_SIZE = 1;
|
||||
|
||||
/* static */ bool DynamicPatriciaTrieWritingUtils::writeEmptyDictionary(
|
||||
|
@ -101,19 +100,6 @@ const int DynamicPatriciaTrieWritingUtils::NODE_FLAG_FIELD_SIZE = 1;
|
|||
hasMultipleCodePoints, codePointFieldPos);
|
||||
}
|
||||
|
||||
/* static */ bool DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition(
|
||||
BufferWithExtendableBuffer *const buffer, const int probability,
|
||||
int *const probabilityFieldPos) {
|
||||
if (probability < 0 || probability > MAX_PROBABILITY) {
|
||||
AKLOGI("probability cannot be written because the probability is invalid: %d",
|
||||
probability);
|
||||
ASSERT(false);
|
||||
return false;
|
||||
}
|
||||
return buffer->writeUintAndAdvancePosition(probability, PROBABILITY_FIELD_SIZE,
|
||||
probabilityFieldPos);
|
||||
}
|
||||
|
||||
/* static */ bool DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(
|
||||
BufferWithExtendableBuffer *const buffer, const int childrenPosition,
|
||||
int *const childrenPositionFieldPos) {
|
||||
|
|
|
@ -49,9 +49,6 @@ class DynamicPatriciaTrieWritingUtils {
|
|||
static bool writeCodePointsAndAdvancePosition(BufferWithExtendableBuffer *const buffer,
|
||||
const int *const codePoints, const int codePointCount, int *const codePointFieldPos);
|
||||
|
||||
static bool writeProbabilityAndAdvancePosition(BufferWithExtendableBuffer *const buffer,
|
||||
const int probability, int *const probabilityFieldPos);
|
||||
|
||||
static bool writeChildrenPositionAndAdvancePosition(BufferWithExtendableBuffer *const buffer,
|
||||
const int childrenPosition, int *const childrenPositionFieldPos);
|
||||
|
||||
|
@ -67,7 +64,6 @@ class DynamicPatriciaTrieWritingUtils {
|
|||
static const int MAX_DICT_OFFSET_VALUE;
|
||||
static const int MIN_DICT_OFFSET_VALUE;
|
||||
static const int DICT_OFFSET_NEGATIVE_FLAG;
|
||||
static const int PROBABILITY_FIELD_SIZE;
|
||||
|
||||
static bool writeDictOffset(BufferWithExtendableBuffer *const buffer, const int targetPos,
|
||||
const int basePos, int *const offsetFieldPos);
|
||||
|
|
|
@ -32,8 +32,6 @@ const char *const DictFileWritingUtils::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE =
|
|||
/* static */ bool DictFileWritingUtils::createEmptyDictFile(const char *const filePath,
|
||||
const int dictVersion, const HeaderReadWriteUtils::AttributeMap *const attributeMap) {
|
||||
switch (dictVersion) {
|
||||
case 3:
|
||||
return createEmptyV3DictFile(filePath, attributeMap);
|
||||
case 4:
|
||||
return createEmptyV4DictFile(filePath, attributeMap);
|
||||
default:
|
||||
|
@ -43,23 +41,6 @@ const char *const DictFileWritingUtils::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE =
|
|||
}
|
||||
}
|
||||
|
||||
/* static */ bool DictFileWritingUtils::createEmptyV3DictFile(const char *const filePath,
|
||||
const HeaderReadWriteUtils::AttributeMap *const attributeMap) {
|
||||
BufferWithExtendableBuffer headerBuffer(
|
||||
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE);
|
||||
HeaderPolicy headerPolicy(FormatUtils::VERSION_3, attributeMap);
|
||||
headerPolicy.writeHeaderToBuffer(&headerBuffer, true /* updatesLastUpdatedTime */,
|
||||
true /* updatesLastDecayedTime */, 0 /* unigramCount */, 0 /* bigramCount */,
|
||||
0 /* extendedRegionSize */);
|
||||
BufferWithExtendableBuffer bodyBuffer(
|
||||
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE);
|
||||
if (!DynamicPatriciaTrieWritingUtils::writeEmptyDictionary(&bodyBuffer, 0 /* rootPos */)) {
|
||||
AKLOGE("Empty ver3 dictionary structure cannot be created on memory.");
|
||||
return false;
|
||||
}
|
||||
return flushAllHeaderAndBodyToFile(filePath, &headerBuffer, &bodyBuffer);
|
||||
}
|
||||
|
||||
/* static */ bool DictFileWritingUtils::createEmptyV4DictFile(const char *const dirPath,
|
||||
const HeaderReadWriteUtils::AttributeMap *const attributeMap) {
|
||||
Ver4DictBuffers::Ver4DictBuffersPtr dictBuffers = Ver4DictBuffers::createVer4DictBuffers();
|
||||
|
|
|
@ -43,9 +43,6 @@ class DictFileWritingUtils {
|
|||
private:
|
||||
DISALLOW_IMPLICIT_CONSTRUCTORS(DictFileWritingUtils);
|
||||
|
||||
static bool createEmptyV3DictFile(const char *const filePath,
|
||||
const HeaderReadWriteUtils::AttributeMap *const attributeMap);
|
||||
|
||||
static bool createEmptyV4DictFile(const char *const filePath,
|
||||
const HeaderReadWriteUtils::AttributeMap *const attributeMap);
|
||||
|
||||
|
|
|
@ -74,9 +74,7 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase {
|
|||
|
||||
private File createEmptyDictionaryAndGetFile(final String dictId,
|
||||
final int formatVersion) throws IOException {
|
||||
if (formatVersion == 3) {
|
||||
return createEmptyVer3DictionaryAndGetFile(dictId);
|
||||
} else if (formatVersion == 4) {
|
||||
if (formatVersion == 4) {
|
||||
return createEmptyVer4DictionaryAndGetFile(dictId);
|
||||
} else {
|
||||
throw new IOException("Dictionary format version " + formatVersion
|
||||
|
@ -102,26 +100,7 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
private File createEmptyVer3DictionaryAndGetFile(final String dictId) throws IOException {
|
||||
final File file = File.createTempFile(dictId, TEST_DICT_FILE_EXTENSION,
|
||||
getContext().getCacheDir());
|
||||
file.delete();
|
||||
Map<String, String> attributeMap = new HashMap<String, String>();
|
||||
attributeMap.put(FormatSpec.FileHeader.SUPPORTS_DYNAMIC_UPDATE_ATTRIBUTE,
|
||||
FormatSpec.FileHeader.ATTRIBUTE_VALUE_TRUE);
|
||||
attributeMap.put(FormatSpec.FileHeader.USES_FORGETTING_CURVE_ATTRIBUTE,
|
||||
FormatSpec.FileHeader.ATTRIBUTE_VALUE_TRUE);
|
||||
if (BinaryDictionary.createEmptyDictFile(file.getAbsolutePath(), 3 /* dictVersion */,
|
||||
attributeMap)) {
|
||||
return file;
|
||||
} else {
|
||||
throw new IOException(
|
||||
"Empty dictionary " + file.getAbsolutePath() + " cannot be created.");
|
||||
}
|
||||
}
|
||||
|
||||
public void testAddValidAndInvalidWords() {
|
||||
testAddValidAndInvalidWords(3 /* formatVersion */);
|
||||
testAddValidAndInvalidWords(4 /* formatVersion */);
|
||||
}
|
||||
|
||||
|
@ -172,7 +151,6 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase {
|
|||
}
|
||||
|
||||
public void testDecayingProbability() {
|
||||
testDecayingProbability(3 /* formatVersion */);
|
||||
testDecayingProbability(4 /* formatVersion */);
|
||||
}
|
||||
|
||||
|
@ -231,7 +209,6 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase {
|
|||
}
|
||||
|
||||
public void testAddManyUnigramsToDecayingDict() {
|
||||
testAddManyUnigramsToDecayingDict(3 /* formatVersion */);
|
||||
testAddManyUnigramsToDecayingDict(4 /* formatVersion */);
|
||||
}
|
||||
|
||||
|
@ -288,7 +265,6 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase {
|
|||
}
|
||||
|
||||
public void testAddManyBigramsToDecayingDict() {
|
||||
testAddManyBigramsToDecayingDict(3 /* formatVersion */);
|
||||
testAddManyBigramsToDecayingDict(4 /* formatVersion */);
|
||||
}
|
||||
|
||||
|
|
|
@ -52,9 +52,7 @@ public class BinaryDictionaryTests extends AndroidTestCase {
|
|||
|
||||
private File createEmptyDictionaryAndGetFile(final String dictId,
|
||||
final int formatVersion) throws IOException {
|
||||
if (formatVersion == 3) {
|
||||
return createEmptyVer3DictionaryAndGetFile(dictId);
|
||||
} else if (formatVersion == 4) {
|
||||
if (formatVersion == 4) {
|
||||
return createEmptyVer4DictionaryAndGetFile(dictId);
|
||||
} else {
|
||||
throw new IOException("Dictionary format version " + formatVersion
|
||||
|
@ -79,24 +77,7 @@ public class BinaryDictionaryTests extends AndroidTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
private File createEmptyVer3DictionaryAndGetFile(final String dictId) throws IOException {
|
||||
final File file = File.createTempFile(dictId, TEST_DICT_FILE_EXTENSION,
|
||||
getContext().getCacheDir());
|
||||
file.delete();
|
||||
Map<String, String> attributeMap = new HashMap<String, String>();
|
||||
attributeMap.put(FormatSpec.FileHeader.SUPPORTS_DYNAMIC_UPDATE_ATTRIBUTE,
|
||||
FormatSpec.FileHeader.ATTRIBUTE_VALUE_TRUE);
|
||||
if (BinaryDictionary.createEmptyDictFile(file.getAbsolutePath(), 3 /* dictVersion */,
|
||||
attributeMap)) {
|
||||
return file;
|
||||
} else {
|
||||
throw new IOException(
|
||||
"Empty dictionary " + file.getAbsolutePath() + " cannot be created.");
|
||||
}
|
||||
}
|
||||
|
||||
public void testIsValidDictionary() {
|
||||
testIsValidDictionary(3 /* formatVersion */);
|
||||
testIsValidDictionary(4 /* formatVersion */);
|
||||
}
|
||||
|
||||
|
@ -125,7 +106,6 @@ public class BinaryDictionaryTests extends AndroidTestCase {
|
|||
}
|
||||
|
||||
public void testAddUnigramWord() {
|
||||
testAddUnigramWord(3 /* formatVersion */);
|
||||
testAddUnigramWord(4 /* formatVersion */);
|
||||
}
|
||||
|
||||
|
@ -168,7 +148,6 @@ public class BinaryDictionaryTests extends AndroidTestCase {
|
|||
}
|
||||
|
||||
public void testRandomlyAddUnigramWord() {
|
||||
testRandomlyAddUnigramWord(3 /* formatVersion */);
|
||||
testRandomlyAddUnigramWord(4 /* formatVersion */);
|
||||
}
|
||||
|
||||
|
@ -205,7 +184,6 @@ public class BinaryDictionaryTests extends AndroidTestCase {
|
|||
}
|
||||
|
||||
public void testAddBigramWords() {
|
||||
testAddBigramWords(3 /* formatVersion */);
|
||||
testAddBigramWords(4 /* formatVersion */);
|
||||
}
|
||||
|
||||
|
@ -274,7 +252,6 @@ public class BinaryDictionaryTests extends AndroidTestCase {
|
|||
}
|
||||
|
||||
public void testRandomlyAddBigramWords() {
|
||||
testRandomlyAddBigramWords(3 /* formatVersion */);
|
||||
testRandomlyAddBigramWords(4 /* formatVersion */);
|
||||
}
|
||||
|
||||
|
@ -336,7 +313,6 @@ public class BinaryDictionaryTests extends AndroidTestCase {
|
|||
}
|
||||
|
||||
public void testRemoveBigramWords() {
|
||||
testRemoveBigramWords(3 /* formatVersion */);
|
||||
testRemoveBigramWords(4 /* formatVersion */);
|
||||
}
|
||||
|
||||
|
@ -387,7 +363,6 @@ public class BinaryDictionaryTests extends AndroidTestCase {
|
|||
}
|
||||
|
||||
public void testFlushDictionary() {
|
||||
testFlushDictionary(3 /* formatVersion */);
|
||||
testFlushDictionary(4 /* formatVersion */);
|
||||
}
|
||||
|
||||
|
@ -440,7 +415,6 @@ public class BinaryDictionaryTests extends AndroidTestCase {
|
|||
}
|
||||
|
||||
public void testFlushWithGCDictionary() {
|
||||
testFlushWithGCDictionary(3 /* formatVersion */);
|
||||
testFlushWithGCDictionary(4 /* formatVersion */);
|
||||
}
|
||||
|
||||
|
@ -489,7 +463,6 @@ public class BinaryDictionaryTests extends AndroidTestCase {
|
|||
}
|
||||
|
||||
public void testAddBigramWordsAndFlashWithGC() {
|
||||
testAddBigramWordsAndFlashWithGC(3 /* formatVersion */);
|
||||
testAddBigramWordsAndFlashWithGC(4 /* formatVersion */);
|
||||
}
|
||||
|
||||
|
@ -559,7 +532,6 @@ public class BinaryDictionaryTests extends AndroidTestCase {
|
|||
}
|
||||
|
||||
public void testRandomOperationsAndFlashWithGC() {
|
||||
testRandomOperationsAndFlashWithGC(3 /* formatVersion */);
|
||||
testRandomOperationsAndFlashWithGC(4 /* formatVersion */);
|
||||
}
|
||||
|
||||
|
@ -671,7 +643,6 @@ public class BinaryDictionaryTests extends AndroidTestCase {
|
|||
}
|
||||
|
||||
public void testAddManyUnigramsAndFlushWithGC() {
|
||||
testAddManyUnigramsAndFlushWithGC(3 /* formatVersion */);
|
||||
testAddManyUnigramsAndFlushWithGC(4 /* formatVersion */);
|
||||
}
|
||||
|
||||
|
@ -720,7 +691,6 @@ public class BinaryDictionaryTests extends AndroidTestCase {
|
|||
}
|
||||
|
||||
public void testUnigramAndBigramCount() {
|
||||
testUnigramAndBigramCount(3 /* formatVersion */);
|
||||
testUnigramAndBigramCount(4 /* formatVersion */);
|
||||
}
|
||||
|
||||
|
@ -780,7 +750,6 @@ public class BinaryDictionaryTests extends AndroidTestCase {
|
|||
}
|
||||
|
||||
public void testAddMultipleDictionaryEntries() {
|
||||
testAddMultipleDictionaryEntries(3 /* formatVersion */);
|
||||
testAddMultipleDictionaryEntries(4 /* formatVersion */);
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in a new issue