Purge Ver3 dictionary writing in native code.
Bug: 11073222 Change-Id: Iccedd618970b270a1e4178cfb178cd87c8208f3dmain
parent
5fa4ad6a85
commit
e717f64659
|
@ -82,12 +82,10 @@ LATIN_IME_CORE_SRC_FILES := \
|
||||||
$(addprefix suggest/policyimpl/dictionary/structure/v3/, \
|
$(addprefix suggest/policyimpl/dictionary/structure/v3/, \
|
||||||
dynamic_patricia_trie_gc_event_listeners.cpp \
|
dynamic_patricia_trie_gc_event_listeners.cpp \
|
||||||
dynamic_patricia_trie_node_reader.cpp \
|
dynamic_patricia_trie_node_reader.cpp \
|
||||||
dynamic_patricia_trie_node_writer.cpp \
|
|
||||||
dynamic_patricia_trie_policy.cpp \
|
dynamic_patricia_trie_policy.cpp \
|
||||||
dynamic_patricia_trie_reading_helper.cpp \
|
dynamic_patricia_trie_reading_helper.cpp \
|
||||||
dynamic_patricia_trie_reading_utils.cpp \
|
dynamic_patricia_trie_reading_utils.cpp \
|
||||||
dynamic_patricia_trie_updating_helper.cpp \
|
dynamic_patricia_trie_updating_helper.cpp \
|
||||||
dynamic_patricia_trie_writing_helper.cpp \
|
|
||||||
dynamic_patricia_trie_writing_utils.cpp) \
|
dynamic_patricia_trie_writing_utils.cpp) \
|
||||||
$(addprefix suggest/policyimpl/dictionary/structure/v4/, \
|
$(addprefix suggest/policyimpl/dictionary/structure/v4/, \
|
||||||
ver4_dict_buffers.cpp \
|
ver4_dict_buffers.cpp \
|
||||||
|
|
|
@ -18,7 +18,6 @@
|
||||||
|
|
||||||
#include "suggest/core/policy/dictionary_shortcuts_structure_policy.h"
|
#include "suggest/core/policy/dictionary_shortcuts_structure_policy.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.h"
|
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_helper.h"
|
|
||||||
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
|
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
|
||||||
#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
|
#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
|
||||||
|
|
||||||
|
@ -66,278 +65,6 @@ void DynamicBigramListPolicy::skipAllBigrams(int *const bigramListPos) const {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool DynamicBigramListPolicy::copyAllBigrams(BufferWithExtendableBuffer *const bufferToWrite,
|
|
||||||
int *const fromPos, int *const toPos, int *const outBigramsCount) const {
|
|
||||||
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*fromPos);
|
|
||||||
if (usesAdditionalBuffer) {
|
|
||||||
*fromPos -= mBuffer->getOriginalBufferSize();
|
|
||||||
}
|
|
||||||
*outBigramsCount = 0;
|
|
||||||
BigramListReadWriteUtils::BigramFlags bigramFlags;
|
|
||||||
int bigramEntryCount = 0;
|
|
||||||
int lastWrittenEntryPos = NOT_A_DICT_POS;
|
|
||||||
do {
|
|
||||||
if (++bigramEntryCount > BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT) {
|
|
||||||
AKLOGE("Too many bigram entries. Entry count: %d, Limit: %d",
|
|
||||||
bigramEntryCount, BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT);
|
|
||||||
ASSERT(false);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
// The buffer address can be changed after calling buffer writing methods.
|
|
||||||
int originalBigramPos;
|
|
||||||
BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition(
|
|
||||||
mBuffer->getBuffer(usesAdditionalBuffer), &bigramFlags, &originalBigramPos,
|
|
||||||
fromPos);
|
|
||||||
if (originalBigramPos == NOT_A_DICT_POS) {
|
|
||||||
// skip invalid bigram entry.
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if (usesAdditionalBuffer) {
|
|
||||||
originalBigramPos += mBuffer->getOriginalBufferSize();
|
|
||||||
}
|
|
||||||
const int bigramPos = followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos);
|
|
||||||
if (bigramPos == NOT_A_DICT_POS) {
|
|
||||||
// Target PtNode has been invalidated.
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
lastWrittenEntryPos = *toPos;
|
|
||||||
if (!BigramListReadWriteUtils::createAndWriteBigramEntry(bufferToWrite, bigramPos,
|
|
||||||
BigramListReadWriteUtils::getProbabilityFromFlags(bigramFlags),
|
|
||||||
BigramListReadWriteUtils::hasNext(bigramFlags), toPos)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
(*outBigramsCount)++;
|
|
||||||
} while(BigramListReadWriteUtils::hasNext(bigramFlags));
|
|
||||||
// Makes the last entry the terminal of the list. Updates the flags.
|
|
||||||
if (lastWrittenEntryPos != NOT_A_DICT_POS) {
|
|
||||||
if (!BigramListReadWriteUtils::setHasNextFlag(bufferToWrite, false /* hasNext */,
|
|
||||||
lastWrittenEntryPos)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (usesAdditionalBuffer) {
|
|
||||||
*fromPos += mBuffer->getOriginalBufferSize();
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Finding useless bigram entries and remove them. Bigram entry is useless when the target PtNode
|
|
||||||
// has been deleted or is not a valid terminal.
|
|
||||||
bool DynamicBigramListPolicy::updateAllBigramEntriesAndDeleteUselessEntries(
|
|
||||||
int *const bigramListPos, int *const outValidBigramEntryCount) {
|
|
||||||
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*bigramListPos);
|
|
||||||
if (usesAdditionalBuffer) {
|
|
||||||
*bigramListPos -= mBuffer->getOriginalBufferSize();
|
|
||||||
}
|
|
||||||
DynamicPatriciaTrieNodeReader nodeReader(mBuffer, this /* bigramsPolicy */, mShortcutPolicy);
|
|
||||||
BigramListReadWriteUtils::BigramFlags bigramFlags;
|
|
||||||
int bigramEntryCount = 0;
|
|
||||||
do {
|
|
||||||
if (++bigramEntryCount > BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT) {
|
|
||||||
AKLOGE("Too many bigram entries. Entry count: %d, Limit: %d",
|
|
||||||
bigramEntryCount, BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT);
|
|
||||||
ASSERT(false);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
int bigramEntryPos = *bigramListPos;
|
|
||||||
int originalBigramPos;
|
|
||||||
// The buffer address can be changed after calling buffer writing methods.
|
|
||||||
BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition(
|
|
||||||
mBuffer->getBuffer(usesAdditionalBuffer), &bigramFlags, &originalBigramPos,
|
|
||||||
bigramListPos);
|
|
||||||
if (usesAdditionalBuffer) {
|
|
||||||
bigramEntryPos += mBuffer->getOriginalBufferSize();
|
|
||||||
}
|
|
||||||
if (originalBigramPos == NOT_A_DICT_POS) {
|
|
||||||
// This entry has already been removed.
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if (usesAdditionalBuffer) {
|
|
||||||
originalBigramPos += mBuffer->getOriginalBufferSize();
|
|
||||||
}
|
|
||||||
const int bigramTargetNodePos =
|
|
||||||
followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos);
|
|
||||||
const PtNodeParams ptNodeParams(nodeReader.fetchNodeInfoInBufferFromPtNodePos(
|
|
||||||
bigramTargetNodePos));
|
|
||||||
if (ptNodeParams.isDeleted() || !ptNodeParams.isTerminal()
|
|
||||||
|| bigramTargetNodePos == NOT_A_DICT_POS) {
|
|
||||||
// The target is no longer valid terminal. Invalidate the current bigram entry.
|
|
||||||
if (!BigramListReadWriteUtils::writeBigramEntry(mBuffer, bigramFlags,
|
|
||||||
NOT_A_DICT_POS /* targetPtNodePos */, &bigramEntryPos)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
bool isRemoved = false;
|
|
||||||
if (!updateProbabilityForDecay(bigramFlags, bigramTargetNodePos, &bigramEntryPos,
|
|
||||||
&isRemoved)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
if (!isRemoved) {
|
|
||||||
(*outValidBigramEntryCount) += 1;
|
|
||||||
}
|
|
||||||
} while(BigramListReadWriteUtils::hasNext(bigramFlags));
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Updates bigram target PtNode positions in the list after the placing step in GC.
|
|
||||||
bool DynamicBigramListPolicy::updateAllBigramTargetPtNodePositions(int *const bigramListPos,
|
|
||||||
const PtNodeWriter::PtNodePositionRelocationMap *const ptNodePositionRelocationMap,
|
|
||||||
int *const outBigramEntryCount) {
|
|
||||||
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*bigramListPos);
|
|
||||||
if (usesAdditionalBuffer) {
|
|
||||||
*bigramListPos -= mBuffer->getOriginalBufferSize();
|
|
||||||
}
|
|
||||||
BigramListReadWriteUtils::BigramFlags bigramFlags;
|
|
||||||
int bigramEntryCount = 0;
|
|
||||||
do {
|
|
||||||
if (++bigramEntryCount > BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT) {
|
|
||||||
AKLOGE("Too many bigram entries. Entry count: %d, Limit: %d",
|
|
||||||
bigramEntryCount, BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT);
|
|
||||||
ASSERT(false);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
int bigramEntryPos = *bigramListPos;
|
|
||||||
if (usesAdditionalBuffer) {
|
|
||||||
bigramEntryPos += mBuffer->getOriginalBufferSize();
|
|
||||||
}
|
|
||||||
int bigramTargetPtNodePos;
|
|
||||||
// The buffer address can be changed after calling buffer writing methods.
|
|
||||||
BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition(
|
|
||||||
mBuffer->getBuffer(usesAdditionalBuffer), &bigramFlags, &bigramTargetPtNodePos,
|
|
||||||
bigramListPos);
|
|
||||||
if (bigramTargetPtNodePos == NOT_A_DICT_POS) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if (usesAdditionalBuffer) {
|
|
||||||
bigramTargetPtNodePos += mBuffer->getOriginalBufferSize();
|
|
||||||
}
|
|
||||||
|
|
||||||
PtNodeWriter::PtNodePositionRelocationMap::const_iterator it =
|
|
||||||
ptNodePositionRelocationMap->find(bigramTargetPtNodePos);
|
|
||||||
if (it != ptNodePositionRelocationMap->end()) {
|
|
||||||
bigramTargetPtNodePos = it->second;
|
|
||||||
} else {
|
|
||||||
bigramTargetPtNodePos = NOT_A_DICT_POS;
|
|
||||||
}
|
|
||||||
if (!BigramListReadWriteUtils::writeBigramEntry(mBuffer, bigramFlags,
|
|
||||||
bigramTargetPtNodePos, &bigramEntryPos)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
} while(BigramListReadWriteUtils::hasNext(bigramFlags));
|
|
||||||
(*outBigramEntryCount) = bigramEntryCount;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool DynamicBigramListPolicy::addNewBigramEntryToBigramList(const int bigramTargetPos,
|
|
||||||
const int probability, int *const bigramListPos, bool *const outAddedNewBigram) {
|
|
||||||
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*bigramListPos);
|
|
||||||
if (usesAdditionalBuffer) {
|
|
||||||
*bigramListPos -= mBuffer->getOriginalBufferSize();
|
|
||||||
}
|
|
||||||
BigramListReadWriteUtils::BigramFlags bigramFlags;
|
|
||||||
int bigramEntryCount = 0;
|
|
||||||
do {
|
|
||||||
if (++bigramEntryCount > BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT) {
|
|
||||||
AKLOGE("Too many bigram entries. Entry count: %d, Limit: %d",
|
|
||||||
bigramEntryCount, BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT);
|
|
||||||
ASSERT(false);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
int entryPos = *bigramListPos;
|
|
||||||
if (usesAdditionalBuffer) {
|
|
||||||
entryPos += mBuffer->getOriginalBufferSize();
|
|
||||||
}
|
|
||||||
int originalBigramPos;
|
|
||||||
// The buffer address can be changed after calling buffer writing methods.
|
|
||||||
BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition(
|
|
||||||
mBuffer->getBuffer(usesAdditionalBuffer), &bigramFlags, &originalBigramPos,
|
|
||||||
bigramListPos);
|
|
||||||
if (usesAdditionalBuffer && originalBigramPos != NOT_A_DICT_POS) {
|
|
||||||
originalBigramPos += mBuffer->getOriginalBufferSize();
|
|
||||||
}
|
|
||||||
if (followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos) == bigramTargetPos) {
|
|
||||||
// Update this bigram entry.
|
|
||||||
*outAddedNewBigram = false;
|
|
||||||
const int originalProbability = BigramListReadWriteUtils::getProbabilityFromFlags(
|
|
||||||
bigramFlags);
|
|
||||||
const int probabilityToWrite = mIsDecayingDict ?
|
|
||||||
ForgettingCurveUtils::getUpdatedEncodedProbability(originalProbability,
|
|
||||||
probability) : probability;
|
|
||||||
const BigramListReadWriteUtils::BigramFlags updatedFlags =
|
|
||||||
BigramListReadWriteUtils::setProbabilityInFlags(bigramFlags,
|
|
||||||
probabilityToWrite);
|
|
||||||
return BigramListReadWriteUtils::writeBigramEntry(mBuffer, updatedFlags,
|
|
||||||
originalBigramPos, &entryPos);
|
|
||||||
}
|
|
||||||
if (BigramListReadWriteUtils::hasNext(bigramFlags)) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
// The current last entry is found.
|
|
||||||
// First, update the flags of the last entry.
|
|
||||||
if (!BigramListReadWriteUtils::setHasNextFlag(mBuffer, true /* hasNext */, entryPos)) {
|
|
||||||
*outAddedNewBigram = false;
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
if (usesAdditionalBuffer) {
|
|
||||||
*bigramListPos += mBuffer->getOriginalBufferSize();
|
|
||||||
}
|
|
||||||
// Then, add a new entry after the last entry.
|
|
||||||
*outAddedNewBigram = true;
|
|
||||||
return writeNewBigramEntry(bigramTargetPos, probability, bigramListPos);
|
|
||||||
} while(BigramListReadWriteUtils::hasNext(bigramFlags));
|
|
||||||
// We return directly from the while loop.
|
|
||||||
ASSERT(false);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool DynamicBigramListPolicy::writeNewBigramEntry(const int bigramTargetPos, const int probability,
|
|
||||||
int *const writingPos) {
|
|
||||||
// hasNext is false because we are adding a new bigram entry at the end of the bigram list.
|
|
||||||
const int probabilityToWrite = mIsDecayingDict ?
|
|
||||||
ForgettingCurveUtils::getUpdatedEncodedProbability(NOT_A_PROBABILITY, probability) :
|
|
||||||
probability;
|
|
||||||
return BigramListReadWriteUtils::createAndWriteBigramEntry(mBuffer, bigramTargetPos,
|
|
||||||
probabilityToWrite, false /* hasNext */, writingPos);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool DynamicBigramListPolicy::removeBigram(const int bigramListPos, const int bigramTargetPos) {
|
|
||||||
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(bigramListPos);
|
|
||||||
int pos = bigramListPos;
|
|
||||||
if (usesAdditionalBuffer) {
|
|
||||||
pos -= mBuffer->getOriginalBufferSize();
|
|
||||||
}
|
|
||||||
BigramListReadWriteUtils::BigramFlags bigramFlags;
|
|
||||||
int bigramEntryCount = 0;
|
|
||||||
do {
|
|
||||||
if (++bigramEntryCount > BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT) {
|
|
||||||
AKLOGE("Too many bigram entries. Entry count: %d, Limit: %d",
|
|
||||||
bigramEntryCount, BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT);
|
|
||||||
ASSERT(false);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
int bigramEntryPos = pos;
|
|
||||||
int originalBigramPos;
|
|
||||||
// The buffer address can be changed after calling buffer writing methods.
|
|
||||||
BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition(
|
|
||||||
mBuffer->getBuffer(usesAdditionalBuffer), &bigramFlags, &originalBigramPos, &pos);
|
|
||||||
if (usesAdditionalBuffer) {
|
|
||||||
bigramEntryPos += mBuffer->getOriginalBufferSize();
|
|
||||||
}
|
|
||||||
if (usesAdditionalBuffer && originalBigramPos != NOT_A_DICT_POS) {
|
|
||||||
originalBigramPos += mBuffer->getOriginalBufferSize();
|
|
||||||
}
|
|
||||||
const int bigramPos = followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos);
|
|
||||||
if (bigramPos != bigramTargetPos) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
// Target entry is found. Write an invalid target position to mark the bigram invalid.
|
|
||||||
return BigramListReadWriteUtils::writeBigramEntry(mBuffer, bigramFlags,
|
|
||||||
NOT_A_DICT_POS /* targetOffset */, &bigramEntryPos);
|
|
||||||
} while(BigramListReadWriteUtils::hasNext(bigramFlags));
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
int DynamicBigramListPolicy::followBigramLinkAndGetCurrentBigramPtNodePos(
|
int DynamicBigramListPolicy::followBigramLinkAndGetCurrentBigramPtNodePos(
|
||||||
const int originalBigramPos) const {
|
const int originalBigramPos) const {
|
||||||
if (originalBigramPos == NOT_A_DICT_POS) {
|
if (originalBigramPos == NOT_A_DICT_POS) {
|
||||||
|
@ -362,33 +89,4 @@ int DynamicBigramListPolicy::followBigramLinkAndGetCurrentBigramPtNodePos(
|
||||||
return currentPos;
|
return currentPos;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool DynamicBigramListPolicy::updateProbabilityForDecay(
|
|
||||||
const BigramListReadWriteUtils::BigramFlags bigramFlags, const int targetPtNodePos,
|
|
||||||
int *const bigramEntryPos, bool *const outRemoved) const {
|
|
||||||
*outRemoved = false;
|
|
||||||
if (mIsDecayingDict) {
|
|
||||||
// Update bigram probability for decaying.
|
|
||||||
const int newProbability = ForgettingCurveUtils::getEncodedProbabilityToSave(
|
|
||||||
BigramListReadWriteUtils::getProbabilityFromFlags(bigramFlags), mHeaderPolicy);
|
|
||||||
if (ForgettingCurveUtils::isValidEncodedProbability(newProbability)) {
|
|
||||||
// Write new probability.
|
|
||||||
const BigramListReadWriteUtils::BigramFlags updatedBigramFlags =
|
|
||||||
BigramListReadWriteUtils::setProbabilityInFlags(
|
|
||||||
bigramFlags, newProbability);
|
|
||||||
if (!BigramListReadWriteUtils::writeBigramEntry(mBuffer, updatedBigramFlags,
|
|
||||||
targetPtNodePos, bigramEntryPos)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// Remove current bigram entry.
|
|
||||||
*outRemoved = true;
|
|
||||||
if (!BigramListReadWriteUtils::writeBigramEntry(mBuffer, bigramFlags,
|
|
||||||
NOT_A_DICT_POS /* targetPtNodePos */, bigramEntryPos)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
|
|
|
@ -22,7 +22,6 @@
|
||||||
#include "defines.h"
|
#include "defines.h"
|
||||||
#include "suggest/core/policy/dictionary_bigrams_structure_policy.h"
|
#include "suggest/core/policy/dictionary_bigrams_structure_policy.h"
|
||||||
#include "suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h"
|
#include "suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h"
|
|
||||||
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_gc_event_listeners.h"
|
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_gc_event_listeners.h"
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
@ -50,28 +49,6 @@ class DynamicBigramListPolicy : public DictionaryBigramsStructurePolicy {
|
||||||
|
|
||||||
void skipAllBigrams(int *const bigramListPos) const;
|
void skipAllBigrams(int *const bigramListPos) const;
|
||||||
|
|
||||||
// Copy bigrams from the bigram list that starts at fromPos in mBuffer to toPos in
|
|
||||||
// bufferToWrite and advance these positions after bigram lists. This method skips invalid
|
|
||||||
// bigram entries and write the valid bigram entry count to outBigramsCount.
|
|
||||||
bool copyAllBigrams(BufferWithExtendableBuffer *const bufferToWrite, int *const fromPos,
|
|
||||||
int *const toPos, int *const outBigramsCount) const;
|
|
||||||
|
|
||||||
bool updateAllBigramEntriesAndDeleteUselessEntries(int *const bigramListPos,
|
|
||||||
int *const outBigramEntryCount);
|
|
||||||
|
|
||||||
bool updateAllBigramTargetPtNodePositions(int *const bigramListPos,
|
|
||||||
const PtNodeWriter::PtNodePositionRelocationMap *const
|
|
||||||
ptNodePositionRelocationMap, int *const outValidBigramEntryCount);
|
|
||||||
|
|
||||||
bool addNewBigramEntryToBigramList(const int bigramTargetPos, const int probability,
|
|
||||||
int *const bigramListPos, bool *const outAddedNewBigram);
|
|
||||||
|
|
||||||
bool writeNewBigramEntry(const int bigramTargetPos, const int probability,
|
|
||||||
int *const writingPos);
|
|
||||||
|
|
||||||
// Return whether or not targetBigramPos is found.
|
|
||||||
bool removeBigram(const int bigramListPos, const int bigramTargetPos);
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicBigramListPolicy);
|
DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicBigramListPolicy);
|
||||||
|
|
||||||
|
@ -85,9 +62,6 @@ class DynamicBigramListPolicy : public DictionaryBigramsStructurePolicy {
|
||||||
|
|
||||||
// Follow bigram link and return the position of bigram target PtNode that is currently valid.
|
// Follow bigram link and return the position of bigram target PtNode that is currently valid.
|
||||||
int followBigramLinkAndGetCurrentBigramPtNodePos(const int originalBigramPos) const;
|
int followBigramLinkAndGetCurrentBigramPtNodePos(const int originalBigramPos) const;
|
||||||
|
|
||||||
bool updateProbabilityForDecay(const BigramListReadWriteUtils::BigramFlags bigramFlags,
|
|
||||||
const int targetPtNodePos, int *const bigramEntryPos, bool *const outRemoved) const;
|
|
||||||
};
|
};
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
#endif // LATINIME_DYNAMIC_BIGRAM_LIST_POLICY_H
|
#endif // LATINIME_DYNAMIC_BIGRAM_LIST_POLICY_H
|
||||||
|
|
|
@ -1,338 +0,0 @@
|
||||||
/*
|
|
||||||
* Copyright (C) 2013, The Android Open Source Project
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
* you may not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_writer.h"
|
|
||||||
|
|
||||||
#include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h"
|
|
||||||
#include "suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h"
|
|
||||||
#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h"
|
|
||||||
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.h"
|
|
||||||
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.h"
|
|
||||||
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_utils.h"
|
|
||||||
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
|
|
||||||
#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
|
|
||||||
|
|
||||||
namespace latinime {
|
|
||||||
|
|
||||||
const int DynamicPatriciaTrieNodeWriter::CHILDREN_POSITION_FIELD_SIZE = 3;
|
|
||||||
|
|
||||||
bool DynamicPatriciaTrieNodeWriter::markPtNodeAsDeleted(
|
|
||||||
const PtNodeParams *const toBeUpdatedPtNodeParams) {
|
|
||||||
int pos = toBeUpdatedPtNodeParams->getHeadPos();
|
|
||||||
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(pos);
|
|
||||||
const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
|
|
||||||
if (usesAdditionalBuffer) {
|
|
||||||
pos -= mBuffer->getOriginalBufferSize();
|
|
||||||
}
|
|
||||||
// Read original flags
|
|
||||||
const PatriciaTrieReadingUtils::NodeFlags originalFlags =
|
|
||||||
PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos);
|
|
||||||
const PatriciaTrieReadingUtils::NodeFlags updatedFlags =
|
|
||||||
DynamicPatriciaTrieReadingUtils::updateAndGetFlags(originalFlags, false /* isMoved */,
|
|
||||||
true /* isDeleted */);
|
|
||||||
int writingPos = toBeUpdatedPtNodeParams->getHeadPos();
|
|
||||||
// Update flags.
|
|
||||||
return DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, updatedFlags,
|
|
||||||
&writingPos);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool DynamicPatriciaTrieNodeWriter::markPtNodeAsMoved(
|
|
||||||
const PtNodeParams *const toBeUpdatedPtNodeParams,
|
|
||||||
const int movedPos, const int bigramLinkedNodePos) {
|
|
||||||
int pos = toBeUpdatedPtNodeParams->getHeadPos();
|
|
||||||
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(pos);
|
|
||||||
const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
|
|
||||||
if (usesAdditionalBuffer) {
|
|
||||||
pos -= mBuffer->getOriginalBufferSize();
|
|
||||||
}
|
|
||||||
// Read original flags
|
|
||||||
const PatriciaTrieReadingUtils::NodeFlags originalFlags =
|
|
||||||
PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos);
|
|
||||||
const PatriciaTrieReadingUtils::NodeFlags updatedFlags =
|
|
||||||
DynamicPatriciaTrieReadingUtils::updateAndGetFlags(originalFlags, true /* isMoved */,
|
|
||||||
false /* isDeleted */);
|
|
||||||
int writingPos = toBeUpdatedPtNodeParams->getHeadPos();
|
|
||||||
// Update flags.
|
|
||||||
if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, updatedFlags,
|
|
||||||
&writingPos)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
// Update moved position, which is stored in the parent offset field.
|
|
||||||
if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition(
|
|
||||||
mBuffer, movedPos, toBeUpdatedPtNodeParams->getHeadPos(), &writingPos)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
// Update bigram linked node position, which is stored in the children position field.
|
|
||||||
int childrenPosFieldPos = toBeUpdatedPtNodeParams->getChildrenPosFieldPos();
|
|
||||||
if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(
|
|
||||||
mBuffer, bigramLinkedNodePos, &childrenPosFieldPos)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
if (toBeUpdatedPtNodeParams->hasChildren()) {
|
|
||||||
// Update children's parent position.
|
|
||||||
mReadingHelper.initWithPtNodeArrayPos(toBeUpdatedPtNodeParams->getChildrenPos());
|
|
||||||
while (!mReadingHelper.isEnd()) {
|
|
||||||
const PtNodeParams childPtNodeParams(mReadingHelper.getPtNodeParams());
|
|
||||||
int parentOffsetFieldPos = childPtNodeParams.getHeadPos()
|
|
||||||
+ DynamicPatriciaTrieWritingUtils::NODE_FLAG_FIELD_SIZE;
|
|
||||||
if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition(
|
|
||||||
mBuffer, bigramLinkedNodePos, childPtNodeParams.getHeadPos(),
|
|
||||||
&parentOffsetFieldPos)) {
|
|
||||||
// Parent offset cannot be written because of a bug or a broken dictionary; thus,
|
|
||||||
// we give up to update dictionary.
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
mReadingHelper.readNextSiblingNode(childPtNodeParams);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool DynamicPatriciaTrieNodeWriter::updatePtNodeProbability(
|
|
||||||
const PtNodeParams *const toBeUpdatedPtNodeParams, const int newProbability,
|
|
||||||
const int timestamp) {
|
|
||||||
if (!toBeUpdatedPtNodeParams->isTerminal()) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
const int probabilityToWrite = getUpdatedProbability(toBeUpdatedPtNodeParams->getProbability(),
|
|
||||||
newProbability);
|
|
||||||
int probabilityFieldPos = toBeUpdatedPtNodeParams->getProbabilityFieldPos();
|
|
||||||
return DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition(mBuffer,
|
|
||||||
probabilityToWrite, &probabilityFieldPos);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool DynamicPatriciaTrieNodeWriter::updateChildrenPosition(
|
|
||||||
const PtNodeParams *const toBeUpdatedPtNodeParams, const int newChildrenPosition) {
|
|
||||||
int childrenPosFieldPos = toBeUpdatedPtNodeParams->getChildrenPosFieldPos();
|
|
||||||
return DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(mBuffer,
|
|
||||||
newChildrenPosition, &childrenPosFieldPos);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool DynamicPatriciaTrieNodeWriter::writePtNodeAndAdvancePosition(
|
|
||||||
const PtNodeParams *const ptNodeParams, int *const ptNodeWritingPos) {
|
|
||||||
return writePtNodeAndGetProbabilityFieldPosAndAdvancePosition(ptNodeParams,
|
|
||||||
0 /* outProbabilityFieldPos */, ptNodeWritingPos);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool DynamicPatriciaTrieNodeWriter::writeNewTerminalPtNodeAndAdvancePosition(
|
|
||||||
const PtNodeParams *const ptNodeParams, const int timestamp, int *const ptNodeWritingPos) {
|
|
||||||
int probabilityFieldPos = NOT_A_DICT_POS;
|
|
||||||
if (!writePtNodeAndGetProbabilityFieldPosAndAdvancePosition(ptNodeParams, &probabilityFieldPos,
|
|
||||||
ptNodeWritingPos)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
if (probabilityFieldPos == NOT_A_DICT_POS) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
const int probabilityToWrite = getUpdatedProbability(
|
|
||||||
NOT_A_PROBABILITY /* originalProbability */, ptNodeParams->getProbability());
|
|
||||||
return DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition(mBuffer,
|
|
||||||
probabilityToWrite, &probabilityFieldPos);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool DynamicPatriciaTrieNodeWriter::addNewBigramEntry(
|
|
||||||
const PtNodeParams *const sourcePtNodeParams,
|
|
||||||
const PtNodeParams *const targetPtNodeParam, const int probability, const int timestamp,
|
|
||||||
bool *const outAddedNewBigram) {
|
|
||||||
const int newNodePos = mBuffer->getTailPosition();
|
|
||||||
int writingPos = newNodePos;
|
|
||||||
// Write a new PtNode using original PtNode's info to the tail of the dictionary in mBuffer.
|
|
||||||
if (!writePtNodeAndAdvancePosition(sourcePtNodeParams, &writingPos)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
if (!markPtNodeAsMoved(sourcePtNodeParams, newNodePos, newNodePos)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
const PtNodeParams newPtNodeParams(
|
|
||||||
mPtNodeReader->fetchNodeInfoInBufferFromPtNodePos(newNodePos));
|
|
||||||
if (newPtNodeParams.getBigramsPos() != NOT_A_DICT_POS) {
|
|
||||||
// Insert a new bigram entry into the existing bigram list.
|
|
||||||
int bigramListPos = newPtNodeParams.getBigramsPos();
|
|
||||||
return mBigramPolicy->addNewBigramEntryToBigramList(targetPtNodeParam->getHeadPos(),
|
|
||||||
probability, &bigramListPos, outAddedNewBigram);
|
|
||||||
} else {
|
|
||||||
// The PtNode doesn't have a bigram list.
|
|
||||||
*outAddedNewBigram = true;
|
|
||||||
// First, Write a bigram entry at the tail position of the PtNode.
|
|
||||||
if (!mBigramPolicy->writeNewBigramEntry(targetPtNodeParam->getHeadPos(), probability,
|
|
||||||
&writingPos)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
// Then, Mark as the PtNode having bigram list in the flags.
|
|
||||||
const PatriciaTrieReadingUtils::NodeFlags updatedFlags =
|
|
||||||
PatriciaTrieReadingUtils::createAndGetFlags(newPtNodeParams.isBlacklisted(),
|
|
||||||
newPtNodeParams.isNotAWord(), newPtNodeParams.isTerminal(),
|
|
||||||
newPtNodeParams.getShortcutPos() != NOT_A_DICT_POS, true /* hasBigrams */,
|
|
||||||
newPtNodeParams.getCodePointCount() > 1, CHILDREN_POSITION_FIELD_SIZE);
|
|
||||||
writingPos = newNodePos;
|
|
||||||
// Write updated flags into the moved PtNode's flags field.
|
|
||||||
return DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, updatedFlags,
|
|
||||||
&writingPos);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
bool DynamicPatriciaTrieNodeWriter::removeBigramEntry(
|
|
||||||
const PtNodeParams *const sourcePtNodeParams, const PtNodeParams *const targetPtNodeParam) {
|
|
||||||
if (sourcePtNodeParams->getBigramsPos() == NOT_A_DICT_POS) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
return mBigramPolicy->removeBigram(sourcePtNodeParams->getBigramsPos(),
|
|
||||||
targetPtNodeParam->getHeadPos());
|
|
||||||
}
|
|
||||||
|
|
||||||
bool DynamicPatriciaTrieNodeWriter::updateAllBigramEntriesAndDeleteUselessEntries(
|
|
||||||
const PtNodeParams *const sourcePtNodeParams, int *const outBigramEntryCount) {
|
|
||||||
int bigramListPos = sourcePtNodeParams->getBigramsPos();
|
|
||||||
return mBigramPolicy->updateAllBigramEntriesAndDeleteUselessEntries(&bigramListPos,
|
|
||||||
outBigramEntryCount);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool DynamicPatriciaTrieNodeWriter::updateAllPositionFields(
|
|
||||||
const PtNodeParams *const toBeUpdatedPtNodeParams,
|
|
||||||
const DictPositionRelocationMap *const dictPositionRelocationMap,
|
|
||||||
int *const outBigramEntryCount) {
|
|
||||||
int parentPos = toBeUpdatedPtNodeParams->getParentPos();
|
|
||||||
if (parentPos != NOT_A_DICT_POS) {
|
|
||||||
PtNodeWriter::PtNodePositionRelocationMap::const_iterator it =
|
|
||||||
dictPositionRelocationMap->mPtNodePositionRelocationMap.find(parentPos);
|
|
||||||
if (it != dictPositionRelocationMap->mPtNodePositionRelocationMap.end()) {
|
|
||||||
parentPos = it->second;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
int writingPos = toBeUpdatedPtNodeParams->getHeadPos()
|
|
||||||
+ DynamicPatriciaTrieWritingUtils::NODE_FLAG_FIELD_SIZE;
|
|
||||||
// Write an updated offset to the parent PtNode.
|
|
||||||
if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition(mBuffer,
|
|
||||||
parentPos, toBeUpdatedPtNodeParams->getHeadPos(), &writingPos)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Updates children position that is a PtNodeArray position.
|
|
||||||
int childrenPos = toBeUpdatedPtNodeParams->getChildrenPos();
|
|
||||||
if (childrenPos != NOT_A_DICT_POS) {
|
|
||||||
PtNodeWriter::PtNodeArrayPositionRelocationMap::const_iterator it =
|
|
||||||
dictPositionRelocationMap->mPtNodeArrayPositionRelocationMap.find(childrenPos);
|
|
||||||
if (it != dictPositionRelocationMap->mPtNodeArrayPositionRelocationMap.end()) {
|
|
||||||
childrenPos = it->second;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (!updateChildrenPosition(toBeUpdatedPtNodeParams, childrenPos)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
// Updates bigram target PtNode positions in the bigram list.
|
|
||||||
int bigramsPos = toBeUpdatedPtNodeParams->getBigramsPos();
|
|
||||||
int bigramCount = 0;
|
|
||||||
if (bigramsPos != NOT_A_DICT_POS) {
|
|
||||||
int bigramEntryCount;
|
|
||||||
if (!mBigramPolicy->updateAllBigramTargetPtNodePositions(&bigramsPos,
|
|
||||||
&dictPositionRelocationMap->mPtNodePositionRelocationMap, &bigramEntryCount)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
bigramCount += bigramEntryCount;
|
|
||||||
}
|
|
||||||
if (outBigramEntryCount) {
|
|
||||||
*outBigramEntryCount = bigramCount;
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool DynamicPatriciaTrieNodeWriter::writePtNodeAndGetProbabilityFieldPosAndAdvancePosition(
|
|
||||||
const PtNodeParams *const ptNodeParams, int *const outProbabilityFieldPos,
|
|
||||||
int *const ptNodeWritingPos) {
|
|
||||||
const int nodePos = *ptNodeWritingPos;
|
|
||||||
// Write dummy flags. The Node flags are updated with appropriate flags at the last step of the
|
|
||||||
// PtNode writing.
|
|
||||||
if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer,
|
|
||||||
0 /* nodeFlags */, ptNodeWritingPos)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
// Calculate a parent offset and write the offset.
|
|
||||||
if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition(mBuffer,
|
|
||||||
ptNodeParams->getParentPos(), nodePos, ptNodeWritingPos)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
// Write code points
|
|
||||||
if (!DynamicPatriciaTrieWritingUtils::writeCodePointsAndAdvancePosition(mBuffer,
|
|
||||||
ptNodeParams->getCodePoints(), ptNodeParams->getCodePointCount(), ptNodeWritingPos)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
// Write probability when the probability is a valid probability, which means this node is
|
|
||||||
// terminal.
|
|
||||||
if (ptNodeParams->isTerminal()) {
|
|
||||||
if (outProbabilityFieldPos) {
|
|
||||||
*outProbabilityFieldPos = *ptNodeWritingPos;
|
|
||||||
}
|
|
||||||
if (ptNodeParams->getProbability() == NOT_A_PROBABILITY) {
|
|
||||||
// Write a dummy probability.
|
|
||||||
if (!DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition(mBuffer,
|
|
||||||
0 /* probability */, ptNodeWritingPos)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
if (!DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition(mBuffer,
|
|
||||||
ptNodeParams->getProbability(), ptNodeWritingPos)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// Write children position
|
|
||||||
if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(mBuffer,
|
|
||||||
ptNodeParams->getChildrenPos(), ptNodeWritingPos)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
// Copy shortcut list when the originalShortcutListPos is valid dictionary position.
|
|
||||||
if (ptNodeParams->getShortcutPos() != NOT_A_DICT_POS) {
|
|
||||||
int fromPos = ptNodeParams->getShortcutPos();
|
|
||||||
if (!mShortcutPolicy->copyAllShortcutsAndReturnIfSucceededOrNot(mBuffer, &fromPos,
|
|
||||||
ptNodeWritingPos)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// Copy bigram list when the originalBigramListPos is valid dictionary position.
|
|
||||||
int bigramCount = 0;
|
|
||||||
if (ptNodeParams->getBigramsPos() != NOT_A_DICT_POS) {
|
|
||||||
int fromPos = ptNodeParams->getBigramsPos();
|
|
||||||
if (!mBigramPolicy->copyAllBigrams(mBuffer, &fromPos, ptNodeWritingPos, &bigramCount)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// Create node flags and write them.
|
|
||||||
PatriciaTrieReadingUtils::NodeFlags nodeFlags =
|
|
||||||
PatriciaTrieReadingUtils::createAndGetFlags(ptNodeParams->isBlacklisted(),
|
|
||||||
ptNodeParams->isNotAWord(), ptNodeParams->isTerminal(),
|
|
||||||
ptNodeParams->getShortcutPos() != NOT_A_DICT_POS /* hasShortcutTargets */,
|
|
||||||
bigramCount > 0 /* hasBigrams */,
|
|
||||||
ptNodeParams->getCodePointCount() > 1 /* hasMultipleChars */,
|
|
||||||
CHILDREN_POSITION_FIELD_SIZE);
|
|
||||||
int flagsFieldPos = nodePos;
|
|
||||||
if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, nodeFlags,
|
|
||||||
&flagsFieldPos)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
int DynamicPatriciaTrieNodeWriter::getUpdatedProbability(const int originalProbability,
|
|
||||||
const int newProbability) const {
|
|
||||||
if (mNeedsToDecayWhenUpdating) {
|
|
||||||
return ForgettingCurveUtils::getUpdatedEncodedProbability(originalProbability,
|
|
||||||
newProbability);
|
|
||||||
} else {
|
|
||||||
return newProbability;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,99 +0,0 @@
|
||||||
/*
|
|
||||||
* Copyright (C) 2013, The Android Open Source Project
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
* you may not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef LATINIME_DYNAMIC_PATRICIA_TRIE_NODE_WRITER_H
|
|
||||||
#define LATINIME_DYNAMIC_PATRICIA_TRIE_NODE_WRITER_H
|
|
||||||
|
|
||||||
#include <stdint.h>
|
|
||||||
|
|
||||||
#include "defines.h"
|
|
||||||
#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h"
|
|
||||||
#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h"
|
|
||||||
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.h"
|
|
||||||
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.h"
|
|
||||||
|
|
||||||
namespace latinime {
|
|
||||||
|
|
||||||
class BufferWithExtendableBuffer;
|
|
||||||
class DynamicBigramListPolicy;
|
|
||||||
class DynamicShortcutListPolicy;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* This class is used for helping to writes nodes of dynamic patricia trie.
|
|
||||||
*/
|
|
||||||
class DynamicPatriciaTrieNodeWriter : public PtNodeWriter {
|
|
||||||
public:
|
|
||||||
DynamicPatriciaTrieNodeWriter(BufferWithExtendableBuffer *const buffer,
|
|
||||||
const DynamicPatriciaTrieNodeReader *const ptNodeReader,
|
|
||||||
DynamicBigramListPolicy *const bigramPolicy,
|
|
||||||
DynamicShortcutListPolicy *const shortcutPolicy, const bool needsToDecayWhenUpdating)
|
|
||||||
: mBuffer(buffer), mPtNodeReader(ptNodeReader), mReadingHelper(mBuffer, ptNodeReader),
|
|
||||||
mBigramPolicy(bigramPolicy), mShortcutPolicy(shortcutPolicy),
|
|
||||||
mNeedsToDecayWhenUpdating(needsToDecayWhenUpdating) {}
|
|
||||||
|
|
||||||
virtual ~DynamicPatriciaTrieNodeWriter() {}
|
|
||||||
|
|
||||||
virtual bool markPtNodeAsDeleted(const PtNodeParams *const toBeUpdatedPtNodeParams);
|
|
||||||
|
|
||||||
virtual bool markPtNodeAsMoved(const PtNodeParams *const toBeUpdatedPtNodeParams,
|
|
||||||
const int movedPos, const int bigramLinkedNodePos);
|
|
||||||
|
|
||||||
virtual bool updatePtNodeProbability(const PtNodeParams *const toBeUpdatedPtNodeParams,
|
|
||||||
const int newProbability, const int timestamp);
|
|
||||||
|
|
||||||
virtual bool updateChildrenPosition(const PtNodeParams *const toBeUpdatedPtNodeParams,
|
|
||||||
const int newChildrenPosition);
|
|
||||||
|
|
||||||
virtual bool writePtNodeAndAdvancePosition(const PtNodeParams *const ptNodeParams,
|
|
||||||
int *const ptNodeWritingPos);
|
|
||||||
|
|
||||||
virtual bool writeNewTerminalPtNodeAndAdvancePosition(const PtNodeParams *const ptNodeParams,
|
|
||||||
const int timestamp, int *const ptNodeWritingPos);
|
|
||||||
|
|
||||||
virtual bool addNewBigramEntry(const PtNodeParams *const sourcePtNodeParams,
|
|
||||||
const PtNodeParams *const targetPtNodeParam, const int probability, const int timestamp,
|
|
||||||
bool *const outAddedNewBigram);
|
|
||||||
|
|
||||||
virtual bool removeBigramEntry(const PtNodeParams *const sourcePtNodeParams,
|
|
||||||
const PtNodeParams *const targetPtNodeParam);
|
|
||||||
|
|
||||||
virtual bool updateAllBigramEntriesAndDeleteUselessEntries(
|
|
||||||
const PtNodeParams *const sourcePtNodeParams, int *const outBigramEntryCount);
|
|
||||||
|
|
||||||
virtual bool updateAllPositionFields(const PtNodeParams *const toBeUpdatedPtNodeParams,
|
|
||||||
const DictPositionRelocationMap *const dictPositionRelocationMap,
|
|
||||||
int *const outBigramEntryCount);
|
|
||||||
|
|
||||||
private:
|
|
||||||
DISALLOW_COPY_AND_ASSIGN(DynamicPatriciaTrieNodeWriter);
|
|
||||||
|
|
||||||
bool writePtNodeAndGetProbabilityFieldPosAndAdvancePosition(
|
|
||||||
const PtNodeParams *const ptNodeParams, int *const outProbabilityFieldPos,
|
|
||||||
int *const ptNodeWritingPos);
|
|
||||||
|
|
||||||
int getUpdatedProbability(const int originalProbability, const int newProbability) const;
|
|
||||||
|
|
||||||
static const int CHILDREN_POSITION_FIELD_SIZE;
|
|
||||||
|
|
||||||
BufferWithExtendableBuffer *const mBuffer;
|
|
||||||
const DynamicPatriciaTrieNodeReader *const mPtNodeReader;
|
|
||||||
DynamicPatriciaTrieReadingHelper mReadingHelper;
|
|
||||||
DynamicBigramListPolicy *const mBigramPolicy;
|
|
||||||
DynamicShortcutListPolicy *const mShortcutPolicy;
|
|
||||||
const bool mNeedsToDecayWhenUpdating;
|
|
||||||
};
|
|
||||||
} // namespace latinime
|
|
||||||
#endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_NODE_WRITER_H */
|
|
|
@ -27,25 +27,11 @@
|
||||||
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.h"
|
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.h"
|
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.h"
|
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_updating_helper.h"
|
|
||||||
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_helper.h"
|
|
||||||
#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
|
#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
|
||||||
#include "suggest/policyimpl/dictionary/utils/probability_utils.h"
|
#include "suggest/policyimpl/dictionary/utils/probability_utils.h"
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
|
||||||
// Note that there are corresponding definitions in Java side in BinaryDictionaryTests and
|
|
||||||
// BinaryDictionaryDecayingTests.
|
|
||||||
const char *const DynamicPatriciaTriePolicy::UNIGRAM_COUNT_QUERY = "UNIGRAM_COUNT";
|
|
||||||
const char *const DynamicPatriciaTriePolicy::BIGRAM_COUNT_QUERY = "BIGRAM_COUNT";
|
|
||||||
const char *const DynamicPatriciaTriePolicy::MAX_UNIGRAM_COUNT_QUERY = "MAX_UNIGRAM_COUNT";
|
|
||||||
const char *const DynamicPatriciaTriePolicy::MAX_BIGRAM_COUNT_QUERY = "MAX_BIGRAM_COUNT";
|
|
||||||
const char *const DynamicPatriciaTriePolicy::SET_NEEDS_TO_DECAY_FOR_TESTING_QUERY =
|
|
||||||
"SET_NEEDS_TO_DECAY_FOR_TESTING";
|
|
||||||
const int DynamicPatriciaTriePolicy::MAX_DICT_EXTENDED_REGION_SIZE = 32 * 1024;
|
|
||||||
const int DynamicPatriciaTriePolicy::MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS =
|
|
||||||
DynamicPatriciaTrieWritingHelper::MAX_DICTIONARY_SIZE - 1024;
|
|
||||||
|
|
||||||
void DynamicPatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const dicNode,
|
void DynamicPatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const dicNode,
|
||||||
DicNodeVector *const childDicNodes) const {
|
DicNodeVector *const childDicNodes) const {
|
||||||
if (!dicNode->hasChildren()) {
|
if (!dicNode->hasChildren()) {
|
||||||
|
@ -140,163 +126,4 @@ int DynamicPatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) c
|
||||||
return ptNodeParams.getBigramsPos();
|
return ptNodeParams.getBigramsPos();
|
||||||
}
|
}
|
||||||
|
|
||||||
bool DynamicPatriciaTriePolicy::addUnigramWord(const int *const word, const int length,
|
|
||||||
const int probability, const int timestamp) {
|
|
||||||
if (!mMmappedBuffer.get()->isUpdatable()) {
|
|
||||||
AKLOGI("Warning: addUnigramWord() is called for non-updatable dictionary.");
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
if (mBufferWithExtendableBuffer.getTailPosition()
|
|
||||||
>= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) {
|
|
||||||
AKLOGE("The dictionary is too large to dynamically update.");
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer, &mNodeReader);
|
|
||||||
readingHelper.initWithPtNodeArrayPos(getRootPosition());
|
|
||||||
bool addedNewUnigram = false;
|
|
||||||
if (mUpdatingHelper.addUnigramWord(&readingHelper, word, length, probability, timestamp,
|
|
||||||
&addedNewUnigram)) {
|
|
||||||
if (addedNewUnigram) {
|
|
||||||
mUnigramCount++;
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
} else {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
bool DynamicPatriciaTriePolicy::addBigramWords(const int *const word0, const int length0,
|
|
||||||
const int *const word1, const int length1, const int probability,
|
|
||||||
const int timestamp) {
|
|
||||||
if (!mMmappedBuffer.get()->isUpdatable()) {
|
|
||||||
AKLOGI("Warning: addBigramWords() is called for non-updatable dictionary.");
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
if (mBufferWithExtendableBuffer.getTailPosition()
|
|
||||||
>= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) {
|
|
||||||
AKLOGE("The dictionary is too large to dynamically update.");
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
const int word0Pos = getTerminalPtNodePositionOfWord(word0, length0,
|
|
||||||
false /* forceLowerCaseSearch */);
|
|
||||||
if (word0Pos == NOT_A_DICT_POS) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
const int word1Pos = getTerminalPtNodePositionOfWord(word1, length1,
|
|
||||||
false /* forceLowerCaseSearch */);
|
|
||||||
if (word1Pos == NOT_A_DICT_POS) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
bool addedNewBigram = false;
|
|
||||||
if (mUpdatingHelper.addBigramWords(word0Pos, word1Pos, probability, timestamp,
|
|
||||||
&addedNewBigram)) {
|
|
||||||
if (addedNewBigram) {
|
|
||||||
mBigramCount++;
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
} else {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
bool DynamicPatriciaTriePolicy::removeBigramWords(const int *const word0, const int length0,
|
|
||||||
const int *const word1, const int length1) {
|
|
||||||
if (!mMmappedBuffer.get()->isUpdatable()) {
|
|
||||||
AKLOGI("Warning: removeBigramWords() is called for non-updatable dictionary.");
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
if (mBufferWithExtendableBuffer.getTailPosition()
|
|
||||||
>= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) {
|
|
||||||
AKLOGE("The dictionary is too large to dynamically update.");
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
const int word0Pos = getTerminalPtNodePositionOfWord(word0, length0,
|
|
||||||
false /* forceLowerCaseSearch */);
|
|
||||||
if (word0Pos == NOT_A_DICT_POS) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
const int word1Pos = getTerminalPtNodePositionOfWord(word1, length1,
|
|
||||||
false /* forceLowerCaseSearch */);
|
|
||||||
if (word1Pos == NOT_A_DICT_POS) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
if (mUpdatingHelper.removeBigramWords(word0Pos, word1Pos)) {
|
|
||||||
mBigramCount--;
|
|
||||||
return true;
|
|
||||||
} else {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void DynamicPatriciaTriePolicy::flush(const char *const filePath) {
|
|
||||||
if (!mMmappedBuffer.get()->isUpdatable()) {
|
|
||||||
AKLOGI("Warning: flush() is called for non-updatable dictionary.");
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer,
|
|
||||||
&mBigramListPolicy, &mShortcutListPolicy, false /* needsToDecay */);
|
|
||||||
writingHelper.writeToDictFile(filePath, &mHeaderPolicy, mUnigramCount, mBigramCount);
|
|
||||||
}
|
|
||||||
|
|
||||||
void DynamicPatriciaTriePolicy::flushWithGC(const char *const filePath) {
|
|
||||||
if (!mMmappedBuffer.get()->isUpdatable()) {
|
|
||||||
AKLOGI("Warning: flushWithGC() is called for non-updatable dictionary.");
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
const bool needsToDecay = mHeaderPolicy.isDecayingDict()
|
|
||||||
&& (mNeedsToDecayForTesting || ForgettingCurveUtils::needsToDecay(
|
|
||||||
false /* mindsBlockByDecay */, mUnigramCount, mBigramCount, &mHeaderPolicy));
|
|
||||||
DynamicBigramListPolicy bigramListPolicyForGC(&mHeaderPolicy, &mBufferWithExtendableBuffer,
|
|
||||||
&mShortcutListPolicy, needsToDecay);
|
|
||||||
DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer,
|
|
||||||
&bigramListPolicyForGC, &mShortcutListPolicy, needsToDecay);
|
|
||||||
writingHelper.writeToDictFileWithGC(getRootPosition(), filePath, &mHeaderPolicy);
|
|
||||||
mNeedsToDecayForTesting = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool DynamicPatriciaTriePolicy::needsToRunGC(const bool mindsBlockByGC) const {
|
|
||||||
if (!mMmappedBuffer.get()->isUpdatable()) {
|
|
||||||
AKLOGI("Warning: needsToRunGC() is called for non-updatable dictionary.");
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
if (mBufferWithExtendableBuffer.isNearSizeLimit()) {
|
|
||||||
// Additional buffer size is near the limit.
|
|
||||||
return true;
|
|
||||||
} else if (mHeaderPolicy.getExtendedRegionSize()
|
|
||||||
+ mBufferWithExtendableBuffer.getUsedAdditionalBufferSize()
|
|
||||||
> MAX_DICT_EXTENDED_REGION_SIZE) {
|
|
||||||
// Total extended region size exceeds the limit.
|
|
||||||
return true;
|
|
||||||
} else if (mBufferWithExtendableBuffer.getTailPosition()
|
|
||||||
>= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS
|
|
||||||
&& mBufferWithExtendableBuffer.getUsedAdditionalBufferSize() > 0) {
|
|
||||||
// Needs to reduce dictionary size.
|
|
||||||
return true;
|
|
||||||
} else if (mHeaderPolicy.isDecayingDict()) {
|
|
||||||
return mNeedsToDecayForTesting || ForgettingCurveUtils::needsToDecay(
|
|
||||||
mindsBlockByGC, mUnigramCount, mBigramCount, &mHeaderPolicy);
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
void DynamicPatriciaTriePolicy::getProperty(const char *const query, const int queryLength,
|
|
||||||
char *const outResult, const int maxResultLength) {
|
|
||||||
const int compareLength = queryLength + 1 /* terminator */;
|
|
||||||
if (strncmp(query, UNIGRAM_COUNT_QUERY, compareLength) == 0) {
|
|
||||||
snprintf(outResult, maxResultLength, "%d", mUnigramCount);
|
|
||||||
} else if (strncmp(query, BIGRAM_COUNT_QUERY, compareLength) == 0) {
|
|
||||||
snprintf(outResult, maxResultLength, "%d", mBigramCount);
|
|
||||||
} else if (strncmp(query, MAX_UNIGRAM_COUNT_QUERY, compareLength) == 0) {
|
|
||||||
snprintf(outResult, maxResultLength, "%d",
|
|
||||||
mHeaderPolicy.isDecayingDict() ? ForgettingCurveUtils::MAX_UNIGRAM_COUNT :
|
|
||||||
static_cast<int>(DynamicPatriciaTrieWritingHelper::MAX_DICTIONARY_SIZE));
|
|
||||||
} else if (strncmp(query, MAX_BIGRAM_COUNT_QUERY, compareLength) == 0) {
|
|
||||||
snprintf(outResult, maxResultLength, "%d",
|
|
||||||
mHeaderPolicy.isDecayingDict() ? ForgettingCurveUtils::MAX_BIGRAM_COUNT :
|
|
||||||
static_cast<int>(DynamicPatriciaTrieWritingHelper::MAX_DICTIONARY_SIZE));
|
|
||||||
} else if (strncmp(query, SET_NEEDS_TO_DECAY_FOR_TESTING_QUERY, compareLength) == 0) {
|
|
||||||
mNeedsToDecayForTesting = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
|
|
|
@ -23,8 +23,6 @@
|
||||||
#include "suggest/policyimpl/dictionary/header/header_policy.h"
|
#include "suggest/policyimpl/dictionary/header/header_policy.h"
|
||||||
#include "suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h"
|
#include "suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.h"
|
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_writer.h"
|
|
||||||
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_updating_helper.h"
|
|
||||||
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
|
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
|
||||||
#include "suggest/policyimpl/dictionary/utils/format_utils.h"
|
#include "suggest/policyimpl/dictionary/utils/format_utils.h"
|
||||||
#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
|
#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
|
||||||
|
@ -47,12 +45,7 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
||||||
mShortcutListPolicy(&mBufferWithExtendableBuffer),
|
mShortcutListPolicy(&mBufferWithExtendableBuffer),
|
||||||
mBigramListPolicy(&mHeaderPolicy, &mBufferWithExtendableBuffer, &mShortcutListPolicy,
|
mBigramListPolicy(&mHeaderPolicy, &mBufferWithExtendableBuffer, &mShortcutListPolicy,
|
||||||
mHeaderPolicy.isDecayingDict()),
|
mHeaderPolicy.isDecayingDict()),
|
||||||
mNodeReader(&mBufferWithExtendableBuffer, &mBigramListPolicy, &mShortcutListPolicy),
|
mNodeReader(&mBufferWithExtendableBuffer, &mBigramListPolicy, &mShortcutListPolicy) {}
|
||||||
mNodeWriter(&mBufferWithExtendableBuffer, &mNodeReader, &mBigramListPolicy,
|
|
||||||
&mShortcutListPolicy, mHeaderPolicy.isDecayingDict()),
|
|
||||||
mUpdatingHelper(&mBufferWithExtendableBuffer, &mNodeReader, &mNodeWriter),
|
|
||||||
mUnigramCount(mHeaderPolicy.getUnigramCount()),
|
|
||||||
mBigramCount(mHeaderPolicy.getBigramCount()), mNeedsToDecayForTesting(false) {}
|
|
||||||
|
|
||||||
AK_FORCE_INLINE int getRootPosition() const {
|
AK_FORCE_INLINE int getRootPosition() const {
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -89,45 +82,59 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
||||||
}
|
}
|
||||||
|
|
||||||
bool addUnigramWord(const int *const word, const int length, const int probability,
|
bool addUnigramWord(const int *const word, const int length, const int probability,
|
||||||
const int timestamp);
|
const int timestamp) {
|
||||||
|
// This method should not be called for non-updatable dictionary.
|
||||||
|
AKLOGI("Warning: addUnigramWord() is called for non-updatable dictionary.");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
bool addBigramWords(const int *const word0, const int length0, const int *const word1,
|
bool addBigramWords(const int *const word0, const int length0, const int *const word1,
|
||||||
const int length1, const int probability, const int timestamp);
|
const int length1, const int probability, const int timestamp) {
|
||||||
|
// This method should not be called for non-updatable dictionary.
|
||||||
|
AKLOGI("Warning: addBigramWords() is called for non-updatable dictionary.");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
bool removeBigramWords(const int *const word0, const int length0, const int *const word1,
|
bool removeBigramWords(const int *const word0, const int length0, const int *const word1,
|
||||||
const int length1);
|
const int length1) {
|
||||||
|
// This method should not be called for non-updatable dictionary.
|
||||||
|
AKLOGI("Warning: removeBigramWords() is called for non-updatable dictionary.");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
void flush(const char *const filePath);
|
void flush(const char *const filePath) {
|
||||||
|
// This method should not be called for non-updatable dictionary.
|
||||||
|
AKLOGI("Warning: flush() is called for non-updatable dictionary.");
|
||||||
|
}
|
||||||
|
|
||||||
void flushWithGC(const char *const filePath);
|
void flushWithGC(const char *const filePath) {
|
||||||
|
// This method should not be called for non-updatable dictionary.
|
||||||
|
AKLOGI("Warning: flushWithGC() is called for non-updatable dictionary.");
|
||||||
|
}
|
||||||
|
|
||||||
bool needsToRunGC(const bool mindsBlockByGC) const;
|
bool needsToRunGC(const bool mindsBlockByGC) const {
|
||||||
|
// This method should not be called for non-updatable dictionary.
|
||||||
|
AKLOGI("Warning: needsToRunGC() is called for non-updatable dictionary.");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
void getProperty(const char *const query, const int queryLength, char *const outResult,
|
void getProperty(const char *const query, const int queryLength, char *const outResult,
|
||||||
const int maxResultLength);
|
const int maxResultLength) {
|
||||||
|
// getProperty is not supported for this class.
|
||||||
|
if (maxResultLength > 0) {
|
||||||
|
outResult[0] = '\0';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTriePolicy);
|
DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTriePolicy);
|
||||||
|
|
||||||
static const char *const UNIGRAM_COUNT_QUERY;
|
|
||||||
static const char *const BIGRAM_COUNT_QUERY;
|
|
||||||
static const char *const MAX_UNIGRAM_COUNT_QUERY;
|
|
||||||
static const char *const MAX_BIGRAM_COUNT_QUERY;
|
|
||||||
static const char *const SET_NEEDS_TO_DECAY_FOR_TESTING_QUERY;
|
|
||||||
static const int MAX_DICT_EXTENDED_REGION_SIZE;
|
|
||||||
static const int MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS;
|
|
||||||
|
|
||||||
const MmappedBuffer::MmappedBufferPtr mMmappedBuffer;
|
const MmappedBuffer::MmappedBufferPtr mMmappedBuffer;
|
||||||
const HeaderPolicy mHeaderPolicy;
|
const HeaderPolicy mHeaderPolicy;
|
||||||
BufferWithExtendableBuffer mBufferWithExtendableBuffer;
|
BufferWithExtendableBuffer mBufferWithExtendableBuffer;
|
||||||
DynamicShortcutListPolicy mShortcutListPolicy;
|
DynamicShortcutListPolicy mShortcutListPolicy;
|
||||||
DynamicBigramListPolicy mBigramListPolicy;
|
DynamicBigramListPolicy mBigramListPolicy;
|
||||||
DynamicPatriciaTrieNodeReader mNodeReader;
|
DynamicPatriciaTrieNodeReader mNodeReader;
|
||||||
DynamicPatriciaTrieNodeWriter mNodeWriter;
|
|
||||||
DynamicPatriciaTrieUpdatingHelper mUpdatingHelper;
|
|
||||||
int mUnigramCount;
|
|
||||||
int mBigramCount;
|
|
||||||
int mNeedsToDecayForTesting;
|
|
||||||
};
|
};
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
#endif // LATINIME_DYNAMIC_PATRICIA_TRIE_POLICY_H
|
#endif // LATINIME_DYNAMIC_PATRICIA_TRIE_POLICY_H
|
||||||
|
|
|
@ -1,142 +0,0 @@
|
||||||
/*
|
|
||||||
* Copyright (C) 2013, The Android Open Source Project
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
* you may not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_helper.h"
|
|
||||||
|
|
||||||
#include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h"
|
|
||||||
#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_reader.h"
|
|
||||||
#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h"
|
|
||||||
#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h"
|
|
||||||
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_gc_event_listeners.h"
|
|
||||||
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.h"
|
|
||||||
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_writer.h"
|
|
||||||
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.h"
|
|
||||||
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.h"
|
|
||||||
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_utils.h"
|
|
||||||
#include "suggest/policyimpl/dictionary/header/header_policy.h"
|
|
||||||
#include "suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h"
|
|
||||||
#include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h"
|
|
||||||
#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
|
|
||||||
#include "utils/hash_map_compat.h"
|
|
||||||
|
|
||||||
namespace latinime {
|
|
||||||
|
|
||||||
// TODO: Make MAX_DICTIONARY_SIZE 8MB.
|
|
||||||
const size_t DynamicPatriciaTrieWritingHelper::MAX_DICTIONARY_SIZE = 2 * 1024 * 1024;
|
|
||||||
|
|
||||||
void DynamicPatriciaTrieWritingHelper::writeToDictFile(const char *const fileName,
|
|
||||||
const HeaderPolicy *const headerPolicy, const int unigramCount, const int bigramCount) {
|
|
||||||
BufferWithExtendableBuffer headerBuffer(
|
|
||||||
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE);
|
|
||||||
const int extendedRegionSize = headerPolicy->getExtendedRegionSize() +
|
|
||||||
mBuffer->getUsedAdditionalBufferSize();
|
|
||||||
if (!headerPolicy->writeHeaderToBuffer(&headerBuffer, false /* updatesLastUpdatedTime */,
|
|
||||||
false /* updatesLastDecayedTime */, unigramCount, bigramCount, extendedRegionSize)) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
DictFileWritingUtils::flushAllHeaderAndBodyToFile(fileName, &headerBuffer, mBuffer);
|
|
||||||
}
|
|
||||||
|
|
||||||
void DynamicPatriciaTrieWritingHelper::writeToDictFileWithGC(const int rootPtNodeArrayPos,
|
|
||||||
const char *const fileName, const HeaderPolicy *const headerPolicy) {
|
|
||||||
BufferWithExtendableBuffer newDictBuffer(MAX_DICTIONARY_SIZE);
|
|
||||||
int unigramCount = 0;
|
|
||||||
int bigramCount = 0;
|
|
||||||
if (mNeedsToDecay) {
|
|
||||||
ForgettingCurveUtils::sTimeKeeper.setCurrentTime();
|
|
||||||
}
|
|
||||||
if (!runGC(rootPtNodeArrayPos, headerPolicy, &newDictBuffer, &unigramCount, &bigramCount)) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
BufferWithExtendableBuffer headerBuffer(
|
|
||||||
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE);
|
|
||||||
if (!headerPolicy->writeHeaderToBuffer(&headerBuffer, true /* updatesLastUpdatedTime */,
|
|
||||||
mNeedsToDecay, unigramCount, bigramCount, 0 /* extendedRegionSize */)) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
DictFileWritingUtils::flushAllHeaderAndBodyToFile(fileName, &headerBuffer, &newDictBuffer);
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO: Make this method version independent.
|
|
||||||
bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
|
|
||||||
const HeaderPolicy *const headerPolicy, BufferWithExtendableBuffer *const bufferToWrite,
|
|
||||||
int *const outUnigramCount, int *const outBigramCount) {
|
|
||||||
DynamicPatriciaTrieNodeReader ptNodeReader(mBuffer, mBigramPolicy, mShortcutPolicy);
|
|
||||||
DynamicPatriciaTrieReadingHelper readingHelper(mBuffer, &ptNodeReader);
|
|
||||||
DynamicPatriciaTrieNodeWriter ptNodeWriter(mBuffer, &ptNodeReader, mBigramPolicy,
|
|
||||||
mShortcutPolicy, false /* needsToDecayWhenUpdating */);
|
|
||||||
readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
|
|
||||||
DynamicPatriciaTrieGcEventListeners
|
|
||||||
::TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted
|
|
||||||
traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted(
|
|
||||||
headerPolicy, &ptNodeWriter, mBuffer, mNeedsToDecay);
|
|
||||||
if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(
|
|
||||||
&traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
if (mNeedsToDecay && traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted
|
|
||||||
.getValidUnigramCount() > ForgettingCurveUtils::MAX_UNIGRAM_COUNT_AFTER_GC) {
|
|
||||||
// TODO: Remove more unigrams.
|
|
||||||
}
|
|
||||||
|
|
||||||
readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
|
|
||||||
DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateBigramProbability
|
|
||||||
traversePolicyToUpdateBigramProbability(&ptNodeWriter);
|
|
||||||
if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(
|
|
||||||
&traversePolicyToUpdateBigramProbability)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
if (mNeedsToDecay && traversePolicyToUpdateBigramProbability.getValidBigramEntryCount()
|
|
||||||
> ForgettingCurveUtils::MAX_BIGRAM_COUNT_AFTER_GC) {
|
|
||||||
// TODO: Remove more bigrams.
|
|
||||||
}
|
|
||||||
|
|
||||||
// Mapping from positions in mBuffer to positions in bufferToWrite.
|
|
||||||
PtNodeWriter::DictPositionRelocationMap dictPositionRelocationMap;
|
|
||||||
readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
|
|
||||||
DynamicPatriciaTrieNodeWriter newPtNodeWriter(bufferToWrite, &ptNodeReader, mBigramPolicy,
|
|
||||||
mShortcutPolicy, false /* needsToDecayWhenUpdating */);
|
|
||||||
DynamicPatriciaTrieGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer
|
|
||||||
traversePolicyToPlaceAndWriteValidPtNodesToBuffer(&newPtNodeWriter, bufferToWrite,
|
|
||||||
&dictPositionRelocationMap);
|
|
||||||
if (!readingHelper.traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner(
|
|
||||||
&traversePolicyToPlaceAndWriteValidPtNodesToBuffer)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Create policy instance for the GCed dictionary.
|
|
||||||
DynamicShortcutListPolicy newDictShortcutPolicy(bufferToWrite);
|
|
||||||
DynamicBigramListPolicy newDictBigramPolicy(headerPolicy, bufferToWrite, &newDictShortcutPolicy,
|
|
||||||
mNeedsToDecay);
|
|
||||||
// Create reading node reader and reading helper for the GCed dictionary.
|
|
||||||
DynamicPatriciaTrieNodeReader newDictNodeReader(bufferToWrite, &newDictBigramPolicy,
|
|
||||||
&newDictShortcutPolicy);
|
|
||||||
DynamicPatriciaTrieReadingHelper newDictReadingHelper(bufferToWrite, &newDictNodeReader);
|
|
||||||
DynamicPatriciaTrieNodeWriter newDictNodeWriter(bufferToWrite, &newDictNodeReader,
|
|
||||||
&newDictBigramPolicy, &newDictShortcutPolicy, false /* needsToDecayWhenUpdating */);
|
|
||||||
newDictReadingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
|
|
||||||
DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateAllPositionFields
|
|
||||||
traversePolicyToUpdateAllPositionFields(&newDictNodeWriter, &dictPositionRelocationMap);
|
|
||||||
if (!newDictReadingHelper.traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner(
|
|
||||||
&traversePolicyToUpdateAllPositionFields)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
*outUnigramCount = traversePolicyToUpdateAllPositionFields.getUnigramCount();
|
|
||||||
*outBigramCount = traversePolicyToUpdateAllPositionFields.getBigramCount();
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace latinime
|
|
|
@ -1,65 +0,0 @@
|
||||||
/*
|
|
||||||
* Copyright (C) 2013, The Android Open Source Project
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
* you may not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef LATINIME_DYNAMIC_PATRICIA_TRIE_WRITING_HELPER_H
|
|
||||||
#define LATINIME_DYNAMIC_PATRICIA_TRIE_WRITING_HELPER_H
|
|
||||||
|
|
||||||
#include <cstddef>
|
|
||||||
#include <stdint.h>
|
|
||||||
|
|
||||||
#include "defines.h"
|
|
||||||
|
|
||||||
namespace latinime {
|
|
||||||
|
|
||||||
class BufferWithExtendableBuffer;
|
|
||||||
class DynamicBigramListPolicy;
|
|
||||||
class DynamicPatriciaTrieReadingHelper;
|
|
||||||
class DynamicShortcutListPolicy;
|
|
||||||
class HeaderPolicy;
|
|
||||||
|
|
||||||
// TODO: Make it independent from a particular format and move to pt_common.
|
|
||||||
class DynamicPatriciaTrieWritingHelper {
|
|
||||||
public:
|
|
||||||
static const size_t MAX_DICTIONARY_SIZE;
|
|
||||||
|
|
||||||
DynamicPatriciaTrieWritingHelper(BufferWithExtendableBuffer *const buffer,
|
|
||||||
DynamicBigramListPolicy *const bigramPolicy,
|
|
||||||
DynamicShortcutListPolicy *const shortcutPolicy, const bool needsToDecay)
|
|
||||||
: mBuffer(buffer), mBigramPolicy(bigramPolicy), mShortcutPolicy(shortcutPolicy),
|
|
||||||
mNeedsToDecay(needsToDecay) {}
|
|
||||||
|
|
||||||
~DynamicPatriciaTrieWritingHelper() {}
|
|
||||||
|
|
||||||
void writeToDictFile(const char *const fileName, const HeaderPolicy *const headerPolicy,
|
|
||||||
const int unigramCount, const int bigramCount);
|
|
||||||
|
|
||||||
void writeToDictFileWithGC(const int rootPtNodeArrayPos, const char *const fileName,
|
|
||||||
const HeaderPolicy *const headerPolicy);
|
|
||||||
|
|
||||||
private:
|
|
||||||
DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTrieWritingHelper);
|
|
||||||
|
|
||||||
BufferWithExtendableBuffer *const mBuffer;
|
|
||||||
DynamicBigramListPolicy *const mBigramPolicy;
|
|
||||||
DynamicShortcutListPolicy *const mShortcutPolicy;
|
|
||||||
const bool mNeedsToDecay;
|
|
||||||
|
|
||||||
bool runGC(const int rootPtNodeArrayPos, const HeaderPolicy *const headerPolicy,
|
|
||||||
BufferWithExtendableBuffer *const bufferToWrite, int *const outUnigramCount,
|
|
||||||
int *const outBigramCount);
|
|
||||||
};
|
|
||||||
} // namespace latinime
|
|
||||||
#endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_WRITING_HELPER_H */
|
|
|
@ -33,7 +33,6 @@ const int DynamicPatriciaTrieWritingUtils::DICT_OFFSET_FIELD_SIZE = 3;
|
||||||
const int DynamicPatriciaTrieWritingUtils::MAX_DICT_OFFSET_VALUE = 0x7FFFFF;
|
const int DynamicPatriciaTrieWritingUtils::MAX_DICT_OFFSET_VALUE = 0x7FFFFF;
|
||||||
const int DynamicPatriciaTrieWritingUtils::MIN_DICT_OFFSET_VALUE = -0x7FFFFF;
|
const int DynamicPatriciaTrieWritingUtils::MIN_DICT_OFFSET_VALUE = -0x7FFFFF;
|
||||||
const int DynamicPatriciaTrieWritingUtils::DICT_OFFSET_NEGATIVE_FLAG = 0x800000;
|
const int DynamicPatriciaTrieWritingUtils::DICT_OFFSET_NEGATIVE_FLAG = 0x800000;
|
||||||
const int DynamicPatriciaTrieWritingUtils::PROBABILITY_FIELD_SIZE = 1;
|
|
||||||
const int DynamicPatriciaTrieWritingUtils::NODE_FLAG_FIELD_SIZE = 1;
|
const int DynamicPatriciaTrieWritingUtils::NODE_FLAG_FIELD_SIZE = 1;
|
||||||
|
|
||||||
/* static */ bool DynamicPatriciaTrieWritingUtils::writeEmptyDictionary(
|
/* static */ bool DynamicPatriciaTrieWritingUtils::writeEmptyDictionary(
|
||||||
|
@ -101,19 +100,6 @@ const int DynamicPatriciaTrieWritingUtils::NODE_FLAG_FIELD_SIZE = 1;
|
||||||
hasMultipleCodePoints, codePointFieldPos);
|
hasMultipleCodePoints, codePointFieldPos);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* static */ bool DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition(
|
|
||||||
BufferWithExtendableBuffer *const buffer, const int probability,
|
|
||||||
int *const probabilityFieldPos) {
|
|
||||||
if (probability < 0 || probability > MAX_PROBABILITY) {
|
|
||||||
AKLOGI("probability cannot be written because the probability is invalid: %d",
|
|
||||||
probability);
|
|
||||||
ASSERT(false);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
return buffer->writeUintAndAdvancePosition(probability, PROBABILITY_FIELD_SIZE,
|
|
||||||
probabilityFieldPos);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* static */ bool DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(
|
/* static */ bool DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(
|
||||||
BufferWithExtendableBuffer *const buffer, const int childrenPosition,
|
BufferWithExtendableBuffer *const buffer, const int childrenPosition,
|
||||||
int *const childrenPositionFieldPos) {
|
int *const childrenPositionFieldPos) {
|
||||||
|
|
|
@ -49,9 +49,6 @@ class DynamicPatriciaTrieWritingUtils {
|
||||||
static bool writeCodePointsAndAdvancePosition(BufferWithExtendableBuffer *const buffer,
|
static bool writeCodePointsAndAdvancePosition(BufferWithExtendableBuffer *const buffer,
|
||||||
const int *const codePoints, const int codePointCount, int *const codePointFieldPos);
|
const int *const codePoints, const int codePointCount, int *const codePointFieldPos);
|
||||||
|
|
||||||
static bool writeProbabilityAndAdvancePosition(BufferWithExtendableBuffer *const buffer,
|
|
||||||
const int probability, int *const probabilityFieldPos);
|
|
||||||
|
|
||||||
static bool writeChildrenPositionAndAdvancePosition(BufferWithExtendableBuffer *const buffer,
|
static bool writeChildrenPositionAndAdvancePosition(BufferWithExtendableBuffer *const buffer,
|
||||||
const int childrenPosition, int *const childrenPositionFieldPos);
|
const int childrenPosition, int *const childrenPositionFieldPos);
|
||||||
|
|
||||||
|
@ -67,7 +64,6 @@ class DynamicPatriciaTrieWritingUtils {
|
||||||
static const int MAX_DICT_OFFSET_VALUE;
|
static const int MAX_DICT_OFFSET_VALUE;
|
||||||
static const int MIN_DICT_OFFSET_VALUE;
|
static const int MIN_DICT_OFFSET_VALUE;
|
||||||
static const int DICT_OFFSET_NEGATIVE_FLAG;
|
static const int DICT_OFFSET_NEGATIVE_FLAG;
|
||||||
static const int PROBABILITY_FIELD_SIZE;
|
|
||||||
|
|
||||||
static bool writeDictOffset(BufferWithExtendableBuffer *const buffer, const int targetPos,
|
static bool writeDictOffset(BufferWithExtendableBuffer *const buffer, const int targetPos,
|
||||||
const int basePos, int *const offsetFieldPos);
|
const int basePos, int *const offsetFieldPos);
|
||||||
|
|
|
@ -32,8 +32,6 @@ const char *const DictFileWritingUtils::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE =
|
||||||
/* static */ bool DictFileWritingUtils::createEmptyDictFile(const char *const filePath,
|
/* static */ bool DictFileWritingUtils::createEmptyDictFile(const char *const filePath,
|
||||||
const int dictVersion, const HeaderReadWriteUtils::AttributeMap *const attributeMap) {
|
const int dictVersion, const HeaderReadWriteUtils::AttributeMap *const attributeMap) {
|
||||||
switch (dictVersion) {
|
switch (dictVersion) {
|
||||||
case 3:
|
|
||||||
return createEmptyV3DictFile(filePath, attributeMap);
|
|
||||||
case 4:
|
case 4:
|
||||||
return createEmptyV4DictFile(filePath, attributeMap);
|
return createEmptyV4DictFile(filePath, attributeMap);
|
||||||
default:
|
default:
|
||||||
|
@ -43,23 +41,6 @@ const char *const DictFileWritingUtils::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE =
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* static */ bool DictFileWritingUtils::createEmptyV3DictFile(const char *const filePath,
|
|
||||||
const HeaderReadWriteUtils::AttributeMap *const attributeMap) {
|
|
||||||
BufferWithExtendableBuffer headerBuffer(
|
|
||||||
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE);
|
|
||||||
HeaderPolicy headerPolicy(FormatUtils::VERSION_3, attributeMap);
|
|
||||||
headerPolicy.writeHeaderToBuffer(&headerBuffer, true /* updatesLastUpdatedTime */,
|
|
||||||
true /* updatesLastDecayedTime */, 0 /* unigramCount */, 0 /* bigramCount */,
|
|
||||||
0 /* extendedRegionSize */);
|
|
||||||
BufferWithExtendableBuffer bodyBuffer(
|
|
||||||
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE);
|
|
||||||
if (!DynamicPatriciaTrieWritingUtils::writeEmptyDictionary(&bodyBuffer, 0 /* rootPos */)) {
|
|
||||||
AKLOGE("Empty ver3 dictionary structure cannot be created on memory.");
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
return flushAllHeaderAndBodyToFile(filePath, &headerBuffer, &bodyBuffer);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* static */ bool DictFileWritingUtils::createEmptyV4DictFile(const char *const dirPath,
|
/* static */ bool DictFileWritingUtils::createEmptyV4DictFile(const char *const dirPath,
|
||||||
const HeaderReadWriteUtils::AttributeMap *const attributeMap) {
|
const HeaderReadWriteUtils::AttributeMap *const attributeMap) {
|
||||||
Ver4DictBuffers::Ver4DictBuffersPtr dictBuffers = Ver4DictBuffers::createVer4DictBuffers();
|
Ver4DictBuffers::Ver4DictBuffersPtr dictBuffers = Ver4DictBuffers::createVer4DictBuffers();
|
||||||
|
|
|
@ -43,9 +43,6 @@ class DictFileWritingUtils {
|
||||||
private:
|
private:
|
||||||
DISALLOW_IMPLICIT_CONSTRUCTORS(DictFileWritingUtils);
|
DISALLOW_IMPLICIT_CONSTRUCTORS(DictFileWritingUtils);
|
||||||
|
|
||||||
static bool createEmptyV3DictFile(const char *const filePath,
|
|
||||||
const HeaderReadWriteUtils::AttributeMap *const attributeMap);
|
|
||||||
|
|
||||||
static bool createEmptyV4DictFile(const char *const filePath,
|
static bool createEmptyV4DictFile(const char *const filePath,
|
||||||
const HeaderReadWriteUtils::AttributeMap *const attributeMap);
|
const HeaderReadWriteUtils::AttributeMap *const attributeMap);
|
||||||
|
|
||||||
|
|
|
@ -74,9 +74,7 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase {
|
||||||
|
|
||||||
private File createEmptyDictionaryAndGetFile(final String dictId,
|
private File createEmptyDictionaryAndGetFile(final String dictId,
|
||||||
final int formatVersion) throws IOException {
|
final int formatVersion) throws IOException {
|
||||||
if (formatVersion == 3) {
|
if (formatVersion == 4) {
|
||||||
return createEmptyVer3DictionaryAndGetFile(dictId);
|
|
||||||
} else if (formatVersion == 4) {
|
|
||||||
return createEmptyVer4DictionaryAndGetFile(dictId);
|
return createEmptyVer4DictionaryAndGetFile(dictId);
|
||||||
} else {
|
} else {
|
||||||
throw new IOException("Dictionary format version " + formatVersion
|
throw new IOException("Dictionary format version " + formatVersion
|
||||||
|
@ -102,26 +100,7 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private File createEmptyVer3DictionaryAndGetFile(final String dictId) throws IOException {
|
|
||||||
final File file = File.createTempFile(dictId, TEST_DICT_FILE_EXTENSION,
|
|
||||||
getContext().getCacheDir());
|
|
||||||
file.delete();
|
|
||||||
Map<String, String> attributeMap = new HashMap<String, String>();
|
|
||||||
attributeMap.put(FormatSpec.FileHeader.SUPPORTS_DYNAMIC_UPDATE_ATTRIBUTE,
|
|
||||||
FormatSpec.FileHeader.ATTRIBUTE_VALUE_TRUE);
|
|
||||||
attributeMap.put(FormatSpec.FileHeader.USES_FORGETTING_CURVE_ATTRIBUTE,
|
|
||||||
FormatSpec.FileHeader.ATTRIBUTE_VALUE_TRUE);
|
|
||||||
if (BinaryDictionary.createEmptyDictFile(file.getAbsolutePath(), 3 /* dictVersion */,
|
|
||||||
attributeMap)) {
|
|
||||||
return file;
|
|
||||||
} else {
|
|
||||||
throw new IOException(
|
|
||||||
"Empty dictionary " + file.getAbsolutePath() + " cannot be created.");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public void testAddValidAndInvalidWords() {
|
public void testAddValidAndInvalidWords() {
|
||||||
testAddValidAndInvalidWords(3 /* formatVersion */);
|
|
||||||
testAddValidAndInvalidWords(4 /* formatVersion */);
|
testAddValidAndInvalidWords(4 /* formatVersion */);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -172,7 +151,6 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testDecayingProbability() {
|
public void testDecayingProbability() {
|
||||||
testDecayingProbability(3 /* formatVersion */);
|
|
||||||
testDecayingProbability(4 /* formatVersion */);
|
testDecayingProbability(4 /* formatVersion */);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -231,7 +209,6 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testAddManyUnigramsToDecayingDict() {
|
public void testAddManyUnigramsToDecayingDict() {
|
||||||
testAddManyUnigramsToDecayingDict(3 /* formatVersion */);
|
|
||||||
testAddManyUnigramsToDecayingDict(4 /* formatVersion */);
|
testAddManyUnigramsToDecayingDict(4 /* formatVersion */);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -288,7 +265,6 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testAddManyBigramsToDecayingDict() {
|
public void testAddManyBigramsToDecayingDict() {
|
||||||
testAddManyBigramsToDecayingDict(3 /* formatVersion */);
|
|
||||||
testAddManyBigramsToDecayingDict(4 /* formatVersion */);
|
testAddManyBigramsToDecayingDict(4 /* formatVersion */);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -52,9 +52,7 @@ public class BinaryDictionaryTests extends AndroidTestCase {
|
||||||
|
|
||||||
private File createEmptyDictionaryAndGetFile(final String dictId,
|
private File createEmptyDictionaryAndGetFile(final String dictId,
|
||||||
final int formatVersion) throws IOException {
|
final int formatVersion) throws IOException {
|
||||||
if (formatVersion == 3) {
|
if (formatVersion == 4) {
|
||||||
return createEmptyVer3DictionaryAndGetFile(dictId);
|
|
||||||
} else if (formatVersion == 4) {
|
|
||||||
return createEmptyVer4DictionaryAndGetFile(dictId);
|
return createEmptyVer4DictionaryAndGetFile(dictId);
|
||||||
} else {
|
} else {
|
||||||
throw new IOException("Dictionary format version " + formatVersion
|
throw new IOException("Dictionary format version " + formatVersion
|
||||||
|
@ -79,24 +77,7 @@ public class BinaryDictionaryTests extends AndroidTestCase {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private File createEmptyVer3DictionaryAndGetFile(final String dictId) throws IOException {
|
|
||||||
final File file = File.createTempFile(dictId, TEST_DICT_FILE_EXTENSION,
|
|
||||||
getContext().getCacheDir());
|
|
||||||
file.delete();
|
|
||||||
Map<String, String> attributeMap = new HashMap<String, String>();
|
|
||||||
attributeMap.put(FormatSpec.FileHeader.SUPPORTS_DYNAMIC_UPDATE_ATTRIBUTE,
|
|
||||||
FormatSpec.FileHeader.ATTRIBUTE_VALUE_TRUE);
|
|
||||||
if (BinaryDictionary.createEmptyDictFile(file.getAbsolutePath(), 3 /* dictVersion */,
|
|
||||||
attributeMap)) {
|
|
||||||
return file;
|
|
||||||
} else {
|
|
||||||
throw new IOException(
|
|
||||||
"Empty dictionary " + file.getAbsolutePath() + " cannot be created.");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public void testIsValidDictionary() {
|
public void testIsValidDictionary() {
|
||||||
testIsValidDictionary(3 /* formatVersion */);
|
|
||||||
testIsValidDictionary(4 /* formatVersion */);
|
testIsValidDictionary(4 /* formatVersion */);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -125,7 +106,6 @@ public class BinaryDictionaryTests extends AndroidTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testAddUnigramWord() {
|
public void testAddUnigramWord() {
|
||||||
testAddUnigramWord(3 /* formatVersion */);
|
|
||||||
testAddUnigramWord(4 /* formatVersion */);
|
testAddUnigramWord(4 /* formatVersion */);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -168,7 +148,6 @@ public class BinaryDictionaryTests extends AndroidTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testRandomlyAddUnigramWord() {
|
public void testRandomlyAddUnigramWord() {
|
||||||
testRandomlyAddUnigramWord(3 /* formatVersion */);
|
|
||||||
testRandomlyAddUnigramWord(4 /* formatVersion */);
|
testRandomlyAddUnigramWord(4 /* formatVersion */);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -205,7 +184,6 @@ public class BinaryDictionaryTests extends AndroidTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testAddBigramWords() {
|
public void testAddBigramWords() {
|
||||||
testAddBigramWords(3 /* formatVersion */);
|
|
||||||
testAddBigramWords(4 /* formatVersion */);
|
testAddBigramWords(4 /* formatVersion */);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -274,7 +252,6 @@ public class BinaryDictionaryTests extends AndroidTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testRandomlyAddBigramWords() {
|
public void testRandomlyAddBigramWords() {
|
||||||
testRandomlyAddBigramWords(3 /* formatVersion */);
|
|
||||||
testRandomlyAddBigramWords(4 /* formatVersion */);
|
testRandomlyAddBigramWords(4 /* formatVersion */);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -336,7 +313,6 @@ public class BinaryDictionaryTests extends AndroidTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testRemoveBigramWords() {
|
public void testRemoveBigramWords() {
|
||||||
testRemoveBigramWords(3 /* formatVersion */);
|
|
||||||
testRemoveBigramWords(4 /* formatVersion */);
|
testRemoveBigramWords(4 /* formatVersion */);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -387,7 +363,6 @@ public class BinaryDictionaryTests extends AndroidTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testFlushDictionary() {
|
public void testFlushDictionary() {
|
||||||
testFlushDictionary(3 /* formatVersion */);
|
|
||||||
testFlushDictionary(4 /* formatVersion */);
|
testFlushDictionary(4 /* formatVersion */);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -440,7 +415,6 @@ public class BinaryDictionaryTests extends AndroidTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testFlushWithGCDictionary() {
|
public void testFlushWithGCDictionary() {
|
||||||
testFlushWithGCDictionary(3 /* formatVersion */);
|
|
||||||
testFlushWithGCDictionary(4 /* formatVersion */);
|
testFlushWithGCDictionary(4 /* formatVersion */);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -489,7 +463,6 @@ public class BinaryDictionaryTests extends AndroidTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testAddBigramWordsAndFlashWithGC() {
|
public void testAddBigramWordsAndFlashWithGC() {
|
||||||
testAddBigramWordsAndFlashWithGC(3 /* formatVersion */);
|
|
||||||
testAddBigramWordsAndFlashWithGC(4 /* formatVersion */);
|
testAddBigramWordsAndFlashWithGC(4 /* formatVersion */);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -559,7 +532,6 @@ public class BinaryDictionaryTests extends AndroidTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testRandomOperationsAndFlashWithGC() {
|
public void testRandomOperationsAndFlashWithGC() {
|
||||||
testRandomOperationsAndFlashWithGC(3 /* formatVersion */);
|
|
||||||
testRandomOperationsAndFlashWithGC(4 /* formatVersion */);
|
testRandomOperationsAndFlashWithGC(4 /* formatVersion */);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -671,7 +643,6 @@ public class BinaryDictionaryTests extends AndroidTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testAddManyUnigramsAndFlushWithGC() {
|
public void testAddManyUnigramsAndFlushWithGC() {
|
||||||
testAddManyUnigramsAndFlushWithGC(3 /* formatVersion */);
|
|
||||||
testAddManyUnigramsAndFlushWithGC(4 /* formatVersion */);
|
testAddManyUnigramsAndFlushWithGC(4 /* formatVersion */);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -720,7 +691,6 @@ public class BinaryDictionaryTests extends AndroidTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testUnigramAndBigramCount() {
|
public void testUnigramAndBigramCount() {
|
||||||
testUnigramAndBigramCount(3 /* formatVersion */);
|
|
||||||
testUnigramAndBigramCount(4 /* formatVersion */);
|
testUnigramAndBigramCount(4 /* formatVersion */);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -780,7 +750,6 @@ public class BinaryDictionaryTests extends AndroidTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testAddMultipleDictionaryEntries() {
|
public void testAddMultipleDictionaryEntries() {
|
||||||
testAddMultipleDictionaryEntries(3 /* formatVersion */);
|
|
||||||
testAddMultipleDictionaryEntries(4 /* formatVersion */);
|
testAddMultipleDictionaryEntries(4 /* formatVersion */);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue