am dd7d95bd
: Merge "GC step 4. Update all positions in new dict and add a test."
* commit 'dd7d95bd4a18440e332938869337b4c9cce9dbd3': GC step 4. Update all positions in new dict and add a test.
This commit is contained in:
commit
88603e9a7f
11 changed files with 278 additions and 45 deletions
|
@ -101,10 +101,13 @@ const int BigramListReadWriteUtils::ATTRIBUTE_ADDRESS_SHIFT = 4;
|
|||
/* static */ bool BigramListReadWriteUtils::writeBigramEntry(
|
||||
BufferWithExtendableBuffer *const bufferToWrite, const BigramFlags flags,
|
||||
const int targetPtNodePos, int *const writingPos) {
|
||||
if (!bufferToWrite->writeUintAndAdvancePosition(flags, 1 /* size */, writingPos)) {
|
||||
const int offset = (targetPtNodePos != NOT_A_DICT_POS) ?
|
||||
targetPtNodePos - (*writingPos + 1) : 0;
|
||||
const BigramFlags flagsToWrite = (offset < 0) ?
|
||||
(flags | FLAG_ATTRIBUTE_OFFSET_NEGATIVE) : flags;
|
||||
if (!bufferToWrite->writeUintAndAdvancePosition(flagsToWrite, 1 /* size */, writingPos)) {
|
||||
return false;
|
||||
}
|
||||
const int offset = (targetPtNodePos != NOT_A_DICT_POS) ? targetPtNodePos - *writingPos : 0;
|
||||
const uint32_t absOffest = abs(offset);
|
||||
const int bigramTargetFieldSize = attributeAddressSize(flags);
|
||||
return bufferToWrite->writeUintAndAdvancePosition(absOffest, bigramTargetFieldSize,
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
#include "suggest/core/policy/dictionary_shortcuts_structure_policy.h"
|
||||
#include "suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h"
|
||||
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h"
|
||||
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h"
|
||||
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
|
||||
|
||||
namespace latinime {
|
||||
|
@ -71,7 +72,8 @@ bool DynamicBigramListPolicy::copyAllBigrams(BufferWithExtendableBuffer *const b
|
|||
int bigramEntryCount = 0;
|
||||
do {
|
||||
if (++bigramEntryCount > BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT) {
|
||||
AKLOGE("Too many bigram entries. %d", BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT);
|
||||
AKLOGE("Too many bigram entries. Entry count: %d, Limit: %d",
|
||||
bigramEntryCount, BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT);
|
||||
ASSERT(false);
|
||||
return false;
|
||||
}
|
||||
|
@ -114,7 +116,8 @@ bool DynamicBigramListPolicy::updateAllBigramEntriesAndDeleteUselessEntries(
|
|||
int bigramEntryCount = 0;
|
||||
do {
|
||||
if (++bigramEntryCount > BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT) {
|
||||
AKLOGE("Too many bigram entries. %d", BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT);
|
||||
AKLOGE("Too many bigram entries. Entry count: %d, Limit: %d",
|
||||
bigramEntryCount, BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT);
|
||||
ASSERT(false);
|
||||
return false;
|
||||
}
|
||||
|
@ -150,6 +153,54 @@ bool DynamicBigramListPolicy::updateAllBigramEntriesAndDeleteUselessEntries(
|
|||
return true;
|
||||
}
|
||||
|
||||
// Updates bigram target PtNode positions in the list after the placing step in GC.
|
||||
bool DynamicBigramListPolicy::updateAllBigramTargetPtNodePositions(int *const bigramListPos,
|
||||
const DynamicPatriciaTrieWritingHelper::PtNodePositionRelocationMap *const
|
||||
ptNodePositionRelocationMap) {
|
||||
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*bigramListPos);
|
||||
if (usesAdditionalBuffer) {
|
||||
*bigramListPos -= mBuffer->getOriginalBufferSize();
|
||||
}
|
||||
BigramListReadWriteUtils::BigramFlags bigramFlags;
|
||||
int bigramEntryCount = 0;
|
||||
do {
|
||||
if (++bigramEntryCount > BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT) {
|
||||
AKLOGE("Too many bigram entries. Entry count: %d, Limit: %d",
|
||||
bigramEntryCount, BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT);
|
||||
ASSERT(false);
|
||||
return false;
|
||||
}
|
||||
int bigramEntryPos = *bigramListPos;
|
||||
if (usesAdditionalBuffer) {
|
||||
bigramEntryPos += mBuffer->getOriginalBufferSize();
|
||||
}
|
||||
int bigramTargetPtNodePos;
|
||||
// The buffer address can be changed after calling buffer writing methods.
|
||||
BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition(
|
||||
mBuffer->getBuffer(usesAdditionalBuffer), &bigramFlags, &bigramTargetPtNodePos,
|
||||
bigramListPos);
|
||||
if (bigramTargetPtNodePos == NOT_A_DICT_POS) {
|
||||
continue;
|
||||
}
|
||||
if (usesAdditionalBuffer) {
|
||||
bigramTargetPtNodePos += mBuffer->getOriginalBufferSize();
|
||||
}
|
||||
|
||||
DynamicPatriciaTrieWritingHelper::PtNodePositionRelocationMap::const_iterator it =
|
||||
ptNodePositionRelocationMap->find(bigramTargetPtNodePos);
|
||||
if (it != ptNodePositionRelocationMap->end()) {
|
||||
bigramTargetPtNodePos = it->second;
|
||||
} else {
|
||||
bigramTargetPtNodePos = NOT_A_DICT_POS;
|
||||
}
|
||||
if (!BigramListReadWriteUtils::writeBigramEntry(mBuffer, bigramFlags,
|
||||
bigramTargetPtNodePos, &bigramEntryPos)) {
|
||||
return false;
|
||||
}
|
||||
} while(BigramListReadWriteUtils::hasNext(bigramFlags));
|
||||
return true;
|
||||
}
|
||||
|
||||
bool DynamicBigramListPolicy::addNewBigramEntryToBigramList(const int bigramTargetPos,
|
||||
const int probability, int *const bigramListPos) {
|
||||
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*bigramListPos);
|
||||
|
@ -160,7 +211,8 @@ bool DynamicBigramListPolicy::addNewBigramEntryToBigramList(const int bigramTarg
|
|||
int bigramEntryCount = 0;
|
||||
do {
|
||||
if (++bigramEntryCount > BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT) {
|
||||
AKLOGE("Too many bigram entries. %d", BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT);
|
||||
AKLOGE("Too many bigram entries. Entry count: %d, Limit: %d",
|
||||
bigramEntryCount, BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT);
|
||||
ASSERT(false);
|
||||
return false;
|
||||
}
|
||||
|
@ -222,7 +274,8 @@ bool DynamicBigramListPolicy::removeBigram(const int bigramListPos, const int bi
|
|||
int bigramEntryCount = 0;
|
||||
do {
|
||||
if (++bigramEntryCount > BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT) {
|
||||
AKLOGE("Too many bigram entries. %d", BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT);
|
||||
AKLOGE("Too many bigram entries. Entry count: %d, Limit: %d",
|
||||
bigramEntryCount, BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT);
|
||||
ASSERT(false);
|
||||
return false;
|
||||
}
|
||||
|
|
|
@ -21,6 +21,7 @@
|
|||
|
||||
#include "defines.h"
|
||||
#include "suggest/core/policy/dictionary_bigrams_structure_policy.h"
|
||||
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h"
|
||||
|
||||
namespace latinime {
|
||||
|
||||
|
@ -51,6 +52,10 @@ class DynamicBigramListPolicy : public DictionaryBigramsStructurePolicy {
|
|||
|
||||
bool updateAllBigramEntriesAndDeleteUselessEntries(int *const bigramListPos);
|
||||
|
||||
bool updateAllBigramTargetPtNodePositions(int *const bigramListPos,
|
||||
const DynamicPatriciaTrieWritingHelper::PtNodePositionRelocationMap *const
|
||||
ptNodePositionRelocationMap);
|
||||
|
||||
bool addNewBigramEntryToBigramList(const int bigramTargetPos, const int probability,
|
||||
int *const bigramListPos);
|
||||
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
namespace latinime {
|
||||
|
||||
bool DynamicPatriciaTrieGcEventListeners
|
||||
::ListenerForUpdatingUnigramProbabilityAndMarkingUselessPtNodesAsDeleted
|
||||
::TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted
|
||||
::onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node,
|
||||
const int *const nodeCodePoints) {
|
||||
// PtNode is useless when the PtNode is not a terminal and doesn't have any not useless
|
||||
|
@ -47,11 +47,13 @@ bool DynamicPatriciaTrieGcEventListeners
|
|||
}
|
||||
|
||||
// Writes dummy PtNode array size when the head of PtNode array is read.
|
||||
bool DynamicPatriciaTrieGcEventListeners::ListenerForPlacingAndWritingValidPtNodesToBuffer
|
||||
bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer
|
||||
::onDescend(const int ptNodeArrayPos) {
|
||||
mValidPtNodeCount = 0;
|
||||
int writingPos = mBufferToWrite->getTailPosition();
|
||||
mPositionMap->insert(hash_map_compat<int, int>::value_type(ptNodeArrayPos, writingPos));
|
||||
mDictPositionRelocationMap->mPtNodeArrayPositionRelocationMap.insert(
|
||||
DynamicPatriciaTrieWritingHelper::PtNodeArrayPositionRelocationMap::value_type(
|
||||
ptNodeArrayPos, writingPos));
|
||||
// Writes dummy PtNode array size because arrays can have a forward link or needles PtNodes.
|
||||
// This field will be updated later in onReadingPtNodeArrayTail() with actual PtNode count.
|
||||
mPtNodeArraySizeFieldPos = writingPos;
|
||||
|
@ -60,7 +62,7 @@ bool DynamicPatriciaTrieGcEventListeners::ListenerForPlacingAndWritingValidPtNod
|
|||
}
|
||||
|
||||
// Write PtNode array terminal and actual PtNode array size.
|
||||
bool DynamicPatriciaTrieGcEventListeners::ListenerForPlacingAndWritingValidPtNodesToBuffer
|
||||
bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer
|
||||
::onReadingPtNodeArrayTail() {
|
||||
int writingPos = mBufferToWrite->getTailPosition();
|
||||
// Write PtNode array terminal.
|
||||
|
@ -77,17 +79,20 @@ bool DynamicPatriciaTrieGcEventListeners::ListenerForPlacingAndWritingValidPtNod
|
|||
}
|
||||
|
||||
// Write valid PtNode to buffer and memorize mapping from the old position to the new position.
|
||||
bool DynamicPatriciaTrieGcEventListeners::ListenerForPlacingAndWritingValidPtNodesToBuffer
|
||||
bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer
|
||||
::onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node,
|
||||
const int *const nodeCodePoints) {
|
||||
if (node->isDeleted()) {
|
||||
// Current PtNode is not written in new buffer because it has been deleted.
|
||||
mPositionMap->insert(hash_map_compat<int, int>::value_type(node->getHeadPos(),
|
||||
NOT_A_DICT_POS));
|
||||
mDictPositionRelocationMap->mPtNodePositionRelocationMap.insert(
|
||||
DynamicPatriciaTrieWritingHelper::PtNodePositionRelocationMap::value_type(
|
||||
node->getHeadPos(), NOT_A_DICT_POS));
|
||||
return true;
|
||||
}
|
||||
int writingPos = mBufferToWrite->getTailPosition();
|
||||
mPositionMap->insert(hash_map_compat<int, int>::value_type(node->getHeadPos(), writingPos));
|
||||
mDictPositionRelocationMap->mPtNodePositionRelocationMap.insert(
|
||||
DynamicPatriciaTrieWritingHelper::PtNodePositionRelocationMap::value_type(
|
||||
node->getHeadPos(), writingPos));
|
||||
mValidPtNodeCount++;
|
||||
// Writes current PtNode.
|
||||
return mWritingHelper->writePtNodeToBufferByCopyingPtNodeInfo(mBufferToWrite, node,
|
||||
|
@ -95,4 +100,50 @@ bool DynamicPatriciaTrieGcEventListeners::ListenerForPlacingAndWritingValidPtNod
|
|||
node->getProbability(), &writingPos);
|
||||
}
|
||||
|
||||
bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateAllPositionFields
|
||||
::onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node,
|
||||
const int *const nodeCodePoints) {
|
||||
// Updates parent position.
|
||||
int parentPos = node->getParentPos();
|
||||
if (parentPos != NOT_A_DICT_POS) {
|
||||
DynamicPatriciaTrieWritingHelper::PtNodePositionRelocationMap::const_iterator it =
|
||||
mDictPositionRelocationMap->mPtNodePositionRelocationMap.find(parentPos);
|
||||
if (it != mDictPositionRelocationMap->mPtNodePositionRelocationMap.end()) {
|
||||
parentPos = it->second;
|
||||
}
|
||||
}
|
||||
int writingPos = node->getHeadPos() + DynamicPatriciaTrieWritingUtils::NODE_FLAG_FIELD_SIZE;
|
||||
const int parentPosOffset = (parentPos != NOT_A_DICT_POS) ?
|
||||
parentPos - node->getHeadPos() : NOT_A_DICT_POS;
|
||||
// Write updated parent offset.
|
||||
if (!DynamicPatriciaTrieWritingUtils::writeParentOffsetAndAdvancePosition(mBufferToWrite,
|
||||
parentPosOffset, &writingPos)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Updates children position.
|
||||
int childrenPos = node->getChildrenPos();
|
||||
if (childrenPos != NOT_A_DICT_POS) {
|
||||
DynamicPatriciaTrieWritingHelper::PtNodeArrayPositionRelocationMap::const_iterator it =
|
||||
mDictPositionRelocationMap->mPtNodeArrayPositionRelocationMap.find(childrenPos);
|
||||
if (it != mDictPositionRelocationMap->mPtNodeArrayPositionRelocationMap.end()) {
|
||||
childrenPos = it->second;
|
||||
}
|
||||
}
|
||||
writingPos = node->getChildrenPosFieldPos();
|
||||
if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(mBufferToWrite,
|
||||
childrenPos, &writingPos)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Updates bigram target PtNode positions in the bigram list.
|
||||
int bigramsPos = node->getBigramsPos();
|
||||
if (bigramsPos != NOT_A_DICT_POS) {
|
||||
mBigramPolicy->updateAllBigramTargetPtNodePositions(&bigramsPos,
|
||||
&mDictPositionRelocationMap->mPtNodePositionRelocationMap);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace latinime
|
||||
|
|
|
@ -34,16 +34,16 @@ class DynamicPatriciaTrieGcEventListeners {
|
|||
// Updates all PtNodes that can be reached from the root. Checks if each PtNode is useless or
|
||||
// not and marks useless PtNodes as deleted. Such deleted PtNodes will be discarded in the GC.
|
||||
// TODO: Concatenate non-terminal PtNodes.
|
||||
class ListenerForUpdatingUnigramProbabilityAndMarkingUselessPtNodesAsDeleted
|
||||
class TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted
|
||||
: public DynamicPatriciaTrieReadingHelper::TraversingEventListener {
|
||||
public:
|
||||
ListenerForUpdatingUnigramProbabilityAndMarkingUselessPtNodesAsDeleted(
|
||||
TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted(
|
||||
DynamicPatriciaTrieWritingHelper *const writingHelper,
|
||||
BufferWithExtendableBuffer *const buffer)
|
||||
: mWritingHelper(writingHelper), mBuffer(buffer), valueStack(),
|
||||
mChildrenValue(0) {}
|
||||
|
||||
~ListenerForUpdatingUnigramProbabilityAndMarkingUselessPtNodesAsDeleted() {};
|
||||
~TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted() {};
|
||||
|
||||
bool onAscend() {
|
||||
if (valueStack.empty()) {
|
||||
|
@ -66,7 +66,7 @@ class DynamicPatriciaTrieGcEventListeners {
|
|||
|
||||
private:
|
||||
DISALLOW_IMPLICIT_CONSTRUCTORS(
|
||||
ListenerForUpdatingUnigramProbabilityAndMarkingUselessPtNodesAsDeleted);
|
||||
TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted);
|
||||
|
||||
DynamicPatriciaTrieWritingHelper *const mWritingHelper;
|
||||
BufferWithExtendableBuffer *const mBuffer;
|
||||
|
@ -76,10 +76,10 @@ class DynamicPatriciaTrieGcEventListeners {
|
|||
|
||||
// Updates all bigram entries that are held by valid PtNodes. This removes useless bigram
|
||||
// entries.
|
||||
class ListenerForUpdatingBigramProbability
|
||||
class TraversePolicyToUpdateBigramProbability
|
||||
: public DynamicPatriciaTrieReadingHelper::TraversingEventListener {
|
||||
public:
|
||||
ListenerForUpdatingBigramProbability(DynamicBigramListPolicy *const bigramPolicy)
|
||||
TraversePolicyToUpdateBigramProbability(DynamicBigramListPolicy *const bigramPolicy)
|
||||
: mBigramPolicy(bigramPolicy) {}
|
||||
|
||||
bool onAscend() { return true; }
|
||||
|
@ -102,20 +102,21 @@ class DynamicPatriciaTrieGcEventListeners {
|
|||
}
|
||||
|
||||
private:
|
||||
DISALLOW_IMPLICIT_CONSTRUCTORS(ListenerForUpdatingBigramProbability);
|
||||
DISALLOW_IMPLICIT_CONSTRUCTORS(TraversePolicyToUpdateBigramProbability);
|
||||
|
||||
DynamicBigramListPolicy *const mBigramPolicy;
|
||||
};
|
||||
|
||||
class ListenerForPlacingAndWritingValidPtNodesToBuffer
|
||||
class TraversePolicyToPlaceAndWriteValidPtNodesToBuffer
|
||||
: public DynamicPatriciaTrieReadingHelper::TraversingEventListener {
|
||||
public:
|
||||
ListenerForPlacingAndWritingValidPtNodesToBuffer(
|
||||
TraversePolicyToPlaceAndWriteValidPtNodesToBuffer(
|
||||
DynamicPatriciaTrieWritingHelper *const writingHelper,
|
||||
BufferWithExtendableBuffer *const bufferToWrite,
|
||||
hash_map_compat<int, int> *const positionMap)
|
||||
DynamicPatriciaTrieWritingHelper::DictPositionRelocationMap *const
|
||||
dictPositionRelocationMap)
|
||||
: mWritingHelper(writingHelper), mBufferToWrite(bufferToWrite),
|
||||
mPositionMap(positionMap), mValidPtNodeCount(0),
|
||||
mDictPositionRelocationMap(dictPositionRelocationMap), mValidPtNodeCount(0),
|
||||
mPtNodeArraySizeFieldPos(NOT_A_DICT_POS) {};
|
||||
|
||||
bool onAscend() { return true; }
|
||||
|
@ -127,20 +128,49 @@ class DynamicPatriciaTrieGcEventListeners {
|
|||
bool onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node,
|
||||
const int *const nodeCodePoints);
|
||||
|
||||
hash_map_compat<int, int> *getPositionMap() const {
|
||||
return mPositionMap;
|
||||
}
|
||||
|
||||
private:
|
||||
DISALLOW_IMPLICIT_CONSTRUCTORS(ListenerForPlacingAndWritingValidPtNodesToBuffer);
|
||||
DISALLOW_IMPLICIT_CONSTRUCTORS(TraversePolicyToPlaceAndWriteValidPtNodesToBuffer);
|
||||
|
||||
DynamicPatriciaTrieWritingHelper *const mWritingHelper;
|
||||
BufferWithExtendableBuffer *const mBufferToWrite;
|
||||
hash_map_compat<int, int> *const mPositionMap;
|
||||
DynamicPatriciaTrieWritingHelper::DictPositionRelocationMap *const
|
||||
mDictPositionRelocationMap;
|
||||
int mValidPtNodeCount;
|
||||
int mPtNodeArraySizeFieldPos;
|
||||
};
|
||||
|
||||
class TraversePolicyToUpdateAllPositionFields
|
||||
: public DynamicPatriciaTrieReadingHelper::TraversingEventListener {
|
||||
public:
|
||||
TraversePolicyToUpdateAllPositionFields(
|
||||
DynamicPatriciaTrieWritingHelper *const writingHelper,
|
||||
DynamicBigramListPolicy *const bigramPolicy,
|
||||
BufferWithExtendableBuffer *const bufferToWrite,
|
||||
const DynamicPatriciaTrieWritingHelper::DictPositionRelocationMap *const
|
||||
dictPositionRelocationMap)
|
||||
: mWritingHelper(writingHelper), mBigramPolicy(bigramPolicy),
|
||||
mBufferToWrite(bufferToWrite),
|
||||
mDictPositionRelocationMap(dictPositionRelocationMap) {};
|
||||
|
||||
bool onAscend() { return true; }
|
||||
|
||||
bool onDescend(const int ptNodeArrayPos) { return true; }
|
||||
|
||||
bool onReadingPtNodeArrayTail() { return true; }
|
||||
|
||||
bool onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node,
|
||||
const int *const nodeCodePoints);
|
||||
|
||||
private:
|
||||
DISALLOW_IMPLICIT_CONSTRUCTORS(TraversePolicyToUpdateAllPositionFields);
|
||||
|
||||
DynamicPatriciaTrieWritingHelper *const mWritingHelper;
|
||||
DynamicBigramListPolicy *const mBigramPolicy;
|
||||
BufferWithExtendableBuffer *const mBufferToWrite;
|
||||
const DynamicPatriciaTrieWritingHelper::DictPositionRelocationMap *const
|
||||
mDictPositionRelocationMap;
|
||||
};
|
||||
|
||||
private:
|
||||
DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTrieGcEventListeners);
|
||||
};
|
||||
|
|
|
@ -258,7 +258,9 @@ void DynamicPatriciaTriePolicy::flushWithGC(const char *const filePath) {
|
|||
AKLOGI("Warning: flushWithGC() is called for non-updatable dictionary.");
|
||||
return;
|
||||
}
|
||||
// TODO: Implement.
|
||||
DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer,
|
||||
&mBigramListPolicy, &mShortcutListPolicy);
|
||||
writingHelper.writeToDictFileWithGC(getRootPosition(), filePath, &mHeaderPolicy);
|
||||
}
|
||||
|
||||
bool DynamicPatriciaTriePolicy::needsToRunGC() const {
|
||||
|
|
|
@ -59,6 +59,9 @@ bool DynamicPatriciaTrieReadingHelper::traverseAllPtNodesInPostorderDepthFirstMa
|
|||
if (!listener->onReadingPtNodeArrayTail()) {
|
||||
return false;
|
||||
}
|
||||
if (mReadingStateStack.size() <= 0) {
|
||||
break;
|
||||
}
|
||||
if (!listener->onAscend()) {
|
||||
return false;
|
||||
}
|
||||
|
@ -101,6 +104,9 @@ bool DynamicPatriciaTrieReadingHelper::traverseAllPtNodesInPtNodeArrayLevelPreor
|
|||
if (!listener->onAscend()) {
|
||||
return false;
|
||||
}
|
||||
if (mReadingStateStack.size() <= 0) {
|
||||
break;
|
||||
}
|
||||
popReadingStateFromStack();
|
||||
alreadyVisitedChildren = true;
|
||||
alreadyVisitedAllPtNodesInArray = true;
|
||||
|
|
|
@ -28,6 +28,7 @@
|
|||
#include "suggest/policyimpl/dictionary/header/header_policy.h"
|
||||
#include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h"
|
||||
#include "suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h"
|
||||
#include "utils/hash_map_compat.h"
|
||||
|
||||
namespace latinime {
|
||||
|
||||
|
@ -221,7 +222,8 @@ bool DynamicPatriciaTrieWritingHelper::markNodeAsMovedAndSetPosition(
|
|||
while (!readingHelper.isEnd()) {
|
||||
const int childPtNodeWrittenPos = nodeReader->getHeadPos();
|
||||
const int parentOffset = movedPos - childPtNodeWrittenPos;
|
||||
int parentOffsetFieldPos = childPtNodeWrittenPos + 1 /* Flags */;
|
||||
int parentOffsetFieldPos = childPtNodeWrittenPos
|
||||
+ DynamicPatriciaTrieWritingUtils::NODE_FLAG_FIELD_SIZE;
|
||||
if (!DynamicPatriciaTrieWritingUtils::writeParentOffsetAndAdvancePosition(
|
||||
mBuffer, parentOffset, &parentOffsetFieldPos)) {
|
||||
// Parent offset cannot be written because of a bug or a broken dictionary; thus,
|
||||
|
@ -521,30 +523,48 @@ bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
|
|||
DynamicPatriciaTrieReadingHelper readingHelper(mBuffer, mBigramPolicy, mShortcutPolicy);
|
||||
readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
|
||||
DynamicPatriciaTrieGcEventListeners
|
||||
::ListenerForUpdatingUnigramProbabilityAndMarkingUselessPtNodesAsDeleted
|
||||
listenerForUpdatingUnigramProbabilityAndMarkingUselessPtNodesAsDeleted(
|
||||
::TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted
|
||||
traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted(
|
||||
this, mBuffer);
|
||||
if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(
|
||||
&listenerForUpdatingUnigramProbabilityAndMarkingUselessPtNodesAsDeleted)) {
|
||||
&traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
|
||||
DynamicPatriciaTrieGcEventListeners::ListenerForUpdatingBigramProbability
|
||||
listenerForupdatingBigramProbability(mBigramPolicy);
|
||||
DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateBigramProbability
|
||||
traversePolicyToUpdateBigramProbability(mBigramPolicy);
|
||||
if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(
|
||||
&listenerForupdatingBigramProbability)) {
|
||||
&traversePolicyToUpdateBigramProbability)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Mapping from positions in mBuffer to positions in bufferToWrite.
|
||||
hash_map_compat<int, int> positionMap;
|
||||
DictPositionRelocationMap dictPositionRelocationMap;
|
||||
readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
|
||||
DynamicPatriciaTrieGcEventListeners::ListenerForPlacingAndWritingValidPtNodesToBuffer
|
||||
listenerForPlacingAndWritingLivingPtNodesToBuffer(this, mBuffer, &positionMap);
|
||||
DynamicPatriciaTrieGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer
|
||||
traversePolicyToPlaceAndWriteValidPtNodesToBuffer(this, bufferToWrite,
|
||||
&dictPositionRelocationMap);
|
||||
if (!readingHelper.traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner(
|
||||
&traversePolicyToPlaceAndWriteValidPtNodesToBuffer)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// TODO: Implement.
|
||||
return false;
|
||||
// Create policy instance for the GCed dictionary.
|
||||
DynamicShortcutListPolicy newDictShortcutPolicy(bufferToWrite);
|
||||
DynamicBigramListPolicy newDictBigramPolicy(bufferToWrite, &newDictShortcutPolicy);
|
||||
// Create reading helper for the GCed dictionary.
|
||||
DynamicPatriciaTrieReadingHelper newDictReadingHelper(bufferToWrite, &newDictBigramPolicy,
|
||||
&newDictShortcutPolicy);
|
||||
newDictReadingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
|
||||
DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateAllPositionFields
|
||||
traversePolicyToUpdateAllPositionFields(this, &newDictBigramPolicy, bufferToWrite,
|
||||
&dictPositionRelocationMap);
|
||||
if (!newDictReadingHelper.traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner(
|
||||
&traversePolicyToUpdateAllPositionFields)) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace latinime
|
||||
|
|
|
@ -21,6 +21,7 @@
|
|||
#include <stdint.h>
|
||||
|
||||
#include "defines.h"
|
||||
#include "utils/hash_map_compat.h"
|
||||
|
||||
namespace latinime {
|
||||
|
||||
|
@ -33,6 +34,20 @@ class HeaderPolicy;
|
|||
|
||||
class DynamicPatriciaTrieWritingHelper {
|
||||
public:
|
||||
typedef hash_map_compat<int, int> PtNodeArrayPositionRelocationMap;
|
||||
typedef hash_map_compat<int, int> PtNodePositionRelocationMap;
|
||||
struct DictPositionRelocationMap {
|
||||
public:
|
||||
DictPositionRelocationMap()
|
||||
: mPtNodeArrayPositionRelocationMap(), mPtNodePositionRelocationMap() {}
|
||||
|
||||
PtNodeArrayPositionRelocationMap mPtNodeArrayPositionRelocationMap;
|
||||
PtNodePositionRelocationMap mPtNodePositionRelocationMap;
|
||||
|
||||
private:
|
||||
DISALLOW_COPY_AND_ASSIGN(DictPositionRelocationMap);
|
||||
};
|
||||
|
||||
DynamicPatriciaTrieWritingHelper(BufferWithExtendableBuffer *const buffer,
|
||||
DynamicBigramListPolicy *const bigramPolicy,
|
||||
DynamicShortcutListPolicy *const shortcutPolicy)
|
||||
|
|
|
@ -28,6 +28,8 @@ class BufferWithExtendableBuffer;
|
|||
|
||||
class DynamicPatriciaTrieWritingUtils {
|
||||
public:
|
||||
static const int NODE_FLAG_FIELD_SIZE;
|
||||
|
||||
static bool writeForwardLinkPositionAndAdvancePosition(
|
||||
BufferWithExtendableBuffer *const buffer, const int forwardLinkPos,
|
||||
int *const forwardLinkFieldPos);
|
||||
|
@ -63,7 +65,6 @@ class DynamicPatriciaTrieWritingUtils {
|
|||
static const int MAX_DICT_OFFSET_VALUE;
|
||||
static const int MIN_DICT_OFFSET_VALUE;
|
||||
static const int DICT_OFFSET_NEGATIVE_FLAG;
|
||||
static const int NODE_FLAG_FIELD_SIZE;
|
||||
static const int PROBABILITY_FIELD_SIZE;
|
||||
|
||||
static bool writeDictOffset(BufferWithExtendableBuffer *const buffer, const int offset,
|
||||
|
|
|
@ -384,4 +384,51 @@ public class BinaryDictionaryTests extends AndroidTestCase {
|
|||
|
||||
dictFile.delete();
|
||||
}
|
||||
|
||||
// TODO: Add large tests for BinaryDictionary.flushWithGC().
|
||||
public void testFlushWithGCDictionary() {
|
||||
File dictFile = null;
|
||||
try {
|
||||
dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary");
|
||||
} catch (IOException e) {
|
||||
fail("IOException while writing an initial dictionary : " + e);
|
||||
} catch (UnsupportedFormatException e) {
|
||||
fail("UnsupportedFormatException while writing an initial dictionary : " + e);
|
||||
}
|
||||
BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
|
||||
0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
|
||||
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
|
||||
|
||||
final int unigramProbability = 100;
|
||||
final int bigramProbability = 10;
|
||||
binaryDictionary.addUnigramWord("aaa", unigramProbability);
|
||||
binaryDictionary.addUnigramWord("abb", unigramProbability);
|
||||
binaryDictionary.addUnigramWord("bcc", unigramProbability);
|
||||
binaryDictionary.addBigramWords("aaa", "abb", bigramProbability);
|
||||
binaryDictionary.addBigramWords("aaa", "bcc", bigramProbability);
|
||||
binaryDictionary.addBigramWords("abb", "aaa", bigramProbability);
|
||||
binaryDictionary.addBigramWords("abb", "bcc", bigramProbability);
|
||||
binaryDictionary.flushWithGC();
|
||||
binaryDictionary.close();
|
||||
|
||||
binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
|
||||
0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
|
||||
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
|
||||
final int probability = binaryDictionary.calculateProbability(unigramProbability,
|
||||
bigramProbability);
|
||||
assertEquals(unigramProbability, binaryDictionary.getFrequency("aaa"));
|
||||
assertEquals(unigramProbability, binaryDictionary.getFrequency("abb"));
|
||||
assertEquals(unigramProbability, binaryDictionary.getFrequency("bcc"));
|
||||
assertEquals(probability, binaryDictionary.getBigramProbability("aaa", "abb"));
|
||||
assertEquals(probability, binaryDictionary.getBigramProbability("aaa", "bcc"));
|
||||
assertEquals(probability, binaryDictionary.getBigramProbability("abb", "aaa"));
|
||||
assertEquals(probability, binaryDictionary.getBigramProbability("abb", "bcc"));
|
||||
assertEquals(false, binaryDictionary.isValidBigram("bcc", "aaa"));
|
||||
assertEquals(false, binaryDictionary.isValidBigram("bcc", "bbc"));
|
||||
assertEquals(false, binaryDictionary.isValidBigram("aaa", "aaa"));
|
||||
binaryDictionary.flushWithGC();
|
||||
binaryDictionary.close();
|
||||
|
||||
dictFile.delete();
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue