GC step 4. Update all positions in new dict and add a test.

Bug: 6669677
Change-Id: I035d848a210653dcd3c1a1f21781968adced9225
main
Keisuke Kuroyanagi 2013-09-24 15:29:56 +09:00
parent 0355bf2d4c
commit 2cfe7f9e3b
11 changed files with 278 additions and 45 deletions

View File

@ -101,10 +101,13 @@ const int BigramListReadWriteUtils::ATTRIBUTE_ADDRESS_SHIFT = 4;
/* static */ bool BigramListReadWriteUtils::writeBigramEntry(
BufferWithExtendableBuffer *const bufferToWrite, const BigramFlags flags,
const int targetPtNodePos, int *const writingPos) {
if (!bufferToWrite->writeUintAndAdvancePosition(flags, 1 /* size */, writingPos)) {
const int offset = (targetPtNodePos != NOT_A_DICT_POS) ?
targetPtNodePos - (*writingPos + 1) : 0;
const BigramFlags flagsToWrite = (offset < 0) ?
(flags | FLAG_ATTRIBUTE_OFFSET_NEGATIVE) : flags;
if (!bufferToWrite->writeUintAndAdvancePosition(flagsToWrite, 1 /* size */, writingPos)) {
return false;
}
const int offset = (targetPtNodePos != NOT_A_DICT_POS) ? targetPtNodePos - *writingPos : 0;
const uint32_t absOffest = abs(offset);
const int bigramTargetFieldSize = attributeAddressSize(flags);
return bufferToWrite->writeUintAndAdvancePosition(absOffest, bigramTargetFieldSize,

View File

@ -19,6 +19,7 @@
#include "suggest/core/policy/dictionary_shortcuts_structure_policy.h"
#include "suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h"
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h"
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
namespace latinime {
@ -71,7 +72,8 @@ bool DynamicBigramListPolicy::copyAllBigrams(BufferWithExtendableBuffer *const b
int bigramEntryCount = 0;
do {
if (++bigramEntryCount > BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT) {
AKLOGE("Too many bigram entries. %d", BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT);
AKLOGE("Too many bigram entries. Entry count: %d, Limit: %d",
bigramEntryCount, BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT);
ASSERT(false);
return false;
}
@ -114,7 +116,8 @@ bool DynamicBigramListPolicy::updateAllBigramEntriesAndDeleteUselessEntries(
int bigramEntryCount = 0;
do {
if (++bigramEntryCount > BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT) {
AKLOGE("Too many bigram entries. %d", BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT);
AKLOGE("Too many bigram entries. Entry count: %d, Limit: %d",
bigramEntryCount, BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT);
ASSERT(false);
return false;
}
@ -150,6 +153,54 @@ bool DynamicBigramListPolicy::updateAllBigramEntriesAndDeleteUselessEntries(
return true;
}
// Updates bigram target PtNode positions in the list after the placing step in GC.
bool DynamicBigramListPolicy::updateAllBigramTargetPtNodePositions(int *const bigramListPos,
const DynamicPatriciaTrieWritingHelper::PtNodePositionRelocationMap *const
ptNodePositionRelocationMap) {
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*bigramListPos);
if (usesAdditionalBuffer) {
*bigramListPos -= mBuffer->getOriginalBufferSize();
}
BigramListReadWriteUtils::BigramFlags bigramFlags;
int bigramEntryCount = 0;
do {
if (++bigramEntryCount > BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT) {
AKLOGE("Too many bigram entries. Entry count: %d, Limit: %d",
bigramEntryCount, BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT);
ASSERT(false);
return false;
}
int bigramEntryPos = *bigramListPos;
if (usesAdditionalBuffer) {
bigramEntryPos += mBuffer->getOriginalBufferSize();
}
int bigramTargetPtNodePos;
// The buffer address can be changed after calling buffer writing methods.
BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition(
mBuffer->getBuffer(usesAdditionalBuffer), &bigramFlags, &bigramTargetPtNodePos,
bigramListPos);
if (bigramTargetPtNodePos == NOT_A_DICT_POS) {
continue;
}
if (usesAdditionalBuffer) {
bigramTargetPtNodePos += mBuffer->getOriginalBufferSize();
}
DynamicPatriciaTrieWritingHelper::PtNodePositionRelocationMap::const_iterator it =
ptNodePositionRelocationMap->find(bigramTargetPtNodePos);
if (it != ptNodePositionRelocationMap->end()) {
bigramTargetPtNodePos = it->second;
} else {
bigramTargetPtNodePos = NOT_A_DICT_POS;
}
if (!BigramListReadWriteUtils::writeBigramEntry(mBuffer, bigramFlags,
bigramTargetPtNodePos, &bigramEntryPos)) {
return false;
}
} while(BigramListReadWriteUtils::hasNext(bigramFlags));
return true;
}
bool DynamicBigramListPolicy::addNewBigramEntryToBigramList(const int bigramTargetPos,
const int probability, int *const bigramListPos) {
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*bigramListPos);
@ -160,7 +211,8 @@ bool DynamicBigramListPolicy::addNewBigramEntryToBigramList(const int bigramTarg
int bigramEntryCount = 0;
do {
if (++bigramEntryCount > BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT) {
AKLOGE("Too many bigram entries. %d", BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT);
AKLOGE("Too many bigram entries. Entry count: %d, Limit: %d",
bigramEntryCount, BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT);
ASSERT(false);
return false;
}
@ -222,7 +274,8 @@ bool DynamicBigramListPolicy::removeBigram(const int bigramListPos, const int bi
int bigramEntryCount = 0;
do {
if (++bigramEntryCount > BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT) {
AKLOGE("Too many bigram entries. %d", BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT);
AKLOGE("Too many bigram entries. Entry count: %d, Limit: %d",
bigramEntryCount, BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT);
ASSERT(false);
return false;
}

View File

@ -21,6 +21,7 @@
#include "defines.h"
#include "suggest/core/policy/dictionary_bigrams_structure_policy.h"
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h"
namespace latinime {
@ -51,6 +52,10 @@ class DynamicBigramListPolicy : public DictionaryBigramsStructurePolicy {
bool updateAllBigramEntriesAndDeleteUselessEntries(int *const bigramListPos);
bool updateAllBigramTargetPtNodePositions(int *const bigramListPos,
const DynamicPatriciaTrieWritingHelper::PtNodePositionRelocationMap *const
ptNodePositionRelocationMap);
bool addNewBigramEntryToBigramList(const int bigramTargetPos, const int probability,
int *const bigramListPos);

View File

@ -19,7 +19,7 @@
namespace latinime {
bool DynamicPatriciaTrieGcEventListeners
::ListenerForUpdatingUnigramProbabilityAndMarkingUselessPtNodesAsDeleted
::TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted
::onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node,
const int *const nodeCodePoints) {
// PtNode is useless when the PtNode is not a terminal and doesn't have any not useless
@ -47,11 +47,13 @@ bool DynamicPatriciaTrieGcEventListeners
}
// Writes dummy PtNode array size when the head of PtNode array is read.
bool DynamicPatriciaTrieGcEventListeners::ListenerForPlacingAndWritingValidPtNodesToBuffer
bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer
::onDescend(const int ptNodeArrayPos) {
mValidPtNodeCount = 0;
int writingPos = mBufferToWrite->getTailPosition();
mPositionMap->insert(hash_map_compat<int, int>::value_type(ptNodeArrayPos, writingPos));
mDictPositionRelocationMap->mPtNodeArrayPositionRelocationMap.insert(
DynamicPatriciaTrieWritingHelper::PtNodeArrayPositionRelocationMap::value_type(
ptNodeArrayPos, writingPos));
// Writes dummy PtNode array size because arrays can have a forward link or needles PtNodes.
// This field will be updated later in onReadingPtNodeArrayTail() with actual PtNode count.
mPtNodeArraySizeFieldPos = writingPos;
@ -60,7 +62,7 @@ bool DynamicPatriciaTrieGcEventListeners::ListenerForPlacingAndWritingValidPtNod
}
// Write PtNode array terminal and actual PtNode array size.
bool DynamicPatriciaTrieGcEventListeners::ListenerForPlacingAndWritingValidPtNodesToBuffer
bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer
::onReadingPtNodeArrayTail() {
int writingPos = mBufferToWrite->getTailPosition();
// Write PtNode array terminal.
@ -77,17 +79,20 @@ bool DynamicPatriciaTrieGcEventListeners::ListenerForPlacingAndWritingValidPtNod
}
// Write valid PtNode to buffer and memorize mapping from the old position to the new position.
bool DynamicPatriciaTrieGcEventListeners::ListenerForPlacingAndWritingValidPtNodesToBuffer
bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer
::onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node,
const int *const nodeCodePoints) {
if (node->isDeleted()) {
// Current PtNode is not written in new buffer because it has been deleted.
mPositionMap->insert(hash_map_compat<int, int>::value_type(node->getHeadPos(),
NOT_A_DICT_POS));
mDictPositionRelocationMap->mPtNodePositionRelocationMap.insert(
DynamicPatriciaTrieWritingHelper::PtNodePositionRelocationMap::value_type(
node->getHeadPos(), NOT_A_DICT_POS));
return true;
}
int writingPos = mBufferToWrite->getTailPosition();
mPositionMap->insert(hash_map_compat<int, int>::value_type(node->getHeadPos(), writingPos));
mDictPositionRelocationMap->mPtNodePositionRelocationMap.insert(
DynamicPatriciaTrieWritingHelper::PtNodePositionRelocationMap::value_type(
node->getHeadPos(), writingPos));
mValidPtNodeCount++;
// Writes current PtNode.
return mWritingHelper->writePtNodeToBufferByCopyingPtNodeInfo(mBufferToWrite, node,
@ -95,4 +100,50 @@ bool DynamicPatriciaTrieGcEventListeners::ListenerForPlacingAndWritingValidPtNod
node->getProbability(), &writingPos);
}
bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateAllPositionFields
::onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node,
const int *const nodeCodePoints) {
// Updates parent position.
int parentPos = node->getParentPos();
if (parentPos != NOT_A_DICT_POS) {
DynamicPatriciaTrieWritingHelper::PtNodePositionRelocationMap::const_iterator it =
mDictPositionRelocationMap->mPtNodePositionRelocationMap.find(parentPos);
if (it != mDictPositionRelocationMap->mPtNodePositionRelocationMap.end()) {
parentPos = it->second;
}
}
int writingPos = node->getHeadPos() + DynamicPatriciaTrieWritingUtils::NODE_FLAG_FIELD_SIZE;
const int parentPosOffset = (parentPos != NOT_A_DICT_POS) ?
parentPos - node->getHeadPos() : NOT_A_DICT_POS;
// Write updated parent offset.
if (!DynamicPatriciaTrieWritingUtils::writeParentOffsetAndAdvancePosition(mBufferToWrite,
parentPosOffset, &writingPos)) {
return false;
}
// Updates children position.
int childrenPos = node->getChildrenPos();
if (childrenPos != NOT_A_DICT_POS) {
DynamicPatriciaTrieWritingHelper::PtNodeArrayPositionRelocationMap::const_iterator it =
mDictPositionRelocationMap->mPtNodeArrayPositionRelocationMap.find(childrenPos);
if (it != mDictPositionRelocationMap->mPtNodeArrayPositionRelocationMap.end()) {
childrenPos = it->second;
}
}
writingPos = node->getChildrenPosFieldPos();
if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(mBufferToWrite,
childrenPos, &writingPos)) {
return false;
}
// Updates bigram target PtNode positions in the bigram list.
int bigramsPos = node->getBigramsPos();
if (bigramsPos != NOT_A_DICT_POS) {
mBigramPolicy->updateAllBigramTargetPtNodePositions(&bigramsPos,
&mDictPositionRelocationMap->mPtNodePositionRelocationMap);
}
return true;
}
} // namespace latinime

View File

@ -34,16 +34,16 @@ class DynamicPatriciaTrieGcEventListeners {
// Updates all PtNodes that can be reached from the root. Checks if each PtNode is useless or
// not and marks useless PtNodes as deleted. Such deleted PtNodes will be discarded in the GC.
// TODO: Concatenate non-terminal PtNodes.
class ListenerForUpdatingUnigramProbabilityAndMarkingUselessPtNodesAsDeleted
class TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted
: public DynamicPatriciaTrieReadingHelper::TraversingEventListener {
public:
ListenerForUpdatingUnigramProbabilityAndMarkingUselessPtNodesAsDeleted(
TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted(
DynamicPatriciaTrieWritingHelper *const writingHelper,
BufferWithExtendableBuffer *const buffer)
: mWritingHelper(writingHelper), mBuffer(buffer), valueStack(),
mChildrenValue(0) {}
~ListenerForUpdatingUnigramProbabilityAndMarkingUselessPtNodesAsDeleted() {};
~TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted() {};
bool onAscend() {
if (valueStack.empty()) {
@ -66,7 +66,7 @@ class DynamicPatriciaTrieGcEventListeners {
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(
ListenerForUpdatingUnigramProbabilityAndMarkingUselessPtNodesAsDeleted);
TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted);
DynamicPatriciaTrieWritingHelper *const mWritingHelper;
BufferWithExtendableBuffer *const mBuffer;
@ -76,10 +76,10 @@ class DynamicPatriciaTrieGcEventListeners {
// Updates all bigram entries that are held by valid PtNodes. This removes useless bigram
// entries.
class ListenerForUpdatingBigramProbability
class TraversePolicyToUpdateBigramProbability
: public DynamicPatriciaTrieReadingHelper::TraversingEventListener {
public:
ListenerForUpdatingBigramProbability(DynamicBigramListPolicy *const bigramPolicy)
TraversePolicyToUpdateBigramProbability(DynamicBigramListPolicy *const bigramPolicy)
: mBigramPolicy(bigramPolicy) {}
bool onAscend() { return true; }
@ -102,20 +102,21 @@ class DynamicPatriciaTrieGcEventListeners {
}
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(ListenerForUpdatingBigramProbability);
DISALLOW_IMPLICIT_CONSTRUCTORS(TraversePolicyToUpdateBigramProbability);
DynamicBigramListPolicy *const mBigramPolicy;
};
class ListenerForPlacingAndWritingValidPtNodesToBuffer
class TraversePolicyToPlaceAndWriteValidPtNodesToBuffer
: public DynamicPatriciaTrieReadingHelper::TraversingEventListener {
public:
ListenerForPlacingAndWritingValidPtNodesToBuffer(
TraversePolicyToPlaceAndWriteValidPtNodesToBuffer(
DynamicPatriciaTrieWritingHelper *const writingHelper,
BufferWithExtendableBuffer *const bufferToWrite,
hash_map_compat<int, int> *const positionMap)
DynamicPatriciaTrieWritingHelper::DictPositionRelocationMap *const
dictPositionRelocationMap)
: mWritingHelper(writingHelper), mBufferToWrite(bufferToWrite),
mPositionMap(positionMap), mValidPtNodeCount(0),
mDictPositionRelocationMap(dictPositionRelocationMap), mValidPtNodeCount(0),
mPtNodeArraySizeFieldPos(NOT_A_DICT_POS) {};
bool onAscend() { return true; }
@ -127,20 +128,49 @@ class DynamicPatriciaTrieGcEventListeners {
bool onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node,
const int *const nodeCodePoints);
hash_map_compat<int, int> *getPositionMap() const {
return mPositionMap;
}
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(ListenerForPlacingAndWritingValidPtNodesToBuffer);
DISALLOW_IMPLICIT_CONSTRUCTORS(TraversePolicyToPlaceAndWriteValidPtNodesToBuffer);
DynamicPatriciaTrieWritingHelper *const mWritingHelper;
BufferWithExtendableBuffer *const mBufferToWrite;
hash_map_compat<int, int> *const mPositionMap;
DynamicPatriciaTrieWritingHelper::DictPositionRelocationMap *const
mDictPositionRelocationMap;
int mValidPtNodeCount;
int mPtNodeArraySizeFieldPos;
};
class TraversePolicyToUpdateAllPositionFields
: public DynamicPatriciaTrieReadingHelper::TraversingEventListener {
public:
TraversePolicyToUpdateAllPositionFields(
DynamicPatriciaTrieWritingHelper *const writingHelper,
DynamicBigramListPolicy *const bigramPolicy,
BufferWithExtendableBuffer *const bufferToWrite,
const DynamicPatriciaTrieWritingHelper::DictPositionRelocationMap *const
dictPositionRelocationMap)
: mWritingHelper(writingHelper), mBigramPolicy(bigramPolicy),
mBufferToWrite(bufferToWrite),
mDictPositionRelocationMap(dictPositionRelocationMap) {};
bool onAscend() { return true; }
bool onDescend(const int ptNodeArrayPos) { return true; }
bool onReadingPtNodeArrayTail() { return true; }
bool onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node,
const int *const nodeCodePoints);
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(TraversePolicyToUpdateAllPositionFields);
DynamicPatriciaTrieWritingHelper *const mWritingHelper;
DynamicBigramListPolicy *const mBigramPolicy;
BufferWithExtendableBuffer *const mBufferToWrite;
const DynamicPatriciaTrieWritingHelper::DictPositionRelocationMap *const
mDictPositionRelocationMap;
};
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTrieGcEventListeners);
};

View File

@ -258,7 +258,9 @@ void DynamicPatriciaTriePolicy::flushWithGC(const char *const filePath) {
AKLOGI("Warning: flushWithGC() is called for non-updatable dictionary.");
return;
}
// TODO: Implement.
DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer,
&mBigramListPolicy, &mShortcutListPolicy);
writingHelper.writeToDictFileWithGC(getRootPosition(), filePath, &mHeaderPolicy);
}
bool DynamicPatriciaTriePolicy::needsToRunGC() const {

View File

@ -59,6 +59,9 @@ bool DynamicPatriciaTrieReadingHelper::traverseAllPtNodesInPostorderDepthFirstMa
if (!listener->onReadingPtNodeArrayTail()) {
return false;
}
if (mReadingStateStack.size() <= 0) {
break;
}
if (!listener->onAscend()) {
return false;
}
@ -101,6 +104,9 @@ bool DynamicPatriciaTrieReadingHelper::traverseAllPtNodesInPtNodeArrayLevelPreor
if (!listener->onAscend()) {
return false;
}
if (mReadingStateStack.size() <= 0) {
break;
}
popReadingStateFromStack();
alreadyVisitedChildren = true;
alreadyVisitedAllPtNodesInArray = true;

View File

@ -28,6 +28,7 @@
#include "suggest/policyimpl/dictionary/header/header_policy.h"
#include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h"
#include "suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h"
#include "utils/hash_map_compat.h"
namespace latinime {
@ -221,7 +222,8 @@ bool DynamicPatriciaTrieWritingHelper::markNodeAsMovedAndSetPosition(
while (!readingHelper.isEnd()) {
const int childPtNodeWrittenPos = nodeReader->getHeadPos();
const int parentOffset = movedPos - childPtNodeWrittenPos;
int parentOffsetFieldPos = childPtNodeWrittenPos + 1 /* Flags */;
int parentOffsetFieldPos = childPtNodeWrittenPos
+ DynamicPatriciaTrieWritingUtils::NODE_FLAG_FIELD_SIZE;
if (!DynamicPatriciaTrieWritingUtils::writeParentOffsetAndAdvancePosition(
mBuffer, parentOffset, &parentOffsetFieldPos)) {
// Parent offset cannot be written because of a bug or a broken dictionary; thus,
@ -521,30 +523,48 @@ bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
DynamicPatriciaTrieReadingHelper readingHelper(mBuffer, mBigramPolicy, mShortcutPolicy);
readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
DynamicPatriciaTrieGcEventListeners
::ListenerForUpdatingUnigramProbabilityAndMarkingUselessPtNodesAsDeleted
listenerForUpdatingUnigramProbabilityAndMarkingUselessPtNodesAsDeleted(
::TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted
traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted(
this, mBuffer);
if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(
&listenerForUpdatingUnigramProbabilityAndMarkingUselessPtNodesAsDeleted)) {
&traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted)) {
return false;
}
readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
DynamicPatriciaTrieGcEventListeners::ListenerForUpdatingBigramProbability
listenerForupdatingBigramProbability(mBigramPolicy);
DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateBigramProbability
traversePolicyToUpdateBigramProbability(mBigramPolicy);
if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(
&listenerForupdatingBigramProbability)) {
&traversePolicyToUpdateBigramProbability)) {
return false;
}
// Mapping from positions in mBuffer to positions in bufferToWrite.
hash_map_compat<int, int> positionMap;
DictPositionRelocationMap dictPositionRelocationMap;
readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
DynamicPatriciaTrieGcEventListeners::ListenerForPlacingAndWritingValidPtNodesToBuffer
listenerForPlacingAndWritingLivingPtNodesToBuffer(this, mBuffer, &positionMap);
DynamicPatriciaTrieGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer
traversePolicyToPlaceAndWriteValidPtNodesToBuffer(this, bufferToWrite,
&dictPositionRelocationMap);
if (!readingHelper.traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner(
&traversePolicyToPlaceAndWriteValidPtNodesToBuffer)) {
return false;
}
// TODO: Implement.
return false;
// Create policy instance for the GCed dictionary.
DynamicShortcutListPolicy newDictShortcutPolicy(bufferToWrite);
DynamicBigramListPolicy newDictBigramPolicy(bufferToWrite, &newDictShortcutPolicy);
// Create reading helper for the GCed dictionary.
DynamicPatriciaTrieReadingHelper newDictReadingHelper(bufferToWrite, &newDictBigramPolicy,
&newDictShortcutPolicy);
newDictReadingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateAllPositionFields
traversePolicyToUpdateAllPositionFields(this, &newDictBigramPolicy, bufferToWrite,
&dictPositionRelocationMap);
if (!newDictReadingHelper.traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner(
&traversePolicyToUpdateAllPositionFields)) {
return false;
}
return true;
}
} // namespace latinime

View File

@ -21,6 +21,7 @@
#include <stdint.h>
#include "defines.h"
#include "utils/hash_map_compat.h"
namespace latinime {
@ -33,6 +34,20 @@ class HeaderPolicy;
class DynamicPatriciaTrieWritingHelper {
public:
typedef hash_map_compat<int, int> PtNodeArrayPositionRelocationMap;
typedef hash_map_compat<int, int> PtNodePositionRelocationMap;
struct DictPositionRelocationMap {
public:
DictPositionRelocationMap()
: mPtNodeArrayPositionRelocationMap(), mPtNodePositionRelocationMap() {}
PtNodeArrayPositionRelocationMap mPtNodeArrayPositionRelocationMap;
PtNodePositionRelocationMap mPtNodePositionRelocationMap;
private:
DISALLOW_COPY_AND_ASSIGN(DictPositionRelocationMap);
};
DynamicPatriciaTrieWritingHelper(BufferWithExtendableBuffer *const buffer,
DynamicBigramListPolicy *const bigramPolicy,
DynamicShortcutListPolicy *const shortcutPolicy)

View File

@ -28,6 +28,8 @@ class BufferWithExtendableBuffer;
class DynamicPatriciaTrieWritingUtils {
public:
static const int NODE_FLAG_FIELD_SIZE;
static bool writeForwardLinkPositionAndAdvancePosition(
BufferWithExtendableBuffer *const buffer, const int forwardLinkPos,
int *const forwardLinkFieldPos);
@ -63,7 +65,6 @@ class DynamicPatriciaTrieWritingUtils {
static const int MAX_DICT_OFFSET_VALUE;
static const int MIN_DICT_OFFSET_VALUE;
static const int DICT_OFFSET_NEGATIVE_FLAG;
static const int NODE_FLAG_FIELD_SIZE;
static const int PROBABILITY_FIELD_SIZE;
static bool writeDictOffset(BufferWithExtendableBuffer *const buffer, const int offset,

View File

@ -384,4 +384,51 @@ public class BinaryDictionaryTests extends AndroidTestCase {
dictFile.delete();
}
// TODO: Add large tests for BinaryDictionary.flushWithGC().
public void testFlushWithGCDictionary() {
File dictFile = null;
try {
dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary");
} catch (IOException e) {
fail("IOException while writing an initial dictionary : " + e);
} catch (UnsupportedFormatException e) {
fail("UnsupportedFormatException while writing an initial dictionary : " + e);
}
BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
final int unigramProbability = 100;
final int bigramProbability = 10;
binaryDictionary.addUnigramWord("aaa", unigramProbability);
binaryDictionary.addUnigramWord("abb", unigramProbability);
binaryDictionary.addUnigramWord("bcc", unigramProbability);
binaryDictionary.addBigramWords("aaa", "abb", bigramProbability);
binaryDictionary.addBigramWords("aaa", "bcc", bigramProbability);
binaryDictionary.addBigramWords("abb", "aaa", bigramProbability);
binaryDictionary.addBigramWords("abb", "bcc", bigramProbability);
binaryDictionary.flushWithGC();
binaryDictionary.close();
binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
final int probability = binaryDictionary.calculateProbability(unigramProbability,
bigramProbability);
assertEquals(unigramProbability, binaryDictionary.getFrequency("aaa"));
assertEquals(unigramProbability, binaryDictionary.getFrequency("abb"));
assertEquals(unigramProbability, binaryDictionary.getFrequency("bcc"));
assertEquals(probability, binaryDictionary.getBigramProbability("aaa", "abb"));
assertEquals(probability, binaryDictionary.getBigramProbability("aaa", "bcc"));
assertEquals(probability, binaryDictionary.getBigramProbability("abb", "aaa"));
assertEquals(probability, binaryDictionary.getBigramProbability("abb", "bcc"));
assertEquals(false, binaryDictionary.isValidBigram("bcc", "aaa"));
assertEquals(false, binaryDictionary.isValidBigram("bcc", "bbc"));
assertEquals(false, binaryDictionary.isValidBigram("aaa", "aaa"));
binaryDictionary.flushWithGC();
binaryDictionary.close();
dictFile.delete();
}
}