Merge "Implement ver4 dictionary GC."

This commit is contained in:
Keisuke Kuroyanagi 2013-11-22 10:53:31 +00:00 committed by Android (Google) Code Review
commit d541d282a4
19 changed files with 329 additions and 46 deletions

View file

@ -94,7 +94,7 @@ bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTarget
bool Ver4BigramListPolicy::removeEntry(const int terminalId, const int targetTerminalId) {
const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId);
if (bigramListPos == NOT_A_DICT_POS) {
// Bigram list does't exist.
// Bigram list doesn't exist.
return false;
}
const int entryPosToUpdate = getEntryPosToUpdate(targetTerminalId, bigramListPos);
@ -118,12 +118,62 @@ bool Ver4BigramListPolicy::removeEntry(const int terminalId, const int targetTer
Ver4DictConstants::NOT_A_TERMINAL_ID /* targetTerminalId */, &writingPos);
}
bool Ver4BigramListPolicy::updateAllBigramEntriesAndDeleteUselessEntries(const int terminalId,
int *const outBigramCount) {
const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId);
if (bigramListPos == NOT_A_DICT_POS) {
// Bigram list doesn't exist.
return true;
}
bool hasNext = true;
int readingPos = bigramListPos;
while (hasNext) {
const int entryPos = readingPos;
int probability = NOT_A_PROBABILITY;
int targetTerminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
mBigramDictContent->getBigramEntryAndAdvancePosition(&probability, &hasNext,
&targetTerminalId, &readingPos);
if (targetTerminalId == Ver4DictConstants::NOT_A_TERMINAL_ID) {
continue;
}
const int targetPtNodePos = mTerminalPositionLookupTable->getTerminalPtNodePosition(
targetTerminalId);
if (targetPtNodePos == NOT_A_DICT_POS) {
// Invalidate bigram entry.
int writingPos = entryPos;
return mBigramDictContent->writeBigramEntryAndAdvancePosition(probability, hasNext,
Ver4DictConstants::NOT_A_TERMINAL_ID /* targetTerminalId */, &writingPos);
}
}
return true;
}
int Ver4BigramListPolicy::getBigramEntryConut(const int terminalId) {
const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId);
if (bigramListPos == NOT_A_DICT_POS) {
// Bigram list doesn't exist.
return 0;
}
int bigramCount = 0;
bool hasNext = true;
int readingPos = bigramListPos;
while (hasNext) {
int targetTerminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
mBigramDictContent->getBigramEntryAndAdvancePosition(0 /* probability */, &hasNext,
&targetTerminalId, &readingPos);
if (targetTerminalId != Ver4DictConstants::NOT_A_TERMINAL_ID) {
bigramCount++;
}
}
return bigramCount;
}
int Ver4BigramListPolicy::getEntryPosToUpdate(const int targetTerminalIdToFind,
const int bigramListPos) const {
bool hasNext = true;
int invalidEntryPos = NOT_A_DICT_POS;
int readingPos = bigramListPos;
while(hasNext) {
while (hasNext) {
const int entryPos = readingPos;
int targetTerminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
mBigramDictContent->getBigramEntryAndAdvancePosition(0 /* probability */, &hasNext,

View file

@ -44,6 +44,11 @@ class Ver4BigramListPolicy : public DictionaryBigramsStructurePolicy {
bool removeEntry(const int terminalId, const int targetTerminalId);
bool updateAllBigramEntriesAndDeleteUselessEntries(const int terminalId,
int *const outBigramCount);
int getBigramEntryConut(const int terminalId);
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4BigramListPolicy);

View file

@ -148,6 +148,14 @@ class PtNodeParams {
return PatriciaTrieReadingUtils::isNotAWord(mFlags);
}
AK_FORCE_INLINE bool hasBigrams() const {
return PatriciaTrieReadingUtils::hasBigrams(mFlags);
}
AK_FORCE_INLINE bool hasShortcutTargets() const {
return PatriciaTrieReadingUtils::hasShortcutTargets(mFlags);
}
// Parent node position
AK_FORCE_INLINE int getParentPos() const {
return mParentPos;

View file

@ -67,16 +67,13 @@ bool DynamicPatriciaTrieGcEventListeners
bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateBigramProbability
::onVisitingPtNode(const PtNodeParams *const ptNodeParams) {
if (!ptNodeParams->isDeleted()) {
int pos = ptNodeParams->getBigramsPos();
if (pos != NOT_A_DICT_POS) {
int bigramEntryCount = 0;
if (!mPtNodeWriter->updateAllBigramEntriesAndDeleteUselessEntries(ptNodeParams,
&bigramEntryCount)) {
return false;
}
mValidBigramEntryCount += bigramEntryCount;
if (!ptNodeParams->isDeleted() && ptNodeParams->hasBigrams()) {
int bigramEntryCount = 0;
if (!mPtNodeWriter->updateAllBigramEntriesAndDeleteUselessEntries(ptNodeParams,
&bigramEntryCount)) {
return false;
}
mValidBigramEntryCount += bigramEntryCount;
}
return true;
}

View file

@ -258,8 +258,7 @@ const PtNodeParams DynamicPatriciaTrieUpdatingHelper::getUpdatedPtNodeParams(
const PatriciaTrieReadingUtils::NodeFlags flags = PatriciaTrieReadingUtils::createAndGetFlags(
originalPtNodeParams->isBlacklisted(), originalPtNodeParams->isNotAWord(),
probability != NOT_A_PROBABILITY /* isTerminal */,
originalPtNodeParams->getShortcutPos() != NOT_A_DICT_POS /* hasShortcutTargets */,
originalPtNodeParams->getBigramsPos() != NOT_A_DICT_POS /* hasBigrams */,
originalPtNodeParams->hasShortcutTargets(), originalPtNodeParams->hasBigrams(),
codePointCount > 1 /* hasMultipleChars */, CHILDREN_POSITION_FIELD_SIZE);
return PtNodeParams(originalPtNodeParams, flags, parentPos, codePointCount, codePoints,
probability);

View file

@ -59,7 +59,7 @@ bool BigramDictContent::copyBigramList(const int bigramListPos, const int toPos)
bool hasNext = true;
int readingPos = bigramListPos;
int writingPos = toPos;
while(hasNext) {
while (hasNext) {
int probability = NOT_A_PROBABILITY;
int targetTerminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
getBigramEntryAndAdvancePosition(&probability, &hasNext, &targetTerminalId,

View file

@ -41,25 +41,29 @@ class ProbabilityDictContent : public SingleDictContent {
}
bool setProbability(const int terminalId, const int probability) {
if (terminalId < 0 || terminalId > getSize()) {
if (terminalId < 0) {
return false;
}
if (terminalId == getSize()) {
if (terminalId >= getSize()) {
// Write new entry.
int flagWritingPos = terminalId * (Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE
+ Ver4DictConstants::PROBABILITY_SIZE);
const int dummyFlags = 0;
// Write dummy flags.
if (!getWritableBuffer()->writeUintAndAdvancePosition(dummyFlags,
Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE, &flagWritingPos)) {
return false;
int writingPos = getBuffer()->getTailPosition();
while (writingPos <= getEntryPos(terminalId)) {
const int dummyFlags = 0;
if (!getWritableBuffer()->writeUintAndAdvancePosition(dummyFlags,
Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE, &writingPos)) {
return false;
}
const int dummyProbability = 0;
if (!getWritableBuffer()->writeUintAndAdvancePosition(dummyProbability,
Ver4DictConstants::PROBABILITY_SIZE, &writingPos)) {
return false;
}
}
}
int probabilityWritingPos = terminalId * (Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE
+ Ver4DictConstants::PROBABILITY_SIZE)
+ Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE;
return getWritableBuffer()->writeUintAndAdvancePosition(probability,
Ver4DictConstants::PROBABILITY_SIZE, &probabilityWritingPos);
const int probabilityWritingPos = getEntryPos(terminalId)
+ Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE;
return getWritableBuffer()->writeUint(probability,
Ver4DictConstants::PROBABILITY_SIZE, probabilityWritingPos);
}
bool flushToFile(const char *const dictDirPath) const {
@ -69,6 +73,11 @@ class ProbabilityDictContent : public SingleDictContent {
private:
DISALLOW_COPY_AND_ASSIGN(ProbabilityDictContent);
int getEntryPos(const int terminalId) const {
return terminalId * (Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE
+ Ver4DictConstants::PROBABILITY_SIZE);
}
int getSize() const {
return getBuffer()->getTailPosition() / (Ver4DictConstants::PROBABILITY_SIZE
+ Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE);

View file

@ -18,6 +18,22 @@
namespace latinime {
bool SparseTableDictContent::copyContent(
const SparseTableDictContent *const sparseTableDictContent) {
if (!mExpandableLookupTableBuffer.copy(
&sparseTableDictContent->mExpandableLookupTableBuffer)) {
return false;
}
if (!mExpandableAddressTableBuffer.copy(
&sparseTableDictContent->mExpandableAddressTableBuffer)) {
return false;
}
if (!mExpandableContentBuffer.copy(&sparseTableDictContent->mExpandableContentBuffer)) {
return false;
}
return true;
}
bool SparseTableDictContent::flush(const char *const dictDirPath,
const char *const lookupTableFileName, const char *const addressTableFileName,
const char *const contentFileName) const {

View file

@ -75,6 +75,8 @@ class SparseTableDictContent : public DictContent {
|| mExpandableContentBuffer.isNearSizeLimit();
}
bool copyContent(const SparseTableDictContent *const sparseTableDictContent);
protected:
SparseTable *getUpdatableAddressLookupTable() {
return &mAddressLookupTable;

View file

@ -44,23 +44,27 @@ class TerminalPositionLookupTable : public SingleDictContent {
if (terminalId < 0 || terminalId >= mSize) {
return NOT_A_DICT_POS;
}
const int readingPos = terminalId * Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE;
return getBuffer()->readUint(Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE,
readingPos) - mHeaderRegionSize;
getEntryPos(terminalId)) - mHeaderRegionSize;
}
bool setTerminalPtNodePosition(const int terminalId, const int terminalPtNodePos) {
if (terminalId < 0 || terminalId > mSize) {
if (terminalId < 0) {
return NOT_A_DICT_POS;
}
if (terminalId == mSize) {
// Use new terminal id.
mSize += 1;
if (terminalId >= mSize) {
int writingPos = getBuffer()->getTailPosition();
while(writingPos <= getEntryPos(terminalId)) {
// Write new entry.
getWritableBuffer()->writeUintAndAdvancePosition(
Ver4DictConstants::NOT_A_TERMINAL_ADDRESS,
Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE, &writingPos);
}
mSize = getBuffer()->getTailPosition()
/ Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE;
}
int writingPos = terminalId * Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE;
return getWritableBuffer()->writeUintAndAdvancePosition(
terminalPtNodePos + mHeaderRegionSize,
Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE, &writingPos);
return getWritableBuffer()->writeUint(terminalPtNodePos + mHeaderRegionSize,
Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE, getEntryPos(terminalId));
}
int getNextTerminalId() const {
@ -94,6 +98,10 @@ class TerminalPositionLookupTable : public SingleDictContent {
private:
DISALLOW_COPY_AND_ASSIGN(TerminalPositionLookupTable);
int getEntryPos(const int terminalId) const {
return terminalId * Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE;
}
int mSize;
const int mHeaderRegionSize;
};

View file

@ -93,6 +93,10 @@ class Ver4DictBuffers {
return &mBigramDictContent;
}
AK_FORCE_INLINE ShortcutDictContent *getUpdatableShortcutDictContent() {
return &mShortcutDictContent;
}
AK_FORCE_INLINE const ShortcutDictContent *getShortcutDictContent() const {
return &mShortcutDictContent;
}

View file

@ -41,6 +41,7 @@ const int Ver4DictConstants::NOT_A_TERMINAL_ID = -1;
const int Ver4DictConstants::PROBABILITY_SIZE = 1;
const int Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE = 1;
const int Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE = 3;
const int Ver4DictConstants::NOT_A_TERMINAL_ADDRESS = 0;
const int Ver4DictConstants::TERMINAL_ID_FIELD_SIZE = 4;
const int Ver4DictConstants::BIGRAM_ADDRESS_TABLE_BLOCK_SIZE = 4;

View file

@ -41,6 +41,7 @@ class Ver4DictConstants {
static const int PROBABILITY_SIZE;
static const int FLAGS_IN_PROBABILITY_FILE_SIZE;
static const int TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE;
static const int NOT_A_TERMINAL_ADDRESS;
static const int TERMINAL_ID_FIELD_SIZE;
static const int BIGRAM_ADDRESS_TABLE_BLOCK_SIZE;

View file

@ -45,8 +45,17 @@ bool Ver4PatriciaTrieNodeWriter::markPtNodeAsDeleted(
true /* isDeleted */);
int writingPos = toBeUpdatedPtNodeParams->getHeadPos();
// Update flags.
return DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mTrieBuffer, updatedFlags,
&writingPos);
if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mTrieBuffer, updatedFlags,
&writingPos)) {
return false;
}
if (toBeUpdatedPtNodeParams->getTerminalId() != NOT_A_DICT_POS) {
// The PtNode is a terminal. Delete entry from the terminal position lookup table.
return mBuffers->getUpdatableTerminalPositionLookupTable()->setTerminalPtNodePosition(
toBeUpdatedPtNodeParams->getTerminalId(), NOT_A_DICT_POS /* ptNodePos */);
} else {
return true;
}
}
bool Ver4PatriciaTrieNodeWriter::markPtNodeAsMoved(
@ -171,7 +180,7 @@ bool Ver4PatriciaTrieNodeWriter::writePtNodeAndAdvancePosition(
PatriciaTrieReadingUtils::NodeFlags nodeFlags =
PatriciaTrieReadingUtils::createAndGetFlags(ptNodeParams->isBlacklisted(),
ptNodeParams->isNotAWord(), isTerminal,
false /* hasShortcutTargets */, false /* hasBigrams */,
ptNodeParams->hasShortcutTargets(), ptNodeParams->hasBigrams(),
ptNodeParams->getCodePointCount() > 1 /* hasMultipleChars */,
CHILDREN_POSITION_FIELD_SIZE);
int flagsFieldPos = nodePos;
@ -198,16 +207,49 @@ bool Ver4PatriciaTrieNodeWriter::removeBigramEntry(
bool Ver4PatriciaTrieNodeWriter::updateAllBigramEntriesAndDeleteUselessEntries(
const PtNodeParams *const sourcePtNodeParams, int *const outBigramEntryCount) {
// TODO: Implement.
return false;
return mBigramPolicy->updateAllBigramEntriesAndDeleteUselessEntries(
sourcePtNodeParams->getTerminalId(), outBigramEntryCount);
}
bool Ver4PatriciaTrieNodeWriter::updateAllPositionFields(
const PtNodeParams *const toBeUpdatedPtNodeParams,
const DictPositionRelocationMap *const dictPositionRelocationMap,
int *const outBigramEntryCount) {
// TODO: Implement.
return false;
int parentPos = toBeUpdatedPtNodeParams->getParentPos();
if (parentPos != NOT_A_DICT_POS) {
PtNodeWriter::PtNodePositionRelocationMap::const_iterator it =
dictPositionRelocationMap->mPtNodePositionRelocationMap.find(parentPos);
if (it != dictPositionRelocationMap->mPtNodePositionRelocationMap.end()) {
parentPos = it->second;
}
}
int writingPos = toBeUpdatedPtNodeParams->getHeadPos()
+ DynamicPatriciaTrieWritingUtils::NODE_FLAG_FIELD_SIZE;
// Write updated parent offset.
if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition(mTrieBuffer,
parentPos, toBeUpdatedPtNodeParams->getHeadPos(), &writingPos)) {
return false;
}
// Updates children position.
int childrenPos = toBeUpdatedPtNodeParams->getChildrenPos();
if (childrenPos != NOT_A_DICT_POS) {
PtNodeWriter::PtNodeArrayPositionRelocationMap::const_iterator it =
dictPositionRelocationMap->mPtNodeArrayPositionRelocationMap.find(childrenPos);
if (it != dictPositionRelocationMap->mPtNodeArrayPositionRelocationMap.end()) {
childrenPos = it->second;
}
}
if (!updateChildrenPosition(toBeUpdatedPtNodeParams, childrenPos)) {
return false;
}
// Counts bigram entries.
if (outBigramEntryCount) {
*outBigramEntryCount = mBigramPolicy->getBigramEntryConut(
toBeUpdatedPtNodeParams->getTerminalId());
}
return true;
}
}

View file

@ -223,7 +223,15 @@ void Ver4PatriciaTriePolicy::flush(const char *const filePath) {
}
void Ver4PatriciaTriePolicy::flushWithGC(const char *const filePath) {
// TODO: Implement.
if (!mBuffers.get()->isUpdatable()) {
AKLOGI("Warning: flushWithGC() is called for non-updatable dictionary.");
return;
}
const bool needsToDecay = mHeaderPolicy.isDecayingDict()
&& (mNeedsToDecayForTesting || ForgettingCurveUtils::needsToDecay(
false /* mindsBlockByDecay */, mUnigramCount, mBigramCount, &mHeaderPolicy));
mWritingHelper.writeToDictFileWithGC(getRootPosition(), filePath, &mHeaderPolicy, needsToDecay);
mNeedsToDecayForTesting = false;
}
bool Ver4PatriciaTriePolicy::needsToRunGC(const bool mindsBlockByGC) const {

View file

@ -20,6 +20,7 @@
#include "suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h"
#include "suggest/policyimpl/dictionary/header/header_policy.h"
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_gc_event_listeners.h"
#include "suggest/policyimpl/dictionary/shortcut/ver4_shortcut_list_policy.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
@ -91,7 +92,78 @@ bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
DynamicPatriciaTrieReadingHelper readingHelper(mBuffers->getTrieBuffer(), &ptNodeReader);
readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
DynamicPatriciaTrieGcEventListeners
::TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted
traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted(
headerPolicy, &ptNodeWriter, mBuffers->getWritableTrieBuffer(),
needsToDecay);
if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(
&traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted)) {
return false;
}
if (needsToDecay && traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted
.getValidUnigramCount() > ForgettingCurveUtils::MAX_UNIGRAM_COUNT_AFTER_GC) {
// TODO: Remove more unigrams.
}
readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateBigramProbability
traversePolicyToUpdateBigramProbability(&ptNodeWriter);
if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(
&traversePolicyToUpdateBigramProbability)) {
return false;
}
if (needsToDecay && traversePolicyToUpdateBigramProbability.getValidBigramEntryCount()
> ForgettingCurveUtils::MAX_BIGRAM_COUNT_AFTER_GC) {
// TODO: Remove more bigrams.
}
// Mapping from positions in mBuffer to positions in bufferToWrite.
PtNodeWriter::DictPositionRelocationMap dictPositionRelocationMap;
readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
Ver4PatriciaTrieNodeWriter ptNodeWriterForNewBuffers(buffersToWrite->getWritableTrieBuffer(),
buffersToWrite, &ptNodeReader, &bigramPolicy, &shortcutPolicy);
DynamicPatriciaTrieGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer
traversePolicyToPlaceAndWriteValidPtNodesToBuffer(&ptNodeWriterForNewBuffers,
buffersToWrite->getWritableTrieBuffer(), &dictPositionRelocationMap);
if (!readingHelper.traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner(
&traversePolicyToPlaceAndWriteValidPtNodesToBuffer)) {
return false;
}
// Create policy instances for the GCed dictionary.
Ver4PatriciaTrieNodeReader newPtNodeReader(buffersToWrite->getTrieBuffer(),
buffersToWrite->getProbabilityDictContent());
Ver4BigramListPolicy newBigramPolicy(buffersToWrite->getUpdatableBigramDictContent(),
buffersToWrite->getTerminalPositionLookupTable());
Ver4ShortcutListPolicy newShortcutPolicy(buffersToWrite->getShortcutDictContent(),
buffersToWrite->getTerminalPositionLookupTable());
Ver4PatriciaTrieNodeWriter newPtNodeWriter(buffersToWrite->getWritableTrieBuffer(),
buffersToWrite, &newPtNodeReader, &newBigramPolicy, &newShortcutPolicy);
if(!buffersToWrite->getUpdatableBigramDictContent()->copyContent(
mBuffers->getBigramDictContent())) {
return false;
}
if(!buffersToWrite->getUpdatableShortcutDictContent()->copyContent(
mBuffers->getShortcutDictContent())) {
return false;
}
DynamicPatriciaTrieReadingHelper newDictReadingHelper(buffersToWrite->getTrieBuffer(),
&newPtNodeReader);
newDictReadingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateAllPositionFields
traversePolicyToUpdateAllPositionFields(&newPtNodeWriter, &dictPositionRelocationMap);
if (!newDictReadingHelper.traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner(
&traversePolicyToUpdateAllPositionFields)) {
return false;
}
// TODO: GC for dict contents.
*outUnigramCount = traversePolicyToUpdateAllPositionFields.getUnigramCount();
*outBigramCount = traversePolicyToUpdateAllPositionFields.getBigramCount();
return true;
}

View file

@ -131,4 +131,21 @@ bool BufferWithExtendableBuffer::checkAndPrepareWriting(const int pos, const int
return true;
}
bool BufferWithExtendableBuffer::copy(const BufferWithExtendableBuffer *const sourceBuffer) {
int copyingPos = 0;
const int tailPos = sourceBuffer->getTailPosition();
const int maxDataChunkSize = sizeof(uint32_t);
while (copyingPos < tailPos) {
const int remainingSize = tailPos - copyingPos;
const int copyingSize = (remainingSize >= maxDataChunkSize) ?
maxDataChunkSize : remainingSize;
const uint32_t data = sourceBuffer->readUint(copyingSize, copyingPos);
if (!writeUint(data, copyingSize, copyingPos)) {
return false;
}
copyingPos += copyingSize;
}
return true;
}
}

View file

@ -100,6 +100,8 @@ class BufferWithExtendableBuffer {
bool writeCodePointsAndAdvancePosition(const int *const codePoints, const int codePointCount,
const bool writesTerminator, int *const pos);
bool copy(const BufferWithExtendableBuffer *const sourceBuffer);
private:
DISALLOW_COPY_AND_ASSIGN(BufferWithExtendableBuffer);

View file

@ -297,4 +297,46 @@ public class Ver4BinaryDictionaryTests extends AndroidTestCase {
binaryDictionary.close();
}
public void testFlushWithGCDictionary() {
final String dictVersion = Long.toString(System.currentTimeMillis());
File trieFile = null;
try {
trieFile = createEmptyDictionaryAndGetTrieFile(dictVersion);
} catch (IOException e) {
fail("IOException while writing an initial dictionary : " + e);
}
BinaryDictionary binaryDictionary = new BinaryDictionary(trieFile.getAbsolutePath(),
0 /* offset */, trieFile.length(), true /* useFullEditDistance */,
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
final int unigramProbability = 100;
final int bigramProbability = 10;
binaryDictionary.addUnigramWord("aaa", unigramProbability);
binaryDictionary.addUnigramWord("abb", unigramProbability);
binaryDictionary.addUnigramWord("bcc", unigramProbability);
binaryDictionary.addBigramWords("aaa", "abb", bigramProbability);
binaryDictionary.addBigramWords("aaa", "bcc", bigramProbability);
binaryDictionary.addBigramWords("abb", "aaa", bigramProbability);
binaryDictionary.addBigramWords("abb", "bcc", bigramProbability);
binaryDictionary.flushWithGC();
binaryDictionary.close();
binaryDictionary = new BinaryDictionary(trieFile.getAbsolutePath(),
0 /* offset */, trieFile.length(), true /* useFullEditDistance */,
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
final int probability = binaryDictionary.calculateProbability(unigramProbability,
bigramProbability);
assertEquals(unigramProbability, binaryDictionary.getFrequency("aaa"));
assertEquals(unigramProbability, binaryDictionary.getFrequency("abb"));
assertEquals(unigramProbability, binaryDictionary.getFrequency("bcc"));
assertEquals(probability, binaryDictionary.getBigramProbability("aaa", "abb"));
assertEquals(probability, binaryDictionary.getBigramProbability("aaa", "bcc"));
assertEquals(probability, binaryDictionary.getBigramProbability("abb", "aaa"));
assertEquals(probability, binaryDictionary.getBigramProbability("abb", "bcc"));
assertEquals(false, binaryDictionary.isValidBigram("bcc", "aaa"));
assertEquals(false, binaryDictionary.isValidBigram("bcc", "bbc"));
assertEquals(false, binaryDictionary.isValidBigram("aaa", "aaa"));
binaryDictionary.flushWithGC();
binaryDictionary.close();
}
}