From bcde771489838b0c708040bc0616da7af9a72337 Mon Sep 17 00:00:00 2001 From: Keisuke Kuroyanagi Date: Fri, 22 Nov 2013 19:37:33 +0900 Subject: [PATCH] Implement ver4 dictionary GC. Bug: 11391130 Change-Id: I8a8c5b24b5922df49f27341a2f94d85513bb7166 --- .../bigram/ver4_bigram_list_policy.cpp | 54 +++++++++++++- .../bigram/ver4_bigram_list_policy.h | 5 ++ .../structure/pt_common/pt_node_params.h | 8 +++ ...namic_patricia_trie_gc_event_listeners.cpp | 15 ++-- .../dynamic_patricia_trie_updating_helper.cpp | 3 +- .../v4/content/bigram_dict_content.cpp | 2 +- .../v4/content/probability_dict_content.h | 37 ++++++---- .../v4/content/sparse_table_dict_content.cpp | 16 +++++ .../v4/content/sparse_table_dict_content.h | 2 + .../content/terminal_position_lookup_table.h | 28 +++++--- .../structure/v4/ver4_dict_buffers.h | 4 ++ .../structure/v4/ver4_dict_constants.cpp | 1 + .../structure/v4/ver4_dict_constants.h | 1 + .../v4/ver4_patricia_trie_node_writer.cpp | 56 +++++++++++++-- .../v4/ver4_patricia_trie_policy.cpp | 10 ++- .../v4/ver4_patricia_trie_writing_helper.cpp | 72 +++++++++++++++++++ .../utils/buffer_with_extendable_buffer.cpp | 17 +++++ .../utils/buffer_with_extendable_buffer.h | 2 + .../latin/Ver4BinaryDictionaryTests.java | 42 +++++++++++ 19 files changed, 329 insertions(+), 46 deletions(-) diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp index 94d7f1061..8bbbacd54 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp @@ -94,7 +94,7 @@ bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTarget bool Ver4BigramListPolicy::removeEntry(const int terminalId, const int targetTerminalId) { const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId); if (bigramListPos == NOT_A_DICT_POS) { - // Bigram list does't exist. + // Bigram list doesn't exist. return false; } const int entryPosToUpdate = getEntryPosToUpdate(targetTerminalId, bigramListPos); @@ -118,12 +118,62 @@ bool Ver4BigramListPolicy::removeEntry(const int terminalId, const int targetTer Ver4DictConstants::NOT_A_TERMINAL_ID /* targetTerminalId */, &writingPos); } +bool Ver4BigramListPolicy::updateAllBigramEntriesAndDeleteUselessEntries(const int terminalId, + int *const outBigramCount) { + const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId); + if (bigramListPos == NOT_A_DICT_POS) { + // Bigram list doesn't exist. + return true; + } + bool hasNext = true; + int readingPos = bigramListPos; + while (hasNext) { + const int entryPos = readingPos; + int probability = NOT_A_PROBABILITY; + int targetTerminalId = Ver4DictConstants::NOT_A_TERMINAL_ID; + mBigramDictContent->getBigramEntryAndAdvancePosition(&probability, &hasNext, + &targetTerminalId, &readingPos); + if (targetTerminalId == Ver4DictConstants::NOT_A_TERMINAL_ID) { + continue; + } + const int targetPtNodePos = mTerminalPositionLookupTable->getTerminalPtNodePosition( + targetTerminalId); + if (targetPtNodePos == NOT_A_DICT_POS) { + // Invalidate bigram entry. + int writingPos = entryPos; + return mBigramDictContent->writeBigramEntryAndAdvancePosition(probability, hasNext, + Ver4DictConstants::NOT_A_TERMINAL_ID /* targetTerminalId */, &writingPos); + } + } + return true; +} + +int Ver4BigramListPolicy::getBigramEntryConut(const int terminalId) { + const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId); + if (bigramListPos == NOT_A_DICT_POS) { + // Bigram list doesn't exist. + return 0; + } + int bigramCount = 0; + bool hasNext = true; + int readingPos = bigramListPos; + while (hasNext) { + int targetTerminalId = Ver4DictConstants::NOT_A_TERMINAL_ID; + mBigramDictContent->getBigramEntryAndAdvancePosition(0 /* probability */, &hasNext, + &targetTerminalId, &readingPos); + if (targetTerminalId != Ver4DictConstants::NOT_A_TERMINAL_ID) { + bigramCount++; + } + } + return bigramCount; +} + int Ver4BigramListPolicy::getEntryPosToUpdate(const int targetTerminalIdToFind, const int bigramListPos) const { bool hasNext = true; int invalidEntryPos = NOT_A_DICT_POS; int readingPos = bigramListPos; - while(hasNext) { + while (hasNext) { const int entryPos = readingPos; int targetTerminalId = Ver4DictConstants::NOT_A_TERMINAL_ID; mBigramDictContent->getBigramEntryAndAdvancePosition(0 /* probability */, &hasNext, diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h index b3fe13d7d..5b7d5b527 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h @@ -44,6 +44,11 @@ class Ver4BigramListPolicy : public DictionaryBigramsStructurePolicy { bool removeEntry(const int terminalId, const int targetTerminalId); + bool updateAllBigramEntriesAndDeleteUselessEntries(const int terminalId, + int *const outBigramCount); + + int getBigramEntryConut(const int terminalId); + private: DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4BigramListPolicy); diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h index 6a4cfee3c..d105a3e49 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h @@ -148,6 +148,14 @@ class PtNodeParams { return PatriciaTrieReadingUtils::isNotAWord(mFlags); } + AK_FORCE_INLINE bool hasBigrams() const { + return PatriciaTrieReadingUtils::hasBigrams(mFlags); + } + + AK_FORCE_INLINE bool hasShortcutTargets() const { + return PatriciaTrieReadingUtils::hasShortcutTargets(mFlags); + } + // Parent node position AK_FORCE_INLINE int getParentPos() const { return mParentPos; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_gc_event_listeners.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_gc_event_listeners.cpp index 173017dc7..223933f50 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_gc_event_listeners.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_gc_event_listeners.cpp @@ -67,16 +67,13 @@ bool DynamicPatriciaTrieGcEventListeners bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateBigramProbability ::onVisitingPtNode(const PtNodeParams *const ptNodeParams) { - if (!ptNodeParams->isDeleted()) { - int pos = ptNodeParams->getBigramsPos(); - if (pos != NOT_A_DICT_POS) { - int bigramEntryCount = 0; - if (!mPtNodeWriter->updateAllBigramEntriesAndDeleteUselessEntries(ptNodeParams, - &bigramEntryCount)) { - return false; - } - mValidBigramEntryCount += bigramEntryCount; + if (!ptNodeParams->isDeleted() && ptNodeParams->hasBigrams()) { + int bigramEntryCount = 0; + if (!mPtNodeWriter->updateAllBigramEntriesAndDeleteUselessEntries(ptNodeParams, + &bigramEntryCount)) { + return false; } + mValidBigramEntryCount += bigramEntryCount; } return true; } diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_updating_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_updating_helper.cpp index cbc0d2304..39e1ecaaa 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_updating_helper.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_updating_helper.cpp @@ -258,8 +258,7 @@ const PtNodeParams DynamicPatriciaTrieUpdatingHelper::getUpdatedPtNodeParams( const PatriciaTrieReadingUtils::NodeFlags flags = PatriciaTrieReadingUtils::createAndGetFlags( originalPtNodeParams->isBlacklisted(), originalPtNodeParams->isNotAWord(), probability != NOT_A_PROBABILITY /* isTerminal */, - originalPtNodeParams->getShortcutPos() != NOT_A_DICT_POS /* hasShortcutTargets */, - originalPtNodeParams->getBigramsPos() != NOT_A_DICT_POS /* hasBigrams */, + originalPtNodeParams->hasShortcutTargets(), originalPtNodeParams->hasBigrams(), codePointCount > 1 /* hasMultipleChars */, CHILDREN_POSITION_FIELD_SIZE); return PtNodeParams(originalPtNodeParams, flags, parentPos, codePointCount, codePoints, probability); diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp index 999460086..c22b9b455 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp @@ -59,7 +59,7 @@ bool BigramDictContent::copyBigramList(const int bigramListPos, const int toPos) bool hasNext = true; int readingPos = bigramListPos; int writingPos = toPos; - while(hasNext) { + while (hasNext) { int probability = NOT_A_PROBABILITY; int targetTerminalId = Ver4DictConstants::NOT_A_TERMINAL_ID; getBigramEntryAndAdvancePosition(&probability, &hasNext, &targetTerminalId, diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h index 518376426..73a472a21 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h @@ -41,25 +41,29 @@ class ProbabilityDictContent : public SingleDictContent { } bool setProbability(const int terminalId, const int probability) { - if (terminalId < 0 || terminalId > getSize()) { + if (terminalId < 0) { return false; } - if (terminalId == getSize()) { + if (terminalId >= getSize()) { // Write new entry. - int flagWritingPos = terminalId * (Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE - + Ver4DictConstants::PROBABILITY_SIZE); - const int dummyFlags = 0; - // Write dummy flags. - if (!getWritableBuffer()->writeUintAndAdvancePosition(dummyFlags, - Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE, &flagWritingPos)) { - return false; + int writingPos = getBuffer()->getTailPosition(); + while (writingPos <= getEntryPos(terminalId)) { + const int dummyFlags = 0; + if (!getWritableBuffer()->writeUintAndAdvancePosition(dummyFlags, + Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE, &writingPos)) { + return false; + } + const int dummyProbability = 0; + if (!getWritableBuffer()->writeUintAndAdvancePosition(dummyProbability, + Ver4DictConstants::PROBABILITY_SIZE, &writingPos)) { + return false; + } } } - int probabilityWritingPos = terminalId * (Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE - + Ver4DictConstants::PROBABILITY_SIZE) - + Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE; - return getWritableBuffer()->writeUintAndAdvancePosition(probability, - Ver4DictConstants::PROBABILITY_SIZE, &probabilityWritingPos); + const int probabilityWritingPos = getEntryPos(terminalId) + + Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE; + return getWritableBuffer()->writeUint(probability, + Ver4DictConstants::PROBABILITY_SIZE, probabilityWritingPos); } bool flushToFile(const char *const dictDirPath) const { @@ -69,6 +73,11 @@ class ProbabilityDictContent : public SingleDictContent { private: DISALLOW_COPY_AND_ASSIGN(ProbabilityDictContent); + int getEntryPos(const int terminalId) const { + return terminalId * (Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE + + Ver4DictConstants::PROBABILITY_SIZE); + } + int getSize() const { return getBuffer()->getTailPosition() / (Ver4DictConstants::PROBABILITY_SIZE + Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE); diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.cpp index c65420614..1c5caa1a7 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.cpp @@ -18,6 +18,22 @@ namespace latinime { +bool SparseTableDictContent::copyContent( + const SparseTableDictContent *const sparseTableDictContent) { + if (!mExpandableLookupTableBuffer.copy( + &sparseTableDictContent->mExpandableLookupTableBuffer)) { + return false; + } + if (!mExpandableAddressTableBuffer.copy( + &sparseTableDictContent->mExpandableAddressTableBuffer)) { + return false; + } + if (!mExpandableContentBuffer.copy(&sparseTableDictContent->mExpandableContentBuffer)) { + return false; + } + return true; +} + bool SparseTableDictContent::flush(const char *const dictDirPath, const char *const lookupTableFileName, const char *const addressTableFileName, const char *const contentFileName) const { diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h index 9a4f1e1c0..151568747 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h @@ -75,6 +75,8 @@ class SparseTableDictContent : public DictContent { || mExpandableContentBuffer.isNearSizeLimit(); } + bool copyContent(const SparseTableDictContent *const sparseTableDictContent); + protected: SparseTable *getUpdatableAddressLookupTable() { return &mAddressLookupTable; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h index 873b2406c..2940a985e 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h @@ -44,23 +44,27 @@ class TerminalPositionLookupTable : public SingleDictContent { if (terminalId < 0 || terminalId >= mSize) { return NOT_A_DICT_POS; } - const int readingPos = terminalId * Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE; return getBuffer()->readUint(Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE, - readingPos) - mHeaderRegionSize; + getEntryPos(terminalId)) - mHeaderRegionSize; } bool setTerminalPtNodePosition(const int terminalId, const int terminalPtNodePos) { - if (terminalId < 0 || terminalId > mSize) { + if (terminalId < 0) { return NOT_A_DICT_POS; } - if (terminalId == mSize) { - // Use new terminal id. - mSize += 1; + if (terminalId >= mSize) { + int writingPos = getBuffer()->getTailPosition(); + while(writingPos <= getEntryPos(terminalId)) { + // Write new entry. + getWritableBuffer()->writeUintAndAdvancePosition( + Ver4DictConstants::NOT_A_TERMINAL_ADDRESS, + Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE, &writingPos); + } + mSize = getBuffer()->getTailPosition() + / Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE; } - int writingPos = terminalId * Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE; - return getWritableBuffer()->writeUintAndAdvancePosition( - terminalPtNodePos + mHeaderRegionSize, - Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE, &writingPos); + return getWritableBuffer()->writeUint(terminalPtNodePos + mHeaderRegionSize, + Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE, getEntryPos(terminalId)); } int getNextTerminalId() const { @@ -94,6 +98,10 @@ class TerminalPositionLookupTable : public SingleDictContent { private: DISALLOW_COPY_AND_ASSIGN(TerminalPositionLookupTable); + int getEntryPos(const int terminalId) const { + return terminalId * Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE; + } + int mSize; const int mHeaderRegionSize; }; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h index d3de3c86d..e67bd2edb 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h @@ -93,6 +93,10 @@ class Ver4DictBuffers { return &mBigramDictContent; } + AK_FORCE_INLINE ShortcutDictContent *getUpdatableShortcutDictContent() { + return &mShortcutDictContent; + } + AK_FORCE_INLINE const ShortcutDictContent *getShortcutDictContent() const { return &mShortcutDictContent; } diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp index b4f9553ee..a27d0bc98 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp @@ -41,6 +41,7 @@ const int Ver4DictConstants::NOT_A_TERMINAL_ID = -1; const int Ver4DictConstants::PROBABILITY_SIZE = 1; const int Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE = 1; const int Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE = 3; +const int Ver4DictConstants::NOT_A_TERMINAL_ADDRESS = 0; const int Ver4DictConstants::TERMINAL_ID_FIELD_SIZE = 4; const int Ver4DictConstants::BIGRAM_ADDRESS_TABLE_BLOCK_SIZE = 4; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h index 64c388791..96d5f6de0 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h @@ -41,6 +41,7 @@ class Ver4DictConstants { static const int PROBABILITY_SIZE; static const int FLAGS_IN_PROBABILITY_FILE_SIZE; static const int TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE; + static const int NOT_A_TERMINAL_ADDRESS; static const int TERMINAL_ID_FIELD_SIZE; static const int BIGRAM_ADDRESS_TABLE_BLOCK_SIZE; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp index 5b1ddddf5..071792544 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp @@ -45,8 +45,17 @@ bool Ver4PatriciaTrieNodeWriter::markPtNodeAsDeleted( true /* isDeleted */); int writingPos = toBeUpdatedPtNodeParams->getHeadPos(); // Update flags. - return DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mTrieBuffer, updatedFlags, - &writingPos); + if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mTrieBuffer, updatedFlags, + &writingPos)) { + return false; + } + if (toBeUpdatedPtNodeParams->getTerminalId() != NOT_A_DICT_POS) { + // The PtNode is a terminal. Delete entry from the terminal position lookup table. + return mBuffers->getUpdatableTerminalPositionLookupTable()->setTerminalPtNodePosition( + toBeUpdatedPtNodeParams->getTerminalId(), NOT_A_DICT_POS /* ptNodePos */); + } else { + return true; + } } bool Ver4PatriciaTrieNodeWriter::markPtNodeAsMoved( @@ -171,7 +180,7 @@ bool Ver4PatriciaTrieNodeWriter::writePtNodeAndAdvancePosition( PatriciaTrieReadingUtils::NodeFlags nodeFlags = PatriciaTrieReadingUtils::createAndGetFlags(ptNodeParams->isBlacklisted(), ptNodeParams->isNotAWord(), isTerminal, - false /* hasShortcutTargets */, false /* hasBigrams */, + ptNodeParams->hasShortcutTargets(), ptNodeParams->hasBigrams(), ptNodeParams->getCodePointCount() > 1 /* hasMultipleChars */, CHILDREN_POSITION_FIELD_SIZE); int flagsFieldPos = nodePos; @@ -198,16 +207,49 @@ bool Ver4PatriciaTrieNodeWriter::removeBigramEntry( bool Ver4PatriciaTrieNodeWriter::updateAllBigramEntriesAndDeleteUselessEntries( const PtNodeParams *const sourcePtNodeParams, int *const outBigramEntryCount) { - // TODO: Implement. - return false; + return mBigramPolicy->updateAllBigramEntriesAndDeleteUselessEntries( + sourcePtNodeParams->getTerminalId(), outBigramEntryCount); } bool Ver4PatriciaTrieNodeWriter::updateAllPositionFields( const PtNodeParams *const toBeUpdatedPtNodeParams, const DictPositionRelocationMap *const dictPositionRelocationMap, int *const outBigramEntryCount) { - // TODO: Implement. - return false; + int parentPos = toBeUpdatedPtNodeParams->getParentPos(); + if (parentPos != NOT_A_DICT_POS) { + PtNodeWriter::PtNodePositionRelocationMap::const_iterator it = + dictPositionRelocationMap->mPtNodePositionRelocationMap.find(parentPos); + if (it != dictPositionRelocationMap->mPtNodePositionRelocationMap.end()) { + parentPos = it->second; + } + } + int writingPos = toBeUpdatedPtNodeParams->getHeadPos() + + DynamicPatriciaTrieWritingUtils::NODE_FLAG_FIELD_SIZE; + // Write updated parent offset. + if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition(mTrieBuffer, + parentPos, toBeUpdatedPtNodeParams->getHeadPos(), &writingPos)) { + return false; + } + + // Updates children position. + int childrenPos = toBeUpdatedPtNodeParams->getChildrenPos(); + if (childrenPos != NOT_A_DICT_POS) { + PtNodeWriter::PtNodeArrayPositionRelocationMap::const_iterator it = + dictPositionRelocationMap->mPtNodeArrayPositionRelocationMap.find(childrenPos); + if (it != dictPositionRelocationMap->mPtNodeArrayPositionRelocationMap.end()) { + childrenPos = it->second; + } + } + if (!updateChildrenPosition(toBeUpdatedPtNodeParams, childrenPos)) { + return false; + } + + // Counts bigram entries. + if (outBigramEntryCount) { + *outBigramEntryCount = mBigramPolicy->getBigramEntryConut( + toBeUpdatedPtNodeParams->getTerminalId()); + } + return true; } } diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp index bcad6fe05..eeab3121a 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp @@ -223,7 +223,15 @@ void Ver4PatriciaTriePolicy::flush(const char *const filePath) { } void Ver4PatriciaTriePolicy::flushWithGC(const char *const filePath) { - // TODO: Implement. + if (!mBuffers.get()->isUpdatable()) { + AKLOGI("Warning: flushWithGC() is called for non-updatable dictionary."); + return; + } + const bool needsToDecay = mHeaderPolicy.isDecayingDict() + && (mNeedsToDecayForTesting || ForgettingCurveUtils::needsToDecay( + false /* mindsBlockByDecay */, mUnigramCount, mBigramCount, &mHeaderPolicy)); + mWritingHelper.writeToDictFileWithGC(getRootPosition(), filePath, &mHeaderPolicy, needsToDecay); + mNeedsToDecayForTesting = false; } bool Ver4PatriciaTriePolicy::needsToRunGC(const bool mindsBlockByGC) const { diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp index 853cf38ad..11adf2c1d 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp @@ -20,6 +20,7 @@ #include "suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h" #include "suggest/policyimpl/dictionary/header/header_policy.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_gc_event_listeners.h" #include "suggest/policyimpl/dictionary/shortcut/ver4_shortcut_list_policy.h" #include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h" #include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h" @@ -91,7 +92,78 @@ bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos, DynamicPatriciaTrieReadingHelper readingHelper(mBuffers->getTrieBuffer(), &ptNodeReader); readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos); + DynamicPatriciaTrieGcEventListeners + ::TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted + traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted( + headerPolicy, &ptNodeWriter, mBuffers->getWritableTrieBuffer(), + needsToDecay); + if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner( + &traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted)) { + return false; + } + if (needsToDecay && traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted + .getValidUnigramCount() > ForgettingCurveUtils::MAX_UNIGRAM_COUNT_AFTER_GC) { + // TODO: Remove more unigrams. + } + readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos); + DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateBigramProbability + traversePolicyToUpdateBigramProbability(&ptNodeWriter); + if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner( + &traversePolicyToUpdateBigramProbability)) { + return false; + } + if (needsToDecay && traversePolicyToUpdateBigramProbability.getValidBigramEntryCount() + > ForgettingCurveUtils::MAX_BIGRAM_COUNT_AFTER_GC) { + // TODO: Remove more bigrams. + } + + // Mapping from positions in mBuffer to positions in bufferToWrite. + PtNodeWriter::DictPositionRelocationMap dictPositionRelocationMap; + readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos); + Ver4PatriciaTrieNodeWriter ptNodeWriterForNewBuffers(buffersToWrite->getWritableTrieBuffer(), + buffersToWrite, &ptNodeReader, &bigramPolicy, &shortcutPolicy); + DynamicPatriciaTrieGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer + traversePolicyToPlaceAndWriteValidPtNodesToBuffer(&ptNodeWriterForNewBuffers, + buffersToWrite->getWritableTrieBuffer(), &dictPositionRelocationMap); + if (!readingHelper.traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner( + &traversePolicyToPlaceAndWriteValidPtNodesToBuffer)) { + return false; + } + + // Create policy instances for the GCed dictionary. + Ver4PatriciaTrieNodeReader newPtNodeReader(buffersToWrite->getTrieBuffer(), + buffersToWrite->getProbabilityDictContent()); + Ver4BigramListPolicy newBigramPolicy(buffersToWrite->getUpdatableBigramDictContent(), + buffersToWrite->getTerminalPositionLookupTable()); + Ver4ShortcutListPolicy newShortcutPolicy(buffersToWrite->getShortcutDictContent(), + buffersToWrite->getTerminalPositionLookupTable()); + Ver4PatriciaTrieNodeWriter newPtNodeWriter(buffersToWrite->getWritableTrieBuffer(), + buffersToWrite, &newPtNodeReader, &newBigramPolicy, &newShortcutPolicy); + + if(!buffersToWrite->getUpdatableBigramDictContent()->copyContent( + mBuffers->getBigramDictContent())) { + return false; + } + if(!buffersToWrite->getUpdatableShortcutDictContent()->copyContent( + mBuffers->getShortcutDictContent())) { + return false; + } + + DynamicPatriciaTrieReadingHelper newDictReadingHelper(buffersToWrite->getTrieBuffer(), + &newPtNodeReader); + newDictReadingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos); + DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateAllPositionFields + traversePolicyToUpdateAllPositionFields(&newPtNodeWriter, &dictPositionRelocationMap); + if (!newDictReadingHelper.traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner( + &traversePolicyToUpdateAllPositionFields)) { + return false; + } + + // TODO: GC for dict contents. + + *outUnigramCount = traversePolicyToUpdateAllPositionFields.getUnigramCount(); + *outBigramCount = traversePolicyToUpdateAllPositionFields.getBigramCount(); return true; } diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp index 26eafcd44..4b537da8a 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp @@ -131,4 +131,21 @@ bool BufferWithExtendableBuffer::checkAndPrepareWriting(const int pos, const int return true; } +bool BufferWithExtendableBuffer::copy(const BufferWithExtendableBuffer *const sourceBuffer) { + int copyingPos = 0; + const int tailPos = sourceBuffer->getTailPosition(); + const int maxDataChunkSize = sizeof(uint32_t); + while (copyingPos < tailPos) { + const int remainingSize = tailPos - copyingPos; + const int copyingSize = (remainingSize >= maxDataChunkSize) ? + maxDataChunkSize : remainingSize; + const uint32_t data = sourceBuffer->readUint(copyingSize, copyingPos); + if (!writeUint(data, copyingSize, copyingPos)) { + return false; + } + copyingPos += copyingSize; + } + return true; +} + } diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h index ee6107ad7..76be16518 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h @@ -100,6 +100,8 @@ class BufferWithExtendableBuffer { bool writeCodePointsAndAdvancePosition(const int *const codePoints, const int codePointCount, const bool writesTerminator, int *const pos); + bool copy(const BufferWithExtendableBuffer *const sourceBuffer); + private: DISALLOW_COPY_AND_ASSIGN(BufferWithExtendableBuffer); diff --git a/tests/src/com/android/inputmethod/latin/Ver4BinaryDictionaryTests.java b/tests/src/com/android/inputmethod/latin/Ver4BinaryDictionaryTests.java index b51a86b1c..55177266e 100644 --- a/tests/src/com/android/inputmethod/latin/Ver4BinaryDictionaryTests.java +++ b/tests/src/com/android/inputmethod/latin/Ver4BinaryDictionaryTests.java @@ -297,4 +297,46 @@ public class Ver4BinaryDictionaryTests extends AndroidTestCase { binaryDictionary.close(); } + public void testFlushWithGCDictionary() { + final String dictVersion = Long.toString(System.currentTimeMillis()); + File trieFile = null; + try { + trieFile = createEmptyDictionaryAndGetTrieFile(dictVersion); + } catch (IOException e) { + fail("IOException while writing an initial dictionary : " + e); + } + BinaryDictionary binaryDictionary = new BinaryDictionary(trieFile.getAbsolutePath(), + 0 /* offset */, trieFile.length(), true /* useFullEditDistance */, + Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); + + final int unigramProbability = 100; + final int bigramProbability = 10; + binaryDictionary.addUnigramWord("aaa", unigramProbability); + binaryDictionary.addUnigramWord("abb", unigramProbability); + binaryDictionary.addUnigramWord("bcc", unigramProbability); + binaryDictionary.addBigramWords("aaa", "abb", bigramProbability); + binaryDictionary.addBigramWords("aaa", "bcc", bigramProbability); + binaryDictionary.addBigramWords("abb", "aaa", bigramProbability); + binaryDictionary.addBigramWords("abb", "bcc", bigramProbability); + binaryDictionary.flushWithGC(); + binaryDictionary.close(); + + binaryDictionary = new BinaryDictionary(trieFile.getAbsolutePath(), + 0 /* offset */, trieFile.length(), true /* useFullEditDistance */, + Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); + final int probability = binaryDictionary.calculateProbability(unigramProbability, + bigramProbability); + assertEquals(unigramProbability, binaryDictionary.getFrequency("aaa")); + assertEquals(unigramProbability, binaryDictionary.getFrequency("abb")); + assertEquals(unigramProbability, binaryDictionary.getFrequency("bcc")); + assertEquals(probability, binaryDictionary.getBigramProbability("aaa", "abb")); + assertEquals(probability, binaryDictionary.getBigramProbability("aaa", "bcc")); + assertEquals(probability, binaryDictionary.getBigramProbability("abb", "aaa")); + assertEquals(probability, binaryDictionary.getBigramProbability("abb", "bcc")); + assertEquals(false, binaryDictionary.isValidBigram("bcc", "aaa")); + assertEquals(false, binaryDictionary.isValidBigram("bcc", "bbc")); + assertEquals(false, binaryDictionary.isValidBigram("aaa", "aaa")); + binaryDictionary.flushWithGC(); + binaryDictionary.close(); + } }