diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.cpp index f8ed2b9aa..00c614c46 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.cpp @@ -101,10 +101,13 @@ const int BigramListReadWriteUtils::ATTRIBUTE_ADDRESS_SHIFT = 4; /* static */ bool BigramListReadWriteUtils::writeBigramEntry( BufferWithExtendableBuffer *const bufferToWrite, const BigramFlags flags, const int targetPtNodePos, int *const writingPos) { - if (!bufferToWrite->writeUintAndAdvancePosition(flags, 1 /* size */, writingPos)) { + const int offset = (targetPtNodePos != NOT_A_DICT_POS) ? + targetPtNodePos - (*writingPos + 1) : 0; + const BigramFlags flagsToWrite = (offset < 0) ? + (flags | FLAG_ATTRIBUTE_OFFSET_NEGATIVE) : flags; + if (!bufferToWrite->writeUintAndAdvancePosition(flagsToWrite, 1 /* size */, writingPos)) { return false; } - const int offset = (targetPtNodePos != NOT_A_DICT_POS) ? targetPtNodePos - *writingPos : 0; const uint32_t absOffest = abs(offset); const int bigramTargetFieldSize = attributeAddressSize(flags); return bufferToWrite->writeUintAndAdvancePosition(absOffest, bigramTargetFieldSize, diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp index ba43bdb10..2e0f97792 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp @@ -19,6 +19,7 @@ #include "suggest/core/policy/dictionary_shortcuts_structure_policy.h" #include "suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h" #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h" +#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h" #include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" namespace latinime { @@ -71,7 +72,8 @@ bool DynamicBigramListPolicy::copyAllBigrams(BufferWithExtendableBuffer *const b int bigramEntryCount = 0; do { if (++bigramEntryCount > BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT) { - AKLOGE("Too many bigram entries. %d", BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT); + AKLOGE("Too many bigram entries. Entry count: %d, Limit: %d", + bigramEntryCount, BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT); ASSERT(false); return false; } @@ -114,7 +116,8 @@ bool DynamicBigramListPolicy::updateAllBigramEntriesAndDeleteUselessEntries( int bigramEntryCount = 0; do { if (++bigramEntryCount > BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT) { - AKLOGE("Too many bigram entries. %d", BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT); + AKLOGE("Too many bigram entries. Entry count: %d, Limit: %d", + bigramEntryCount, BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT); ASSERT(false); return false; } @@ -150,6 +153,54 @@ bool DynamicBigramListPolicy::updateAllBigramEntriesAndDeleteUselessEntries( return true; } +// Updates bigram target PtNode positions in the list after the placing step in GC. +bool DynamicBigramListPolicy::updateAllBigramTargetPtNodePositions(int *const bigramListPos, + const DynamicPatriciaTrieWritingHelper::PtNodePositionRelocationMap *const + ptNodePositionRelocationMap) { + const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*bigramListPos); + if (usesAdditionalBuffer) { + *bigramListPos -= mBuffer->getOriginalBufferSize(); + } + BigramListReadWriteUtils::BigramFlags bigramFlags; + int bigramEntryCount = 0; + do { + if (++bigramEntryCount > BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT) { + AKLOGE("Too many bigram entries. Entry count: %d, Limit: %d", + bigramEntryCount, BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT); + ASSERT(false); + return false; + } + int bigramEntryPos = *bigramListPos; + if (usesAdditionalBuffer) { + bigramEntryPos += mBuffer->getOriginalBufferSize(); + } + int bigramTargetPtNodePos; + // The buffer address can be changed after calling buffer writing methods. + BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition( + mBuffer->getBuffer(usesAdditionalBuffer), &bigramFlags, &bigramTargetPtNodePos, + bigramListPos); + if (bigramTargetPtNodePos == NOT_A_DICT_POS) { + continue; + } + if (usesAdditionalBuffer) { + bigramTargetPtNodePos += mBuffer->getOriginalBufferSize(); + } + + DynamicPatriciaTrieWritingHelper::PtNodePositionRelocationMap::const_iterator it = + ptNodePositionRelocationMap->find(bigramTargetPtNodePos); + if (it != ptNodePositionRelocationMap->end()) { + bigramTargetPtNodePos = it->second; + } else { + bigramTargetPtNodePos = NOT_A_DICT_POS; + } + if (!BigramListReadWriteUtils::writeBigramEntry(mBuffer, bigramFlags, + bigramTargetPtNodePos, &bigramEntryPos)) { + return false; + } + } while(BigramListReadWriteUtils::hasNext(bigramFlags)); + return true; +} + bool DynamicBigramListPolicy::addNewBigramEntryToBigramList(const int bigramTargetPos, const int probability, int *const bigramListPos) { const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*bigramListPos); @@ -160,7 +211,8 @@ bool DynamicBigramListPolicy::addNewBigramEntryToBigramList(const int bigramTarg int bigramEntryCount = 0; do { if (++bigramEntryCount > BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT) { - AKLOGE("Too many bigram entries. %d", BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT); + AKLOGE("Too many bigram entries. Entry count: %d, Limit: %d", + bigramEntryCount, BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT); ASSERT(false); return false; } @@ -222,7 +274,8 @@ bool DynamicBigramListPolicy::removeBigram(const int bigramListPos, const int bi int bigramEntryCount = 0; do { if (++bigramEntryCount > BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT) { - AKLOGE("Too many bigram entries. %d", BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT); + AKLOGE("Too many bigram entries. Entry count: %d, Limit: %d", + bigramEntryCount, BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT); ASSERT(false); return false; } diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h index 16b080ae5..8ea318a41 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h @@ -21,6 +21,7 @@ #include "defines.h" #include "suggest/core/policy/dictionary_bigrams_structure_policy.h" +#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h" namespace latinime { @@ -51,6 +52,10 @@ class DynamicBigramListPolicy : public DictionaryBigramsStructurePolicy { bool updateAllBigramEntriesAndDeleteUselessEntries(int *const bigramListPos); + bool updateAllBigramTargetPtNodePositions(int *const bigramListPos, + const DynamicPatriciaTrieWritingHelper::PtNodePositionRelocationMap *const + ptNodePositionRelocationMap); + bool addNewBigramEntryToBigramList(const int bigramTargetPos, const int probability, int *const bigramListPos); diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp index ffa02e3f6..51374102e 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp @@ -19,7 +19,7 @@ namespace latinime { bool DynamicPatriciaTrieGcEventListeners - ::ListenerForUpdatingUnigramProbabilityAndMarkingUselessPtNodesAsDeleted + ::TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted ::onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node, const int *const nodeCodePoints) { // PtNode is useless when the PtNode is not a terminal and doesn't have any not useless @@ -47,11 +47,13 @@ bool DynamicPatriciaTrieGcEventListeners } // Writes dummy PtNode array size when the head of PtNode array is read. -bool DynamicPatriciaTrieGcEventListeners::ListenerForPlacingAndWritingValidPtNodesToBuffer +bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer ::onDescend(const int ptNodeArrayPos) { mValidPtNodeCount = 0; int writingPos = mBufferToWrite->getTailPosition(); - mPositionMap->insert(hash_map_compat::value_type(ptNodeArrayPos, writingPos)); + mDictPositionRelocationMap->mPtNodeArrayPositionRelocationMap.insert( + DynamicPatriciaTrieWritingHelper::PtNodeArrayPositionRelocationMap::value_type( + ptNodeArrayPos, writingPos)); // Writes dummy PtNode array size because arrays can have a forward link or needles PtNodes. // This field will be updated later in onReadingPtNodeArrayTail() with actual PtNode count. mPtNodeArraySizeFieldPos = writingPos; @@ -60,7 +62,7 @@ bool DynamicPatriciaTrieGcEventListeners::ListenerForPlacingAndWritingValidPtNod } // Write PtNode array terminal and actual PtNode array size. -bool DynamicPatriciaTrieGcEventListeners::ListenerForPlacingAndWritingValidPtNodesToBuffer +bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer ::onReadingPtNodeArrayTail() { int writingPos = mBufferToWrite->getTailPosition(); // Write PtNode array terminal. @@ -77,17 +79,20 @@ bool DynamicPatriciaTrieGcEventListeners::ListenerForPlacingAndWritingValidPtNod } // Write valid PtNode to buffer and memorize mapping from the old position to the new position. -bool DynamicPatriciaTrieGcEventListeners::ListenerForPlacingAndWritingValidPtNodesToBuffer +bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer ::onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node, const int *const nodeCodePoints) { if (node->isDeleted()) { // Current PtNode is not written in new buffer because it has been deleted. - mPositionMap->insert(hash_map_compat::value_type(node->getHeadPos(), - NOT_A_DICT_POS)); + mDictPositionRelocationMap->mPtNodePositionRelocationMap.insert( + DynamicPatriciaTrieWritingHelper::PtNodePositionRelocationMap::value_type( + node->getHeadPos(), NOT_A_DICT_POS)); return true; } int writingPos = mBufferToWrite->getTailPosition(); - mPositionMap->insert(hash_map_compat::value_type(node->getHeadPos(), writingPos)); + mDictPositionRelocationMap->mPtNodePositionRelocationMap.insert( + DynamicPatriciaTrieWritingHelper::PtNodePositionRelocationMap::value_type( + node->getHeadPos(), writingPos)); mValidPtNodeCount++; // Writes current PtNode. return mWritingHelper->writePtNodeToBufferByCopyingPtNodeInfo(mBufferToWrite, node, @@ -95,4 +100,50 @@ bool DynamicPatriciaTrieGcEventListeners::ListenerForPlacingAndWritingValidPtNod node->getProbability(), &writingPos); } +bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateAllPositionFields + ::onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node, + const int *const nodeCodePoints) { + // Updates parent position. + int parentPos = node->getParentPos(); + if (parentPos != NOT_A_DICT_POS) { + DynamicPatriciaTrieWritingHelper::PtNodePositionRelocationMap::const_iterator it = + mDictPositionRelocationMap->mPtNodePositionRelocationMap.find(parentPos); + if (it != mDictPositionRelocationMap->mPtNodePositionRelocationMap.end()) { + parentPos = it->second; + } + } + int writingPos = node->getHeadPos() + DynamicPatriciaTrieWritingUtils::NODE_FLAG_FIELD_SIZE; + const int parentPosOffset = (parentPos != NOT_A_DICT_POS) ? + parentPos - node->getHeadPos() : NOT_A_DICT_POS; + // Write updated parent offset. + if (!DynamicPatriciaTrieWritingUtils::writeParentOffsetAndAdvancePosition(mBufferToWrite, + parentPosOffset, &writingPos)) { + return false; + } + + // Updates children position. + int childrenPos = node->getChildrenPos(); + if (childrenPos != NOT_A_DICT_POS) { + DynamicPatriciaTrieWritingHelper::PtNodeArrayPositionRelocationMap::const_iterator it = + mDictPositionRelocationMap->mPtNodeArrayPositionRelocationMap.find(childrenPos); + if (it != mDictPositionRelocationMap->mPtNodeArrayPositionRelocationMap.end()) { + childrenPos = it->second; + } + } + writingPos = node->getChildrenPosFieldPos(); + if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(mBufferToWrite, + childrenPos, &writingPos)) { + return false; + } + + // Updates bigram target PtNode positions in the bigram list. + int bigramsPos = node->getBigramsPos(); + if (bigramsPos != NOT_A_DICT_POS) { + mBigramPolicy->updateAllBigramTargetPtNodePositions(&bigramsPos, + &mDictPositionRelocationMap->mPtNodePositionRelocationMap); + } + + return true; +} + } // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h index 728559330..4256f22fb 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h @@ -34,16 +34,16 @@ class DynamicPatriciaTrieGcEventListeners { // Updates all PtNodes that can be reached from the root. Checks if each PtNode is useless or // not and marks useless PtNodes as deleted. Such deleted PtNodes will be discarded in the GC. // TODO: Concatenate non-terminal PtNodes. - class ListenerForUpdatingUnigramProbabilityAndMarkingUselessPtNodesAsDeleted + class TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted : public DynamicPatriciaTrieReadingHelper::TraversingEventListener { public: - ListenerForUpdatingUnigramProbabilityAndMarkingUselessPtNodesAsDeleted( + TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted( DynamicPatriciaTrieWritingHelper *const writingHelper, BufferWithExtendableBuffer *const buffer) : mWritingHelper(writingHelper), mBuffer(buffer), valueStack(), mChildrenValue(0) {} - ~ListenerForUpdatingUnigramProbabilityAndMarkingUselessPtNodesAsDeleted() {}; + ~TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted() {}; bool onAscend() { if (valueStack.empty()) { @@ -66,7 +66,7 @@ class DynamicPatriciaTrieGcEventListeners { private: DISALLOW_IMPLICIT_CONSTRUCTORS( - ListenerForUpdatingUnigramProbabilityAndMarkingUselessPtNodesAsDeleted); + TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted); DynamicPatriciaTrieWritingHelper *const mWritingHelper; BufferWithExtendableBuffer *const mBuffer; @@ -76,10 +76,10 @@ class DynamicPatriciaTrieGcEventListeners { // Updates all bigram entries that are held by valid PtNodes. This removes useless bigram // entries. - class ListenerForUpdatingBigramProbability + class TraversePolicyToUpdateBigramProbability : public DynamicPatriciaTrieReadingHelper::TraversingEventListener { public: - ListenerForUpdatingBigramProbability(DynamicBigramListPolicy *const bigramPolicy) + TraversePolicyToUpdateBigramProbability(DynamicBigramListPolicy *const bigramPolicy) : mBigramPolicy(bigramPolicy) {} bool onAscend() { return true; } @@ -102,20 +102,21 @@ class DynamicPatriciaTrieGcEventListeners { } private: - DISALLOW_IMPLICIT_CONSTRUCTORS(ListenerForUpdatingBigramProbability); + DISALLOW_IMPLICIT_CONSTRUCTORS(TraversePolicyToUpdateBigramProbability); DynamicBigramListPolicy *const mBigramPolicy; }; - class ListenerForPlacingAndWritingValidPtNodesToBuffer + class TraversePolicyToPlaceAndWriteValidPtNodesToBuffer : public DynamicPatriciaTrieReadingHelper::TraversingEventListener { public: - ListenerForPlacingAndWritingValidPtNodesToBuffer( + TraversePolicyToPlaceAndWriteValidPtNodesToBuffer( DynamicPatriciaTrieWritingHelper *const writingHelper, BufferWithExtendableBuffer *const bufferToWrite, - hash_map_compat *const positionMap) + DynamicPatriciaTrieWritingHelper::DictPositionRelocationMap *const + dictPositionRelocationMap) : mWritingHelper(writingHelper), mBufferToWrite(bufferToWrite), - mPositionMap(positionMap), mValidPtNodeCount(0), + mDictPositionRelocationMap(dictPositionRelocationMap), mValidPtNodeCount(0), mPtNodeArraySizeFieldPos(NOT_A_DICT_POS) {}; bool onAscend() { return true; } @@ -127,20 +128,49 @@ class DynamicPatriciaTrieGcEventListeners { bool onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node, const int *const nodeCodePoints); - hash_map_compat *getPositionMap() const { - return mPositionMap; - } - private: - DISALLOW_IMPLICIT_CONSTRUCTORS(ListenerForPlacingAndWritingValidPtNodesToBuffer); + DISALLOW_IMPLICIT_CONSTRUCTORS(TraversePolicyToPlaceAndWriteValidPtNodesToBuffer); DynamicPatriciaTrieWritingHelper *const mWritingHelper; BufferWithExtendableBuffer *const mBufferToWrite; - hash_map_compat *const mPositionMap; + DynamicPatriciaTrieWritingHelper::DictPositionRelocationMap *const + mDictPositionRelocationMap; int mValidPtNodeCount; int mPtNodeArraySizeFieldPos; }; + class TraversePolicyToUpdateAllPositionFields + : public DynamicPatriciaTrieReadingHelper::TraversingEventListener { + public: + TraversePolicyToUpdateAllPositionFields( + DynamicPatriciaTrieWritingHelper *const writingHelper, + DynamicBigramListPolicy *const bigramPolicy, + BufferWithExtendableBuffer *const bufferToWrite, + const DynamicPatriciaTrieWritingHelper::DictPositionRelocationMap *const + dictPositionRelocationMap) + : mWritingHelper(writingHelper), mBigramPolicy(bigramPolicy), + mBufferToWrite(bufferToWrite), + mDictPositionRelocationMap(dictPositionRelocationMap) {}; + + bool onAscend() { return true; } + + bool onDescend(const int ptNodeArrayPos) { return true; } + + bool onReadingPtNodeArrayTail() { return true; } + + bool onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node, + const int *const nodeCodePoints); + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(TraversePolicyToUpdateAllPositionFields); + + DynamicPatriciaTrieWritingHelper *const mWritingHelper; + DynamicBigramListPolicy *const mBigramPolicy; + BufferWithExtendableBuffer *const mBufferToWrite; + const DynamicPatriciaTrieWritingHelper::DictPositionRelocationMap *const + mDictPositionRelocationMap; + }; + private: DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTrieGcEventListeners); }; diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp index 70a09c245..051da1be3 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp @@ -258,7 +258,9 @@ void DynamicPatriciaTriePolicy::flushWithGC(const char *const filePath) { AKLOGI("Warning: flushWithGC() is called for non-updatable dictionary."); return; } - // TODO: Implement. + DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer, + &mBigramListPolicy, &mShortcutListPolicy); + writingHelper.writeToDictFileWithGC(getRootPosition(), filePath, &mHeaderPolicy); } bool DynamicPatriciaTriePolicy::needsToRunGC() const { diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.cpp index 754b679d5..f4a2ef389 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.cpp @@ -59,6 +59,9 @@ bool DynamicPatriciaTrieReadingHelper::traverseAllPtNodesInPostorderDepthFirstMa if (!listener->onReadingPtNodeArrayTail()) { return false; } + if (mReadingStateStack.size() <= 0) { + break; + } if (!listener->onAscend()) { return false; } @@ -101,6 +104,9 @@ bool DynamicPatriciaTrieReadingHelper::traverseAllPtNodesInPtNodeArrayLevelPreor if (!listener->onAscend()) { return false; } + if (mReadingStateStack.size() <= 0) { + break; + } popReadingStateFromStack(); alreadyVisitedChildren = true; alreadyVisitedAllPtNodesInArray = true; diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp index 3fc9c4ce0..53075e686 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp @@ -28,6 +28,7 @@ #include "suggest/policyimpl/dictionary/header/header_policy.h" #include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h" #include "suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h" +#include "utils/hash_map_compat.h" namespace latinime { @@ -221,7 +222,8 @@ bool DynamicPatriciaTrieWritingHelper::markNodeAsMovedAndSetPosition( while (!readingHelper.isEnd()) { const int childPtNodeWrittenPos = nodeReader->getHeadPos(); const int parentOffset = movedPos - childPtNodeWrittenPos; - int parentOffsetFieldPos = childPtNodeWrittenPos + 1 /* Flags */; + int parentOffsetFieldPos = childPtNodeWrittenPos + + DynamicPatriciaTrieWritingUtils::NODE_FLAG_FIELD_SIZE; if (!DynamicPatriciaTrieWritingUtils::writeParentOffsetAndAdvancePosition( mBuffer, parentOffset, &parentOffsetFieldPos)) { // Parent offset cannot be written because of a bug or a broken dictionary; thus, @@ -521,30 +523,48 @@ bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos, DynamicPatriciaTrieReadingHelper readingHelper(mBuffer, mBigramPolicy, mShortcutPolicy); readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos); DynamicPatriciaTrieGcEventListeners - ::ListenerForUpdatingUnigramProbabilityAndMarkingUselessPtNodesAsDeleted - listenerForUpdatingUnigramProbabilityAndMarkingUselessPtNodesAsDeleted( + ::TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted + traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted( this, mBuffer); if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner( - &listenerForUpdatingUnigramProbabilityAndMarkingUselessPtNodesAsDeleted)) { + &traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted)) { return false; } readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos); - DynamicPatriciaTrieGcEventListeners::ListenerForUpdatingBigramProbability - listenerForupdatingBigramProbability(mBigramPolicy); + DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateBigramProbability + traversePolicyToUpdateBigramProbability(mBigramPolicy); if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner( - &listenerForupdatingBigramProbability)) { + &traversePolicyToUpdateBigramProbability)) { return false; } // Mapping from positions in mBuffer to positions in bufferToWrite. - hash_map_compat positionMap; + DictPositionRelocationMap dictPositionRelocationMap; readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos); - DynamicPatriciaTrieGcEventListeners::ListenerForPlacingAndWritingValidPtNodesToBuffer - listenerForPlacingAndWritingLivingPtNodesToBuffer(this, mBuffer, &positionMap); + DynamicPatriciaTrieGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer + traversePolicyToPlaceAndWriteValidPtNodesToBuffer(this, bufferToWrite, + &dictPositionRelocationMap); + if (!readingHelper.traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner( + &traversePolicyToPlaceAndWriteValidPtNodesToBuffer)) { + return false; + } - // TODO: Implement. - return false; + // Create policy instance for the GCed dictionary. + DynamicShortcutListPolicy newDictShortcutPolicy(bufferToWrite); + DynamicBigramListPolicy newDictBigramPolicy(bufferToWrite, &newDictShortcutPolicy); + // Create reading helper for the GCed dictionary. + DynamicPatriciaTrieReadingHelper newDictReadingHelper(bufferToWrite, &newDictBigramPolicy, + &newDictShortcutPolicy); + newDictReadingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos); + DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateAllPositionFields + traversePolicyToUpdateAllPositionFields(this, &newDictBigramPolicy, bufferToWrite, + &dictPositionRelocationMap); + if (!newDictReadingHelper.traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner( + &traversePolicyToUpdateAllPositionFields)) { + return false; + } + return true; } } // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h index e82b80ae5..d8058cc4e 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h @@ -21,6 +21,7 @@ #include #include "defines.h" +#include "utils/hash_map_compat.h" namespace latinime { @@ -33,6 +34,20 @@ class HeaderPolicy; class DynamicPatriciaTrieWritingHelper { public: + typedef hash_map_compat PtNodeArrayPositionRelocationMap; + typedef hash_map_compat PtNodePositionRelocationMap; + struct DictPositionRelocationMap { + public: + DictPositionRelocationMap() + : mPtNodeArrayPositionRelocationMap(), mPtNodePositionRelocationMap() {} + + PtNodeArrayPositionRelocationMap mPtNodeArrayPositionRelocationMap; + PtNodePositionRelocationMap mPtNodePositionRelocationMap; + + private: + DISALLOW_COPY_AND_ASSIGN(DictPositionRelocationMap); + }; + DynamicPatriciaTrieWritingHelper(BufferWithExtendableBuffer *const buffer, DynamicBigramListPolicy *const bigramPolicy, DynamicShortcutListPolicy *const shortcutPolicy) diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h index 183ede444..536ad3005 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h @@ -28,6 +28,8 @@ class BufferWithExtendableBuffer; class DynamicPatriciaTrieWritingUtils { public: + static const int NODE_FLAG_FIELD_SIZE; + static bool writeForwardLinkPositionAndAdvancePosition( BufferWithExtendableBuffer *const buffer, const int forwardLinkPos, int *const forwardLinkFieldPos); @@ -63,7 +65,6 @@ class DynamicPatriciaTrieWritingUtils { static const int MAX_DICT_OFFSET_VALUE; static const int MIN_DICT_OFFSET_VALUE; static const int DICT_OFFSET_NEGATIVE_FLAG; - static const int NODE_FLAG_FIELD_SIZE; static const int PROBABILITY_FIELD_SIZE; static bool writeDictOffset(BufferWithExtendableBuffer *const buffer, const int offset, diff --git a/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java b/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java index 00d76c990..87901839f 100644 --- a/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java +++ b/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java @@ -384,4 +384,51 @@ public class BinaryDictionaryTests extends AndroidTestCase { dictFile.delete(); } + + // TODO: Add large tests for BinaryDictionary.flushWithGC(). + public void testFlushWithGCDictionary() { + File dictFile = null; + try { + dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary"); + } catch (IOException e) { + fail("IOException while writing an initial dictionary : " + e); + } catch (UnsupportedFormatException e) { + fail("UnsupportedFormatException while writing an initial dictionary : " + e); + } + BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), + 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, + Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); + + final int unigramProbability = 100; + final int bigramProbability = 10; + binaryDictionary.addUnigramWord("aaa", unigramProbability); + binaryDictionary.addUnigramWord("abb", unigramProbability); + binaryDictionary.addUnigramWord("bcc", unigramProbability); + binaryDictionary.addBigramWords("aaa", "abb", bigramProbability); + binaryDictionary.addBigramWords("aaa", "bcc", bigramProbability); + binaryDictionary.addBigramWords("abb", "aaa", bigramProbability); + binaryDictionary.addBigramWords("abb", "bcc", bigramProbability); + binaryDictionary.flushWithGC(); + binaryDictionary.close(); + + binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), + 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, + Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); + final int probability = binaryDictionary.calculateProbability(unigramProbability, + bigramProbability); + assertEquals(unigramProbability, binaryDictionary.getFrequency("aaa")); + assertEquals(unigramProbability, binaryDictionary.getFrequency("abb")); + assertEquals(unigramProbability, binaryDictionary.getFrequency("bcc")); + assertEquals(probability, binaryDictionary.getBigramProbability("aaa", "abb")); + assertEquals(probability, binaryDictionary.getBigramProbability("aaa", "bcc")); + assertEquals(probability, binaryDictionary.getBigramProbability("abb", "aaa")); + assertEquals(probability, binaryDictionary.getBigramProbability("abb", "bcc")); + assertEquals(false, binaryDictionary.isValidBigram("bcc", "aaa")); + assertEquals(false, binaryDictionary.isValidBigram("bcc", "bbc")); + assertEquals(false, binaryDictionary.isValidBigram("aaa", "aaa")); + binaryDictionary.flushWithGC(); + binaryDictionary.close(); + + dictFile.delete(); + } }