diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.cpp index 00c614c46..1926b9831 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.cpp @@ -16,6 +16,7 @@ #include "suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h" +#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h" #include "suggest/policyimpl/dictionary/utils/byte_array_utils.h" #include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" @@ -78,8 +79,10 @@ const int BigramListReadWriteUtils::ATTRIBUTE_ADDRESS_SHIFT = 4; offset = ByteArrayUtils::readUint24AndAdvancePosition(bigramsBuf, pos); break; } - if (offset == 0) { + if (offset == DynamicPatriciaTrieReadingUtils::DICT_OFFSET_INVALID) { return NOT_A_DICT_POS; + } else if (offset == DynamicPatriciaTrieReadingUtils::DICT_OFFSET_ZERO_OFFSET) { + return origin; } if (isOffsetNegative(flags)) { return origin - offset; @@ -88,6 +91,24 @@ const int BigramListReadWriteUtils::ATTRIBUTE_ADDRESS_SHIFT = 4; } } +/* static */ bool BigramListReadWriteUtils::setHasNextFlag( + BufferWithExtendableBuffer *const buffer, const bool hasNext, const int entryPos) { + const bool usesAdditionalBuffer = buffer->isInAdditionalBuffer(entryPos); + int readingPos = entryPos; + if (usesAdditionalBuffer) { + readingPos -= buffer->getOriginalBufferSize(); + } + BigramFlags bigramFlags = ByteArrayUtils::readUint8AndAdvancePosition( + buffer->getBuffer(usesAdditionalBuffer), &readingPos); + if (hasNext) { + bigramFlags = bigramFlags | FLAG_ATTRIBUTE_HAS_NEXT; + } else { + bigramFlags = bigramFlags & (~FLAG_ATTRIBUTE_HAS_NEXT); + } + int writingPos = entryPos; + return buffer->writeUintAndAdvancePosition(bigramFlags, 1 /* size */, &writingPos); +} + /* static */ bool BigramListReadWriteUtils::createAndWriteBigramEntry( BufferWithExtendableBuffer *const buffer, const int targetPos, const int probability, const bool hasNext, int *const writingPos) { @@ -101,10 +122,9 @@ const int BigramListReadWriteUtils::ATTRIBUTE_ADDRESS_SHIFT = 4; /* static */ bool BigramListReadWriteUtils::writeBigramEntry( BufferWithExtendableBuffer *const bufferToWrite, const BigramFlags flags, const int targetPtNodePos, int *const writingPos) { - const int offset = (targetPtNodePos != NOT_A_DICT_POS) ? - targetPtNodePos - (*writingPos + 1) : 0; + const int offset = getBigramTargetOffset(targetPtNodePos, *writingPos); const BigramFlags flagsToWrite = (offset < 0) ? - (flags | FLAG_ATTRIBUTE_OFFSET_NEGATIVE) : flags; + (flags | FLAG_ATTRIBUTE_OFFSET_NEGATIVE) : (flags & ~FLAG_ATTRIBUTE_OFFSET_NEGATIVE); if (!bufferToWrite->writeUintAndAdvancePosition(flagsToWrite, 1 /* size */, writingPos)) { return false; } @@ -116,14 +136,13 @@ const int BigramListReadWriteUtils::ATTRIBUTE_ADDRESS_SHIFT = 4; // Returns true if the bigram entry is valid and put entry flags into out*. /* static */ bool BigramListReadWriteUtils::createAndGetBigramFlags(const int entryPos, - const int targetPos, const int probability, const bool hasNext, + const int targetPtNodePos, const int probability, const bool hasNext, BigramFlags *const outBigramFlags) { BigramFlags flags = probability & MASK_ATTRIBUTE_PROBABILITY; if (hasNext) { flags |= FLAG_ATTRIBUTE_HAS_NEXT; } - const int targetFieldPos = entryPos + 1; - const int offset = (targetPos != NOT_A_DICT_POS) ? targetPos - targetFieldPos : 0; + const int offset = getBigramTargetOffset(targetPtNodePos, entryPos); if (offset < 0) { flags |= FLAG_ATTRIBUTE_OFFSET_NEGATIVE; } @@ -146,4 +165,18 @@ const int BigramListReadWriteUtils::ATTRIBUTE_ADDRESS_SHIFT = 4; return true; } +/* static */ int BigramListReadWriteUtils::getBigramTargetOffset(const int targetPtNodePos, + const int entryPos) { + if (targetPtNodePos == NOT_A_DICT_POS) { + return DynamicPatriciaTrieReadingUtils::DICT_OFFSET_INVALID; + } else { + const int offset = targetPtNodePos - (entryPos + 1 /* bigramFlagsField */); + if (offset == 0) { + return DynamicPatriciaTrieReadingUtils::DICT_OFFSET_ZERO_OFFSET; + } else { + return offset; + } + } +} + } // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h b/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h index 234a0ea58..eabe4e099 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h +++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h @@ -59,9 +59,8 @@ public: */ } - static AK_FORCE_INLINE BigramFlags setHasNextFlag(const BigramFlags flags) { - return flags | FLAG_ATTRIBUTE_HAS_NEXT; - } + static bool setHasNextFlag(BufferWithExtendableBuffer *const buffer, + const bool hasNext, const int entryPos); static AK_FORCE_INLINE BigramFlags setProbabilityInFlags(const BigramFlags flags, const int probability) { @@ -96,6 +95,8 @@ private: static int getBigramAddressAndAdvancePosition(const uint8_t *const bigramsBuf, const BigramFlags flags, int *const pos); + + static int getBigramTargetOffset(const int targetPtNodePos, const int entryPos); }; } // namespace latinime #endif // LATINIME_BIGRAM_LIST_READ_WRITE_UTILS_H diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp index 2e0f97792..bc2f5ee58 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp @@ -70,6 +70,7 @@ bool DynamicBigramListPolicy::copyAllBigrams(BufferWithExtendableBuffer *const b *outBigramsCount = 0; BigramListReadWriteUtils::BigramFlags bigramFlags; int bigramEntryCount = 0; + int lastWrittenEntryPos = NOT_A_DICT_POS; do { if (++bigramEntryCount > BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT) { AKLOGE("Too many bigram entries. Entry count: %d, Limit: %d", @@ -90,6 +91,11 @@ bool DynamicBigramListPolicy::copyAllBigrams(BufferWithExtendableBuffer *const b originalBigramPos += mBuffer->getOriginalBufferSize(); } const int bigramPos = followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos); + if (bigramPos == NOT_A_DICT_POS) { + // Target PtNode has been invalidated. + continue; + } + lastWrittenEntryPos = *toPos; if (!BigramListReadWriteUtils::createAndWriteBigramEntry(bufferToWrite, bigramPos, BigramListReadWriteUtils::getProbabilityFromFlags(bigramFlags), BigramListReadWriteUtils::hasNext(bigramFlags), toPos)) { @@ -97,6 +103,13 @@ bool DynamicBigramListPolicy::copyAllBigrams(BufferWithExtendableBuffer *const b } (*outBigramsCount)++; } while(BigramListReadWriteUtils::hasNext(bigramFlags)); + // Makes the last entry the terminal of the list. Updates the flags. + if (lastWrittenEntryPos != NOT_A_DICT_POS) { + if (!BigramListReadWriteUtils::setHasNextFlag(bufferToWrite, false /* hasNext */, + lastWrittenEntryPos)) { + return false; + } + } if (usesAdditionalBuffer) { *fromPos += mBuffer->getOriginalBufferSize(); } @@ -240,10 +253,7 @@ bool DynamicBigramListPolicy::addNewBigramEntryToBigramList(const int bigramTarg } // The current last entry is found. // First, update the flags of the last entry. - const BigramListReadWriteUtils::BigramFlags updatedFlags = - BigramListReadWriteUtils::setHasNextFlag(bigramFlags); - if (!BigramListReadWriteUtils::writeBigramEntry(mBuffer, updatedFlags, originalBigramPos, - &entryPos)) { + if (!BigramListReadWriteUtils::setHasNextFlag(mBuffer, true /* hasNext */, entryPos)) { return false; } if (usesAdditionalBuffer) { diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp index 51374102e..c60e45819 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp @@ -96,7 +96,7 @@ bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNo mValidPtNodeCount++; // Writes current PtNode. return mWritingHelper->writePtNodeToBufferByCopyingPtNodeInfo(mBufferToWrite, node, - node->getParentPos(), nodeCodePoints, node->getCodePointCount(), + node->getParentPos(), nodeCodePoints, node->getCodePointCount(), node->getProbability(), &writingPos); } @@ -113,11 +113,9 @@ bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateAllPositionField } } int writingPos = node->getHeadPos() + DynamicPatriciaTrieWritingUtils::NODE_FLAG_FIELD_SIZE; - const int parentPosOffset = (parentPos != NOT_A_DICT_POS) ? - parentPos - node->getHeadPos() : NOT_A_DICT_POS; // Write updated parent offset. - if (!DynamicPatriciaTrieWritingUtils::writeParentOffsetAndAdvancePosition(mBufferToWrite, - parentPosOffset, &writingPos)) { + if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition(mBufferToWrite, + parentPos, node->getHeadPos(), &writingPos)) { return false; } @@ -139,8 +137,10 @@ bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateAllPositionField // Updates bigram target PtNode positions in the bigram list. int bigramsPos = node->getBigramsPos(); if (bigramsPos != NOT_A_DICT_POS) { - mBigramPolicy->updateAllBigramTargetPtNodePositions(&bigramsPos, - &mDictPositionRelocationMap->mPtNodePositionRelocationMap); + if (!mBigramPolicy->updateAllBigramTargetPtNodePositions(&bigramsPos, + &mDictPositionRelocationMap->mPtNodePositionRelocationMap)) { + return false; + } } return true; diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp index d52de7ada..456352c17 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp @@ -40,9 +40,10 @@ void DynamicPatriciaTrieNodeReader::fetchPtNodeInfoFromBufferAndProcessMovedPtNo pos -= mBuffer->getOriginalBufferSize(); } mFlags = PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos); - const int parentPos = - DynamicPatriciaTrieReadingUtils::getParentPosAndAdvancePosition(dictBuf, &pos); - mParentPos = (parentPos != 0) ? ptNodePos + parentPos : NOT_A_DICT_POS; + const int parentPosOffset = + DynamicPatriciaTrieReadingUtils::getParentPtNodePosOffsetAndAdvancePosition(dictBuf, + &pos); + mParentPos = DynamicPatriciaTrieReadingUtils::getParentPtNodePos(parentPosOffset, mHeadPos); if (outCodePoints != 0) { mCodePointCount = PatriciaTrieReadingUtils::getCharsAndAdvancePosition( dictBuf, mFlags, maxCodePointCount, outCodePoints, &pos); diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.cpp index 8428c0b15..d68446db6 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.cpp @@ -28,24 +28,42 @@ const DptReadingUtils::NodeFlags DptReadingUtils::FLAG_IS_NOT_MOVED = 0xC0; const DptReadingUtils::NodeFlags DptReadingUtils::FLAG_IS_MOVED = 0x40; const DptReadingUtils::NodeFlags DptReadingUtils::FLAG_IS_DELETED = 0x80; +// TODO: Make DICT_OFFSET_ZERO_OFFSET = 0. +// Currently, DICT_OFFSET_INVALID is 0 in Java side but offset can be 0 during GC. So, the maximum +// value of offsets, which is 0x7FFFFF is used to represent 0 offset. +const int DptReadingUtils::DICT_OFFSET_INVALID = 0; +const int DptReadingUtils::DICT_OFFSET_ZERO_OFFSET = 0x7FFFFF; + /* static */ int DptReadingUtils::getForwardLinkPosition(const uint8_t *const buffer, const int pos) { int linkAddressPos = pos; return ByteArrayUtils::readSint24AndAdvancePosition(buffer, &linkAddressPos); } -/* static */ int DptReadingUtils::getParentPosAndAdvancePosition(const uint8_t *const buffer, - int *const pos) { +/* static */ int DptReadingUtils::getParentPtNodePosOffsetAndAdvancePosition( + const uint8_t *const buffer, int *const pos) { return ByteArrayUtils::readSint24AndAdvancePosition(buffer, pos); } +/* static */ int DptReadingUtils::getParentPtNodePos(const int parentOffset, const int ptNodePos) { + if (parentOffset == DICT_OFFSET_INVALID) { + return NOT_A_DICT_POS; + } else if (parentOffset == DICT_OFFSET_ZERO_OFFSET) { + return ptNodePos; + } else { + return parentOffset + ptNodePos; + } +} + /* static */ int DptReadingUtils::readChildrenPositionAndAdvancePosition( const uint8_t *const buffer, int *const pos) { const int base = *pos; const int offset = ByteArrayUtils::readSint24AndAdvancePosition(buffer, pos); - if (offset == 0) { - // 0 offset means that the node does not have children. + if (offset == DICT_OFFSET_INVALID) { + // The PtNode does not have children. return NOT_A_DICT_POS; + } else if (offset == DICT_OFFSET_ZERO_OFFSET) { + return base; } else { return base + offset; } diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h index db5f9b1bd..67c3cc57e 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h @@ -27,13 +27,19 @@ class DynamicPatriciaTrieReadingUtils { public: typedef uint8_t NodeFlags; + static const int DICT_OFFSET_INVALID; + static const int DICT_OFFSET_ZERO_OFFSET; + static int getForwardLinkPosition(const uint8_t *const buffer, const int pos); static AK_FORCE_INLINE bool isValidForwardLinkPosition(const int forwardLinkAddress) { return forwardLinkAddress != 0; } - static int getParentPosAndAdvancePosition(const uint8_t *const buffer, int *const pos); + static int getParentPtNodePosOffsetAndAdvancePosition(const uint8_t *const buffer, + int *const pos); + + static int getParentPtNodePos(const int parentOffset, const int ptNodePos); static int readChildrenPositionAndAdvancePosition(const uint8_t *const buffer, int *const pos); diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp index 53075e686..d2b9a5e97 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp @@ -203,9 +203,8 @@ bool DynamicPatriciaTrieWritingHelper::markNodeAsMovedAndSetPosition( return false; } // Update moved position, which is stored in the parent offset field. - const int movedPosOffset = movedPos - originalNode->getHeadPos(); - if (!DynamicPatriciaTrieWritingUtils::writeParentOffsetAndAdvancePosition( - mBuffer, movedPosOffset, &writingPos)) { + if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition( + mBuffer, movedPos, originalNode->getHeadPos(), &writingPos)) { return false; } // Update bigram linked node position, which is stored in the children position field. @@ -220,12 +219,10 @@ bool DynamicPatriciaTrieWritingHelper::markNodeAsMovedAndSetPosition( const DynamicPatriciaTrieNodeReader *const nodeReader = readingHelper.getNodeReader(); readingHelper.initWithPtNodeArrayPos(originalNode->getChildrenPos()); while (!readingHelper.isEnd()) { - const int childPtNodeWrittenPos = nodeReader->getHeadPos(); - const int parentOffset = movedPos - childPtNodeWrittenPos; - int parentOffsetFieldPos = childPtNodeWrittenPos + int parentOffsetFieldPos = nodeReader->getHeadPos() + DynamicPatriciaTrieWritingUtils::NODE_FLAG_FIELD_SIZE; - if (!DynamicPatriciaTrieWritingUtils::writeParentOffsetAndAdvancePosition( - mBuffer, parentOffset, &parentOffsetFieldPos)) { + if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition( + mBuffer, movedPos, nodeReader->getHeadPos(), &parentOffsetFieldPos)) { // Parent offset cannot be written because of a bug or a broken dictionary; thus, // we give up to update dictionary. return false; @@ -251,9 +248,8 @@ bool DynamicPatriciaTrieWritingHelper::writePtNodeWithFullInfoToBuffer( return false; } // Calculate a parent offset and write the offset. - const int parentOffset = (parentPos != NOT_A_DICT_POS) ? parentPos - nodePos : NOT_A_DICT_POS; - if (!DynamicPatriciaTrieWritingUtils::writeParentOffsetAndAdvancePosition(bufferToWrite, - parentOffset, writingPos)) { + if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition(bufferToWrite, + parentPos, nodePos, writingPos)) { return false; } // Write code points diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.cpp index b261e594d..5a3983776 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.cpp @@ -39,18 +39,20 @@ const int DynamicPatriciaTrieWritingUtils::NODE_FLAG_FIELD_SIZE = 1; /* static */ bool DynamicPatriciaTrieWritingUtils::writeForwardLinkPositionAndAdvancePosition( BufferWithExtendableBuffer *const buffer, const int forwardLinkPos, int *const forwardLinkFieldPos) { - const int offset = (forwardLinkPos != NOT_A_DICT_POS) ? - forwardLinkPos - (*forwardLinkFieldPos) : 0; - return writeDictOffset(buffer, offset, forwardLinkFieldPos); + return writeDictOffset(buffer, forwardLinkPos, (*forwardLinkFieldPos), forwardLinkFieldPos); } /* static */ bool DynamicPatriciaTrieWritingUtils::writePtNodeArraySizeAndAdvancePosition( BufferWithExtendableBuffer *const buffer, const size_t arraySize, int *const arraySizeFieldPos) { - if (arraySize <= MAX_PTNODE_ARRAY_SIZE_TO_USE_SMALL_SIZE_FIELD) { + // Currently, all array size field to be created has LARGE_PTNODE_ARRAY_SIZE_FIELD_SIZE to + // simplify updating process. + // TODO: Use SMALL_PTNODE_ARRAY_SIZE_FIELD_SIZE for small arrays. + /*if (arraySize <= MAX_PTNODE_ARRAY_SIZE_TO_USE_SMALL_SIZE_FIELD) { return buffer->writeUintAndAdvancePosition(arraySize, SMALL_PTNODE_ARRAY_SIZE_FIELD_SIZE, arraySizeFieldPos); - } else if (arraySize <= MAX_PTNODE_ARRAY_SIZE) { + } else */ + if (arraySize <= MAX_PTNODE_ARRAY_SIZE) { uint32_t data = arraySize | LARGE_PTNODE_ARRAY_SIZE_FIELD_SIZE_FLAG; return buffer->writeUintAndAdvancePosition(data, LARGE_PTNODE_ARRAY_SIZE_FIELD_SIZE, arraySizeFieldPos); @@ -69,11 +71,10 @@ const int DynamicPatriciaTrieWritingUtils::NODE_FLAG_FIELD_SIZE = 1; } // Note that parentOffset is offset from node's head position. -/* static */ bool DynamicPatriciaTrieWritingUtils::writeParentOffsetAndAdvancePosition( - BufferWithExtendableBuffer *const buffer, const int parentOffset, +/* static */ bool DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition( + BufferWithExtendableBuffer *const buffer, const int parentPos, const int basePos, int *const parentPosFieldPos) { - int offset = (parentOffset != NOT_A_DICT_POS) ? parentOffset : 0; - return writeDictOffset(buffer, offset, parentPosFieldPos); + return writeDictOffset(buffer, parentPos, basePos, parentPosFieldPos); } /* static */ bool DynamicPatriciaTrieWritingUtils::writeCodePointsAndAdvancePosition( @@ -106,13 +107,19 @@ const int DynamicPatriciaTrieWritingUtils::NODE_FLAG_FIELD_SIZE = 1; /* static */ bool DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition( BufferWithExtendableBuffer *const buffer, const int childrenPosition, int *const childrenPositionFieldPos) { - int offset = (childrenPosition != NOT_A_DICT_POS) ? - childrenPosition - (*childrenPositionFieldPos) : 0; - return writeDictOffset(buffer, offset, childrenPositionFieldPos); + return writeDictOffset(buffer, childrenPosition, (*childrenPositionFieldPos), + childrenPositionFieldPos); } /* static */ bool DynamicPatriciaTrieWritingUtils::writeDictOffset( - BufferWithExtendableBuffer *const buffer, const int offset, int *const offsetFieldPos) { + BufferWithExtendableBuffer *const buffer, const int targetPos, const int basePos, + int *const offsetFieldPos) { + int offset = targetPos - basePos; + if (targetPos == NOT_A_DICT_POS) { + offset = DynamicPatriciaTrieReadingUtils::DICT_OFFSET_INVALID; + } else if (offset == 0) { + offset = DynamicPatriciaTrieReadingUtils::DICT_OFFSET_ZERO_OFFSET; + } if (offset > MAX_DICT_OFFSET_VALUE || offset < MIN_DICT_OFFSET_VALUE) { AKLOGI("offset cannot be written because the offset is too large or too small: %d", offset); diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h index 536ad3005..a37e9fb3d 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h @@ -41,8 +41,8 @@ class DynamicPatriciaTrieWritingUtils { const DynamicPatriciaTrieReadingUtils::NodeFlags nodeFlags, int *const nodeFlagsFieldPos); - static bool writeParentOffsetAndAdvancePosition(BufferWithExtendableBuffer *const buffer, - const int parentPosition, int *const parentPosFieldPos); + static bool writeParentPosOffsetAndAdvancePosition(BufferWithExtendableBuffer *const buffer, + const int parentPosition, const int basePos, int *const parentPosFieldPos); static bool writeCodePointsAndAdvancePosition(BufferWithExtendableBuffer *const buffer, const int *const codePoints, const int codePointCount, int *const codePointFieldPos); @@ -67,8 +67,8 @@ class DynamicPatriciaTrieWritingUtils { static const int DICT_OFFSET_NEGATIVE_FLAG; static const int PROBABILITY_FIELD_SIZE; - static bool writeDictOffset(BufferWithExtendableBuffer *const buffer, const int offset, - int *const offsetFieldPos); + static bool writeDictOffset(BufferWithExtendableBuffer *const buffer, const int targetPos, + const int basePos, int *const offsetFieldPos); }; } // namespace latinime #endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_WRITING_UTILS_H */ diff --git a/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java b/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java index 87901839f..e26b300a8 100644 --- a/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java +++ b/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java @@ -18,6 +18,7 @@ package com.android.inputmethod.latin; import android.test.AndroidTestCase; import android.test.suitebuilder.annotation.LargeTest; +import android.util.Pair; import com.android.inputmethod.latin.makedict.CodePointUtils; import com.android.inputmethod.latin.makedict.DictEncoder; @@ -385,7 +386,6 @@ public class BinaryDictionaryTests extends AndroidTestCase { dictFile.delete(); } - // TODO: Add large tests for BinaryDictionary.flushWithGC(). public void testFlushWithGCDictionary() { File dictFile = null; try { @@ -431,4 +431,177 @@ public class BinaryDictionaryTests extends AndroidTestCase { dictFile.delete(); } + + // TODO: Evaluate performance of GC + public void testAddBigramWordsAndFlashWithGC() { + final int wordCount = 100; + final int bigramCount = 1000; + final int codePointSetSize = 30; + // TODO: Use various seeds such as a current timestamp to make this test more random. + final int seed = 314159265; + + File dictFile = null; + try { + dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary"); + } catch (IOException e) { + fail("IOException while writing an initial dictionary : " + e); + } catch (UnsupportedFormatException e) { + fail("UnsupportedFormatException while writing an initial dictionary : " + e); + } + + BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), + 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, + Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); + final ArrayList words = new ArrayList(); + // Test a word that isn't contained within the dictionary. + final Random random = new Random(seed); + final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); + final int[] unigramProbabilities = new int[wordCount]; + for (int i = 0; i < wordCount; ++i) { + final String word = CodePointUtils.generateWord(random, codePointSet); + words.add(word); + final int unigramProbability = random.nextInt(0xFF); + unigramProbabilities[i] = unigramProbability; + binaryDictionary.addUnigramWord(word, unigramProbability); + } + + final int[][] probabilities = new int[wordCount][wordCount]; + + for (int i = 0; i < wordCount; ++i) { + for (int j = 0; j < wordCount; ++j) { + probabilities[i][j] = Dictionary.NOT_A_PROBABILITY; + } + } + + for (int i = 0; i < bigramCount; i++) { + final int word0Index = random.nextInt(wordCount); + final int word1Index = random.nextInt(wordCount); + final String word0 = words.get(word0Index); + final String word1 = words.get(word1Index); + final int bigramProbability = random.nextInt(0xF); + probabilities[word0Index][word1Index] = binaryDictionary.calculateProbability( + unigramProbabilities[word1Index], bigramProbability); + binaryDictionary.addBigramWords(word0, word1, bigramProbability); + } + + binaryDictionary.flushWithGC(); + binaryDictionary.close(); + binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), + 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, + Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); + + for (int i = 0; i < words.size(); i++) { + for (int j = 0; j < words.size(); j++) { + assertEquals(probabilities[i][j], + binaryDictionary.getBigramProbability(words.get(i), words.get(j))); + } + } + dictFile.delete(); + } + + public void testRandomOperetionsAndFlashWithGC() { + final int flashWithGCIterationCount = 50; + final int operationCountInEachIteration = 200; + final int initialUnigramCount = 100; + final float addUnigramProb = 0.5f; + final float addBigramProb = 0.8f; + final float removeBigramProb = 0.2f; + final int codePointSetSize = 30; + final int seed = 141421356; + + final Random random = new Random(seed); + + File dictFile = null; + try { + dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary"); + } catch (IOException e) { + fail("IOException while writing an initial dictionary : " + e); + } catch (UnsupportedFormatException e) { + fail("UnsupportedFormatException while writing an initial dictionary : " + e); + } + + BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), + 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, + Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); + final ArrayList words = new ArrayList(); + final ArrayList> bigramWords = new ArrayList>(); + final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); + final HashMap unigramProbabilities = new HashMap(); + final HashMap, Integer> bigramProbabilities = + new HashMap, Integer>(); + for (int i = 0; i < initialUnigramCount; ++i) { + final String word = CodePointUtils.generateWord(random, codePointSet); + words.add(word); + final int unigramProbability = random.nextInt(0xFF); + unigramProbabilities.put(word, unigramProbability); + binaryDictionary.addUnigramWord(word, unigramProbability); + } + binaryDictionary.flushWithGC(); + binaryDictionary.close(); + + for (int gcCount = 0; gcCount < flashWithGCIterationCount; gcCount++) { + binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), + 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, + Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); + for (int opCount = 0; opCount < operationCountInEachIteration; opCount++) { + // Add unigram. + if (random.nextFloat() < addUnigramProb) { + final String word = CodePointUtils.generateWord(random, codePointSet); + words.add(word); + final int unigramProbability = random.nextInt(0xFF); + unigramProbabilities.put(word, unigramProbability); + binaryDictionary.addUnigramWord(word, unigramProbability); + } + // Add bigram. + if (random.nextFloat() < addBigramProb && words.size() > 2) { + final int word0Index = random.nextInt(words.size()); + int word1Index = random.nextInt(words.size() - 1); + if (word0Index <= word1Index) { + word1Index++; + } + final String word0 = words.get(word0Index); + final String word1 = words.get(word1Index); + final int bigramProbability = random.nextInt(0xF); + final Pair bigram = new Pair(word0, word1); + bigramWords.add(bigram); + bigramProbabilities.put(bigram, bigramProbability); + binaryDictionary.addBigramWords(word0, word1, bigramProbability); + } + // Remove bigram. + if (random.nextFloat() < removeBigramProb && !bigramWords.isEmpty()) { + final int bigramIndex = random.nextInt(bigramWords.size()); + final Pair bigram = bigramWords.get(bigramIndex); + bigramWords.remove(bigramIndex); + bigramProbabilities.remove(bigram); + binaryDictionary.removeBigramWords(bigram.first, bigram.second); + } + } + + // Test whether the all unigram operations are collectlly handled. + for (int i = 0; i < words.size(); i++) { + final String word = words.get(i); + final int unigramProbability = unigramProbabilities.get(word); + assertEquals(word, unigramProbability, binaryDictionary.getFrequency(word)); + } + // Test whether the all bigram operations are collectlly handled. + for (int i = 0; i < bigramWords.size(); i++) { + final Pair bigram = bigramWords.get(i); + final int unigramProbability = unigramProbabilities.get(bigram.second); + final int probability; + if (bigramProbabilities.containsKey(bigram)) { + final int bigramProbability = bigramProbabilities.get(bigram); + probability = binaryDictionary.calculateProbability(unigramProbability, + bigramProbability); + } else { + probability = Dictionary.NOT_A_PROBABILITY; + } + assertEquals(probability, + binaryDictionary.getBigramProbability(bigram.first, bigram.second)); + } + binaryDictionary.flushWithGC(); + binaryDictionary.close(); + } + + dictFile.delete(); + } }