diff --git a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h index 784419586..38e8ff183 100644 --- a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h +++ b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h @@ -100,6 +100,8 @@ class DictionaryStructureWithBufferPolicy { // starts iterating the dictionary. virtual int getNextWordAndNextToken(const int token, int *const outCodePoints) = 0; + virtual bool isCorrupted() const = 0; + protected: DictionaryStructureWithBufferPolicy() {} diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp index fa5993090..212f2ef39 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp @@ -36,6 +36,7 @@ void PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const dicNo if (nextPos < 0 || nextPos >= mDictBufferSize) { AKLOGE("Children PtNode array position is invalid. pos: %d, dict size: %d", nextPos, mDictBufferSize); + mIsCorrupted = true; ASSERT(false); return; } @@ -45,6 +46,7 @@ void PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const dicNo if (nextPos < 0 || nextPos >= mDictBufferSize) { AKLOGE("Child PtNode position is invalid. pos: %d, dict size: %d, childCount: %d / %d", nextPos, mDictBufferSize, i, childCount); + mIsCorrupted = true; ASSERT(false); return; } @@ -239,7 +241,13 @@ int PatriciaTriePolicy::getTerminalPtNodePositionOfWord(const int *const inWord, const int length, const bool forceLowerCaseSearch) const { DynamicPtReadingHelper readingHelper(&mPtNodeReader, &mPtNodeArrayReader); readingHelper.initWithPtNodeArrayPos(getRootPosition()); - return readingHelper.getTerminalPtNodePositionOfWord(inWord, length, forceLowerCaseSearch); + const int ptNodePos = + readingHelper.getTerminalPtNodePositionOfWord(inWord, length, forceLowerCaseSearch); + if (readingHelper.isError()) { + mIsCorrupted = true; + AKLOGE("Dictionary reading error in createAndGetAllChildDicNodes()."); + } + return ptNodePos; } int PatriciaTriePolicy::getProbability(const int unigramProbability, diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h index 8fbca2612..6a2345a05 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h @@ -46,7 +46,7 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { mBigramListPolicy(mDictRoot), mShortcutListPolicy(mDictRoot), mPtNodeReader(mDictRoot, mDictBufferSize, &mBigramListPolicy, &mShortcutListPolicy), mPtNodeArrayReader(mDictRoot, mDictBufferSize), - mTerminalPtNodePositionsForIteratingWords() {} + mTerminalPtNodePositionsForIteratingWords(), mIsCorrupted(false) {} AK_FORCE_INLINE int getRootPosition() const { return 0; @@ -134,6 +134,10 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { int getNextWordAndNextToken(const int token, int *const outCodePoints); + bool isCorrupted() const { + return mIsCorrupted; + } + private: DISALLOW_IMPLICIT_CONSTRUCTORS(PatriciaTriePolicy); @@ -146,6 +150,7 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { const Ver2ParticiaTrieNodeReader mPtNodeReader; const Ver2PtNodeArrayReader mPtNodeArrayReader; std::vector mTerminalPtNodePositionsForIteratingWords; + mutable bool mIsCorrupted; int createAndGetLeavingChildNode(const DicNode *const dicNode, const int ptNodePos, DicNodeVector *const childDicNodes) const; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp index efc29a0c3..b5d80be1d 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp @@ -65,6 +65,10 @@ void Ver4PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const d ptNodeParams.getCodePointCount(), ptNodeParams.getCodePoints()); readingHelper.readNextSiblingNode(ptNodeParams); } + if (readingHelper.isError()) { + mIsCorrupted = true; + AKLOGE("Dictionary reading error in createAndGetAllChildDicNodes()."); + } } int Ver4PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount( @@ -72,15 +76,26 @@ int Ver4PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount( int *const outUnigramProbability) const { DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader); readingHelper.initWithPtNodePos(ptNodePos); - return readingHelper.getCodePointsAndProbabilityAndReturnCodePointCount( + const int codePointCount = readingHelper.getCodePointsAndProbabilityAndReturnCodePointCount( maxCodePointCount, outCodePoints, outUnigramProbability); + if (readingHelper.isError()) { + mIsCorrupted = true; + AKLOGE("Dictionary reading error in getCodePointsAndProbabilityAndReturnCodePointCount()."); + } + return codePointCount; } int Ver4PatriciaTriePolicy::getTerminalPtNodePositionOfWord(const int *const inWord, const int length, const bool forceLowerCaseSearch) const { DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader); readingHelper.initWithPtNodeArrayPos(getRootPosition()); - return readingHelper.getTerminalPtNodePositionOfWord(inWord, length, forceLowerCaseSearch); + const int ptNodePos = + readingHelper.getTerminalPtNodePositionOfWord(inWord, length, forceLowerCaseSearch); + if (readingHelper.isError()) { + mIsCorrupted = true; + AKLOGE("Dictionary reading error in createAndGetAllChildDicNodes()."); + } + return ptNodePos; } int Ver4PatriciaTriePolicy::getProbability(const int unigramProbability, @@ -265,7 +280,10 @@ void Ver4PatriciaTriePolicy::flush(const char *const filePath) { AKLOGI("Warning: flush() is called for non-updatable dictionary. filePath: %s", filePath); return; } - mWritingHelper.writeToDictFile(filePath, mUnigramCount, mBigramCount); + if (!mWritingHelper.writeToDictFile(filePath, mUnigramCount, mBigramCount)) { + AKLOGE("Cannot flush the dictionary to file."); + mIsCorrupted = true; + } } void Ver4PatriciaTriePolicy::flushWithGC(const char *const filePath) { @@ -273,7 +291,10 @@ void Ver4PatriciaTriePolicy::flushWithGC(const char *const filePath) { AKLOGI("Warning: flushWithGC() is called for non-updatable dictionary."); return; } - mWritingHelper.writeToDictFileWithGC(getRootPosition(), filePath); + if (!mWritingHelper.writeToDictFileWithGC(getRootPosition(), filePath)) { + AKLOGE("Cannot flush the dictionary to file with GC."); + mIsCorrupted = true; + } } bool Ver4PatriciaTriePolicy::needsToRunGC(const bool mindsBlockByGC) const { diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h index 692163058..7796e2ddc 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h @@ -55,7 +55,7 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { mWritingHelper(mBuffers.get()), mUnigramCount(mHeaderPolicy->getUnigramCount()), mBigramCount(mHeaderPolicy->getBigramCount()), - mTerminalPtNodePositionsForIteratingWords() {}; + mTerminalPtNodePositionsForIteratingWords(), mIsCorrupted(false) {}; AK_FORCE_INLINE int getRootPosition() const { return 0; @@ -116,6 +116,10 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { int getNextWordAndNextToken(const int token, int *const outCodePoints); + bool isCorrupted() const { + return mIsCorrupted; + } + private: DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4PatriciaTriePolicy); @@ -141,6 +145,7 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { int mUnigramCount; int mBigramCount; std::vector mTerminalPtNodePositionsForIteratingWords; + mutable bool mIsCorrupted; }; } // namespace latinime #endif // LATINIME_VER4_PATRICIA_TRIE_POLICY_H diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp index acf099122..93053c38d 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp @@ -33,7 +33,7 @@ namespace latinime { -void Ver4PatriciaTrieWritingHelper::writeToDictFile(const char *const dictDirPath, +bool Ver4PatriciaTrieWritingHelper::writeToDictFile(const char *const dictDirPath, const int unigramCount, const int bigramCount) const { const HeaderPolicy *const headerPolicy = mBuffers->getHeaderPolicy(); BufferWithExtendableBuffer headerBuffer( @@ -46,12 +46,12 @@ void Ver4PatriciaTrieWritingHelper::writeToDictFile(const char *const dictDirPat "updatesLastDecayedTime: %d, unigramCount: %d, bigramCount: %d, " "extendedRegionSize: %d", false, unigramCount, bigramCount, extendedRegionSize); - return; + return false; } - mBuffers->flushHeaderAndDictBuffers(dictDirPath, &headerBuffer); + return mBuffers->flushHeaderAndDictBuffers(dictDirPath, &headerBuffer); } -void Ver4PatriciaTrieWritingHelper::writeToDictFileWithGC(const int rootPtNodeArrayPos, +bool Ver4PatriciaTrieWritingHelper::writeToDictFileWithGC(const int rootPtNodeArrayPos, const char *const dictDirPath) { const HeaderPolicy *const headerPolicy = mBuffers->getHeaderPolicy(); Ver4DictBuffers::Ver4DictBuffersPtr dictBuffers( @@ -59,15 +59,15 @@ void Ver4PatriciaTrieWritingHelper::writeToDictFileWithGC(const int rootPtNodeAr int unigramCount = 0; int bigramCount = 0; if (!runGC(rootPtNodeArrayPos, headerPolicy, dictBuffers.get(), &unigramCount, &bigramCount)) { - return; + return false; } BufferWithExtendableBuffer headerBuffer( BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE); if (!headerPolicy->fillInAndWriteHeaderToBuffer(true /* updatesLastDecayedTime */, unigramCount, bigramCount, 0 /* extendedRegionSize */, &headerBuffer)) { - return; + return false; } - dictBuffers.get()->flushHeaderAndDictBuffers(dictDirPath, &headerBuffer); + return dictBuffers.get()->flushHeaderAndDictBuffers(dictDirPath, &headerBuffer); } bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos, diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.h index c3a155e0e..bb464ad28 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.h @@ -33,10 +33,12 @@ class Ver4PatriciaTrieWritingHelper { Ver4PatriciaTrieWritingHelper(Ver4DictBuffers *const buffers) : mBuffers(buffers) {} - void writeToDictFile(const char *const dictDirPath, const int unigramCount, + bool writeToDictFile(const char *const dictDirPath, const int unigramCount, const int bigramCount) const; - void writeToDictFileWithGC(const int rootPtNodeArrayPos, const char *const dictDirPath); + // This method cannot be const because the original dictionary buffer will be updated to detect + // useless PtNodes during GC. + bool writeToDictFileWithGC(const int rootPtNodeArrayPos, const char *const dictDirPath); private: DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4PatriciaTrieWritingHelper);