Merge "Record reading error during traversing dictionaries."

This commit is contained in:
Keisuke Kuroyanagi 2014-02-20 10:20:21 +00:00 committed by Android (Google) Code Review
commit a7ee108fc5
7 changed files with 59 additions and 16 deletions

View file

@ -100,6 +100,8 @@ class DictionaryStructureWithBufferPolicy {
// starts iterating the dictionary. // starts iterating the dictionary.
virtual int getNextWordAndNextToken(const int token, int *const outCodePoints) = 0; virtual int getNextWordAndNextToken(const int token, int *const outCodePoints) = 0;
virtual bool isCorrupted() const = 0;
protected: protected:
DictionaryStructureWithBufferPolicy() {} DictionaryStructureWithBufferPolicy() {}

View file

@ -36,6 +36,7 @@ void PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const dicNo
if (nextPos < 0 || nextPos >= mDictBufferSize) { if (nextPos < 0 || nextPos >= mDictBufferSize) {
AKLOGE("Children PtNode array position is invalid. pos: %d, dict size: %d", AKLOGE("Children PtNode array position is invalid. pos: %d, dict size: %d",
nextPos, mDictBufferSize); nextPos, mDictBufferSize);
mIsCorrupted = true;
ASSERT(false); ASSERT(false);
return; return;
} }
@ -45,6 +46,7 @@ void PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const dicNo
if (nextPos < 0 || nextPos >= mDictBufferSize) { if (nextPos < 0 || nextPos >= mDictBufferSize) {
AKLOGE("Child PtNode position is invalid. pos: %d, dict size: %d, childCount: %d / %d", AKLOGE("Child PtNode position is invalid. pos: %d, dict size: %d, childCount: %d / %d",
nextPos, mDictBufferSize, i, childCount); nextPos, mDictBufferSize, i, childCount);
mIsCorrupted = true;
ASSERT(false); ASSERT(false);
return; return;
} }
@ -239,7 +241,13 @@ int PatriciaTriePolicy::getTerminalPtNodePositionOfWord(const int *const inWord,
const int length, const bool forceLowerCaseSearch) const { const int length, const bool forceLowerCaseSearch) const {
DynamicPtReadingHelper readingHelper(&mPtNodeReader, &mPtNodeArrayReader); DynamicPtReadingHelper readingHelper(&mPtNodeReader, &mPtNodeArrayReader);
readingHelper.initWithPtNodeArrayPos(getRootPosition()); readingHelper.initWithPtNodeArrayPos(getRootPosition());
return readingHelper.getTerminalPtNodePositionOfWord(inWord, length, forceLowerCaseSearch); const int ptNodePos =
readingHelper.getTerminalPtNodePositionOfWord(inWord, length, forceLowerCaseSearch);
if (readingHelper.isError()) {
mIsCorrupted = true;
AKLOGE("Dictionary reading error in createAndGetAllChildDicNodes().");
}
return ptNodePos;
} }
int PatriciaTriePolicy::getProbability(const int unigramProbability, int PatriciaTriePolicy::getProbability(const int unigramProbability,

View file

@ -46,7 +46,7 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
mBigramListPolicy(mDictRoot), mShortcutListPolicy(mDictRoot), mBigramListPolicy(mDictRoot), mShortcutListPolicy(mDictRoot),
mPtNodeReader(mDictRoot, mDictBufferSize, &mBigramListPolicy, &mShortcutListPolicy), mPtNodeReader(mDictRoot, mDictBufferSize, &mBigramListPolicy, &mShortcutListPolicy),
mPtNodeArrayReader(mDictRoot, mDictBufferSize), mPtNodeArrayReader(mDictRoot, mDictBufferSize),
mTerminalPtNodePositionsForIteratingWords() {} mTerminalPtNodePositionsForIteratingWords(), mIsCorrupted(false) {}
AK_FORCE_INLINE int getRootPosition() const { AK_FORCE_INLINE int getRootPosition() const {
return 0; return 0;
@ -134,6 +134,10 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
int getNextWordAndNextToken(const int token, int *const outCodePoints); int getNextWordAndNextToken(const int token, int *const outCodePoints);
bool isCorrupted() const {
return mIsCorrupted;
}
private: private:
DISALLOW_IMPLICIT_CONSTRUCTORS(PatriciaTriePolicy); DISALLOW_IMPLICIT_CONSTRUCTORS(PatriciaTriePolicy);
@ -146,6 +150,7 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
const Ver2ParticiaTrieNodeReader mPtNodeReader; const Ver2ParticiaTrieNodeReader mPtNodeReader;
const Ver2PtNodeArrayReader mPtNodeArrayReader; const Ver2PtNodeArrayReader mPtNodeArrayReader;
std::vector<int> mTerminalPtNodePositionsForIteratingWords; std::vector<int> mTerminalPtNodePositionsForIteratingWords;
mutable bool mIsCorrupted;
int createAndGetLeavingChildNode(const DicNode *const dicNode, const int ptNodePos, int createAndGetLeavingChildNode(const DicNode *const dicNode, const int ptNodePos,
DicNodeVector *const childDicNodes) const; DicNodeVector *const childDicNodes) const;

View file

@ -65,6 +65,10 @@ void Ver4PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const d
ptNodeParams.getCodePointCount(), ptNodeParams.getCodePoints()); ptNodeParams.getCodePointCount(), ptNodeParams.getCodePoints());
readingHelper.readNextSiblingNode(ptNodeParams); readingHelper.readNextSiblingNode(ptNodeParams);
} }
if (readingHelper.isError()) {
mIsCorrupted = true;
AKLOGE("Dictionary reading error in createAndGetAllChildDicNodes().");
}
} }
int Ver4PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount( int Ver4PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
@ -72,15 +76,26 @@ int Ver4PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
int *const outUnigramProbability) const { int *const outUnigramProbability) const {
DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader); DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader);
readingHelper.initWithPtNodePos(ptNodePos); readingHelper.initWithPtNodePos(ptNodePos);
return readingHelper.getCodePointsAndProbabilityAndReturnCodePointCount( const int codePointCount = readingHelper.getCodePointsAndProbabilityAndReturnCodePointCount(
maxCodePointCount, outCodePoints, outUnigramProbability); maxCodePointCount, outCodePoints, outUnigramProbability);
if (readingHelper.isError()) {
mIsCorrupted = true;
AKLOGE("Dictionary reading error in getCodePointsAndProbabilityAndReturnCodePointCount().");
}
return codePointCount;
} }
int Ver4PatriciaTriePolicy::getTerminalPtNodePositionOfWord(const int *const inWord, int Ver4PatriciaTriePolicy::getTerminalPtNodePositionOfWord(const int *const inWord,
const int length, const bool forceLowerCaseSearch) const { const int length, const bool forceLowerCaseSearch) const {
DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader); DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader);
readingHelper.initWithPtNodeArrayPos(getRootPosition()); readingHelper.initWithPtNodeArrayPos(getRootPosition());
return readingHelper.getTerminalPtNodePositionOfWord(inWord, length, forceLowerCaseSearch); const int ptNodePos =
readingHelper.getTerminalPtNodePositionOfWord(inWord, length, forceLowerCaseSearch);
if (readingHelper.isError()) {
mIsCorrupted = true;
AKLOGE("Dictionary reading error in createAndGetAllChildDicNodes().");
}
return ptNodePos;
} }
int Ver4PatriciaTriePolicy::getProbability(const int unigramProbability, int Ver4PatriciaTriePolicy::getProbability(const int unigramProbability,
@ -265,7 +280,10 @@ void Ver4PatriciaTriePolicy::flush(const char *const filePath) {
AKLOGI("Warning: flush() is called for non-updatable dictionary. filePath: %s", filePath); AKLOGI("Warning: flush() is called for non-updatable dictionary. filePath: %s", filePath);
return; return;
} }
mWritingHelper.writeToDictFile(filePath, mUnigramCount, mBigramCount); if (!mWritingHelper.writeToDictFile(filePath, mUnigramCount, mBigramCount)) {
AKLOGE("Cannot flush the dictionary to file.");
mIsCorrupted = true;
}
} }
void Ver4PatriciaTriePolicy::flushWithGC(const char *const filePath) { void Ver4PatriciaTriePolicy::flushWithGC(const char *const filePath) {
@ -273,7 +291,10 @@ void Ver4PatriciaTriePolicy::flushWithGC(const char *const filePath) {
AKLOGI("Warning: flushWithGC() is called for non-updatable dictionary."); AKLOGI("Warning: flushWithGC() is called for non-updatable dictionary.");
return; return;
} }
mWritingHelper.writeToDictFileWithGC(getRootPosition(), filePath); if (!mWritingHelper.writeToDictFileWithGC(getRootPosition(), filePath)) {
AKLOGE("Cannot flush the dictionary to file with GC.");
mIsCorrupted = true;
}
} }
bool Ver4PatriciaTriePolicy::needsToRunGC(const bool mindsBlockByGC) const { bool Ver4PatriciaTriePolicy::needsToRunGC(const bool mindsBlockByGC) const {

View file

@ -55,7 +55,7 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
mWritingHelper(mBuffers.get()), mWritingHelper(mBuffers.get()),
mUnigramCount(mHeaderPolicy->getUnigramCount()), mUnigramCount(mHeaderPolicy->getUnigramCount()),
mBigramCount(mHeaderPolicy->getBigramCount()), mBigramCount(mHeaderPolicy->getBigramCount()),
mTerminalPtNodePositionsForIteratingWords() {}; mTerminalPtNodePositionsForIteratingWords(), mIsCorrupted(false) {};
AK_FORCE_INLINE int getRootPosition() const { AK_FORCE_INLINE int getRootPosition() const {
return 0; return 0;
@ -116,6 +116,10 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
int getNextWordAndNextToken(const int token, int *const outCodePoints); int getNextWordAndNextToken(const int token, int *const outCodePoints);
bool isCorrupted() const {
return mIsCorrupted;
}
private: private:
DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4PatriciaTriePolicy); DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4PatriciaTriePolicy);
@ -141,6 +145,7 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
int mUnigramCount; int mUnigramCount;
int mBigramCount; int mBigramCount;
std::vector<int> mTerminalPtNodePositionsForIteratingWords; std::vector<int> mTerminalPtNodePositionsForIteratingWords;
mutable bool mIsCorrupted;
}; };
} // namespace latinime } // namespace latinime
#endif // LATINIME_VER4_PATRICIA_TRIE_POLICY_H #endif // LATINIME_VER4_PATRICIA_TRIE_POLICY_H

View file

@ -33,7 +33,7 @@
namespace latinime { namespace latinime {
void Ver4PatriciaTrieWritingHelper::writeToDictFile(const char *const dictDirPath, bool Ver4PatriciaTrieWritingHelper::writeToDictFile(const char *const dictDirPath,
const int unigramCount, const int bigramCount) const { const int unigramCount, const int bigramCount) const {
const HeaderPolicy *const headerPolicy = mBuffers->getHeaderPolicy(); const HeaderPolicy *const headerPolicy = mBuffers->getHeaderPolicy();
BufferWithExtendableBuffer headerBuffer( BufferWithExtendableBuffer headerBuffer(
@ -46,12 +46,12 @@ void Ver4PatriciaTrieWritingHelper::writeToDictFile(const char *const dictDirPat
"updatesLastDecayedTime: %d, unigramCount: %d, bigramCount: %d, " "updatesLastDecayedTime: %d, unigramCount: %d, bigramCount: %d, "
"extendedRegionSize: %d", false, unigramCount, bigramCount, "extendedRegionSize: %d", false, unigramCount, bigramCount,
extendedRegionSize); extendedRegionSize);
return; return false;
} }
mBuffers->flushHeaderAndDictBuffers(dictDirPath, &headerBuffer); return mBuffers->flushHeaderAndDictBuffers(dictDirPath, &headerBuffer);
} }
void Ver4PatriciaTrieWritingHelper::writeToDictFileWithGC(const int rootPtNodeArrayPos, bool Ver4PatriciaTrieWritingHelper::writeToDictFileWithGC(const int rootPtNodeArrayPos,
const char *const dictDirPath) { const char *const dictDirPath) {
const HeaderPolicy *const headerPolicy = mBuffers->getHeaderPolicy(); const HeaderPolicy *const headerPolicy = mBuffers->getHeaderPolicy();
Ver4DictBuffers::Ver4DictBuffersPtr dictBuffers( Ver4DictBuffers::Ver4DictBuffersPtr dictBuffers(
@ -59,15 +59,15 @@ void Ver4PatriciaTrieWritingHelper::writeToDictFileWithGC(const int rootPtNodeAr
int unigramCount = 0; int unigramCount = 0;
int bigramCount = 0; int bigramCount = 0;
if (!runGC(rootPtNodeArrayPos, headerPolicy, dictBuffers.get(), &unigramCount, &bigramCount)) { if (!runGC(rootPtNodeArrayPos, headerPolicy, dictBuffers.get(), &unigramCount, &bigramCount)) {
return; return false;
} }
BufferWithExtendableBuffer headerBuffer( BufferWithExtendableBuffer headerBuffer(
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE); BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE);
if (!headerPolicy->fillInAndWriteHeaderToBuffer(true /* updatesLastDecayedTime */, if (!headerPolicy->fillInAndWriteHeaderToBuffer(true /* updatesLastDecayedTime */,
unigramCount, bigramCount, 0 /* extendedRegionSize */, &headerBuffer)) { unigramCount, bigramCount, 0 /* extendedRegionSize */, &headerBuffer)) {
return; return false;
} }
dictBuffers.get()->flushHeaderAndDictBuffers(dictDirPath, &headerBuffer); return dictBuffers.get()->flushHeaderAndDictBuffers(dictDirPath, &headerBuffer);
} }
bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos, bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,

View file

@ -33,10 +33,12 @@ class Ver4PatriciaTrieWritingHelper {
Ver4PatriciaTrieWritingHelper(Ver4DictBuffers *const buffers) Ver4PatriciaTrieWritingHelper(Ver4DictBuffers *const buffers)
: mBuffers(buffers) {} : mBuffers(buffers) {}
void writeToDictFile(const char *const dictDirPath, const int unigramCount, bool writeToDictFile(const char *const dictDirPath, const int unigramCount,
const int bigramCount) const; const int bigramCount) const;
void writeToDictFileWithGC(const int rootPtNodeArrayPos, const char *const dictDirPath); // This method cannot be const because the original dictionary buffer will be updated to detect
// useless PtNodes during GC.
bool writeToDictFileWithGC(const int rootPtNodeArrayPos, const char *const dictDirPath);
private: private:
DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4PatriciaTrieWritingHelper); DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4PatriciaTrieWritingHelper);