Merge "Record reading error during traversing dictionaries."
This commit is contained in:
commit
a7ee108fc5
7 changed files with 59 additions and 16 deletions
|
@ -100,6 +100,8 @@ class DictionaryStructureWithBufferPolicy {
|
||||||
// starts iterating the dictionary.
|
// starts iterating the dictionary.
|
||||||
virtual int getNextWordAndNextToken(const int token, int *const outCodePoints) = 0;
|
virtual int getNextWordAndNextToken(const int token, int *const outCodePoints) = 0;
|
||||||
|
|
||||||
|
virtual bool isCorrupted() const = 0;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
DictionaryStructureWithBufferPolicy() {}
|
DictionaryStructureWithBufferPolicy() {}
|
||||||
|
|
||||||
|
|
|
@ -36,6 +36,7 @@ void PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const dicNo
|
||||||
if (nextPos < 0 || nextPos >= mDictBufferSize) {
|
if (nextPos < 0 || nextPos >= mDictBufferSize) {
|
||||||
AKLOGE("Children PtNode array position is invalid. pos: %d, dict size: %d",
|
AKLOGE("Children PtNode array position is invalid. pos: %d, dict size: %d",
|
||||||
nextPos, mDictBufferSize);
|
nextPos, mDictBufferSize);
|
||||||
|
mIsCorrupted = true;
|
||||||
ASSERT(false);
|
ASSERT(false);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -45,6 +46,7 @@ void PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const dicNo
|
||||||
if (nextPos < 0 || nextPos >= mDictBufferSize) {
|
if (nextPos < 0 || nextPos >= mDictBufferSize) {
|
||||||
AKLOGE("Child PtNode position is invalid. pos: %d, dict size: %d, childCount: %d / %d",
|
AKLOGE("Child PtNode position is invalid. pos: %d, dict size: %d, childCount: %d / %d",
|
||||||
nextPos, mDictBufferSize, i, childCount);
|
nextPos, mDictBufferSize, i, childCount);
|
||||||
|
mIsCorrupted = true;
|
||||||
ASSERT(false);
|
ASSERT(false);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -239,7 +241,13 @@ int PatriciaTriePolicy::getTerminalPtNodePositionOfWord(const int *const inWord,
|
||||||
const int length, const bool forceLowerCaseSearch) const {
|
const int length, const bool forceLowerCaseSearch) const {
|
||||||
DynamicPtReadingHelper readingHelper(&mPtNodeReader, &mPtNodeArrayReader);
|
DynamicPtReadingHelper readingHelper(&mPtNodeReader, &mPtNodeArrayReader);
|
||||||
readingHelper.initWithPtNodeArrayPos(getRootPosition());
|
readingHelper.initWithPtNodeArrayPos(getRootPosition());
|
||||||
return readingHelper.getTerminalPtNodePositionOfWord(inWord, length, forceLowerCaseSearch);
|
const int ptNodePos =
|
||||||
|
readingHelper.getTerminalPtNodePositionOfWord(inWord, length, forceLowerCaseSearch);
|
||||||
|
if (readingHelper.isError()) {
|
||||||
|
mIsCorrupted = true;
|
||||||
|
AKLOGE("Dictionary reading error in createAndGetAllChildDicNodes().");
|
||||||
|
}
|
||||||
|
return ptNodePos;
|
||||||
}
|
}
|
||||||
|
|
||||||
int PatriciaTriePolicy::getProbability(const int unigramProbability,
|
int PatriciaTriePolicy::getProbability(const int unigramProbability,
|
||||||
|
|
|
@ -46,7 +46,7 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
||||||
mBigramListPolicy(mDictRoot), mShortcutListPolicy(mDictRoot),
|
mBigramListPolicy(mDictRoot), mShortcutListPolicy(mDictRoot),
|
||||||
mPtNodeReader(mDictRoot, mDictBufferSize, &mBigramListPolicy, &mShortcutListPolicy),
|
mPtNodeReader(mDictRoot, mDictBufferSize, &mBigramListPolicy, &mShortcutListPolicy),
|
||||||
mPtNodeArrayReader(mDictRoot, mDictBufferSize),
|
mPtNodeArrayReader(mDictRoot, mDictBufferSize),
|
||||||
mTerminalPtNodePositionsForIteratingWords() {}
|
mTerminalPtNodePositionsForIteratingWords(), mIsCorrupted(false) {}
|
||||||
|
|
||||||
AK_FORCE_INLINE int getRootPosition() const {
|
AK_FORCE_INLINE int getRootPosition() const {
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -134,6 +134,10 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
||||||
|
|
||||||
int getNextWordAndNextToken(const int token, int *const outCodePoints);
|
int getNextWordAndNextToken(const int token, int *const outCodePoints);
|
||||||
|
|
||||||
|
bool isCorrupted() const {
|
||||||
|
return mIsCorrupted;
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
DISALLOW_IMPLICIT_CONSTRUCTORS(PatriciaTriePolicy);
|
DISALLOW_IMPLICIT_CONSTRUCTORS(PatriciaTriePolicy);
|
||||||
|
|
||||||
|
@ -146,6 +150,7 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
||||||
const Ver2ParticiaTrieNodeReader mPtNodeReader;
|
const Ver2ParticiaTrieNodeReader mPtNodeReader;
|
||||||
const Ver2PtNodeArrayReader mPtNodeArrayReader;
|
const Ver2PtNodeArrayReader mPtNodeArrayReader;
|
||||||
std::vector<int> mTerminalPtNodePositionsForIteratingWords;
|
std::vector<int> mTerminalPtNodePositionsForIteratingWords;
|
||||||
|
mutable bool mIsCorrupted;
|
||||||
|
|
||||||
int createAndGetLeavingChildNode(const DicNode *const dicNode, const int ptNodePos,
|
int createAndGetLeavingChildNode(const DicNode *const dicNode, const int ptNodePos,
|
||||||
DicNodeVector *const childDicNodes) const;
|
DicNodeVector *const childDicNodes) const;
|
||||||
|
|
|
@ -65,6 +65,10 @@ void Ver4PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const d
|
||||||
ptNodeParams.getCodePointCount(), ptNodeParams.getCodePoints());
|
ptNodeParams.getCodePointCount(), ptNodeParams.getCodePoints());
|
||||||
readingHelper.readNextSiblingNode(ptNodeParams);
|
readingHelper.readNextSiblingNode(ptNodeParams);
|
||||||
}
|
}
|
||||||
|
if (readingHelper.isError()) {
|
||||||
|
mIsCorrupted = true;
|
||||||
|
AKLOGE("Dictionary reading error in createAndGetAllChildDicNodes().");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int Ver4PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
|
int Ver4PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
|
||||||
|
@ -72,15 +76,26 @@ int Ver4PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
|
||||||
int *const outUnigramProbability) const {
|
int *const outUnigramProbability) const {
|
||||||
DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader);
|
DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader);
|
||||||
readingHelper.initWithPtNodePos(ptNodePos);
|
readingHelper.initWithPtNodePos(ptNodePos);
|
||||||
return readingHelper.getCodePointsAndProbabilityAndReturnCodePointCount(
|
const int codePointCount = readingHelper.getCodePointsAndProbabilityAndReturnCodePointCount(
|
||||||
maxCodePointCount, outCodePoints, outUnigramProbability);
|
maxCodePointCount, outCodePoints, outUnigramProbability);
|
||||||
|
if (readingHelper.isError()) {
|
||||||
|
mIsCorrupted = true;
|
||||||
|
AKLOGE("Dictionary reading error in getCodePointsAndProbabilityAndReturnCodePointCount().");
|
||||||
|
}
|
||||||
|
return codePointCount;
|
||||||
}
|
}
|
||||||
|
|
||||||
int Ver4PatriciaTriePolicy::getTerminalPtNodePositionOfWord(const int *const inWord,
|
int Ver4PatriciaTriePolicy::getTerminalPtNodePositionOfWord(const int *const inWord,
|
||||||
const int length, const bool forceLowerCaseSearch) const {
|
const int length, const bool forceLowerCaseSearch) const {
|
||||||
DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader);
|
DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader);
|
||||||
readingHelper.initWithPtNodeArrayPos(getRootPosition());
|
readingHelper.initWithPtNodeArrayPos(getRootPosition());
|
||||||
return readingHelper.getTerminalPtNodePositionOfWord(inWord, length, forceLowerCaseSearch);
|
const int ptNodePos =
|
||||||
|
readingHelper.getTerminalPtNodePositionOfWord(inWord, length, forceLowerCaseSearch);
|
||||||
|
if (readingHelper.isError()) {
|
||||||
|
mIsCorrupted = true;
|
||||||
|
AKLOGE("Dictionary reading error in createAndGetAllChildDicNodes().");
|
||||||
|
}
|
||||||
|
return ptNodePos;
|
||||||
}
|
}
|
||||||
|
|
||||||
int Ver4PatriciaTriePolicy::getProbability(const int unigramProbability,
|
int Ver4PatriciaTriePolicy::getProbability(const int unigramProbability,
|
||||||
|
@ -265,7 +280,10 @@ void Ver4PatriciaTriePolicy::flush(const char *const filePath) {
|
||||||
AKLOGI("Warning: flush() is called for non-updatable dictionary. filePath: %s", filePath);
|
AKLOGI("Warning: flush() is called for non-updatable dictionary. filePath: %s", filePath);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
mWritingHelper.writeToDictFile(filePath, mUnigramCount, mBigramCount);
|
if (!mWritingHelper.writeToDictFile(filePath, mUnigramCount, mBigramCount)) {
|
||||||
|
AKLOGE("Cannot flush the dictionary to file.");
|
||||||
|
mIsCorrupted = true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void Ver4PatriciaTriePolicy::flushWithGC(const char *const filePath) {
|
void Ver4PatriciaTriePolicy::flushWithGC(const char *const filePath) {
|
||||||
|
@ -273,7 +291,10 @@ void Ver4PatriciaTriePolicy::flushWithGC(const char *const filePath) {
|
||||||
AKLOGI("Warning: flushWithGC() is called for non-updatable dictionary.");
|
AKLOGI("Warning: flushWithGC() is called for non-updatable dictionary.");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
mWritingHelper.writeToDictFileWithGC(getRootPosition(), filePath);
|
if (!mWritingHelper.writeToDictFileWithGC(getRootPosition(), filePath)) {
|
||||||
|
AKLOGE("Cannot flush the dictionary to file with GC.");
|
||||||
|
mIsCorrupted = true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Ver4PatriciaTriePolicy::needsToRunGC(const bool mindsBlockByGC) const {
|
bool Ver4PatriciaTriePolicy::needsToRunGC(const bool mindsBlockByGC) const {
|
||||||
|
|
|
@ -55,7 +55,7 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
||||||
mWritingHelper(mBuffers.get()),
|
mWritingHelper(mBuffers.get()),
|
||||||
mUnigramCount(mHeaderPolicy->getUnigramCount()),
|
mUnigramCount(mHeaderPolicy->getUnigramCount()),
|
||||||
mBigramCount(mHeaderPolicy->getBigramCount()),
|
mBigramCount(mHeaderPolicy->getBigramCount()),
|
||||||
mTerminalPtNodePositionsForIteratingWords() {};
|
mTerminalPtNodePositionsForIteratingWords(), mIsCorrupted(false) {};
|
||||||
|
|
||||||
AK_FORCE_INLINE int getRootPosition() const {
|
AK_FORCE_INLINE int getRootPosition() const {
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -116,6 +116,10 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
||||||
|
|
||||||
int getNextWordAndNextToken(const int token, int *const outCodePoints);
|
int getNextWordAndNextToken(const int token, int *const outCodePoints);
|
||||||
|
|
||||||
|
bool isCorrupted() const {
|
||||||
|
return mIsCorrupted;
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4PatriciaTriePolicy);
|
DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4PatriciaTriePolicy);
|
||||||
|
|
||||||
|
@ -141,6 +145,7 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
||||||
int mUnigramCount;
|
int mUnigramCount;
|
||||||
int mBigramCount;
|
int mBigramCount;
|
||||||
std::vector<int> mTerminalPtNodePositionsForIteratingWords;
|
std::vector<int> mTerminalPtNodePositionsForIteratingWords;
|
||||||
|
mutable bool mIsCorrupted;
|
||||||
};
|
};
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
#endif // LATINIME_VER4_PATRICIA_TRIE_POLICY_H
|
#endif // LATINIME_VER4_PATRICIA_TRIE_POLICY_H
|
||||||
|
|
|
@ -33,7 +33,7 @@
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
|
||||||
void Ver4PatriciaTrieWritingHelper::writeToDictFile(const char *const dictDirPath,
|
bool Ver4PatriciaTrieWritingHelper::writeToDictFile(const char *const dictDirPath,
|
||||||
const int unigramCount, const int bigramCount) const {
|
const int unigramCount, const int bigramCount) const {
|
||||||
const HeaderPolicy *const headerPolicy = mBuffers->getHeaderPolicy();
|
const HeaderPolicy *const headerPolicy = mBuffers->getHeaderPolicy();
|
||||||
BufferWithExtendableBuffer headerBuffer(
|
BufferWithExtendableBuffer headerBuffer(
|
||||||
|
@ -46,12 +46,12 @@ void Ver4PatriciaTrieWritingHelper::writeToDictFile(const char *const dictDirPat
|
||||||
"updatesLastDecayedTime: %d, unigramCount: %d, bigramCount: %d, "
|
"updatesLastDecayedTime: %d, unigramCount: %d, bigramCount: %d, "
|
||||||
"extendedRegionSize: %d", false, unigramCount, bigramCount,
|
"extendedRegionSize: %d", false, unigramCount, bigramCount,
|
||||||
extendedRegionSize);
|
extendedRegionSize);
|
||||||
return;
|
return false;
|
||||||
}
|
}
|
||||||
mBuffers->flushHeaderAndDictBuffers(dictDirPath, &headerBuffer);
|
return mBuffers->flushHeaderAndDictBuffers(dictDirPath, &headerBuffer);
|
||||||
}
|
}
|
||||||
|
|
||||||
void Ver4PatriciaTrieWritingHelper::writeToDictFileWithGC(const int rootPtNodeArrayPos,
|
bool Ver4PatriciaTrieWritingHelper::writeToDictFileWithGC(const int rootPtNodeArrayPos,
|
||||||
const char *const dictDirPath) {
|
const char *const dictDirPath) {
|
||||||
const HeaderPolicy *const headerPolicy = mBuffers->getHeaderPolicy();
|
const HeaderPolicy *const headerPolicy = mBuffers->getHeaderPolicy();
|
||||||
Ver4DictBuffers::Ver4DictBuffersPtr dictBuffers(
|
Ver4DictBuffers::Ver4DictBuffersPtr dictBuffers(
|
||||||
|
@ -59,15 +59,15 @@ void Ver4PatriciaTrieWritingHelper::writeToDictFileWithGC(const int rootPtNodeAr
|
||||||
int unigramCount = 0;
|
int unigramCount = 0;
|
||||||
int bigramCount = 0;
|
int bigramCount = 0;
|
||||||
if (!runGC(rootPtNodeArrayPos, headerPolicy, dictBuffers.get(), &unigramCount, &bigramCount)) {
|
if (!runGC(rootPtNodeArrayPos, headerPolicy, dictBuffers.get(), &unigramCount, &bigramCount)) {
|
||||||
return;
|
return false;
|
||||||
}
|
}
|
||||||
BufferWithExtendableBuffer headerBuffer(
|
BufferWithExtendableBuffer headerBuffer(
|
||||||
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE);
|
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE);
|
||||||
if (!headerPolicy->fillInAndWriteHeaderToBuffer(true /* updatesLastDecayedTime */,
|
if (!headerPolicy->fillInAndWriteHeaderToBuffer(true /* updatesLastDecayedTime */,
|
||||||
unigramCount, bigramCount, 0 /* extendedRegionSize */, &headerBuffer)) {
|
unigramCount, bigramCount, 0 /* extendedRegionSize */, &headerBuffer)) {
|
||||||
return;
|
return false;
|
||||||
}
|
}
|
||||||
dictBuffers.get()->flushHeaderAndDictBuffers(dictDirPath, &headerBuffer);
|
return dictBuffers.get()->flushHeaderAndDictBuffers(dictDirPath, &headerBuffer);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
|
bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
|
||||||
|
|
|
@ -33,10 +33,12 @@ class Ver4PatriciaTrieWritingHelper {
|
||||||
Ver4PatriciaTrieWritingHelper(Ver4DictBuffers *const buffers)
|
Ver4PatriciaTrieWritingHelper(Ver4DictBuffers *const buffers)
|
||||||
: mBuffers(buffers) {}
|
: mBuffers(buffers) {}
|
||||||
|
|
||||||
void writeToDictFile(const char *const dictDirPath, const int unigramCount,
|
bool writeToDictFile(const char *const dictDirPath, const int unigramCount,
|
||||||
const int bigramCount) const;
|
const int bigramCount) const;
|
||||||
|
|
||||||
void writeToDictFileWithGC(const int rootPtNodeArrayPos, const char *const dictDirPath);
|
// This method cannot be const because the original dictionary buffer will be updated to detect
|
||||||
|
// useless PtNodes during GC.
|
||||||
|
bool writeToDictFileWithGC(const int rootPtNodeArrayPos, const char *const dictDirPath);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4PatriciaTrieWritingHelper);
|
DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4PatriciaTrieWritingHelper);
|
||||||
|
|
Loading…
Reference in a new issue