Merge "Add boundary check for ver2 bigram reading." into lmp-dev

This commit is contained in:
Keisuke Kuroyanagi 2014-07-29 01:31:52 +00:00 committed by Android (Google) Code Review
commit 6850ea0ee5
8 changed files with 52 additions and 20 deletions

View file

@ -30,7 +30,7 @@ class DictionaryBigramsStructurePolicy {
virtual void getNextBigram(int *const outBigramPos, int *const outProbability, virtual void getNextBigram(int *const outBigramPos, int *const outProbability,
bool *const outHasNext, int *const pos) const = 0; bool *const outHasNext, int *const pos) const = 0;
virtual void skipAllBigrams(int *const pos) const = 0; virtual bool skipAllBigrams(int *const pos) const = 0;
protected: protected:
DictionaryBigramsStructurePolicy() {} DictionaryBigramsStructurePolicy() {}

View file

@ -58,8 +58,9 @@ class Ver4BigramListPolicy : public DictionaryBigramsStructurePolicy {
void getNextBigram(int *const outBigramPos, int *const outProbability, void getNextBigram(int *const outBigramPos, int *const outProbability,
bool *const outHasNext, int *const bigramEntryPos) const; bool *const outHasNext, int *const bigramEntryPos) const;
void skipAllBigrams(int *const pos) const { bool skipAllBigrams(int *const pos) const {
// Do nothing because we don't need to skip bigram lists in ver4 dictionaries. // Do nothing because we don't need to skip bigram lists in ver4 dictionaries.
return true;
} }
bool addNewEntry(const int terminalId, const int newTargetTerminalId, bool addNewEntry(const int terminalId, const int newTargetTerminalId,

View file

@ -38,9 +38,14 @@ const BigramListReadWriteUtils::BigramFlags BigramListReadWriteUtils::FLAG_ATTRI
const BigramListReadWriteUtils::BigramFlags const BigramListReadWriteUtils::BigramFlags
BigramListReadWriteUtils::MASK_ATTRIBUTE_PROBABILITY = 0x0F; BigramListReadWriteUtils::MASK_ATTRIBUTE_PROBABILITY = 0x0F;
/* static */ void BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition( /* static */ bool BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition(
const uint8_t *const bigramsBuf, BigramFlags *const outBigramFlags, const uint8_t *const bigramsBuf, const int bufSize, BigramFlags *const outBigramFlags,
int *const outTargetPtNodePos, int *const bigramEntryPos) { int *const outTargetPtNodePos, int *const bigramEntryPos) {
if (bufSize <= *bigramEntryPos) {
AKLOGE("Read invalid pos in getBigramEntryPropertiesAndAdvancePosition(). bufSize: %d, "
"bigramEntryPos: %d.", bufSize, *bigramEntryPos);
return false;
}
const BigramFlags bigramFlags = ByteArrayUtils::readUint8AndAdvancePosition(bigramsBuf, const BigramFlags bigramFlags = ByteArrayUtils::readUint8AndAdvancePosition(bigramsBuf,
bigramEntryPos); bigramEntryPos);
if (outBigramFlags) { if (outBigramFlags) {
@ -51,15 +56,19 @@ const BigramListReadWriteUtils::BigramFlags
if (outTargetPtNodePos) { if (outTargetPtNodePos) {
*outTargetPtNodePos = targetPos; *outTargetPtNodePos = targetPos;
} }
return true;
} }
/* static */ void BigramListReadWriteUtils::skipExistingBigrams(const uint8_t *const bigramsBuf, /* static */ bool BigramListReadWriteUtils::skipExistingBigrams(const uint8_t *const bigramsBuf,
int *const bigramListPos) { const int bufSize, int *const bigramListPos) {
BigramFlags flags; BigramFlags flags;
do { do {
getBigramEntryPropertiesAndAdvancePosition(bigramsBuf, &flags, 0 /* outTargetPtNodePos */, if (!getBigramEntryPropertiesAndAdvancePosition(bigramsBuf, bufSize, &flags,
bigramListPos); 0 /* outTargetPtNodePos */, bigramListPos)) {
return false;
}
} while(hasNext(flags)); } while(hasNext(flags));
return true;
} }
/* static */ int BigramListReadWriteUtils::getBigramAddressAndAdvancePosition( /* static */ int BigramListReadWriteUtils::getBigramAddressAndAdvancePosition(

View file

@ -30,8 +30,8 @@ class BigramListReadWriteUtils {
public: public:
typedef uint8_t BigramFlags; typedef uint8_t BigramFlags;
static void getBigramEntryPropertiesAndAdvancePosition(const uint8_t *const bigramsBuf, static bool getBigramEntryPropertiesAndAdvancePosition(const uint8_t *const bigramsBuf,
BigramFlags *const outBigramFlags, int *const outTargetPtNodePos, const int bufSize, BigramFlags *const outBigramFlags, int *const outTargetPtNodePos,
int *const bigramEntryPos); int *const bigramEntryPos);
static AK_FORCE_INLINE int getProbabilityFromFlags(const BigramFlags flags) { static AK_FORCE_INLINE int getProbabilityFromFlags(const BigramFlags flags) {
@ -43,7 +43,8 @@ public:
} }
// Bigrams reading methods // Bigrams reading methods
static void skipExistingBigrams(const uint8_t *const bigramsBuf, int *const bigramListPos); static bool skipExistingBigrams(const uint8_t *const bigramsBuf, const int bufSize,
int *const bigramListPos);
private: private:
DISALLOW_IMPLICIT_CONSTRUCTORS(BigramListReadWriteUtils); DISALLOW_IMPLICIT_CONSTRUCTORS(BigramListReadWriteUtils);

View file

@ -27,27 +27,34 @@ namespace latinime {
class BigramListPolicy : public DictionaryBigramsStructurePolicy { class BigramListPolicy : public DictionaryBigramsStructurePolicy {
public: public:
explicit BigramListPolicy(const uint8_t *const bigramsBuf) : mBigramsBuf(bigramsBuf) {} BigramListPolicy(const uint8_t *const bigramsBuf, const int bufSize)
: mBigramsBuf(bigramsBuf), mBufSize(bufSize) {}
~BigramListPolicy() {} ~BigramListPolicy() {}
void getNextBigram(int *const outBigramPos, int *const outProbability, bool *const outHasNext, void getNextBigram(int *const outBigramPos, int *const outProbability, bool *const outHasNext,
int *const pos) const { int *const pos) const {
BigramListReadWriteUtils::BigramFlags flags; BigramListReadWriteUtils::BigramFlags flags;
BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition(mBigramsBuf, &flags, if (!BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition(mBigramsBuf,
outBigramPos, pos); mBufSize, &flags, outBigramPos, pos)) {
AKLOGE("Cannot read bigram entry. mBufSize: %d, pos: %d. ", mBufSize, *pos);
*outProbability = NOT_A_PROBABILITY;
*outHasNext = false;
return;
}
*outProbability = BigramListReadWriteUtils::getProbabilityFromFlags(flags); *outProbability = BigramListReadWriteUtils::getProbabilityFromFlags(flags);
*outHasNext = BigramListReadWriteUtils::hasNext(flags); *outHasNext = BigramListReadWriteUtils::hasNext(flags);
} }
void skipAllBigrams(int *const pos) const { bool skipAllBigrams(int *const pos) const {
BigramListReadWriteUtils::skipExistingBigrams(mBigramsBuf, pos); return BigramListReadWriteUtils::skipExistingBigrams(mBigramsBuf, mBufSize, pos);
} }
private: private:
DISALLOW_IMPLICIT_CONSTRUCTORS(BigramListPolicy); DISALLOW_IMPLICIT_CONSTRUCTORS(BigramListPolicy);
const uint8_t *const mBigramsBuf; const uint8_t *const mBigramsBuf;
const int mBufSize;
}; };
} // namespace latinime } // namespace latinime
#endif // LATINIME_BIGRAM_LIST_POLICY_H #endif // LATINIME_BIGRAM_LIST_POLICY_H

View file

@ -223,7 +223,14 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
mShortcutListPolicy.skipAllShortcuts(&pos); mShortcutListPolicy.skipAllShortcuts(&pos);
} }
if (PatriciaTrieReadingUtils::hasBigrams(flags)) { if (PatriciaTrieReadingUtils::hasBigrams(flags)) {
mBigramListPolicy.skipAllBigrams(&pos); if (!mBigramListPolicy.skipAllBigrams(&pos)) {
AKLOGE("Cannot skip bigrams. BufSize: %d, pos: %d.", mDictBufferSize,
pos);
mIsCorrupted = true;
ASSERT(false);
*outUnigramProbability = NOT_A_PROBABILITY;
return 0;
}
} }
} }
} else { } else {
@ -240,7 +247,13 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
mShortcutListPolicy.skipAllShortcuts(&pos); mShortcutListPolicy.skipAllShortcuts(&pos);
} }
if (PatriciaTrieReadingUtils::hasBigrams(flags)) { if (PatriciaTrieReadingUtils::hasBigrams(flags)) {
mBigramListPolicy.skipAllBigrams(&pos); if (!mBigramListPolicy.skipAllBigrams(&pos)) {
AKLOGE("Cannot skip bigrams. BufSize: %d, pos: %d.", mDictBufferSize, pos);
mIsCorrupted = true;
ASSERT(false);
*outUnigramProbability = NOT_A_PROBABILITY;
return 0;
}
} }
} }

View file

@ -42,7 +42,7 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
mHeaderPolicy(mMmappedBuffer->getBuffer(), FormatUtils::VERSION_2), mHeaderPolicy(mMmappedBuffer->getBuffer(), FormatUtils::VERSION_2),
mDictRoot(mMmappedBuffer->getBuffer() + mHeaderPolicy.getSize()), mDictRoot(mMmappedBuffer->getBuffer() + mHeaderPolicy.getSize()),
mDictBufferSize(mMmappedBuffer->getBufferSize() - mHeaderPolicy.getSize()), mDictBufferSize(mMmappedBuffer->getBufferSize() - mHeaderPolicy.getSize()),
mBigramListPolicy(mDictRoot), mShortcutListPolicy(mDictRoot), mBigramListPolicy(mDictRoot, mDictBufferSize), mShortcutListPolicy(mDictRoot),
mPtNodeReader(mDictRoot, mDictBufferSize, &mBigramListPolicy, &mShortcutListPolicy), mPtNodeReader(mDictRoot, mDictBufferSize, &mBigramListPolicy, &mShortcutListPolicy),
mPtNodeArrayReader(mDictRoot, mDictBufferSize), mPtNodeArrayReader(mDictRoot, mDictBufferSize),
mTerminalPtNodePositionsForIteratingWords(), mIsCorrupted(false) {} mTerminalPtNodePositionsForIteratingWords(), mIsCorrupted(false) {}

View file

@ -40,8 +40,9 @@ class Ver4BigramListPolicy : public DictionaryBigramsStructurePolicy {
void getNextBigram(int *const outBigramPos, int *const outProbability, void getNextBigram(int *const outBigramPos, int *const outProbability,
bool *const outHasNext, int *const bigramEntryPos) const; bool *const outHasNext, int *const bigramEntryPos) const;
void skipAllBigrams(int *const pos) const { bool skipAllBigrams(int *const pos) const {
// Do nothing because we don't need to skip bigram lists in ver4 dictionaries. // Do nothing because we don't need to skip bigram lists in ver4 dictionaries.
return true;
} }
bool addNewEntry(const int terminalId, const int newTargetTerminalId, bool addNewEntry(const int terminalId, const int newTargetTerminalId,