Merge "Add boundary check for ver2 bigram reading." into lmp-dev

This commit is contained in:
Keisuke Kuroyanagi 2014-07-29 01:31:52 +00:00 committed by Android (Google) Code Review
commit 6850ea0ee5
8 changed files with 52 additions and 20 deletions

View file

@ -30,7 +30,7 @@ class DictionaryBigramsStructurePolicy {
virtual void getNextBigram(int *const outBigramPos, int *const outProbability,
bool *const outHasNext, int *const pos) const = 0;
virtual void skipAllBigrams(int *const pos) const = 0;
virtual bool skipAllBigrams(int *const pos) const = 0;
protected:
DictionaryBigramsStructurePolicy() {}

View file

@ -58,8 +58,9 @@ class Ver4BigramListPolicy : public DictionaryBigramsStructurePolicy {
void getNextBigram(int *const outBigramPos, int *const outProbability,
bool *const outHasNext, int *const bigramEntryPos) const;
void skipAllBigrams(int *const pos) const {
bool skipAllBigrams(int *const pos) const {
// Do nothing because we don't need to skip bigram lists in ver4 dictionaries.
return true;
}
bool addNewEntry(const int terminalId, const int newTargetTerminalId,

View file

@ -38,9 +38,14 @@ const BigramListReadWriteUtils::BigramFlags BigramListReadWriteUtils::FLAG_ATTRI
const BigramListReadWriteUtils::BigramFlags
BigramListReadWriteUtils::MASK_ATTRIBUTE_PROBABILITY = 0x0F;
/* static */ void BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition(
const uint8_t *const bigramsBuf, BigramFlags *const outBigramFlags,
/* static */ bool BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition(
const uint8_t *const bigramsBuf, const int bufSize, BigramFlags *const outBigramFlags,
int *const outTargetPtNodePos, int *const bigramEntryPos) {
if (bufSize <= *bigramEntryPos) {
AKLOGE("Read invalid pos in getBigramEntryPropertiesAndAdvancePosition(). bufSize: %d, "
"bigramEntryPos: %d.", bufSize, *bigramEntryPos);
return false;
}
const BigramFlags bigramFlags = ByteArrayUtils::readUint8AndAdvancePosition(bigramsBuf,
bigramEntryPos);
if (outBigramFlags) {
@ -51,15 +56,19 @@ const BigramListReadWriteUtils::BigramFlags
if (outTargetPtNodePos) {
*outTargetPtNodePos = targetPos;
}
return true;
}
/* static */ void BigramListReadWriteUtils::skipExistingBigrams(const uint8_t *const bigramsBuf,
int *const bigramListPos) {
/* static */ bool BigramListReadWriteUtils::skipExistingBigrams(const uint8_t *const bigramsBuf,
const int bufSize, int *const bigramListPos) {
BigramFlags flags;
do {
getBigramEntryPropertiesAndAdvancePosition(bigramsBuf, &flags, 0 /* outTargetPtNodePos */,
bigramListPos);
if (!getBigramEntryPropertiesAndAdvancePosition(bigramsBuf, bufSize, &flags,
0 /* outTargetPtNodePos */, bigramListPos)) {
return false;
}
} while(hasNext(flags));
return true;
}
/* static */ int BigramListReadWriteUtils::getBigramAddressAndAdvancePosition(

View file

@ -30,8 +30,8 @@ class BigramListReadWriteUtils {
public:
typedef uint8_t BigramFlags;
static void getBigramEntryPropertiesAndAdvancePosition(const uint8_t *const bigramsBuf,
BigramFlags *const outBigramFlags, int *const outTargetPtNodePos,
static bool getBigramEntryPropertiesAndAdvancePosition(const uint8_t *const bigramsBuf,
const int bufSize, BigramFlags *const outBigramFlags, int *const outTargetPtNodePos,
int *const bigramEntryPos);
static AK_FORCE_INLINE int getProbabilityFromFlags(const BigramFlags flags) {
@ -43,7 +43,8 @@ public:
}
// Bigrams reading methods
static void skipExistingBigrams(const uint8_t *const bigramsBuf, int *const bigramListPos);
static bool skipExistingBigrams(const uint8_t *const bigramsBuf, const int bufSize,
int *const bigramListPos);
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(BigramListReadWriteUtils);

View file

@ -27,27 +27,34 @@ namespace latinime {
class BigramListPolicy : public DictionaryBigramsStructurePolicy {
public:
explicit BigramListPolicy(const uint8_t *const bigramsBuf) : mBigramsBuf(bigramsBuf) {}
BigramListPolicy(const uint8_t *const bigramsBuf, const int bufSize)
: mBigramsBuf(bigramsBuf), mBufSize(bufSize) {}
~BigramListPolicy() {}
void getNextBigram(int *const outBigramPos, int *const outProbability, bool *const outHasNext,
int *const pos) const {
BigramListReadWriteUtils::BigramFlags flags;
BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition(mBigramsBuf, &flags,
outBigramPos, pos);
if (!BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition(mBigramsBuf,
mBufSize, &flags, outBigramPos, pos)) {
AKLOGE("Cannot read bigram entry. mBufSize: %d, pos: %d. ", mBufSize, *pos);
*outProbability = NOT_A_PROBABILITY;
*outHasNext = false;
return;
}
*outProbability = BigramListReadWriteUtils::getProbabilityFromFlags(flags);
*outHasNext = BigramListReadWriteUtils::hasNext(flags);
}
void skipAllBigrams(int *const pos) const {
BigramListReadWriteUtils::skipExistingBigrams(mBigramsBuf, pos);
bool skipAllBigrams(int *const pos) const {
return BigramListReadWriteUtils::skipExistingBigrams(mBigramsBuf, mBufSize, pos);
}
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(BigramListPolicy);
const uint8_t *const mBigramsBuf;
const int mBufSize;
};
} // namespace latinime
#endif // LATINIME_BIGRAM_LIST_POLICY_H

View file

@ -223,7 +223,14 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
mShortcutListPolicy.skipAllShortcuts(&pos);
}
if (PatriciaTrieReadingUtils::hasBigrams(flags)) {
mBigramListPolicy.skipAllBigrams(&pos);
if (!mBigramListPolicy.skipAllBigrams(&pos)) {
AKLOGE("Cannot skip bigrams. BufSize: %d, pos: %d.", mDictBufferSize,
pos);
mIsCorrupted = true;
ASSERT(false);
*outUnigramProbability = NOT_A_PROBABILITY;
return 0;
}
}
}
} else {
@ -240,7 +247,13 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
mShortcutListPolicy.skipAllShortcuts(&pos);
}
if (PatriciaTrieReadingUtils::hasBigrams(flags)) {
mBigramListPolicy.skipAllBigrams(&pos);
if (!mBigramListPolicy.skipAllBigrams(&pos)) {
AKLOGE("Cannot skip bigrams. BufSize: %d, pos: %d.", mDictBufferSize, pos);
mIsCorrupted = true;
ASSERT(false);
*outUnigramProbability = NOT_A_PROBABILITY;
return 0;
}
}
}

View file

@ -42,7 +42,7 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
mHeaderPolicy(mMmappedBuffer->getBuffer(), FormatUtils::VERSION_2),
mDictRoot(mMmappedBuffer->getBuffer() + mHeaderPolicy.getSize()),
mDictBufferSize(mMmappedBuffer->getBufferSize() - mHeaderPolicy.getSize()),
mBigramListPolicy(mDictRoot), mShortcutListPolicy(mDictRoot),
mBigramListPolicy(mDictRoot, mDictBufferSize), mShortcutListPolicy(mDictRoot),
mPtNodeReader(mDictRoot, mDictBufferSize, &mBigramListPolicy, &mShortcutListPolicy),
mPtNodeArrayReader(mDictRoot, mDictBufferSize),
mTerminalPtNodePositionsForIteratingWords(), mIsCorrupted(false) {}

View file

@ -40,8 +40,9 @@ class Ver4BigramListPolicy : public DictionaryBigramsStructurePolicy {
void getNextBigram(int *const outBigramPos, int *const outProbability,
bool *const outHasNext, int *const bigramEntryPos) const;
void skipAllBigrams(int *const pos) const {
bool skipAllBigrams(int *const pos) const {
// Do nothing because we don't need to skip bigram lists in ver4 dictionaries.
return true;
}
bool addNewEntry(const int terminalId, const int newTargetTerminalId,