am bcc6b52b
: am 7d48233e
: am 6850ea0e
: Merge "Add boundary check for ver2 bigram reading." into lmp-dev
* commit 'bcc6b52b7458ce5c473e24245fee26348d0fcb92': Add boundary check for ver2 bigram reading.
This commit is contained in:
commit
08b98fcea4
8 changed files with 52 additions and 20 deletions
|
@ -30,7 +30,7 @@ class DictionaryBigramsStructurePolicy {
|
|||
|
||||
virtual void getNextBigram(int *const outBigramPos, int *const outProbability,
|
||||
bool *const outHasNext, int *const pos) const = 0;
|
||||
virtual void skipAllBigrams(int *const pos) const = 0;
|
||||
virtual bool skipAllBigrams(int *const pos) const = 0;
|
||||
|
||||
protected:
|
||||
DictionaryBigramsStructurePolicy() {}
|
||||
|
|
|
@ -58,8 +58,9 @@ class Ver4BigramListPolicy : public DictionaryBigramsStructurePolicy {
|
|||
void getNextBigram(int *const outBigramPos, int *const outProbability,
|
||||
bool *const outHasNext, int *const bigramEntryPos) const;
|
||||
|
||||
void skipAllBigrams(int *const pos) const {
|
||||
bool skipAllBigrams(int *const pos) const {
|
||||
// Do nothing because we don't need to skip bigram lists in ver4 dictionaries.
|
||||
return true;
|
||||
}
|
||||
|
||||
bool addNewEntry(const int terminalId, const int newTargetTerminalId,
|
||||
|
|
|
@ -38,9 +38,14 @@ const BigramListReadWriteUtils::BigramFlags BigramListReadWriteUtils::FLAG_ATTRI
|
|||
const BigramListReadWriteUtils::BigramFlags
|
||||
BigramListReadWriteUtils::MASK_ATTRIBUTE_PROBABILITY = 0x0F;
|
||||
|
||||
/* static */ void BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition(
|
||||
const uint8_t *const bigramsBuf, BigramFlags *const outBigramFlags,
|
||||
/* static */ bool BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition(
|
||||
const uint8_t *const bigramsBuf, const int bufSize, BigramFlags *const outBigramFlags,
|
||||
int *const outTargetPtNodePos, int *const bigramEntryPos) {
|
||||
if (bufSize <= *bigramEntryPos) {
|
||||
AKLOGE("Read invalid pos in getBigramEntryPropertiesAndAdvancePosition(). bufSize: %d, "
|
||||
"bigramEntryPos: %d.", bufSize, *bigramEntryPos);
|
||||
return false;
|
||||
}
|
||||
const BigramFlags bigramFlags = ByteArrayUtils::readUint8AndAdvancePosition(bigramsBuf,
|
||||
bigramEntryPos);
|
||||
if (outBigramFlags) {
|
||||
|
@ -51,15 +56,19 @@ const BigramListReadWriteUtils::BigramFlags
|
|||
if (outTargetPtNodePos) {
|
||||
*outTargetPtNodePos = targetPos;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/* static */ void BigramListReadWriteUtils::skipExistingBigrams(const uint8_t *const bigramsBuf,
|
||||
int *const bigramListPos) {
|
||||
/* static */ bool BigramListReadWriteUtils::skipExistingBigrams(const uint8_t *const bigramsBuf,
|
||||
const int bufSize, int *const bigramListPos) {
|
||||
BigramFlags flags;
|
||||
do {
|
||||
getBigramEntryPropertiesAndAdvancePosition(bigramsBuf, &flags, 0 /* outTargetPtNodePos */,
|
||||
bigramListPos);
|
||||
if (!getBigramEntryPropertiesAndAdvancePosition(bigramsBuf, bufSize, &flags,
|
||||
0 /* outTargetPtNodePos */, bigramListPos)) {
|
||||
return false;
|
||||
}
|
||||
} while(hasNext(flags));
|
||||
return true;
|
||||
}
|
||||
|
||||
/* static */ int BigramListReadWriteUtils::getBigramAddressAndAdvancePosition(
|
||||
|
|
|
@ -30,8 +30,8 @@ class BigramListReadWriteUtils {
|
|||
public:
|
||||
typedef uint8_t BigramFlags;
|
||||
|
||||
static void getBigramEntryPropertiesAndAdvancePosition(const uint8_t *const bigramsBuf,
|
||||
BigramFlags *const outBigramFlags, int *const outTargetPtNodePos,
|
||||
static bool getBigramEntryPropertiesAndAdvancePosition(const uint8_t *const bigramsBuf,
|
||||
const int bufSize, BigramFlags *const outBigramFlags, int *const outTargetPtNodePos,
|
||||
int *const bigramEntryPos);
|
||||
|
||||
static AK_FORCE_INLINE int getProbabilityFromFlags(const BigramFlags flags) {
|
||||
|
@ -43,7 +43,8 @@ public:
|
|||
}
|
||||
|
||||
// Bigrams reading methods
|
||||
static void skipExistingBigrams(const uint8_t *const bigramsBuf, int *const bigramListPos);
|
||||
static bool skipExistingBigrams(const uint8_t *const bigramsBuf, const int bufSize,
|
||||
int *const bigramListPos);
|
||||
|
||||
private:
|
||||
DISALLOW_IMPLICIT_CONSTRUCTORS(BigramListReadWriteUtils);
|
||||
|
|
|
@ -27,27 +27,34 @@ namespace latinime {
|
|||
|
||||
class BigramListPolicy : public DictionaryBigramsStructurePolicy {
|
||||
public:
|
||||
explicit BigramListPolicy(const uint8_t *const bigramsBuf) : mBigramsBuf(bigramsBuf) {}
|
||||
BigramListPolicy(const uint8_t *const bigramsBuf, const int bufSize)
|
||||
: mBigramsBuf(bigramsBuf), mBufSize(bufSize) {}
|
||||
|
||||
~BigramListPolicy() {}
|
||||
|
||||
void getNextBigram(int *const outBigramPos, int *const outProbability, bool *const outHasNext,
|
||||
int *const pos) const {
|
||||
BigramListReadWriteUtils::BigramFlags flags;
|
||||
BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition(mBigramsBuf, &flags,
|
||||
outBigramPos, pos);
|
||||
if (!BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition(mBigramsBuf,
|
||||
mBufSize, &flags, outBigramPos, pos)) {
|
||||
AKLOGE("Cannot read bigram entry. mBufSize: %d, pos: %d. ", mBufSize, *pos);
|
||||
*outProbability = NOT_A_PROBABILITY;
|
||||
*outHasNext = false;
|
||||
return;
|
||||
}
|
||||
*outProbability = BigramListReadWriteUtils::getProbabilityFromFlags(flags);
|
||||
*outHasNext = BigramListReadWriteUtils::hasNext(flags);
|
||||
}
|
||||
|
||||
void skipAllBigrams(int *const pos) const {
|
||||
BigramListReadWriteUtils::skipExistingBigrams(mBigramsBuf, pos);
|
||||
bool skipAllBigrams(int *const pos) const {
|
||||
return BigramListReadWriteUtils::skipExistingBigrams(mBigramsBuf, mBufSize, pos);
|
||||
}
|
||||
|
||||
private:
|
||||
DISALLOW_IMPLICIT_CONSTRUCTORS(BigramListPolicy);
|
||||
|
||||
const uint8_t *const mBigramsBuf;
|
||||
const int mBufSize;
|
||||
};
|
||||
} // namespace latinime
|
||||
#endif // LATINIME_BIGRAM_LIST_POLICY_H
|
||||
|
|
|
@ -223,7 +223,14 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
|
|||
mShortcutListPolicy.skipAllShortcuts(&pos);
|
||||
}
|
||||
if (PatriciaTrieReadingUtils::hasBigrams(flags)) {
|
||||
mBigramListPolicy.skipAllBigrams(&pos);
|
||||
if (!mBigramListPolicy.skipAllBigrams(&pos)) {
|
||||
AKLOGE("Cannot skip bigrams. BufSize: %d, pos: %d.", mDictBufferSize,
|
||||
pos);
|
||||
mIsCorrupted = true;
|
||||
ASSERT(false);
|
||||
*outUnigramProbability = NOT_A_PROBABILITY;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
|
@ -240,7 +247,13 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
|
|||
mShortcutListPolicy.skipAllShortcuts(&pos);
|
||||
}
|
||||
if (PatriciaTrieReadingUtils::hasBigrams(flags)) {
|
||||
mBigramListPolicy.skipAllBigrams(&pos);
|
||||
if (!mBigramListPolicy.skipAllBigrams(&pos)) {
|
||||
AKLOGE("Cannot skip bigrams. BufSize: %d, pos: %d.", mDictBufferSize, pos);
|
||||
mIsCorrupted = true;
|
||||
ASSERT(false);
|
||||
*outUnigramProbability = NOT_A_PROBABILITY;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -42,7 +42,7 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
|||
mHeaderPolicy(mMmappedBuffer->getBuffer(), FormatUtils::VERSION_2),
|
||||
mDictRoot(mMmappedBuffer->getBuffer() + mHeaderPolicy.getSize()),
|
||||
mDictBufferSize(mMmappedBuffer->getBufferSize() - mHeaderPolicy.getSize()),
|
||||
mBigramListPolicy(mDictRoot), mShortcutListPolicy(mDictRoot),
|
||||
mBigramListPolicy(mDictRoot, mDictBufferSize), mShortcutListPolicy(mDictRoot),
|
||||
mPtNodeReader(mDictRoot, mDictBufferSize, &mBigramListPolicy, &mShortcutListPolicy),
|
||||
mPtNodeArrayReader(mDictRoot, mDictBufferSize),
|
||||
mTerminalPtNodePositionsForIteratingWords(), mIsCorrupted(false) {}
|
||||
|
|
|
@ -40,8 +40,9 @@ class Ver4BigramListPolicy : public DictionaryBigramsStructurePolicy {
|
|||
void getNextBigram(int *const outBigramPos, int *const outProbability,
|
||||
bool *const outHasNext, int *const bigramEntryPos) const;
|
||||
|
||||
void skipAllBigrams(int *const pos) const {
|
||||
bool skipAllBigrams(int *const pos) const {
|
||||
// Do nothing because we don't need to skip bigram lists in ver4 dictionaries.
|
||||
return true;
|
||||
}
|
||||
|
||||
bool addNewEntry(const int terminalId, const int newTargetTerminalId,
|
||||
|
|
Loading…
Reference in a new issue