Merge "Add boundary check for ver2 bigram reading." into lmp-dev
This commit is contained in:
commit
6850ea0ee5
8 changed files with 52 additions and 20 deletions
|
@ -30,7 +30,7 @@ class DictionaryBigramsStructurePolicy {
|
||||||
|
|
||||||
virtual void getNextBigram(int *const outBigramPos, int *const outProbability,
|
virtual void getNextBigram(int *const outBigramPos, int *const outProbability,
|
||||||
bool *const outHasNext, int *const pos) const = 0;
|
bool *const outHasNext, int *const pos) const = 0;
|
||||||
virtual void skipAllBigrams(int *const pos) const = 0;
|
virtual bool skipAllBigrams(int *const pos) const = 0;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
DictionaryBigramsStructurePolicy() {}
|
DictionaryBigramsStructurePolicy() {}
|
||||||
|
|
|
@ -58,8 +58,9 @@ class Ver4BigramListPolicy : public DictionaryBigramsStructurePolicy {
|
||||||
void getNextBigram(int *const outBigramPos, int *const outProbability,
|
void getNextBigram(int *const outBigramPos, int *const outProbability,
|
||||||
bool *const outHasNext, int *const bigramEntryPos) const;
|
bool *const outHasNext, int *const bigramEntryPos) const;
|
||||||
|
|
||||||
void skipAllBigrams(int *const pos) const {
|
bool skipAllBigrams(int *const pos) const {
|
||||||
// Do nothing because we don't need to skip bigram lists in ver4 dictionaries.
|
// Do nothing because we don't need to skip bigram lists in ver4 dictionaries.
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool addNewEntry(const int terminalId, const int newTargetTerminalId,
|
bool addNewEntry(const int terminalId, const int newTargetTerminalId,
|
||||||
|
|
|
@ -38,9 +38,14 @@ const BigramListReadWriteUtils::BigramFlags BigramListReadWriteUtils::FLAG_ATTRI
|
||||||
const BigramListReadWriteUtils::BigramFlags
|
const BigramListReadWriteUtils::BigramFlags
|
||||||
BigramListReadWriteUtils::MASK_ATTRIBUTE_PROBABILITY = 0x0F;
|
BigramListReadWriteUtils::MASK_ATTRIBUTE_PROBABILITY = 0x0F;
|
||||||
|
|
||||||
/* static */ void BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition(
|
/* static */ bool BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition(
|
||||||
const uint8_t *const bigramsBuf, BigramFlags *const outBigramFlags,
|
const uint8_t *const bigramsBuf, const int bufSize, BigramFlags *const outBigramFlags,
|
||||||
int *const outTargetPtNodePos, int *const bigramEntryPos) {
|
int *const outTargetPtNodePos, int *const bigramEntryPos) {
|
||||||
|
if (bufSize <= *bigramEntryPos) {
|
||||||
|
AKLOGE("Read invalid pos in getBigramEntryPropertiesAndAdvancePosition(). bufSize: %d, "
|
||||||
|
"bigramEntryPos: %d.", bufSize, *bigramEntryPos);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
const BigramFlags bigramFlags = ByteArrayUtils::readUint8AndAdvancePosition(bigramsBuf,
|
const BigramFlags bigramFlags = ByteArrayUtils::readUint8AndAdvancePosition(bigramsBuf,
|
||||||
bigramEntryPos);
|
bigramEntryPos);
|
||||||
if (outBigramFlags) {
|
if (outBigramFlags) {
|
||||||
|
@ -51,15 +56,19 @@ const BigramListReadWriteUtils::BigramFlags
|
||||||
if (outTargetPtNodePos) {
|
if (outTargetPtNodePos) {
|
||||||
*outTargetPtNodePos = targetPos;
|
*outTargetPtNodePos = targetPos;
|
||||||
}
|
}
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* static */ void BigramListReadWriteUtils::skipExistingBigrams(const uint8_t *const bigramsBuf,
|
/* static */ bool BigramListReadWriteUtils::skipExistingBigrams(const uint8_t *const bigramsBuf,
|
||||||
int *const bigramListPos) {
|
const int bufSize, int *const bigramListPos) {
|
||||||
BigramFlags flags;
|
BigramFlags flags;
|
||||||
do {
|
do {
|
||||||
getBigramEntryPropertiesAndAdvancePosition(bigramsBuf, &flags, 0 /* outTargetPtNodePos */,
|
if (!getBigramEntryPropertiesAndAdvancePosition(bigramsBuf, bufSize, &flags,
|
||||||
bigramListPos);
|
0 /* outTargetPtNodePos */, bigramListPos)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
} while(hasNext(flags));
|
} while(hasNext(flags));
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* static */ int BigramListReadWriteUtils::getBigramAddressAndAdvancePosition(
|
/* static */ int BigramListReadWriteUtils::getBigramAddressAndAdvancePosition(
|
||||||
|
|
|
@ -30,8 +30,8 @@ class BigramListReadWriteUtils {
|
||||||
public:
|
public:
|
||||||
typedef uint8_t BigramFlags;
|
typedef uint8_t BigramFlags;
|
||||||
|
|
||||||
static void getBigramEntryPropertiesAndAdvancePosition(const uint8_t *const bigramsBuf,
|
static bool getBigramEntryPropertiesAndAdvancePosition(const uint8_t *const bigramsBuf,
|
||||||
BigramFlags *const outBigramFlags, int *const outTargetPtNodePos,
|
const int bufSize, BigramFlags *const outBigramFlags, int *const outTargetPtNodePos,
|
||||||
int *const bigramEntryPos);
|
int *const bigramEntryPos);
|
||||||
|
|
||||||
static AK_FORCE_INLINE int getProbabilityFromFlags(const BigramFlags flags) {
|
static AK_FORCE_INLINE int getProbabilityFromFlags(const BigramFlags flags) {
|
||||||
|
@ -43,7 +43,8 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
// Bigrams reading methods
|
// Bigrams reading methods
|
||||||
static void skipExistingBigrams(const uint8_t *const bigramsBuf, int *const bigramListPos);
|
static bool skipExistingBigrams(const uint8_t *const bigramsBuf, const int bufSize,
|
||||||
|
int *const bigramListPos);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
DISALLOW_IMPLICIT_CONSTRUCTORS(BigramListReadWriteUtils);
|
DISALLOW_IMPLICIT_CONSTRUCTORS(BigramListReadWriteUtils);
|
||||||
|
|
|
@ -27,27 +27,34 @@ namespace latinime {
|
||||||
|
|
||||||
class BigramListPolicy : public DictionaryBigramsStructurePolicy {
|
class BigramListPolicy : public DictionaryBigramsStructurePolicy {
|
||||||
public:
|
public:
|
||||||
explicit BigramListPolicy(const uint8_t *const bigramsBuf) : mBigramsBuf(bigramsBuf) {}
|
BigramListPolicy(const uint8_t *const bigramsBuf, const int bufSize)
|
||||||
|
: mBigramsBuf(bigramsBuf), mBufSize(bufSize) {}
|
||||||
|
|
||||||
~BigramListPolicy() {}
|
~BigramListPolicy() {}
|
||||||
|
|
||||||
void getNextBigram(int *const outBigramPos, int *const outProbability, bool *const outHasNext,
|
void getNextBigram(int *const outBigramPos, int *const outProbability, bool *const outHasNext,
|
||||||
int *const pos) const {
|
int *const pos) const {
|
||||||
BigramListReadWriteUtils::BigramFlags flags;
|
BigramListReadWriteUtils::BigramFlags flags;
|
||||||
BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition(mBigramsBuf, &flags,
|
if (!BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition(mBigramsBuf,
|
||||||
outBigramPos, pos);
|
mBufSize, &flags, outBigramPos, pos)) {
|
||||||
|
AKLOGE("Cannot read bigram entry. mBufSize: %d, pos: %d. ", mBufSize, *pos);
|
||||||
|
*outProbability = NOT_A_PROBABILITY;
|
||||||
|
*outHasNext = false;
|
||||||
|
return;
|
||||||
|
}
|
||||||
*outProbability = BigramListReadWriteUtils::getProbabilityFromFlags(flags);
|
*outProbability = BigramListReadWriteUtils::getProbabilityFromFlags(flags);
|
||||||
*outHasNext = BigramListReadWriteUtils::hasNext(flags);
|
*outHasNext = BigramListReadWriteUtils::hasNext(flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
void skipAllBigrams(int *const pos) const {
|
bool skipAllBigrams(int *const pos) const {
|
||||||
BigramListReadWriteUtils::skipExistingBigrams(mBigramsBuf, pos);
|
return BigramListReadWriteUtils::skipExistingBigrams(mBigramsBuf, mBufSize, pos);
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
DISALLOW_IMPLICIT_CONSTRUCTORS(BigramListPolicy);
|
DISALLOW_IMPLICIT_CONSTRUCTORS(BigramListPolicy);
|
||||||
|
|
||||||
const uint8_t *const mBigramsBuf;
|
const uint8_t *const mBigramsBuf;
|
||||||
|
const int mBufSize;
|
||||||
};
|
};
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
#endif // LATINIME_BIGRAM_LIST_POLICY_H
|
#endif // LATINIME_BIGRAM_LIST_POLICY_H
|
||||||
|
|
|
@ -223,7 +223,14 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
|
||||||
mShortcutListPolicy.skipAllShortcuts(&pos);
|
mShortcutListPolicy.skipAllShortcuts(&pos);
|
||||||
}
|
}
|
||||||
if (PatriciaTrieReadingUtils::hasBigrams(flags)) {
|
if (PatriciaTrieReadingUtils::hasBigrams(flags)) {
|
||||||
mBigramListPolicy.skipAllBigrams(&pos);
|
if (!mBigramListPolicy.skipAllBigrams(&pos)) {
|
||||||
|
AKLOGE("Cannot skip bigrams. BufSize: %d, pos: %d.", mDictBufferSize,
|
||||||
|
pos);
|
||||||
|
mIsCorrupted = true;
|
||||||
|
ASSERT(false);
|
||||||
|
*outUnigramProbability = NOT_A_PROBABILITY;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
@ -240,7 +247,13 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
|
||||||
mShortcutListPolicy.skipAllShortcuts(&pos);
|
mShortcutListPolicy.skipAllShortcuts(&pos);
|
||||||
}
|
}
|
||||||
if (PatriciaTrieReadingUtils::hasBigrams(flags)) {
|
if (PatriciaTrieReadingUtils::hasBigrams(flags)) {
|
||||||
mBigramListPolicy.skipAllBigrams(&pos);
|
if (!mBigramListPolicy.skipAllBigrams(&pos)) {
|
||||||
|
AKLOGE("Cannot skip bigrams. BufSize: %d, pos: %d.", mDictBufferSize, pos);
|
||||||
|
mIsCorrupted = true;
|
||||||
|
ASSERT(false);
|
||||||
|
*outUnigramProbability = NOT_A_PROBABILITY;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -42,7 +42,7 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
||||||
mHeaderPolicy(mMmappedBuffer->getBuffer(), FormatUtils::VERSION_2),
|
mHeaderPolicy(mMmappedBuffer->getBuffer(), FormatUtils::VERSION_2),
|
||||||
mDictRoot(mMmappedBuffer->getBuffer() + mHeaderPolicy.getSize()),
|
mDictRoot(mMmappedBuffer->getBuffer() + mHeaderPolicy.getSize()),
|
||||||
mDictBufferSize(mMmappedBuffer->getBufferSize() - mHeaderPolicy.getSize()),
|
mDictBufferSize(mMmappedBuffer->getBufferSize() - mHeaderPolicy.getSize()),
|
||||||
mBigramListPolicy(mDictRoot), mShortcutListPolicy(mDictRoot),
|
mBigramListPolicy(mDictRoot, mDictBufferSize), mShortcutListPolicy(mDictRoot),
|
||||||
mPtNodeReader(mDictRoot, mDictBufferSize, &mBigramListPolicy, &mShortcutListPolicy),
|
mPtNodeReader(mDictRoot, mDictBufferSize, &mBigramListPolicy, &mShortcutListPolicy),
|
||||||
mPtNodeArrayReader(mDictRoot, mDictBufferSize),
|
mPtNodeArrayReader(mDictRoot, mDictBufferSize),
|
||||||
mTerminalPtNodePositionsForIteratingWords(), mIsCorrupted(false) {}
|
mTerminalPtNodePositionsForIteratingWords(), mIsCorrupted(false) {}
|
||||||
|
|
|
@ -40,8 +40,9 @@ class Ver4BigramListPolicy : public DictionaryBigramsStructurePolicy {
|
||||||
void getNextBigram(int *const outBigramPos, int *const outProbability,
|
void getNextBigram(int *const outBigramPos, int *const outProbability,
|
||||||
bool *const outHasNext, int *const bigramEntryPos) const;
|
bool *const outHasNext, int *const bigramEntryPos) const;
|
||||||
|
|
||||||
void skipAllBigrams(int *const pos) const {
|
bool skipAllBigrams(int *const pos) const {
|
||||||
// Do nothing because we don't need to skip bigram lists in ver4 dictionaries.
|
// Do nothing because we don't need to skip bigram lists in ver4 dictionaries.
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool addNewEntry(const int terminalId, const int newTargetTerminalId,
|
bool addNewEntry(const int terminalId, const int newTargetTerminalId,
|
||||||
|
|
Loading…
Reference in a new issue