am 963d97af: Merge "Add length check for dict update operations."

* commit '963d97af6d83f62c48a4395caca8ac64972f8a57':
  Add length check for dict update operations.
main
Keisuke Kuroyanagi 2014-01-24 16:51:17 -08:00 committed by Android Git Automerger
commit 3dfdd307ce
2 changed files with 64 additions and 0 deletions

View File

@ -149,6 +149,15 @@ bool Ver4PatriciaTriePolicy::addUnigramWord(const int *const word, const int len
mDictBuffer->getTailPosition());
return false;
}
if (length > MAX_WORD_LENGTH) {
AKLOGE("The word is too long to insert to the dictionary, length: %d", length);
return false;
}
if (shortcutLength > MAX_WORD_LENGTH) {
AKLOGE("The shortcutTarget is too long to insert to the dictionary, length: %d",
shortcutLength);
return false;
}
DynamicPtReadingHelper readingHelper(mDictBuffer, &mNodeReader);
readingHelper.initWithPtNodeArrayPos(getRootPosition());
bool addedNewUnigram = false;
@ -190,6 +199,11 @@ bool Ver4PatriciaTriePolicy::addBigramWords(const int *const word0, const int le
mDictBuffer->getTailPosition());
return false;
}
if (length0 > MAX_WORD_LENGTH || length1 > MAX_WORD_LENGTH) {
AKLOGE("Either src word or target word is too long to insert the bigram to the dictionary. "
"length0: %d, length1: %d", length0, length1);
return false;
}
const int word0Pos = getTerminalPtNodePositionOfWord(word0, length0,
false /* forceLowerCaseSearch */);
if (word0Pos == NOT_A_DICT_POS) {
@ -223,6 +237,11 @@ bool Ver4PatriciaTriePolicy::removeBigramWords(const int *const word0, const int
mDictBuffer->getTailPosition());
return false;
}
if (length0 > MAX_WORD_LENGTH || length1 > MAX_WORD_LENGTH) {
AKLOGE("Either src word or target word is too long to remove the bigram to from the "
"dictionary. length0: %d, length1: %d", length0, length1);
return false;
}
const int word0Pos = getTerminalPtNodePositionOfWord(word0, length0,
false /* forceLowerCaseSearch */);
if (word0Pos == NOT_A_DICT_POS) {

View File

@ -106,6 +106,51 @@ public class BinaryDictionaryTests extends AndroidTestCase {
binaryDictionary.close();
}
public void testAddTooLongWord() {
testAddTooLongWord(FormatSpec.VERSION4);
}
private void testAddTooLongWord(final int formatVersion) {
File dictFile = null;
try {
dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
} catch (IOException e) {
fail("IOException while writing an initial dictionary : " + e);
}
final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
final StringBuffer stringBuilder = new StringBuffer();
for (int i = 0; i < Constants.DICTIONARY_MAX_WORD_LENGTH; i++) {
stringBuilder.append('a');
}
final String validLongWord = stringBuilder.toString();
stringBuilder.append('a');
final String invalidLongWord = stringBuilder.toString();
final int probability = 100;
addUnigramWord(binaryDictionary, "aaa", probability);
addUnigramWord(binaryDictionary, validLongWord, probability);
addUnigramWord(binaryDictionary, invalidLongWord, probability);
// Too long short cut.
binaryDictionary.addUnigramWord("a", probability, invalidLongWord,
10 /* shortcutProbability */, false /* isNotAWord */, false /* isBlacklisted */,
BinaryDictionary.NOT_A_VALID_TIMESTAMP);
addUnigramWord(binaryDictionary, "abc", probability);
final int updatedProbability = 200;
// Update.
addUnigramWord(binaryDictionary, validLongWord, updatedProbability);
addUnigramWord(binaryDictionary, invalidLongWord, updatedProbability);
addUnigramWord(binaryDictionary, "abc", updatedProbability);
assertEquals(probability, binaryDictionary.getFrequency("aaa"));
assertEquals(updatedProbability, binaryDictionary.getFrequency(validLongWord));
assertEquals(BinaryDictionary.NOT_A_PROBABILITY,
binaryDictionary.getFrequency(invalidLongWord));
assertEquals(updatedProbability, binaryDictionary.getFrequency("abc"));
dictFile.delete();
}
private void addUnigramWord(final BinaryDictionary binaryDictionary, final String word,
final int probability) {
binaryDictionary.addUnigramWord(word, probability, "" /* shortcutTarget */,