Merge "Add length check for dict update operations."
commit
963d97af6d
|
@ -149,6 +149,15 @@ bool Ver4PatriciaTriePolicy::addUnigramWord(const int *const word, const int len
|
||||||
mDictBuffer->getTailPosition());
|
mDictBuffer->getTailPosition());
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
if (length > MAX_WORD_LENGTH) {
|
||||||
|
AKLOGE("The word is too long to insert to the dictionary, length: %d", length);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (shortcutLength > MAX_WORD_LENGTH) {
|
||||||
|
AKLOGE("The shortcutTarget is too long to insert to the dictionary, length: %d",
|
||||||
|
shortcutLength);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
DynamicPtReadingHelper readingHelper(mDictBuffer, &mNodeReader);
|
DynamicPtReadingHelper readingHelper(mDictBuffer, &mNodeReader);
|
||||||
readingHelper.initWithPtNodeArrayPos(getRootPosition());
|
readingHelper.initWithPtNodeArrayPos(getRootPosition());
|
||||||
bool addedNewUnigram = false;
|
bool addedNewUnigram = false;
|
||||||
|
@ -190,6 +199,11 @@ bool Ver4PatriciaTriePolicy::addBigramWords(const int *const word0, const int le
|
||||||
mDictBuffer->getTailPosition());
|
mDictBuffer->getTailPosition());
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
if (length0 > MAX_WORD_LENGTH || length1 > MAX_WORD_LENGTH) {
|
||||||
|
AKLOGE("Either src word or target word is too long to insert the bigram to the dictionary. "
|
||||||
|
"length0: %d, length1: %d", length0, length1);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
const int word0Pos = getTerminalPtNodePositionOfWord(word0, length0,
|
const int word0Pos = getTerminalPtNodePositionOfWord(word0, length0,
|
||||||
false /* forceLowerCaseSearch */);
|
false /* forceLowerCaseSearch */);
|
||||||
if (word0Pos == NOT_A_DICT_POS) {
|
if (word0Pos == NOT_A_DICT_POS) {
|
||||||
|
@ -223,6 +237,11 @@ bool Ver4PatriciaTriePolicy::removeBigramWords(const int *const word0, const int
|
||||||
mDictBuffer->getTailPosition());
|
mDictBuffer->getTailPosition());
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
if (length0 > MAX_WORD_LENGTH || length1 > MAX_WORD_LENGTH) {
|
||||||
|
AKLOGE("Either src word or target word is too long to remove the bigram to from the "
|
||||||
|
"dictionary. length0: %d, length1: %d", length0, length1);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
const int word0Pos = getTerminalPtNodePositionOfWord(word0, length0,
|
const int word0Pos = getTerminalPtNodePositionOfWord(word0, length0,
|
||||||
false /* forceLowerCaseSearch */);
|
false /* forceLowerCaseSearch */);
|
||||||
if (word0Pos == NOT_A_DICT_POS) {
|
if (word0Pos == NOT_A_DICT_POS) {
|
||||||
|
|
|
@ -106,6 +106,51 @@ public class BinaryDictionaryTests extends AndroidTestCase {
|
||||||
binaryDictionary.close();
|
binaryDictionary.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testAddTooLongWord() {
|
||||||
|
testAddTooLongWord(FormatSpec.VERSION4);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void testAddTooLongWord(final int formatVersion) {
|
||||||
|
File dictFile = null;
|
||||||
|
try {
|
||||||
|
dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
|
||||||
|
} catch (IOException e) {
|
||||||
|
fail("IOException while writing an initial dictionary : " + e);
|
||||||
|
}
|
||||||
|
final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
|
||||||
|
0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
|
||||||
|
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
|
||||||
|
|
||||||
|
final StringBuffer stringBuilder = new StringBuffer();
|
||||||
|
for (int i = 0; i < Constants.DICTIONARY_MAX_WORD_LENGTH; i++) {
|
||||||
|
stringBuilder.append('a');
|
||||||
|
}
|
||||||
|
final String validLongWord = stringBuilder.toString();
|
||||||
|
stringBuilder.append('a');
|
||||||
|
final String invalidLongWord = stringBuilder.toString();
|
||||||
|
final int probability = 100;
|
||||||
|
addUnigramWord(binaryDictionary, "aaa", probability);
|
||||||
|
addUnigramWord(binaryDictionary, validLongWord, probability);
|
||||||
|
addUnigramWord(binaryDictionary, invalidLongWord, probability);
|
||||||
|
// Too long short cut.
|
||||||
|
binaryDictionary.addUnigramWord("a", probability, invalidLongWord,
|
||||||
|
10 /* shortcutProbability */, false /* isNotAWord */, false /* isBlacklisted */,
|
||||||
|
BinaryDictionary.NOT_A_VALID_TIMESTAMP);
|
||||||
|
addUnigramWord(binaryDictionary, "abc", probability);
|
||||||
|
final int updatedProbability = 200;
|
||||||
|
// Update.
|
||||||
|
addUnigramWord(binaryDictionary, validLongWord, updatedProbability);
|
||||||
|
addUnigramWord(binaryDictionary, invalidLongWord, updatedProbability);
|
||||||
|
addUnigramWord(binaryDictionary, "abc", updatedProbability);
|
||||||
|
|
||||||
|
assertEquals(probability, binaryDictionary.getFrequency("aaa"));
|
||||||
|
assertEquals(updatedProbability, binaryDictionary.getFrequency(validLongWord));
|
||||||
|
assertEquals(BinaryDictionary.NOT_A_PROBABILITY,
|
||||||
|
binaryDictionary.getFrequency(invalidLongWord));
|
||||||
|
assertEquals(updatedProbability, binaryDictionary.getFrequency("abc"));
|
||||||
|
dictFile.delete();
|
||||||
|
}
|
||||||
|
|
||||||
private void addUnigramWord(final BinaryDictionary binaryDictionary, final String word,
|
private void addUnigramWord(final BinaryDictionary binaryDictionary, final String word,
|
||||||
final int probability) {
|
final int probability) {
|
||||||
binaryDictionary.addUnigramWord(word, probability, "" /* shortcutTarget */,
|
binaryDictionary.addUnigramWord(word, probability, "" /* shortcutTarget */,
|
||||||
|
|
Loading…
Reference in New Issue