Implement removeBigramWords() for DynamicPatriciaTrie.

Bug: 6669677
Change-Id: I99cb517511b4c968b8bf937baab5d7f42b0f534e
This commit is contained in:
Keisuke Kuroyanagi 2013-09-17 17:49:22 +09:00
parent 7e51989b3f
commit d9f450ef00
4 changed files with 77 additions and 16 deletions

View file

@ -54,11 +54,13 @@ void DynamicBigramListPolicy::skipAllBigrams(int *const pos) const {
}
}
bool DynamicBigramListPolicy::copyAllBigrams(int *const fromPos, int *const toPos) {
bool DynamicBigramListPolicy::copyAllBigrams(int *const fromPos, int *const toPos,
int *outBigramsCount) {
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*fromPos);
if (usesAdditionalBuffer) {
*fromPos -= mBuffer->getOriginalBufferSize();
}
*outBigramsCount = 0;
BigramListReadWriteUtils::BigramFlags flags;
do {
// The buffer address can be changed after calling buffer writing methods.
@ -91,6 +93,7 @@ bool DynamicBigramListPolicy::copyAllBigrams(int *const fromPos, int *const toPo
toPos)) {
return false;
}
(*outBigramsCount)++;
} while(BigramListReadWriteUtils::hasNext(flags));
if (usesAdditionalBuffer) {
*fromPos += mBuffer->getOriginalBufferSize();

View file

@ -45,8 +45,9 @@ class DynamicBigramListPolicy : public DictionaryBigramsStructurePolicy {
void skipAllBigrams(int *const pos) const;
// Copy bigrams from the bigram list that starts at fromPos to toPos and advance these
// positions after bigram lists. This method skips invalid bigram entries.
bool copyAllBigrams(int *const fromPos, int *const toPos);
// positions after bigram lists. This method skips invalid bigram entries and write the valid
// bigram entry count to outBigramsCount.
bool copyAllBigrams(int *const fromPos, int *const toPos, int *outBigramsCount);
bool addNewBigramEntryToBigramList(const int bigramPos, const int probability, int *const pos);

View file

@ -125,11 +125,10 @@ bool DynamicPatriciaTrieWritingHelper::addBigramWords(const int word0Pos, const
bool DynamicPatriciaTrieWritingHelper::removeBigramWords(const int word0Pos, const int word1Pos) {
DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy);
nodeReader.fetchNodeInfoFromBuffer(word0Pos);
if (nodeReader.isDeleted() || nodeReader.getBigramsPos() == NOT_A_DICT_POS) {
if (nodeReader.getBigramsPos() == NOT_A_DICT_POS) {
return false;
}
// TODO: Implement.
return false;
return mBigramPolicy->removeBigram(nodeReader.getBigramsPos(), word1Pos);
}
bool DynamicPatriciaTrieWritingHelper::markNodeAsMovedAndSetPosition(
@ -193,13 +192,9 @@ bool DynamicPatriciaTrieWritingHelper::writePtNodeWithFullInfoToBuffer(const boo
const int originalBigramListPos, const int originalShortcutListPos,
int *const writingPos) {
const int nodePos = *writingPos;
// Create node flags and write them.
const PatriciaTrieReadingUtils::NodeFlags nodeFlags =
PatriciaTrieReadingUtils::createAndGetFlags(isBlacklisted, isNotAWord,
probability != NOT_A_PROBABILITY, originalShortcutListPos != NOT_A_DICT_POS,
originalBigramListPos != NOT_A_DICT_POS, codePointCount > 1,
3 /* childrenPositionFieldSize */);
if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, nodeFlags,
// Write dummy flags. The Node flags are updated with appropriate flags at the last step of the
// PtNode writing.
if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, 0 /* nodeFlags */,
writingPos)) {
return false;
}
@ -212,7 +207,7 @@ bool DynamicPatriciaTrieWritingHelper::writePtNodeWithFullInfoToBuffer(const boo
// Write code points
if (!DynamicPatriciaTrieWritingUtils::writeCodePointsAndAdvancePosition(mBuffer, codePoints,
codePointCount, writingPos)) {
return false;;
return false;
}
// Write probability when the probability is a valid probability, which means this node is
// terminal.
@ -235,12 +230,25 @@ bool DynamicPatriciaTrieWritingHelper::writePtNodeWithFullInfoToBuffer(const boo
}
}
// Copy bigram list when the originalBigramListPos is valid dictionary position.
int bigramCount = 0;
if (originalBigramListPos != NOT_A_DICT_POS) {
int fromPos = originalBigramListPos;
if (!mBigramPolicy->copyAllBigrams(&fromPos, writingPos)) {
if (!mBigramPolicy->copyAllBigrams(&fromPos, writingPos, &bigramCount)) {
return false;
}
}
// Create node flags and write them.
PatriciaTrieReadingUtils::NodeFlags nodeFlags =
PatriciaTrieReadingUtils::createAndGetFlags(isBlacklisted, isNotAWord,
probability != NOT_A_PROBABILITY /* isTerminal */,
originalShortcutListPos != NOT_A_DICT_POS /* hasShortcutTargets */,
bigramCount > 0 /* hasBigrams */, codePointCount > 1 /* hasMultipleChars */,
CHILDREN_POSITION_FIELD_SIZE);
int flagsFieldPos = nodePos;
if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, nodeFlags,
&flagsFieldPos)) {
return false;
}
return true;
}

View file

@ -206,6 +206,7 @@ public class BinaryDictionaryTests extends AndroidTestCase {
final int bigramCount = 1000;
final int codePointSetSize = 50;
final int seed = 11111;
File dictFile = null;
try {
dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary");
@ -217,7 +218,6 @@ public class BinaryDictionaryTests extends AndroidTestCase {
BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
final ArrayList<String> words = new ArrayList<String>();
// Test a word that isn't contained within the dictionary.
final Random random = new Random(seed);
@ -250,4 +250,53 @@ public class BinaryDictionaryTests extends AndroidTestCase {
dictFile.delete();
}
public void testRemoveBigramWords() {
File dictFile = null;
try {
dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary");
} catch (IOException e) {
fail("IOException while writing an initial dictionary : " + e);
} catch (UnsupportedFormatException e) {
fail("UnsupportedFormatException while writing an initial dictionary : " + e);
}
BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
final int unigramProbability = 100;
final int bigramProbability = 10;
binaryDictionary.addUnigramWord("aaa", unigramProbability);
binaryDictionary.addUnigramWord("abb", unigramProbability);
binaryDictionary.addUnigramWord("bcc", unigramProbability);
binaryDictionary.addBigramWords("aaa", "abb", bigramProbability);
binaryDictionary.addBigramWords("aaa", "bcc", bigramProbability);
binaryDictionary.addBigramWords("abb", "aaa", bigramProbability);
binaryDictionary.addBigramWords("abb", "bcc", bigramProbability);
assertEquals(true, binaryDictionary.isValidBigram("aaa", "abb"));
assertEquals(true, binaryDictionary.isValidBigram("aaa", "bcc"));
assertEquals(true, binaryDictionary.isValidBigram("abb", "aaa"));
assertEquals(true, binaryDictionary.isValidBigram("abb", "bcc"));
binaryDictionary.removeBigramWords("aaa", "abb");
assertEquals(false, binaryDictionary.isValidBigram("aaa", "abb"));
binaryDictionary.addBigramWords("aaa", "abb", bigramProbability);
assertEquals(true, binaryDictionary.isValidBigram("aaa", "abb"));
binaryDictionary.removeBigramWords("aaa", "bcc");
assertEquals(false, binaryDictionary.isValidBigram("aaa", "bcc"));
binaryDictionary.removeBigramWords("abb", "aaa");
assertEquals(false, binaryDictionary.isValidBigram("abb", "aaa"));
binaryDictionary.removeBigramWords("abb", "bcc");
assertEquals(false, binaryDictionary.isValidBigram("abb", "bcc"));
binaryDictionary.removeBigramWords("aaa", "abb");
// Test remove non-existing bigram operation.
binaryDictionary.removeBigramWords("aaa", "abb");
binaryDictionary.removeBigramWords("bcc", "aaa");
dictFile.delete();
}
}