am d9f450ef: Implement removeBigramWords() for DynamicPatriciaTrie.

* commit 'd9f450ef00f09a9eccfc677968b46e072267a5f2':
  Implement removeBigramWords() for DynamicPatriciaTrie.
main
Keisuke Kuroyanagi 2013-09-17 02:16:35 -07:00 committed by Android Git Automerger
commit 7c9542683d
4 changed files with 77 additions and 16 deletions

View File

@ -54,11 +54,13 @@ void DynamicBigramListPolicy::skipAllBigrams(int *const pos) const {
} }
} }
bool DynamicBigramListPolicy::copyAllBigrams(int *const fromPos, int *const toPos) { bool DynamicBigramListPolicy::copyAllBigrams(int *const fromPos, int *const toPos,
int *outBigramsCount) {
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*fromPos); const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*fromPos);
if (usesAdditionalBuffer) { if (usesAdditionalBuffer) {
*fromPos -= mBuffer->getOriginalBufferSize(); *fromPos -= mBuffer->getOriginalBufferSize();
} }
*outBigramsCount = 0;
BigramListReadWriteUtils::BigramFlags flags; BigramListReadWriteUtils::BigramFlags flags;
do { do {
// The buffer address can be changed after calling buffer writing methods. // The buffer address can be changed after calling buffer writing methods.
@ -91,6 +93,7 @@ bool DynamicBigramListPolicy::copyAllBigrams(int *const fromPos, int *const toPo
toPos)) { toPos)) {
return false; return false;
} }
(*outBigramsCount)++;
} while(BigramListReadWriteUtils::hasNext(flags)); } while(BigramListReadWriteUtils::hasNext(flags));
if (usesAdditionalBuffer) { if (usesAdditionalBuffer) {
*fromPos += mBuffer->getOriginalBufferSize(); *fromPos += mBuffer->getOriginalBufferSize();

View File

@ -45,8 +45,9 @@ class DynamicBigramListPolicy : public DictionaryBigramsStructurePolicy {
void skipAllBigrams(int *const pos) const; void skipAllBigrams(int *const pos) const;
// Copy bigrams from the bigram list that starts at fromPos to toPos and advance these // Copy bigrams from the bigram list that starts at fromPos to toPos and advance these
// positions after bigram lists. This method skips invalid bigram entries. // positions after bigram lists. This method skips invalid bigram entries and write the valid
bool copyAllBigrams(int *const fromPos, int *const toPos); // bigram entry count to outBigramsCount.
bool copyAllBigrams(int *const fromPos, int *const toPos, int *outBigramsCount);
bool addNewBigramEntryToBigramList(const int bigramPos, const int probability, int *const pos); bool addNewBigramEntryToBigramList(const int bigramPos, const int probability, int *const pos);

View File

@ -125,11 +125,10 @@ bool DynamicPatriciaTrieWritingHelper::addBigramWords(const int word0Pos, const
bool DynamicPatriciaTrieWritingHelper::removeBigramWords(const int word0Pos, const int word1Pos) { bool DynamicPatriciaTrieWritingHelper::removeBigramWords(const int word0Pos, const int word1Pos) {
DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy); DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy);
nodeReader.fetchNodeInfoFromBuffer(word0Pos); nodeReader.fetchNodeInfoFromBuffer(word0Pos);
if (nodeReader.isDeleted() || nodeReader.getBigramsPos() == NOT_A_DICT_POS) { if (nodeReader.getBigramsPos() == NOT_A_DICT_POS) {
return false; return false;
} }
// TODO: Implement. return mBigramPolicy->removeBigram(nodeReader.getBigramsPos(), word1Pos);
return false;
} }
bool DynamicPatriciaTrieWritingHelper::markNodeAsMovedAndSetPosition( bool DynamicPatriciaTrieWritingHelper::markNodeAsMovedAndSetPosition(
@ -193,13 +192,9 @@ bool DynamicPatriciaTrieWritingHelper::writePtNodeWithFullInfoToBuffer(const boo
const int originalBigramListPos, const int originalShortcutListPos, const int originalBigramListPos, const int originalShortcutListPos,
int *const writingPos) { int *const writingPos) {
const int nodePos = *writingPos; const int nodePos = *writingPos;
// Create node flags and write them. // Write dummy flags. The Node flags are updated with appropriate flags at the last step of the
const PatriciaTrieReadingUtils::NodeFlags nodeFlags = // PtNode writing.
PatriciaTrieReadingUtils::createAndGetFlags(isBlacklisted, isNotAWord, if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, 0 /* nodeFlags */,
probability != NOT_A_PROBABILITY, originalShortcutListPos != NOT_A_DICT_POS,
originalBigramListPos != NOT_A_DICT_POS, codePointCount > 1,
3 /* childrenPositionFieldSize */);
if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, nodeFlags,
writingPos)) { writingPos)) {
return false; return false;
} }
@ -212,7 +207,7 @@ bool DynamicPatriciaTrieWritingHelper::writePtNodeWithFullInfoToBuffer(const boo
// Write code points // Write code points
if (!DynamicPatriciaTrieWritingUtils::writeCodePointsAndAdvancePosition(mBuffer, codePoints, if (!DynamicPatriciaTrieWritingUtils::writeCodePointsAndAdvancePosition(mBuffer, codePoints,
codePointCount, writingPos)) { codePointCount, writingPos)) {
return false;; return false;
} }
// Write probability when the probability is a valid probability, which means this node is // Write probability when the probability is a valid probability, which means this node is
// terminal. // terminal.
@ -235,12 +230,25 @@ bool DynamicPatriciaTrieWritingHelper::writePtNodeWithFullInfoToBuffer(const boo
} }
} }
// Copy bigram list when the originalBigramListPos is valid dictionary position. // Copy bigram list when the originalBigramListPos is valid dictionary position.
int bigramCount = 0;
if (originalBigramListPos != NOT_A_DICT_POS) { if (originalBigramListPos != NOT_A_DICT_POS) {
int fromPos = originalBigramListPos; int fromPos = originalBigramListPos;
if (!mBigramPolicy->copyAllBigrams(&fromPos, writingPos)) { if (!mBigramPolicy->copyAllBigrams(&fromPos, writingPos, &bigramCount)) {
return false; return false;
} }
} }
// Create node flags and write them.
PatriciaTrieReadingUtils::NodeFlags nodeFlags =
PatriciaTrieReadingUtils::createAndGetFlags(isBlacklisted, isNotAWord,
probability != NOT_A_PROBABILITY /* isTerminal */,
originalShortcutListPos != NOT_A_DICT_POS /* hasShortcutTargets */,
bigramCount > 0 /* hasBigrams */, codePointCount > 1 /* hasMultipleChars */,
CHILDREN_POSITION_FIELD_SIZE);
int flagsFieldPos = nodePos;
if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, nodeFlags,
&flagsFieldPos)) {
return false;
}
return true; return true;
} }

View File

@ -206,6 +206,7 @@ public class BinaryDictionaryTests extends AndroidTestCase {
final int bigramCount = 1000; final int bigramCount = 1000;
final int codePointSetSize = 50; final int codePointSetSize = 50;
final int seed = 11111; final int seed = 11111;
File dictFile = null; File dictFile = null;
try { try {
dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary"); dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary");
@ -217,7 +218,6 @@ public class BinaryDictionaryTests extends AndroidTestCase {
BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
final ArrayList<String> words = new ArrayList<String>(); final ArrayList<String> words = new ArrayList<String>();
// Test a word that isn't contained within the dictionary. // Test a word that isn't contained within the dictionary.
final Random random = new Random(seed); final Random random = new Random(seed);
@ -250,4 +250,53 @@ public class BinaryDictionaryTests extends AndroidTestCase {
dictFile.delete(); dictFile.delete();
} }
public void testRemoveBigramWords() {
File dictFile = null;
try {
dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary");
} catch (IOException e) {
fail("IOException while writing an initial dictionary : " + e);
} catch (UnsupportedFormatException e) {
fail("UnsupportedFormatException while writing an initial dictionary : " + e);
}
BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
final int unigramProbability = 100;
final int bigramProbability = 10;
binaryDictionary.addUnigramWord("aaa", unigramProbability);
binaryDictionary.addUnigramWord("abb", unigramProbability);
binaryDictionary.addUnigramWord("bcc", unigramProbability);
binaryDictionary.addBigramWords("aaa", "abb", bigramProbability);
binaryDictionary.addBigramWords("aaa", "bcc", bigramProbability);
binaryDictionary.addBigramWords("abb", "aaa", bigramProbability);
binaryDictionary.addBigramWords("abb", "bcc", bigramProbability);
assertEquals(true, binaryDictionary.isValidBigram("aaa", "abb"));
assertEquals(true, binaryDictionary.isValidBigram("aaa", "bcc"));
assertEquals(true, binaryDictionary.isValidBigram("abb", "aaa"));
assertEquals(true, binaryDictionary.isValidBigram("abb", "bcc"));
binaryDictionary.removeBigramWords("aaa", "abb");
assertEquals(false, binaryDictionary.isValidBigram("aaa", "abb"));
binaryDictionary.addBigramWords("aaa", "abb", bigramProbability);
assertEquals(true, binaryDictionary.isValidBigram("aaa", "abb"));
binaryDictionary.removeBigramWords("aaa", "bcc");
assertEquals(false, binaryDictionary.isValidBigram("aaa", "bcc"));
binaryDictionary.removeBigramWords("abb", "aaa");
assertEquals(false, binaryDictionary.isValidBigram("abb", "aaa"));
binaryDictionary.removeBigramWords("abb", "bcc");
assertEquals(false, binaryDictionary.isValidBigram("abb", "bcc"));
binaryDictionary.removeBigramWords("aaa", "abb");
// Test remove non-existing bigram operation.
binaryDictionary.removeBigramWords("aaa", "abb");
binaryDictionary.removeBigramWords("bcc", "aaa");
dictFile.delete();
}
} }