am d9f450ef
: Implement removeBigramWords() for DynamicPatriciaTrie.
* commit 'd9f450ef00f09a9eccfc677968b46e072267a5f2': Implement removeBigramWords() for DynamicPatriciaTrie.
This commit is contained in:
commit
7c9542683d
4 changed files with 77 additions and 16 deletions
|
@ -54,11 +54,13 @@ void DynamicBigramListPolicy::skipAllBigrams(int *const pos) const {
|
|||
}
|
||||
}
|
||||
|
||||
bool DynamicBigramListPolicy::copyAllBigrams(int *const fromPos, int *const toPos) {
|
||||
bool DynamicBigramListPolicy::copyAllBigrams(int *const fromPos, int *const toPos,
|
||||
int *outBigramsCount) {
|
||||
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*fromPos);
|
||||
if (usesAdditionalBuffer) {
|
||||
*fromPos -= mBuffer->getOriginalBufferSize();
|
||||
}
|
||||
*outBigramsCount = 0;
|
||||
BigramListReadWriteUtils::BigramFlags flags;
|
||||
do {
|
||||
// The buffer address can be changed after calling buffer writing methods.
|
||||
|
@ -91,6 +93,7 @@ bool DynamicBigramListPolicy::copyAllBigrams(int *const fromPos, int *const toPo
|
|||
toPos)) {
|
||||
return false;
|
||||
}
|
||||
(*outBigramsCount)++;
|
||||
} while(BigramListReadWriteUtils::hasNext(flags));
|
||||
if (usesAdditionalBuffer) {
|
||||
*fromPos += mBuffer->getOriginalBufferSize();
|
||||
|
|
|
@ -45,8 +45,9 @@ class DynamicBigramListPolicy : public DictionaryBigramsStructurePolicy {
|
|||
void skipAllBigrams(int *const pos) const;
|
||||
|
||||
// Copy bigrams from the bigram list that starts at fromPos to toPos and advance these
|
||||
// positions after bigram lists. This method skips invalid bigram entries.
|
||||
bool copyAllBigrams(int *const fromPos, int *const toPos);
|
||||
// positions after bigram lists. This method skips invalid bigram entries and write the valid
|
||||
// bigram entry count to outBigramsCount.
|
||||
bool copyAllBigrams(int *const fromPos, int *const toPos, int *outBigramsCount);
|
||||
|
||||
bool addNewBigramEntryToBigramList(const int bigramPos, const int probability, int *const pos);
|
||||
|
||||
|
|
|
@ -125,11 +125,10 @@ bool DynamicPatriciaTrieWritingHelper::addBigramWords(const int word0Pos, const
|
|||
bool DynamicPatriciaTrieWritingHelper::removeBigramWords(const int word0Pos, const int word1Pos) {
|
||||
DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy);
|
||||
nodeReader.fetchNodeInfoFromBuffer(word0Pos);
|
||||
if (nodeReader.isDeleted() || nodeReader.getBigramsPos() == NOT_A_DICT_POS) {
|
||||
if (nodeReader.getBigramsPos() == NOT_A_DICT_POS) {
|
||||
return false;
|
||||
}
|
||||
// TODO: Implement.
|
||||
return false;
|
||||
return mBigramPolicy->removeBigram(nodeReader.getBigramsPos(), word1Pos);
|
||||
}
|
||||
|
||||
bool DynamicPatriciaTrieWritingHelper::markNodeAsMovedAndSetPosition(
|
||||
|
@ -193,13 +192,9 @@ bool DynamicPatriciaTrieWritingHelper::writePtNodeWithFullInfoToBuffer(const boo
|
|||
const int originalBigramListPos, const int originalShortcutListPos,
|
||||
int *const writingPos) {
|
||||
const int nodePos = *writingPos;
|
||||
// Create node flags and write them.
|
||||
const PatriciaTrieReadingUtils::NodeFlags nodeFlags =
|
||||
PatriciaTrieReadingUtils::createAndGetFlags(isBlacklisted, isNotAWord,
|
||||
probability != NOT_A_PROBABILITY, originalShortcutListPos != NOT_A_DICT_POS,
|
||||
originalBigramListPos != NOT_A_DICT_POS, codePointCount > 1,
|
||||
3 /* childrenPositionFieldSize */);
|
||||
if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, nodeFlags,
|
||||
// Write dummy flags. The Node flags are updated with appropriate flags at the last step of the
|
||||
// PtNode writing.
|
||||
if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, 0 /* nodeFlags */,
|
||||
writingPos)) {
|
||||
return false;
|
||||
}
|
||||
|
@ -212,7 +207,7 @@ bool DynamicPatriciaTrieWritingHelper::writePtNodeWithFullInfoToBuffer(const boo
|
|||
// Write code points
|
||||
if (!DynamicPatriciaTrieWritingUtils::writeCodePointsAndAdvancePosition(mBuffer, codePoints,
|
||||
codePointCount, writingPos)) {
|
||||
return false;;
|
||||
return false;
|
||||
}
|
||||
// Write probability when the probability is a valid probability, which means this node is
|
||||
// terminal.
|
||||
|
@ -235,12 +230,25 @@ bool DynamicPatriciaTrieWritingHelper::writePtNodeWithFullInfoToBuffer(const boo
|
|||
}
|
||||
}
|
||||
// Copy bigram list when the originalBigramListPos is valid dictionary position.
|
||||
int bigramCount = 0;
|
||||
if (originalBigramListPos != NOT_A_DICT_POS) {
|
||||
int fromPos = originalBigramListPos;
|
||||
if (!mBigramPolicy->copyAllBigrams(&fromPos, writingPos)) {
|
||||
if (!mBigramPolicy->copyAllBigrams(&fromPos, writingPos, &bigramCount)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
// Create node flags and write them.
|
||||
PatriciaTrieReadingUtils::NodeFlags nodeFlags =
|
||||
PatriciaTrieReadingUtils::createAndGetFlags(isBlacklisted, isNotAWord,
|
||||
probability != NOT_A_PROBABILITY /* isTerminal */,
|
||||
originalShortcutListPos != NOT_A_DICT_POS /* hasShortcutTargets */,
|
||||
bigramCount > 0 /* hasBigrams */, codePointCount > 1 /* hasMultipleChars */,
|
||||
CHILDREN_POSITION_FIELD_SIZE);
|
||||
int flagsFieldPos = nodePos;
|
||||
if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, nodeFlags,
|
||||
&flagsFieldPos)) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
|
@ -206,6 +206,7 @@ public class BinaryDictionaryTests extends AndroidTestCase {
|
|||
final int bigramCount = 1000;
|
||||
final int codePointSetSize = 50;
|
||||
final int seed = 11111;
|
||||
|
||||
File dictFile = null;
|
||||
try {
|
||||
dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary");
|
||||
|
@ -217,7 +218,6 @@ public class BinaryDictionaryTests extends AndroidTestCase {
|
|||
BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
|
||||
0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
|
||||
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
|
||||
|
||||
final ArrayList<String> words = new ArrayList<String>();
|
||||
// Test a word that isn't contained within the dictionary.
|
||||
final Random random = new Random(seed);
|
||||
|
@ -250,4 +250,53 @@ public class BinaryDictionaryTests extends AndroidTestCase {
|
|||
|
||||
dictFile.delete();
|
||||
}
|
||||
|
||||
public void testRemoveBigramWords() {
|
||||
File dictFile = null;
|
||||
try {
|
||||
dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary");
|
||||
} catch (IOException e) {
|
||||
fail("IOException while writing an initial dictionary : " + e);
|
||||
} catch (UnsupportedFormatException e) {
|
||||
fail("UnsupportedFormatException while writing an initial dictionary : " + e);
|
||||
}
|
||||
BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
|
||||
0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
|
||||
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
|
||||
|
||||
final int unigramProbability = 100;
|
||||
final int bigramProbability = 10;
|
||||
binaryDictionary.addUnigramWord("aaa", unigramProbability);
|
||||
binaryDictionary.addUnigramWord("abb", unigramProbability);
|
||||
binaryDictionary.addUnigramWord("bcc", unigramProbability);
|
||||
binaryDictionary.addBigramWords("aaa", "abb", bigramProbability);
|
||||
binaryDictionary.addBigramWords("aaa", "bcc", bigramProbability);
|
||||
binaryDictionary.addBigramWords("abb", "aaa", bigramProbability);
|
||||
binaryDictionary.addBigramWords("abb", "bcc", bigramProbability);
|
||||
|
||||
assertEquals(true, binaryDictionary.isValidBigram("aaa", "abb"));
|
||||
assertEquals(true, binaryDictionary.isValidBigram("aaa", "bcc"));
|
||||
assertEquals(true, binaryDictionary.isValidBigram("abb", "aaa"));
|
||||
assertEquals(true, binaryDictionary.isValidBigram("abb", "bcc"));
|
||||
|
||||
binaryDictionary.removeBigramWords("aaa", "abb");
|
||||
assertEquals(false, binaryDictionary.isValidBigram("aaa", "abb"));
|
||||
binaryDictionary.addBigramWords("aaa", "abb", bigramProbability);
|
||||
assertEquals(true, binaryDictionary.isValidBigram("aaa", "abb"));
|
||||
|
||||
|
||||
binaryDictionary.removeBigramWords("aaa", "bcc");
|
||||
assertEquals(false, binaryDictionary.isValidBigram("aaa", "bcc"));
|
||||
binaryDictionary.removeBigramWords("abb", "aaa");
|
||||
assertEquals(false, binaryDictionary.isValidBigram("abb", "aaa"));
|
||||
binaryDictionary.removeBigramWords("abb", "bcc");
|
||||
assertEquals(false, binaryDictionary.isValidBigram("abb", "bcc"));
|
||||
|
||||
binaryDictionary.removeBigramWords("aaa", "abb");
|
||||
// Test remove non-existing bigram operation.
|
||||
binaryDictionary.removeBigramWords("aaa", "abb");
|
||||
binaryDictionary.removeBigramWords("bcc", "aaa");
|
||||
|
||||
dictFile.delete();
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue