am d9f450ef: Implement removeBigramWords() for DynamicPatriciaTrie.
* commit 'd9f450ef00f09a9eccfc677968b46e072267a5f2': Implement removeBigramWords() for DynamicPatriciaTrie.main
commit
7c9542683d
|
@ -54,11 +54,13 @@ void DynamicBigramListPolicy::skipAllBigrams(int *const pos) const {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool DynamicBigramListPolicy::copyAllBigrams(int *const fromPos, int *const toPos) {
|
bool DynamicBigramListPolicy::copyAllBigrams(int *const fromPos, int *const toPos,
|
||||||
|
int *outBigramsCount) {
|
||||||
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*fromPos);
|
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*fromPos);
|
||||||
if (usesAdditionalBuffer) {
|
if (usesAdditionalBuffer) {
|
||||||
*fromPos -= mBuffer->getOriginalBufferSize();
|
*fromPos -= mBuffer->getOriginalBufferSize();
|
||||||
}
|
}
|
||||||
|
*outBigramsCount = 0;
|
||||||
BigramListReadWriteUtils::BigramFlags flags;
|
BigramListReadWriteUtils::BigramFlags flags;
|
||||||
do {
|
do {
|
||||||
// The buffer address can be changed after calling buffer writing methods.
|
// The buffer address can be changed after calling buffer writing methods.
|
||||||
|
@ -91,6 +93,7 @@ bool DynamicBigramListPolicy::copyAllBigrams(int *const fromPos, int *const toPo
|
||||||
toPos)) {
|
toPos)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
(*outBigramsCount)++;
|
||||||
} while(BigramListReadWriteUtils::hasNext(flags));
|
} while(BigramListReadWriteUtils::hasNext(flags));
|
||||||
if (usesAdditionalBuffer) {
|
if (usesAdditionalBuffer) {
|
||||||
*fromPos += mBuffer->getOriginalBufferSize();
|
*fromPos += mBuffer->getOriginalBufferSize();
|
||||||
|
|
|
@ -45,8 +45,9 @@ class DynamicBigramListPolicy : public DictionaryBigramsStructurePolicy {
|
||||||
void skipAllBigrams(int *const pos) const;
|
void skipAllBigrams(int *const pos) const;
|
||||||
|
|
||||||
// Copy bigrams from the bigram list that starts at fromPos to toPos and advance these
|
// Copy bigrams from the bigram list that starts at fromPos to toPos and advance these
|
||||||
// positions after bigram lists. This method skips invalid bigram entries.
|
// positions after bigram lists. This method skips invalid bigram entries and write the valid
|
||||||
bool copyAllBigrams(int *const fromPos, int *const toPos);
|
// bigram entry count to outBigramsCount.
|
||||||
|
bool copyAllBigrams(int *const fromPos, int *const toPos, int *outBigramsCount);
|
||||||
|
|
||||||
bool addNewBigramEntryToBigramList(const int bigramPos, const int probability, int *const pos);
|
bool addNewBigramEntryToBigramList(const int bigramPos, const int probability, int *const pos);
|
||||||
|
|
||||||
|
|
|
@ -125,11 +125,10 @@ bool DynamicPatriciaTrieWritingHelper::addBigramWords(const int word0Pos, const
|
||||||
bool DynamicPatriciaTrieWritingHelper::removeBigramWords(const int word0Pos, const int word1Pos) {
|
bool DynamicPatriciaTrieWritingHelper::removeBigramWords(const int word0Pos, const int word1Pos) {
|
||||||
DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy);
|
DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy);
|
||||||
nodeReader.fetchNodeInfoFromBuffer(word0Pos);
|
nodeReader.fetchNodeInfoFromBuffer(word0Pos);
|
||||||
if (nodeReader.isDeleted() || nodeReader.getBigramsPos() == NOT_A_DICT_POS) {
|
if (nodeReader.getBigramsPos() == NOT_A_DICT_POS) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
// TODO: Implement.
|
return mBigramPolicy->removeBigram(nodeReader.getBigramsPos(), word1Pos);
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool DynamicPatriciaTrieWritingHelper::markNodeAsMovedAndSetPosition(
|
bool DynamicPatriciaTrieWritingHelper::markNodeAsMovedAndSetPosition(
|
||||||
|
@ -193,13 +192,9 @@ bool DynamicPatriciaTrieWritingHelper::writePtNodeWithFullInfoToBuffer(const boo
|
||||||
const int originalBigramListPos, const int originalShortcutListPos,
|
const int originalBigramListPos, const int originalShortcutListPos,
|
||||||
int *const writingPos) {
|
int *const writingPos) {
|
||||||
const int nodePos = *writingPos;
|
const int nodePos = *writingPos;
|
||||||
// Create node flags and write them.
|
// Write dummy flags. The Node flags are updated with appropriate flags at the last step of the
|
||||||
const PatriciaTrieReadingUtils::NodeFlags nodeFlags =
|
// PtNode writing.
|
||||||
PatriciaTrieReadingUtils::createAndGetFlags(isBlacklisted, isNotAWord,
|
if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, 0 /* nodeFlags */,
|
||||||
probability != NOT_A_PROBABILITY, originalShortcutListPos != NOT_A_DICT_POS,
|
|
||||||
originalBigramListPos != NOT_A_DICT_POS, codePointCount > 1,
|
|
||||||
3 /* childrenPositionFieldSize */);
|
|
||||||
if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, nodeFlags,
|
|
||||||
writingPos)) {
|
writingPos)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -212,7 +207,7 @@ bool DynamicPatriciaTrieWritingHelper::writePtNodeWithFullInfoToBuffer(const boo
|
||||||
// Write code points
|
// Write code points
|
||||||
if (!DynamicPatriciaTrieWritingUtils::writeCodePointsAndAdvancePosition(mBuffer, codePoints,
|
if (!DynamicPatriciaTrieWritingUtils::writeCodePointsAndAdvancePosition(mBuffer, codePoints,
|
||||||
codePointCount, writingPos)) {
|
codePointCount, writingPos)) {
|
||||||
return false;;
|
return false;
|
||||||
}
|
}
|
||||||
// Write probability when the probability is a valid probability, which means this node is
|
// Write probability when the probability is a valid probability, which means this node is
|
||||||
// terminal.
|
// terminal.
|
||||||
|
@ -235,12 +230,25 @@ bool DynamicPatriciaTrieWritingHelper::writePtNodeWithFullInfoToBuffer(const boo
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Copy bigram list when the originalBigramListPos is valid dictionary position.
|
// Copy bigram list when the originalBigramListPos is valid dictionary position.
|
||||||
|
int bigramCount = 0;
|
||||||
if (originalBigramListPos != NOT_A_DICT_POS) {
|
if (originalBigramListPos != NOT_A_DICT_POS) {
|
||||||
int fromPos = originalBigramListPos;
|
int fromPos = originalBigramListPos;
|
||||||
if (!mBigramPolicy->copyAllBigrams(&fromPos, writingPos)) {
|
if (!mBigramPolicy->copyAllBigrams(&fromPos, writingPos, &bigramCount)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// Create node flags and write them.
|
||||||
|
PatriciaTrieReadingUtils::NodeFlags nodeFlags =
|
||||||
|
PatriciaTrieReadingUtils::createAndGetFlags(isBlacklisted, isNotAWord,
|
||||||
|
probability != NOT_A_PROBABILITY /* isTerminal */,
|
||||||
|
originalShortcutListPos != NOT_A_DICT_POS /* hasShortcutTargets */,
|
||||||
|
bigramCount > 0 /* hasBigrams */, codePointCount > 1 /* hasMultipleChars */,
|
||||||
|
CHILDREN_POSITION_FIELD_SIZE);
|
||||||
|
int flagsFieldPos = nodePos;
|
||||||
|
if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, nodeFlags,
|
||||||
|
&flagsFieldPos)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -206,6 +206,7 @@ public class BinaryDictionaryTests extends AndroidTestCase {
|
||||||
final int bigramCount = 1000;
|
final int bigramCount = 1000;
|
||||||
final int codePointSetSize = 50;
|
final int codePointSetSize = 50;
|
||||||
final int seed = 11111;
|
final int seed = 11111;
|
||||||
|
|
||||||
File dictFile = null;
|
File dictFile = null;
|
||||||
try {
|
try {
|
||||||
dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary");
|
dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary");
|
||||||
|
@ -217,7 +218,6 @@ public class BinaryDictionaryTests extends AndroidTestCase {
|
||||||
BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
|
BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
|
||||||
0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
|
0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
|
||||||
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
|
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
|
||||||
|
|
||||||
final ArrayList<String> words = new ArrayList<String>();
|
final ArrayList<String> words = new ArrayList<String>();
|
||||||
// Test a word that isn't contained within the dictionary.
|
// Test a word that isn't contained within the dictionary.
|
||||||
final Random random = new Random(seed);
|
final Random random = new Random(seed);
|
||||||
|
@ -250,4 +250,53 @@ public class BinaryDictionaryTests extends AndroidTestCase {
|
||||||
|
|
||||||
dictFile.delete();
|
dictFile.delete();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testRemoveBigramWords() {
|
||||||
|
File dictFile = null;
|
||||||
|
try {
|
||||||
|
dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary");
|
||||||
|
} catch (IOException e) {
|
||||||
|
fail("IOException while writing an initial dictionary : " + e);
|
||||||
|
} catch (UnsupportedFormatException e) {
|
||||||
|
fail("UnsupportedFormatException while writing an initial dictionary : " + e);
|
||||||
|
}
|
||||||
|
BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
|
||||||
|
0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
|
||||||
|
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
|
||||||
|
|
||||||
|
final int unigramProbability = 100;
|
||||||
|
final int bigramProbability = 10;
|
||||||
|
binaryDictionary.addUnigramWord("aaa", unigramProbability);
|
||||||
|
binaryDictionary.addUnigramWord("abb", unigramProbability);
|
||||||
|
binaryDictionary.addUnigramWord("bcc", unigramProbability);
|
||||||
|
binaryDictionary.addBigramWords("aaa", "abb", bigramProbability);
|
||||||
|
binaryDictionary.addBigramWords("aaa", "bcc", bigramProbability);
|
||||||
|
binaryDictionary.addBigramWords("abb", "aaa", bigramProbability);
|
||||||
|
binaryDictionary.addBigramWords("abb", "bcc", bigramProbability);
|
||||||
|
|
||||||
|
assertEquals(true, binaryDictionary.isValidBigram("aaa", "abb"));
|
||||||
|
assertEquals(true, binaryDictionary.isValidBigram("aaa", "bcc"));
|
||||||
|
assertEquals(true, binaryDictionary.isValidBigram("abb", "aaa"));
|
||||||
|
assertEquals(true, binaryDictionary.isValidBigram("abb", "bcc"));
|
||||||
|
|
||||||
|
binaryDictionary.removeBigramWords("aaa", "abb");
|
||||||
|
assertEquals(false, binaryDictionary.isValidBigram("aaa", "abb"));
|
||||||
|
binaryDictionary.addBigramWords("aaa", "abb", bigramProbability);
|
||||||
|
assertEquals(true, binaryDictionary.isValidBigram("aaa", "abb"));
|
||||||
|
|
||||||
|
|
||||||
|
binaryDictionary.removeBigramWords("aaa", "bcc");
|
||||||
|
assertEquals(false, binaryDictionary.isValidBigram("aaa", "bcc"));
|
||||||
|
binaryDictionary.removeBigramWords("abb", "aaa");
|
||||||
|
assertEquals(false, binaryDictionary.isValidBigram("abb", "aaa"));
|
||||||
|
binaryDictionary.removeBigramWords("abb", "bcc");
|
||||||
|
assertEquals(false, binaryDictionary.isValidBigram("abb", "bcc"));
|
||||||
|
|
||||||
|
binaryDictionary.removeBigramWords("aaa", "abb");
|
||||||
|
// Test remove non-existing bigram operation.
|
||||||
|
binaryDictionary.removeBigramWords("aaa", "abb");
|
||||||
|
binaryDictionary.removeBigramWords("bcc", "aaa");
|
||||||
|
|
||||||
|
dictFile.delete();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue