Merge "Implement ver4 dictionary bigram removing methods."
commit
cc356d04e7
|
@ -91,6 +91,33 @@ bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTarget
|
||||||
return mBigramDictContent->copyBigramList(bigramListPos, writingPos);
|
return mBigramDictContent->copyBigramList(bigramListPos, writingPos);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool Ver4BigramListPolicy::removeEntry(const int terminalId, const int targetTerminalId) {
|
||||||
|
const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId);
|
||||||
|
if (bigramListPos == NOT_A_DICT_POS) {
|
||||||
|
// Bigram list does't exist.
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
const int entryPosToUpdate = getEntryPosToUpdate(targetTerminalId, bigramListPos);
|
||||||
|
if (entryPosToUpdate == NOT_A_DICT_POS) {
|
||||||
|
// Bigram entry doesn't exist.
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
int readingPos = entryPosToUpdate;
|
||||||
|
bool hasNext = false;
|
||||||
|
int probability = NOT_A_PROBABILITY;
|
||||||
|
int originalTargetTerminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
|
||||||
|
mBigramDictContent->getBigramEntryAndAdvancePosition(&probability, &hasNext,
|
||||||
|
&originalTargetTerminalId, &readingPos);
|
||||||
|
if (targetTerminalId != originalTargetTerminalId) {
|
||||||
|
// Bigram entry doesn't exist.
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
int writingPos = entryPosToUpdate;
|
||||||
|
// Remove bigram entry by overwriting target terminal Id.
|
||||||
|
return mBigramDictContent->writeBigramEntryAndAdvancePosition(probability, hasNext,
|
||||||
|
Ver4DictConstants::NOT_A_TERMINAL_ID /* targetTerminalId */, &writingPos);
|
||||||
|
}
|
||||||
|
|
||||||
int Ver4BigramListPolicy::getEntryPosToUpdate(const int targetTerminalIdToFind,
|
int Ver4BigramListPolicy::getEntryPosToUpdate(const int targetTerminalIdToFind,
|
||||||
const int bigramListPos) const {
|
const int bigramListPos) const {
|
||||||
bool hasNext = true;
|
bool hasNext = true;
|
||||||
|
|
|
@ -42,6 +42,8 @@ class Ver4BigramListPolicy : public DictionaryBigramsStructurePolicy {
|
||||||
bool addNewEntry(const int terminalId, const int newTargetTerminalId, const int newProbability,
|
bool addNewEntry(const int terminalId, const int newTargetTerminalId, const int newProbability,
|
||||||
bool *const outAddedNewEntry);
|
bool *const outAddedNewEntry);
|
||||||
|
|
||||||
|
bool removeEntry(const int terminalId, const int targetTerminalId);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4BigramListPolicy);
|
DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4BigramListPolicy);
|
||||||
|
|
||||||
|
|
|
@ -31,9 +31,12 @@ void BigramDictContent::getBigramEntryAndAdvancePosition(int *const outProbabili
|
||||||
if (outHasNext) {
|
if (outHasNext) {
|
||||||
*outHasNext = (bigramFlags & Ver4DictConstants::BIGRAM_HAS_NEXT_MASK) != 0;
|
*outHasNext = (bigramFlags & Ver4DictConstants::BIGRAM_HAS_NEXT_MASK) != 0;
|
||||||
}
|
}
|
||||||
if (outTargetTerminalId) {
|
const int targetTerminalId = bigramListBuffer->readUintAndAdvancePosition(
|
||||||
*outTargetTerminalId = bigramListBuffer->readUintAndAdvancePosition(
|
|
||||||
Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE, bigramEntryPos);
|
Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE, bigramEntryPos);
|
||||||
|
if (outTargetTerminalId) {
|
||||||
|
*outTargetTerminalId =
|
||||||
|
(targetTerminalId == Ver4DictConstants::INVALID_BIGRAM_TARGET_TERMINAL_ID) ?
|
||||||
|
Ver4DictConstants::NOT_A_TERMINAL_ID : targetTerminalId;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -45,7 +48,10 @@ bool BigramDictContent::writeBigramEntryAndAdvancePosition(const int probability
|
||||||
Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE, entryWritingPos)) {
|
Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE, entryWritingPos)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
return bigramListBuffer->writeUintAndAdvancePosition(targetTerminalId,
|
const int targetTerminalIdToWrite =
|
||||||
|
(targetTerminalId == Ver4DictConstants::NOT_A_TERMINAL_ID) ?
|
||||||
|
Ver4DictConstants::INVALID_BIGRAM_TARGET_TERMINAL_ID : targetTerminalId;
|
||||||
|
return bigramListBuffer->writeUintAndAdvancePosition(targetTerminalIdToWrite,
|
||||||
Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE, entryWritingPos);
|
Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE, entryWritingPos);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -42,6 +42,10 @@ const int Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE = 16;
|
||||||
const int Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_DATA_SIZE = 4;
|
const int Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_DATA_SIZE = 4;
|
||||||
|
|
||||||
const int Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE = 3;
|
const int Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE = 3;
|
||||||
|
// Unsigned int max value of BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE-byte is used for representing
|
||||||
|
// invalid terminal ID in bigram lists.
|
||||||
|
const int Ver4DictConstants::INVALID_BIGRAM_TARGET_TERMINAL_ID =
|
||||||
|
(1 << (BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE * 8)) - 1;
|
||||||
const int Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE = 1;
|
const int Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE = 1;
|
||||||
const int Ver4DictConstants::BIGRAM_PROBABILITY_MASK = 0x0F;
|
const int Ver4DictConstants::BIGRAM_PROBABILITY_MASK = 0x0F;
|
||||||
const int Ver4DictConstants::BIGRAM_HAS_NEXT_MASK = 0x80;
|
const int Ver4DictConstants::BIGRAM_HAS_NEXT_MASK = 0x80;
|
||||||
|
|
|
@ -47,6 +47,7 @@ class Ver4DictConstants {
|
||||||
|
|
||||||
static const int BIGRAM_FLAGS_FIELD_SIZE;
|
static const int BIGRAM_FLAGS_FIELD_SIZE;
|
||||||
static const int BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE;
|
static const int BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE;
|
||||||
|
static const int INVALID_BIGRAM_TARGET_TERMINAL_ID;
|
||||||
static const int BIGRAM_PROBABILITY_MASK;
|
static const int BIGRAM_PROBABILITY_MASK;
|
||||||
static const int BIGRAM_HAS_NEXT_MASK;
|
static const int BIGRAM_HAS_NEXT_MASK;
|
||||||
|
|
||||||
|
|
|
@ -192,8 +192,8 @@ bool Ver4PatriciaTrieNodeWriter::addNewBigramEntry(
|
||||||
|
|
||||||
bool Ver4PatriciaTrieNodeWriter::removeBigramEntry(
|
bool Ver4PatriciaTrieNodeWriter::removeBigramEntry(
|
||||||
const PtNodeParams *const sourcePtNodeParams, const PtNodeParams *const targetPtNodeParam) {
|
const PtNodeParams *const sourcePtNodeParams, const PtNodeParams *const targetPtNodeParam) {
|
||||||
// TODO: Implement.
|
return mBigramPolicy->removeEntry(sourcePtNodeParams->getTerminalId(),
|
||||||
return false;
|
targetPtNodeParam->getTerminalId());
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -188,9 +188,32 @@ bool Ver4PatriciaTriePolicy::addBigramWords(const int *const word0, const int le
|
||||||
|
|
||||||
bool Ver4PatriciaTriePolicy::removeBigramWords(const int *const word0, const int length0,
|
bool Ver4PatriciaTriePolicy::removeBigramWords(const int *const word0, const int length0,
|
||||||
const int *const word1, const int length1) {
|
const int *const word1, const int length1) {
|
||||||
// TODO: Implement.
|
if (!mBuffers.get()->isUpdatable()) {
|
||||||
|
AKLOGI("Warning: addBigramWords() is called for non-updatable dictionary.");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
if (mDictBuffer.getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) {
|
||||||
|
AKLOGE("The dictionary is too large to dynamically update. Dictionary size: %d",
|
||||||
|
mDictBuffer.getTailPosition());
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
const int word0Pos = getTerminalPtNodePositionOfWord(word0, length0,
|
||||||
|
false /* forceLowerCaseSearch */);
|
||||||
|
if (word0Pos == NOT_A_DICT_POS) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
const int word1Pos = getTerminalPtNodePositionOfWord(word1, length1,
|
||||||
|
false /* forceLowerCaseSearch */);
|
||||||
|
if (word1Pos == NOT_A_DICT_POS) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (mUpdatingHelper.removeBigramWords(word0Pos, word1Pos)) {
|
||||||
|
mBigramCount--;
|
||||||
|
return true;
|
||||||
|
} else {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void Ver4PatriciaTriePolicy::flush(const char *const filePath) {
|
void Ver4PatriciaTriePolicy::flush(const char *const filePath) {
|
||||||
// TODO: Implement.
|
// TODO: Implement.
|
||||||
|
|
|
@ -246,4 +246,56 @@ public class Ver4BinaryDictionaryTests extends AndroidTestCase {
|
||||||
assertEquals(probability, binaryDictionary.getBigramProbability("abb", "aaa"));
|
assertEquals(probability, binaryDictionary.getBigramProbability("abb", "aaa"));
|
||||||
assertEquals(probability, binaryDictionary.getBigramProbability("abb", "bcc"));
|
assertEquals(probability, binaryDictionary.getBigramProbability("abb", "bcc"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testRemoveBigramWords() {
|
||||||
|
final String dictVersion = Long.toString(System.currentTimeMillis());
|
||||||
|
final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
|
||||||
|
getDictionaryOptions(TEST_LOCALE, dictVersion));
|
||||||
|
final DictEncoder encoder = new Ver4DictEncoder(getContext().getCacheDir());
|
||||||
|
try {
|
||||||
|
encoder.writeDictionary(dict, FORMAT_OPTIONS);
|
||||||
|
} catch (IOException e) {
|
||||||
|
Log.e(TAG, "IOException while writing dictionary", e);
|
||||||
|
} catch (UnsupportedFormatException e) {
|
||||||
|
Log.e(TAG, "Unsupported format", e);
|
||||||
|
}
|
||||||
|
final File trieFile = getTrieFile(TEST_LOCALE, dictVersion);
|
||||||
|
final BinaryDictionary binaryDictionary = new BinaryDictionary(trieFile.getAbsolutePath(),
|
||||||
|
0 /* offset */, trieFile.length(), true /* useFullEditDistance */,
|
||||||
|
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
|
||||||
|
assertTrue(binaryDictionary.isValidDictionary());
|
||||||
|
|
||||||
|
final int unigramProbability = 100;
|
||||||
|
final int bigramProbability = 10;
|
||||||
|
binaryDictionary.addUnigramWord("aaa", unigramProbability);
|
||||||
|
binaryDictionary.addUnigramWord("abb", unigramProbability);
|
||||||
|
binaryDictionary.addUnigramWord("bcc", unigramProbability);
|
||||||
|
binaryDictionary.addBigramWords("aaa", "abb", bigramProbability);
|
||||||
|
binaryDictionary.addBigramWords("aaa", "bcc", bigramProbability);
|
||||||
|
binaryDictionary.addBigramWords("abb", "aaa", bigramProbability);
|
||||||
|
binaryDictionary.addBigramWords("abb", "bcc", bigramProbability);
|
||||||
|
|
||||||
|
assertEquals(true, binaryDictionary.isValidBigram("aaa", "abb"));
|
||||||
|
assertEquals(true, binaryDictionary.isValidBigram("aaa", "bcc"));
|
||||||
|
assertEquals(true, binaryDictionary.isValidBigram("abb", "aaa"));
|
||||||
|
assertEquals(true, binaryDictionary.isValidBigram("abb", "bcc"));
|
||||||
|
|
||||||
|
binaryDictionary.removeBigramWords("aaa", "abb");
|
||||||
|
assertEquals(false, binaryDictionary.isValidBigram("aaa", "abb"));
|
||||||
|
binaryDictionary.addBigramWords("aaa", "abb", bigramProbability);
|
||||||
|
assertEquals(true, binaryDictionary.isValidBigram("aaa", "abb"));
|
||||||
|
|
||||||
|
binaryDictionary.removeBigramWords("aaa", "bcc");
|
||||||
|
assertEquals(false, binaryDictionary.isValidBigram("aaa", "bcc"));
|
||||||
|
binaryDictionary.removeBigramWords("abb", "aaa");
|
||||||
|
assertEquals(false, binaryDictionary.isValidBigram("abb", "aaa"));
|
||||||
|
binaryDictionary.removeBigramWords("abb", "bcc");
|
||||||
|
assertEquals(false, binaryDictionary.isValidBigram("abb", "bcc"));
|
||||||
|
|
||||||
|
binaryDictionary.removeBigramWords("aaa", "abb");
|
||||||
|
// Test remove non-existing bigram operation.
|
||||||
|
binaryDictionary.removeBigramWords("aaa", "abb");
|
||||||
|
binaryDictionary.removeBigramWords("bcc", "aaa");
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue