Remove unigram for v402 with historical info.

Bug: 15531638
Change-Id: If1b73ac693e45a80df987ea16b2dece5597697e8
main
Keisuke Kuroyanagi 2014-08-15 15:47:53 +09:00
parent 82be1873f1
commit 8890b01550
5 changed files with 64 additions and 4 deletions

View File

@ -425,6 +425,18 @@ bool Ver4PatriciaTrieNodeWriter::updatePtNodeFlags(const int ptNodePos,
return true;
}
bool Ver4PatriciaTrieNodeWriter::suppressUnigramEntry(const PtNodeParams *const ptNodeParams) {
if (!mHeaderPolicy->hasHistoricalInfoOfWords()) {
// Require historical info to suppress unigram entry.
return false;
}
const HistoricalInfo suppressedHistorycalInfo(0 /* timestamp */, 0 /* level */, 0 /* count */);
const ProbabilityEntry probabilityEntryToWrite =
ProbabilityEntry().createEntryWithUpdatedHistoricalInfo(&suppressedHistorycalInfo);
return mBuffers->getMutableProbabilityDictContent()->setProbabilityEntry(
ptNodeParams->getTerminalId(), &probabilityEntryToWrite);
}
} // namespace v402
} // namespace backward
} // namespace latinime

View File

@ -111,6 +111,11 @@ class Ver4PatriciaTrieNodeWriter : public PtNodeWriter {
bool updatePtNodeHasBigramsAndShortcutTargetsFlags(const PtNodeParams *const ptNodeParams);
// Suppress unigram not to use the word for generating suggestions. So, this method can be used
// only for dictionaries with historical info. Also, suppressed entries are included in unigram
// count. They will be removed from the dictionary during GC.
bool suppressUnigramEntry(const PtNodeParams *const ptNodeParams);
private:
DISALLOW_COPY_AND_ASSIGN(Ver4PatriciaTrieNodeWriter);

View File

@ -258,6 +258,20 @@ bool Ver4PatriciaTriePolicy::addUnigramEntry(const int *const word, const int le
}
}
bool Ver4PatriciaTriePolicy::removeUnigramEntry(const int *const word, const int length) {
if (!mBuffers->isUpdatable()) {
AKLOGI("Warning: removeUnigramEntry() is called for non-updatable dictionary.");
return false;
}
const int ptNodePos = getTerminalPtNodePositionOfWord(word, length,
false /* forceLowerCaseSearch */);
if (ptNodePos == NOT_A_DICT_POS) {
return false;
}
const PtNodeParams ptNodeParams = mNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos);
return mNodeWriter.suppressUnigramEntry(&ptNodeParams);
}
bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
const BigramProperty *const bigramProperty) {
if (!mBuffers->isUpdatable()) {

View File

@ -108,10 +108,7 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
bool addUnigramEntry(const int *const word, const int length,
const UnigramProperty *const unigramProperty);
bool removeUnigramEntry(const int *const word, const int length) {
// Removing unigram entry is not supported.
return false;
}
bool removeUnigramEntry(const int *const word, const int length);
bool addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
const BigramProperty *const bigramProperty);

View File

@ -689,4 +689,36 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase {
binaryDictionary.close();
dictFile.delete();
}
public void testRemoveUnigrams() {
for (final int formatVersion : DICT_FORMAT_VERSIONS) {
testRemoveUnigrams(formatVersion);
}
}
private void testRemoveUnigrams(final int formatVersion) {
final int unigramInputCount = 20;
setCurrentTimeForTestMode(mCurrentTime);
File dictFile = null;
try {
dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
} catch (IOException e) {
fail("IOException while writing an initial dictionary : " + e);
}
final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
addUnigramWord(binaryDictionary, "aaa", Dictionary.NOT_A_PROBABILITY);
assertFalse(binaryDictionary.isValidWord("aaa"));
for (int i = 0; i < unigramInputCount; i++) {
addUnigramWord(binaryDictionary, "aaa", Dictionary.NOT_A_PROBABILITY);
}
assertTrue(binaryDictionary.isValidWord("aaa"));
assertTrue(binaryDictionary.removeUnigramEntry("aaa"));
assertFalse(binaryDictionary.isValidWord("aaa"));
binaryDictionary.close();
dictFile.delete();
}
}