am 54099574: Merge "Remove unigram for v402 with historical info."
* commit '540995744080713b65630e02b62835deb8c8bdf3': Remove unigram for v402 with historical info.main
commit
2444dd2974
|
@ -425,6 +425,18 @@ bool Ver4PatriciaTrieNodeWriter::updatePtNodeFlags(const int ptNodePos,
|
|||
return true;
|
||||
}
|
||||
|
||||
bool Ver4PatriciaTrieNodeWriter::suppressUnigramEntry(const PtNodeParams *const ptNodeParams) {
|
||||
if (!mHeaderPolicy->hasHistoricalInfoOfWords()) {
|
||||
// Require historical info to suppress unigram entry.
|
||||
return false;
|
||||
}
|
||||
const HistoricalInfo suppressedHistorycalInfo(0 /* timestamp */, 0 /* level */, 0 /* count */);
|
||||
const ProbabilityEntry probabilityEntryToWrite =
|
||||
ProbabilityEntry().createEntryWithUpdatedHistoricalInfo(&suppressedHistorycalInfo);
|
||||
return mBuffers->getMutableProbabilityDictContent()->setProbabilityEntry(
|
||||
ptNodeParams->getTerminalId(), &probabilityEntryToWrite);
|
||||
}
|
||||
|
||||
} // namespace v402
|
||||
} // namespace backward
|
||||
} // namespace latinime
|
||||
|
|
|
@ -111,6 +111,11 @@ class Ver4PatriciaTrieNodeWriter : public PtNodeWriter {
|
|||
|
||||
bool updatePtNodeHasBigramsAndShortcutTargetsFlags(const PtNodeParams *const ptNodeParams);
|
||||
|
||||
// Suppress unigram not to use the word for generating suggestions. So, this method can be used
|
||||
// only for dictionaries with historical info. Also, suppressed entries are included in unigram
|
||||
// count. They will be removed from the dictionary during GC.
|
||||
bool suppressUnigramEntry(const PtNodeParams *const ptNodeParams);
|
||||
|
||||
private:
|
||||
DISALLOW_COPY_AND_ASSIGN(Ver4PatriciaTrieNodeWriter);
|
||||
|
||||
|
|
|
@ -258,6 +258,20 @@ bool Ver4PatriciaTriePolicy::addUnigramEntry(const int *const word, const int le
|
|||
}
|
||||
}
|
||||
|
||||
bool Ver4PatriciaTriePolicy::removeUnigramEntry(const int *const word, const int length) {
|
||||
if (!mBuffers->isUpdatable()) {
|
||||
AKLOGI("Warning: removeUnigramEntry() is called for non-updatable dictionary.");
|
||||
return false;
|
||||
}
|
||||
const int ptNodePos = getTerminalPtNodePositionOfWord(word, length,
|
||||
false /* forceLowerCaseSearch */);
|
||||
if (ptNodePos == NOT_A_DICT_POS) {
|
||||
return false;
|
||||
}
|
||||
const PtNodeParams ptNodeParams = mNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos);
|
||||
return mNodeWriter.suppressUnigramEntry(&ptNodeParams);
|
||||
}
|
||||
|
||||
bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
|
||||
const BigramProperty *const bigramProperty) {
|
||||
if (!mBuffers->isUpdatable()) {
|
||||
|
|
|
@ -108,10 +108,7 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
|||
bool addUnigramEntry(const int *const word, const int length,
|
||||
const UnigramProperty *const unigramProperty);
|
||||
|
||||
bool removeUnigramEntry(const int *const word, const int length) {
|
||||
// Removing unigram entry is not supported.
|
||||
return false;
|
||||
}
|
||||
bool removeUnigramEntry(const int *const word, const int length);
|
||||
|
||||
bool addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
|
||||
const BigramProperty *const bigramProperty);
|
||||
|
|
|
@ -689,4 +689,36 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase {
|
|||
binaryDictionary.close();
|
||||
dictFile.delete();
|
||||
}
|
||||
|
||||
public void testRemoveUnigrams() {
|
||||
for (final int formatVersion : DICT_FORMAT_VERSIONS) {
|
||||
testRemoveUnigrams(formatVersion);
|
||||
}
|
||||
}
|
||||
|
||||
private void testRemoveUnigrams(final int formatVersion) {
|
||||
final int unigramInputCount = 20;
|
||||
setCurrentTimeForTestMode(mCurrentTime);
|
||||
File dictFile = null;
|
||||
try {
|
||||
dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
|
||||
} catch (IOException e) {
|
||||
fail("IOException while writing an initial dictionary : " + e);
|
||||
}
|
||||
final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
|
||||
0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
|
||||
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
|
||||
|
||||
addUnigramWord(binaryDictionary, "aaa", Dictionary.NOT_A_PROBABILITY);
|
||||
assertFalse(binaryDictionary.isValidWord("aaa"));
|
||||
for (int i = 0; i < unigramInputCount; i++) {
|
||||
addUnigramWord(binaryDictionary, "aaa", Dictionary.NOT_A_PROBABILITY);
|
||||
}
|
||||
assertTrue(binaryDictionary.isValidWord("aaa"));
|
||||
assertTrue(binaryDictionary.removeUnigramEntry("aaa"));
|
||||
assertFalse(binaryDictionary.isValidWord("aaa"));
|
||||
|
||||
binaryDictionary.close();
|
||||
dictFile.delete();
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue