am 54099574: Merge "Remove unigram for v402 with historical info."
* commit '540995744080713b65630e02b62835deb8c8bdf3': Remove unigram for v402 with historical info.main
commit
2444dd2974
|
@ -425,6 +425,18 @@ bool Ver4PatriciaTrieNodeWriter::updatePtNodeFlags(const int ptNodePos,
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool Ver4PatriciaTrieNodeWriter::suppressUnigramEntry(const PtNodeParams *const ptNodeParams) {
|
||||||
|
if (!mHeaderPolicy->hasHistoricalInfoOfWords()) {
|
||||||
|
// Require historical info to suppress unigram entry.
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
const HistoricalInfo suppressedHistorycalInfo(0 /* timestamp */, 0 /* level */, 0 /* count */);
|
||||||
|
const ProbabilityEntry probabilityEntryToWrite =
|
||||||
|
ProbabilityEntry().createEntryWithUpdatedHistoricalInfo(&suppressedHistorycalInfo);
|
||||||
|
return mBuffers->getMutableProbabilityDictContent()->setProbabilityEntry(
|
||||||
|
ptNodeParams->getTerminalId(), &probabilityEntryToWrite);
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace v402
|
} // namespace v402
|
||||||
} // namespace backward
|
} // namespace backward
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
|
|
|
@ -111,6 +111,11 @@ class Ver4PatriciaTrieNodeWriter : public PtNodeWriter {
|
||||||
|
|
||||||
bool updatePtNodeHasBigramsAndShortcutTargetsFlags(const PtNodeParams *const ptNodeParams);
|
bool updatePtNodeHasBigramsAndShortcutTargetsFlags(const PtNodeParams *const ptNodeParams);
|
||||||
|
|
||||||
|
// Suppress unigram not to use the word for generating suggestions. So, this method can be used
|
||||||
|
// only for dictionaries with historical info. Also, suppressed entries are included in unigram
|
||||||
|
// count. They will be removed from the dictionary during GC.
|
||||||
|
bool suppressUnigramEntry(const PtNodeParams *const ptNodeParams);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
DISALLOW_COPY_AND_ASSIGN(Ver4PatriciaTrieNodeWriter);
|
DISALLOW_COPY_AND_ASSIGN(Ver4PatriciaTrieNodeWriter);
|
||||||
|
|
||||||
|
|
|
@ -258,6 +258,20 @@ bool Ver4PatriciaTriePolicy::addUnigramEntry(const int *const word, const int le
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool Ver4PatriciaTriePolicy::removeUnigramEntry(const int *const word, const int length) {
|
||||||
|
if (!mBuffers->isUpdatable()) {
|
||||||
|
AKLOGI("Warning: removeUnigramEntry() is called for non-updatable dictionary.");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
const int ptNodePos = getTerminalPtNodePositionOfWord(word, length,
|
||||||
|
false /* forceLowerCaseSearch */);
|
||||||
|
if (ptNodePos == NOT_A_DICT_POS) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
const PtNodeParams ptNodeParams = mNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos);
|
||||||
|
return mNodeWriter.suppressUnigramEntry(&ptNodeParams);
|
||||||
|
}
|
||||||
|
|
||||||
bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
|
bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
|
||||||
const BigramProperty *const bigramProperty) {
|
const BigramProperty *const bigramProperty) {
|
||||||
if (!mBuffers->isUpdatable()) {
|
if (!mBuffers->isUpdatable()) {
|
||||||
|
|
|
@ -108,10 +108,7 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
||||||
bool addUnigramEntry(const int *const word, const int length,
|
bool addUnigramEntry(const int *const word, const int length,
|
||||||
const UnigramProperty *const unigramProperty);
|
const UnigramProperty *const unigramProperty);
|
||||||
|
|
||||||
bool removeUnigramEntry(const int *const word, const int length) {
|
bool removeUnigramEntry(const int *const word, const int length);
|
||||||
// Removing unigram entry is not supported.
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
|
bool addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
|
||||||
const BigramProperty *const bigramProperty);
|
const BigramProperty *const bigramProperty);
|
||||||
|
|
|
@ -689,4 +689,36 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase {
|
||||||
binaryDictionary.close();
|
binaryDictionary.close();
|
||||||
dictFile.delete();
|
dictFile.delete();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testRemoveUnigrams() {
|
||||||
|
for (final int formatVersion : DICT_FORMAT_VERSIONS) {
|
||||||
|
testRemoveUnigrams(formatVersion);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void testRemoveUnigrams(final int formatVersion) {
|
||||||
|
final int unigramInputCount = 20;
|
||||||
|
setCurrentTimeForTestMode(mCurrentTime);
|
||||||
|
File dictFile = null;
|
||||||
|
try {
|
||||||
|
dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
|
||||||
|
} catch (IOException e) {
|
||||||
|
fail("IOException while writing an initial dictionary : " + e);
|
||||||
|
}
|
||||||
|
final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
|
||||||
|
0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
|
||||||
|
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
|
||||||
|
|
||||||
|
addUnigramWord(binaryDictionary, "aaa", Dictionary.NOT_A_PROBABILITY);
|
||||||
|
assertFalse(binaryDictionary.isValidWord("aaa"));
|
||||||
|
for (int i = 0; i < unigramInputCount; i++) {
|
||||||
|
addUnigramWord(binaryDictionary, "aaa", Dictionary.NOT_A_PROBABILITY);
|
||||||
|
}
|
||||||
|
assertTrue(binaryDictionary.isValidWord("aaa"));
|
||||||
|
assertTrue(binaryDictionary.removeUnigramEntry("aaa"));
|
||||||
|
assertFalse(binaryDictionary.isValidWord("aaa"));
|
||||||
|
|
||||||
|
binaryDictionary.close();
|
||||||
|
dictFile.delete();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue