am 54099574: Merge "Remove unigram for v402 with historical info."

* commit '540995744080713b65630e02b62835deb8c8bdf3':
  Remove unigram for v402 with historical info.
main
Keisuke Kuroyanagi 2014-08-15 06:59:35 +00:00 committed by Android Git Automerger
commit 2444dd2974
5 changed files with 64 additions and 4 deletions

View File

@ -425,6 +425,18 @@ bool Ver4PatriciaTrieNodeWriter::updatePtNodeFlags(const int ptNodePos,
return true; return true;
} }
bool Ver4PatriciaTrieNodeWriter::suppressUnigramEntry(const PtNodeParams *const ptNodeParams) {
if (!mHeaderPolicy->hasHistoricalInfoOfWords()) {
// Require historical info to suppress unigram entry.
return false;
}
const HistoricalInfo suppressedHistorycalInfo(0 /* timestamp */, 0 /* level */, 0 /* count */);
const ProbabilityEntry probabilityEntryToWrite =
ProbabilityEntry().createEntryWithUpdatedHistoricalInfo(&suppressedHistorycalInfo);
return mBuffers->getMutableProbabilityDictContent()->setProbabilityEntry(
ptNodeParams->getTerminalId(), &probabilityEntryToWrite);
}
} // namespace v402 } // namespace v402
} // namespace backward } // namespace backward
} // namespace latinime } // namespace latinime

View File

@ -111,6 +111,11 @@ class Ver4PatriciaTrieNodeWriter : public PtNodeWriter {
bool updatePtNodeHasBigramsAndShortcutTargetsFlags(const PtNodeParams *const ptNodeParams); bool updatePtNodeHasBigramsAndShortcutTargetsFlags(const PtNodeParams *const ptNodeParams);
// Suppress unigram not to use the word for generating suggestions. So, this method can be used
// only for dictionaries with historical info. Also, suppressed entries are included in unigram
// count. They will be removed from the dictionary during GC.
bool suppressUnigramEntry(const PtNodeParams *const ptNodeParams);
private: private:
DISALLOW_COPY_AND_ASSIGN(Ver4PatriciaTrieNodeWriter); DISALLOW_COPY_AND_ASSIGN(Ver4PatriciaTrieNodeWriter);

View File

@ -258,6 +258,20 @@ bool Ver4PatriciaTriePolicy::addUnigramEntry(const int *const word, const int le
} }
} }
bool Ver4PatriciaTriePolicy::removeUnigramEntry(const int *const word, const int length) {
if (!mBuffers->isUpdatable()) {
AKLOGI("Warning: removeUnigramEntry() is called for non-updatable dictionary.");
return false;
}
const int ptNodePos = getTerminalPtNodePositionOfWord(word, length,
false /* forceLowerCaseSearch */);
if (ptNodePos == NOT_A_DICT_POS) {
return false;
}
const PtNodeParams ptNodeParams = mNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos);
return mNodeWriter.suppressUnigramEntry(&ptNodeParams);
}
bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsInfo, bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
const BigramProperty *const bigramProperty) { const BigramProperty *const bigramProperty) {
if (!mBuffers->isUpdatable()) { if (!mBuffers->isUpdatable()) {

View File

@ -108,10 +108,7 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
bool addUnigramEntry(const int *const word, const int length, bool addUnigramEntry(const int *const word, const int length,
const UnigramProperty *const unigramProperty); const UnigramProperty *const unigramProperty);
bool removeUnigramEntry(const int *const word, const int length) { bool removeUnigramEntry(const int *const word, const int length);
// Removing unigram entry is not supported.
return false;
}
bool addNgramEntry(const PrevWordsInfo *const prevWordsInfo, bool addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
const BigramProperty *const bigramProperty); const BigramProperty *const bigramProperty);

View File

@ -689,4 +689,36 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase {
binaryDictionary.close(); binaryDictionary.close();
dictFile.delete(); dictFile.delete();
} }
public void testRemoveUnigrams() {
for (final int formatVersion : DICT_FORMAT_VERSIONS) {
testRemoveUnigrams(formatVersion);
}
}
private void testRemoveUnigrams(final int formatVersion) {
final int unigramInputCount = 20;
setCurrentTimeForTestMode(mCurrentTime);
File dictFile = null;
try {
dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
} catch (IOException e) {
fail("IOException while writing an initial dictionary : " + e);
}
final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
addUnigramWord(binaryDictionary, "aaa", Dictionary.NOT_A_PROBABILITY);
assertFalse(binaryDictionary.isValidWord("aaa"));
for (int i = 0; i < unigramInputCount; i++) {
addUnigramWord(binaryDictionary, "aaa", Dictionary.NOT_A_PROBABILITY);
}
assertTrue(binaryDictionary.isValidWord("aaa"));
assertTrue(binaryDictionary.removeUnigramEntry("aaa"));
assertFalse(binaryDictionary.isValidWord("aaa"));
binaryDictionary.close();
dictFile.delete();
}
} }