Fix bug and Add large test for decaying dictionary.
- GC gets failure when the dictionary become empty. - Useless unigrams are sometimes not removed. Bug: 10197478 Change-Id: I8d1479c01efba61a81f03bc077da6bcb4797a940main
parent
32cf6f85a2
commit
cfb018ba6d
|
@ -52,6 +52,10 @@ public final class BinaryDictionary extends Dictionary {
|
||||||
public static final String UNIGRAM_COUNT_QUERY = "UNIGRAM_COUNT";
|
public static final String UNIGRAM_COUNT_QUERY = "UNIGRAM_COUNT";
|
||||||
@UsedForTesting
|
@UsedForTesting
|
||||||
public static final String BIGRAM_COUNT_QUERY = "BIGRAM_COUNT";
|
public static final String BIGRAM_COUNT_QUERY = "BIGRAM_COUNT";
|
||||||
|
@UsedForTesting
|
||||||
|
public static final String MAX_UNIGRAM_COUNT_QUERY = "MAX_UNIGRAM_COUNT";
|
||||||
|
@UsedForTesting
|
||||||
|
public static final String MAX_BIGRAM_COUNT_QUERY = "MAX_BIGRAM_COUNT";
|
||||||
|
|
||||||
private long mNativeDict;
|
private long mNativeDict;
|
||||||
private final Locale mLocale;
|
private final Locale mLocale;
|
||||||
|
|
|
@ -39,7 +39,7 @@ bool DynamicPatriciaTrieGcEventListeners
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (!ForgettingCurveUtils::isValidEncodedProbability(newProbability)) {
|
if (!ForgettingCurveUtils::isValidEncodedProbability(newProbability)) {
|
||||||
isUselessPtNode = false;
|
isUselessPtNode = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (mChildrenValue > 0) {
|
if (mChildrenValue > 0) {
|
||||||
|
|
|
@ -60,6 +60,7 @@ class DynamicPatriciaTrieGcEventListeners {
|
||||||
|
|
||||||
bool onDescend(const int ptNodeArrayPos) {
|
bool onDescend(const int ptNodeArrayPos) {
|
||||||
mValueStack.push_back(0);
|
mValueStack.push_back(0);
|
||||||
|
mChildrenValue = 0;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -37,6 +37,8 @@ namespace latinime {
|
||||||
// BinaryDictionaryDecayingTests.
|
// BinaryDictionaryDecayingTests.
|
||||||
const char *const DynamicPatriciaTriePolicy::UNIGRAM_COUNT_QUERY = "UNIGRAM_COUNT";
|
const char *const DynamicPatriciaTriePolicy::UNIGRAM_COUNT_QUERY = "UNIGRAM_COUNT";
|
||||||
const char *const DynamicPatriciaTriePolicy::BIGRAM_COUNT_QUERY = "BIGRAM_COUNT";
|
const char *const DynamicPatriciaTriePolicy::BIGRAM_COUNT_QUERY = "BIGRAM_COUNT";
|
||||||
|
const char *const DynamicPatriciaTriePolicy::MAX_UNIGRAM_COUNT_QUERY = "MAX_UNIGRAM_COUNT";
|
||||||
|
const char *const DynamicPatriciaTriePolicy::MAX_BIGRAM_COUNT_QUERY = "MAX_BIGRAM_COUNT";
|
||||||
const char *const DynamicPatriciaTriePolicy::SET_NEEDS_TO_DECAY_FOR_TESTING_QUERY =
|
const char *const DynamicPatriciaTriePolicy::SET_NEEDS_TO_DECAY_FOR_TESTING_QUERY =
|
||||||
"SET_NEEDS_TO_DECAY_FOR_TESTING";
|
"SET_NEEDS_TO_DECAY_FOR_TESTING";
|
||||||
const int DynamicPatriciaTriePolicy::MAX_DICT_EXTENDED_REGION_SIZE = 1024 * 1024;
|
const int DynamicPatriciaTriePolicy::MAX_DICT_EXTENDED_REGION_SIZE = 1024 * 1024;
|
||||||
|
@ -355,6 +357,14 @@ void DynamicPatriciaTriePolicy::getProperty(const char *const query, char *const
|
||||||
snprintf(outResult, maxResultLength, "%d", mUnigramCount);
|
snprintf(outResult, maxResultLength, "%d", mUnigramCount);
|
||||||
} else if (strncmp(query, BIGRAM_COUNT_QUERY, maxResultLength) == 0) {
|
} else if (strncmp(query, BIGRAM_COUNT_QUERY, maxResultLength) == 0) {
|
||||||
snprintf(outResult, maxResultLength, "%d", mBigramCount);
|
snprintf(outResult, maxResultLength, "%d", mBigramCount);
|
||||||
|
} else if (strncmp(query, MAX_UNIGRAM_COUNT_QUERY, maxResultLength) == 0) {
|
||||||
|
snprintf(outResult, maxResultLength, "%d",
|
||||||
|
mHeaderPolicy.isDecayingDict() ? ForgettingCurveUtils::MAX_UNIGRAM_COUNT :
|
||||||
|
DynamicPatriciaTrieWritingHelper::MAX_DICTIONARY_SIZE);
|
||||||
|
} else if (strncmp(query, MAX_BIGRAM_COUNT_QUERY, maxResultLength) == 0) {
|
||||||
|
snprintf(outResult, maxResultLength, "%d",
|
||||||
|
mHeaderPolicy.isDecayingDict() ? ForgettingCurveUtils::MAX_BIGRAM_COUNT :
|
||||||
|
DynamicPatriciaTrieWritingHelper::MAX_DICTIONARY_SIZE);
|
||||||
} else if (strncmp(query, SET_NEEDS_TO_DECAY_FOR_TESTING_QUERY, maxResultLength) == 0) {
|
} else if (strncmp(query, SET_NEEDS_TO_DECAY_FOR_TESTING_QUERY, maxResultLength) == 0) {
|
||||||
mNeedsToDecayForTesting = true;
|
mNeedsToDecayForTesting = true;
|
||||||
}
|
}
|
||||||
|
|
|
@ -102,6 +102,8 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
||||||
|
|
||||||
static const char *const UNIGRAM_COUNT_QUERY;
|
static const char *const UNIGRAM_COUNT_QUERY;
|
||||||
static const char *const BIGRAM_COUNT_QUERY;
|
static const char *const BIGRAM_COUNT_QUERY;
|
||||||
|
static const char *const MAX_UNIGRAM_COUNT_QUERY;
|
||||||
|
static const char *const MAX_BIGRAM_COUNT_QUERY;
|
||||||
static const char *const SET_NEEDS_TO_DECAY_FOR_TESTING_QUERY;
|
static const char *const SET_NEEDS_TO_DECAY_FOR_TESTING_QUERY;
|
||||||
static const int MAX_DICT_EXTENDED_REGION_SIZE;
|
static const int MAX_DICT_EXTENDED_REGION_SIZE;
|
||||||
static const int MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS;
|
static const int MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS;
|
||||||
|
|
|
@ -93,6 +93,12 @@ bool DynamicPatriciaTrieReadingHelper::traverseAllPtNodesInPtNodeArrayLevelPreor
|
||||||
if (!listener->onDescend(getPosOfLastPtNodeArrayHead())) {
|
if (!listener->onDescend(getPosOfLastPtNodeArrayHead())) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
if (isEnd()) {
|
||||||
|
// Empty dictionary. Needs to notify the listener of the tail of empty PtNode array.
|
||||||
|
if (!listener->onReadingPtNodeArrayTail()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
pushReadingStateToStack();
|
pushReadingStateToStack();
|
||||||
while (!isEnd()) {
|
while (!isEnd()) {
|
||||||
if (alreadyVisitedAllPtNodesInArray) {
|
if (alreadyVisitedAllPtNodesInArray) {
|
||||||
|
|
|
@ -279,9 +279,11 @@ class DynamicPatriciaTrieReadingHelper {
|
||||||
} else {
|
} else {
|
||||||
mReadingState = mReadingStateStack.back();
|
mReadingState = mReadingStateStack.back();
|
||||||
mReadingStateStack.pop_back();
|
mReadingStateStack.pop_back();
|
||||||
|
if (!isEnd()) {
|
||||||
fetchPtNodeInfo();
|
fetchPtNodeInfo();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
};
|
};
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
#endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_READING_HELPER_H */
|
#endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_READING_HELPER_H */
|
||||||
|
|
|
@ -93,8 +93,7 @@ void ForgettingCurveUtils::TimeKeeper::setCurrentTime() {
|
||||||
for (int i = 0; i < decayIterationCount; ++i) {
|
for (int i = 0; i < decayIterationCount; ++i) {
|
||||||
const float currentRate = static_cast<float>(currentEncodedProbability)
|
const float currentRate = static_cast<float>(currentEncodedProbability)
|
||||||
/ static_cast<float>(MAX_ENCODED_PROBABILITY);
|
/ static_cast<float>(MAX_ENCODED_PROBABILITY);
|
||||||
const float thresholdToDecay = MIN_PROBABILITY_TO_DECAY
|
const float thresholdToDecay = (1.0f - MIN_PROBABILITY_TO_DECAY) * currentRate;
|
||||||
+ (1.0f - MIN_PROBABILITY_TO_DECAY) * currentRate;
|
|
||||||
const float randValue = static_cast<float>(rand()) / static_cast<float>(RAND_MAX);
|
const float randValue = static_cast<float>(rand()) / static_cast<float>(RAND_MAX);
|
||||||
if (thresholdToDecay < randValue) {
|
if (thresholdToDecay < randValue) {
|
||||||
currentEncodedProbability = max(currentEncodedProbability - ENCODED_PROBABILITY_STEP,
|
currentEncodedProbability = max(currentEncodedProbability - ENCODED_PROBABILITY_STEP,
|
||||||
|
|
|
@ -19,13 +19,16 @@ package com.android.inputmethod.latin;
|
||||||
import android.test.AndroidTestCase;
|
import android.test.AndroidTestCase;
|
||||||
import android.test.suitebuilder.annotation.LargeTest;
|
import android.test.suitebuilder.annotation.LargeTest;
|
||||||
|
|
||||||
|
import com.android.inputmethod.latin.makedict.CodePointUtils;
|
||||||
import com.android.inputmethod.latin.makedict.FormatSpec;
|
import com.android.inputmethod.latin.makedict.FormatSpec;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.Locale;
|
import java.util.Locale;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
import java.util.Random;
|
||||||
|
|
||||||
@LargeTest
|
@LargeTest
|
||||||
public class BinaryDictionaryDecayingTests extends AndroidTestCase {
|
public class BinaryDictionaryDecayingTests extends AndroidTestCase {
|
||||||
|
@ -179,4 +182,55 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase {
|
||||||
binaryDictionary.close();
|
binaryDictionary.close();
|
||||||
dictFile.delete();
|
dictFile.delete();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testAddManyUnigramsToDecayingDict() {
|
||||||
|
final int unigramCount = 30000;
|
||||||
|
final int unigramTypedCount = 100000;
|
||||||
|
final int codePointSetSize = 50;
|
||||||
|
final long seed = System.currentTimeMillis();
|
||||||
|
final Random random = new Random(seed);
|
||||||
|
|
||||||
|
File dictFile = null;
|
||||||
|
try {
|
||||||
|
dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary");
|
||||||
|
} catch (IOException e) {
|
||||||
|
fail("IOException while writing an initial dictionary : " + e);
|
||||||
|
}
|
||||||
|
BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
|
||||||
|
0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
|
||||||
|
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
|
||||||
|
|
||||||
|
final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
|
||||||
|
final ArrayList<String> words = new ArrayList<String>();
|
||||||
|
|
||||||
|
for (int i = 0; i < unigramCount; i++) {
|
||||||
|
final String word = CodePointUtils.generateWord(random, codePointSet);
|
||||||
|
words.add(word);
|
||||||
|
}
|
||||||
|
|
||||||
|
final int maxUnigramCount = Integer.parseInt(
|
||||||
|
binaryDictionary.getPropertyForTests(BinaryDictionary.MAX_UNIGRAM_COUNT_QUERY));
|
||||||
|
for (int i = 0; i < unigramTypedCount; i++) {
|
||||||
|
final String word = words.get(random.nextInt(words.size()));
|
||||||
|
binaryDictionary.addUnigramWord(word, DUMMY_PROBABILITY);
|
||||||
|
|
||||||
|
if (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) {
|
||||||
|
final int unigramCountBeforeGC =
|
||||||
|
Integer.parseInt(binaryDictionary.getPropertyForTests(
|
||||||
|
BinaryDictionary.UNIGRAM_COUNT_QUERY));
|
||||||
|
while (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) {
|
||||||
|
binaryDictionary.flushWithGC();
|
||||||
|
}
|
||||||
|
final int unigramCountAfterGC =
|
||||||
|
Integer.parseInt(binaryDictionary.getPropertyForTests(
|
||||||
|
BinaryDictionary.UNIGRAM_COUNT_QUERY));
|
||||||
|
assertTrue(unigramCountBeforeGC > unigramCountAfterGC);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
assertTrue(Integer.parseInt(binaryDictionary.getPropertyForTests(
|
||||||
|
BinaryDictionary.UNIGRAM_COUNT_QUERY)) > 0);
|
||||||
|
assertTrue(Integer.parseInt(binaryDictionary.getPropertyForTests(
|
||||||
|
BinaryDictionary.UNIGRAM_COUNT_QUERY)) <= maxUnigramCount);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue