Change entry count limit.
Unigram 10K, Bigram 30K, Trigram 30K. Change-Id: Ibd19c6a2b618499df1c70000bad7b47498187f0a
This commit is contained in:
parent
101cdca729
commit
1085fef8d0
6 changed files with 65 additions and 27 deletions
|
@ -64,9 +64,6 @@ abstract public class ExpandableBinaryDictionary extends Dictionary {
|
|||
|
||||
private static final int TIMEOUT_FOR_READ_OPS_IN_MILLISECONDS = 100;
|
||||
|
||||
private static final int DEFAULT_MAX_UNIGRAM_COUNT = 10000;
|
||||
private static final int DEFAULT_MAX_BIGRAM_COUNT = 10000;
|
||||
|
||||
/**
|
||||
* The maximum length of a word in this dictionary.
|
||||
*/
|
||||
|
@ -225,10 +222,6 @@ abstract public class ExpandableBinaryDictionary extends Dictionary {
|
|||
attributeMap.put(DictionaryHeader.DICTIONARY_LOCALE_KEY, mLocale.toString());
|
||||
attributeMap.put(DictionaryHeader.DICTIONARY_VERSION_KEY,
|
||||
String.valueOf(TimeUnit.MILLISECONDS.toSeconds(System.currentTimeMillis())));
|
||||
attributeMap.put(DictionaryHeader.MAX_UNIGRAM_COUNT_KEY,
|
||||
String.valueOf(DEFAULT_MAX_UNIGRAM_COUNT));
|
||||
attributeMap.put(DictionaryHeader.MAX_BIGRAM_COUNT_KEY,
|
||||
String.valueOf(DEFAULT_MAX_BIGRAM_COUNT));
|
||||
return attributeMap;
|
||||
}
|
||||
|
||||
|
|
|
@ -40,8 +40,9 @@ public final class DictionaryHeader {
|
|||
public static final String USES_FORGETTING_CURVE_KEY = "USES_FORGETTING_CURVE";
|
||||
public static final String FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID_KEY =
|
||||
"FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID";
|
||||
public static final String MAX_UNIGRAM_COUNT_KEY = "MAX_UNIGRAM_COUNT";
|
||||
public static final String MAX_BIGRAM_COUNT_KEY = "MAX_BIGRAM_COUNT";
|
||||
public static final String MAX_UNIGRAM_COUNT_KEY = "MAX_UNIGRAM_ENTRY_COUNT";
|
||||
public static final String MAX_BIGRAM_COUNT_KEY = "MAX_BIGRAM_ENTRY_COUNT";
|
||||
public static final String MAX_TRIGRAM_COUNT_KEY = "MAX_TRIGRAM_ENTRY_COUNT";
|
||||
public static final String ATTRIBUTE_VALUE_TRUE = "1";
|
||||
public static final String CODE_POINT_TABLE_KEY = "codePointTable";
|
||||
|
||||
|
|
|
@ -38,15 +38,17 @@ const char *const HeaderPolicy::LOCALE_KEY = "locale"; // match Java declaration
|
|||
const char *const HeaderPolicy::FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID_KEY =
|
||||
"FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID";
|
||||
|
||||
const char *const HeaderPolicy::MAX_UNIGRAM_COUNT_KEY = "MAX_UNIGRAM_COUNT";
|
||||
const char *const HeaderPolicy::MAX_BIGRAM_COUNT_KEY = "MAX_BIGRAM_COUNT";
|
||||
const char *const HeaderPolicy::MAX_UNIGRAM_COUNT_KEY = "MAX_UNIGRAM_ENTRY_COUNT";
|
||||
const char *const HeaderPolicy::MAX_BIGRAM_COUNT_KEY = "MAX_BIGRAM_ENTRY_COUNT";
|
||||
const char *const HeaderPolicy::MAX_TRIGRAM_COUNT_KEY = "MAX_TRIGRAM_ENTRY_COUNT";
|
||||
|
||||
const int HeaderPolicy::DEFAULT_MULTIPLE_WORDS_DEMOTION_RATE = 100;
|
||||
const float HeaderPolicy::MULTIPLE_WORD_COST_MULTIPLIER_SCALE = 100.0f;
|
||||
const int HeaderPolicy::DEFAULT_FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID = 3;
|
||||
|
||||
const int HeaderPolicy::DEFAULT_MAX_UNIGRAM_COUNT = 10000;
|
||||
const int HeaderPolicy::DEFAULT_MAX_BIGRAM_COUNT = 10000;
|
||||
const int HeaderPolicy::DEFAULT_MAX_BIGRAM_COUNT = 30000;
|
||||
const int HeaderPolicy::DEFAULT_MAX_TRIGRAM_COUNT = 30000;
|
||||
|
||||
// Used for logging. Question mark is used to indicate that the key is not found.
|
||||
void HeaderPolicy::readHeaderValueOrQuestionMark(const char *const key, int *outValue,
|
||||
|
|
|
@ -253,11 +253,13 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
|
|||
static const char *const FORGETTING_CURVE_DURATION_TO_LEVEL_DOWN_IN_SECONDS_KEY;
|
||||
static const char *const MAX_UNIGRAM_COUNT_KEY;
|
||||
static const char *const MAX_BIGRAM_COUNT_KEY;
|
||||
static const char *const MAX_TRIGRAM_COUNT_KEY;
|
||||
static const int DEFAULT_MULTIPLE_WORDS_DEMOTION_RATE;
|
||||
static const float MULTIPLE_WORD_COST_MULTIPLIER_SCALE;
|
||||
static const int DEFAULT_FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID;
|
||||
static const int DEFAULT_MAX_UNIGRAM_COUNT;
|
||||
static const int DEFAULT_MAX_BIGRAM_COUNT;
|
||||
static const int DEFAULT_MAX_TRIGRAM_COUNT;
|
||||
|
||||
const FormatUtils::FORMAT_VERSION mDictFormatVersion;
|
||||
const HeaderReadWriteUtils::DictionaryFlags mDictionaryFlags;
|
||||
|
|
|
@ -39,7 +39,6 @@ import java.util.ArrayList;
|
|||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.Random;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
|
@ -136,11 +135,18 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase {
|
|||
private HashSet<File> mDictFilesToBeDeleted = new HashSet<>();
|
||||
|
||||
private File createEmptyDictionaryAndGetFile(final int formatVersion) {
|
||||
return createEmptyDictionaryWithAttributeMapAndGetFile(formatVersion,
|
||||
new HashMap<String, String>());
|
||||
}
|
||||
|
||||
private File createEmptyDictionaryWithAttributeMapAndGetFile(final int formatVersion,
|
||||
final HashMap<String, String> attributeMap) {
|
||||
if (formatVersion == FormatSpec.VERSION4
|
||||
|| formatVersion == FormatSpec.VERSION4_ONLY_FOR_TESTING
|
||||
|| formatVersion == FormatSpec.VERSION4_DEV) {
|
||||
try {
|
||||
final File dictFile = createEmptyVer4DictionaryAndGetFile(formatVersion);
|
||||
final File dictFile = createEmptyVer4DictionaryAndGetFile(formatVersion,
|
||||
attributeMap);
|
||||
mDictFilesToBeDeleted.add(dictFile);
|
||||
return dictFile;
|
||||
} catch (final IOException e) {
|
||||
|
@ -152,12 +158,12 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase {
|
|||
return null;
|
||||
}
|
||||
|
||||
private File createEmptyVer4DictionaryAndGetFile(final int formatVersion)
|
||||
private File createEmptyVer4DictionaryAndGetFile(final int formatVersion,
|
||||
final HashMap<String, String> attributeMap)
|
||||
throws IOException {
|
||||
final File file = File.createTempFile(DICTIONARY_ID, TEST_DICT_FILE_EXTENSION,
|
||||
getContext().getCacheDir());
|
||||
FileUtils.deleteRecursively(file);
|
||||
Map<String, String> attributeMap = new HashMap<>();
|
||||
attributeMap.put(DictionaryHeader.DICTIONARY_ID_KEY, DICTIONARY_ID);
|
||||
attributeMap.put(DictionaryHeader.DICTIONARY_VERSION_KEY,
|
||||
String.valueOf(TimeUnit.MILLISECONDS.toSeconds(System.currentTimeMillis())));
|
||||
|
@ -388,7 +394,8 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase {
|
|||
}
|
||||
|
||||
final int maxUnigramCount = Integer.parseInt(
|
||||
binaryDictionary.getPropertyForGettingStats(BinaryDictionary.MAX_UNIGRAM_COUNT_QUERY));
|
||||
binaryDictionary.getPropertyForGettingStats(
|
||||
BinaryDictionary.MAX_UNIGRAM_COUNT_QUERY));
|
||||
for (int i = 0; i < unigramTypedCount; i++) {
|
||||
final String word = words.get(random.nextInt(words.size()));
|
||||
onInputWord(binaryDictionary, word, true /* isValidWord */);
|
||||
|
@ -476,6 +483,12 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase {
|
|||
}
|
||||
|
||||
private void testAddManyBigramsToDecayingDict(final int formatVersion) {
|
||||
final int maxUnigramCount = 5000;
|
||||
final int maxBigramCount = 10000;
|
||||
final HashMap<String, String> attributeMap = new HashMap<>();
|
||||
attributeMap.put(DictionaryHeader.MAX_UNIGRAM_COUNT_KEY, String.valueOf(maxUnigramCount));
|
||||
attributeMap.put(DictionaryHeader.MAX_BIGRAM_COUNT_KEY, String.valueOf(maxBigramCount));
|
||||
|
||||
final int unigramCount = 5000;
|
||||
final int bigramCount = 30000;
|
||||
final int bigramTypedCount = 100000;
|
||||
|
@ -484,7 +497,8 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase {
|
|||
final Random random = new Random(seed);
|
||||
|
||||
setCurrentTimeForTestMode(mCurrentTime);
|
||||
final File dictFile = createEmptyDictionaryAndGetFile(formatVersion);
|
||||
final File dictFile = createEmptyDictionaryWithAttributeMapAndGetFile(formatVersion,
|
||||
attributeMap);
|
||||
final BinaryDictionary binaryDictionary = getBinaryDictionary(dictFile);
|
||||
|
||||
final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
|
||||
|
@ -507,9 +521,6 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase {
|
|||
bigrams.add(bigram);
|
||||
}
|
||||
|
||||
final int maxBigramCount = Integer.parseInt(
|
||||
binaryDictionary.getPropertyForGettingStats(
|
||||
BinaryDictionary.MAX_BIGRAM_COUNT_QUERY));
|
||||
for (int i = 0; i < bigramTypedCount; ++i) {
|
||||
final Pair<String, String> bigram = bigrams.get(random.nextInt(bigrams.size()));
|
||||
onInputWord(binaryDictionary, bigram.first, true /* isValidWord */);
|
||||
|
@ -546,6 +557,12 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase {
|
|||
}
|
||||
|
||||
private void testOverflowBigrams(final int formatVersion) {
|
||||
final int maxUnigramCount = 5000;
|
||||
final int maxBigramCount = 10000;
|
||||
final HashMap<String, String> attributeMap = new HashMap<>();
|
||||
attributeMap.put(DictionaryHeader.MAX_UNIGRAM_COUNT_KEY, String.valueOf(maxUnigramCount));
|
||||
attributeMap.put(DictionaryHeader.MAX_BIGRAM_COUNT_KEY, String.valueOf(maxBigramCount));
|
||||
|
||||
final int bigramCount = 20000;
|
||||
final int unigramCount = 1000;
|
||||
final int unigramTypedCount = 20;
|
||||
|
@ -556,7 +573,8 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase {
|
|||
final long seed = System.currentTimeMillis();
|
||||
final Random random = new Random(seed);
|
||||
setCurrentTimeForTestMode(mCurrentTime);
|
||||
final File dictFile = createEmptyDictionaryAndGetFile(formatVersion);
|
||||
final File dictFile = createEmptyDictionaryWithAttributeMapAndGetFile(formatVersion,
|
||||
attributeMap);
|
||||
final BinaryDictionary binaryDictionary = getBinaryDictionary(dictFile);
|
||||
final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
|
||||
|
||||
|
|
|
@ -23,6 +23,7 @@ import android.util.Pair;
|
|||
|
||||
import com.android.inputmethod.latin.NgramContext.WordInfo;
|
||||
import com.android.inputmethod.latin.makedict.CodePointUtils;
|
||||
import com.android.inputmethod.latin.makedict.DictionaryHeader;
|
||||
import com.android.inputmethod.latin.makedict.FormatSpec;
|
||||
import com.android.inputmethod.latin.makedict.WeightedString;
|
||||
import com.android.inputmethod.latin.makedict.WordProperty;
|
||||
|
@ -78,11 +79,18 @@ public class BinaryDictionaryTests extends AndroidTestCase {
|
|||
}
|
||||
|
||||
private File createEmptyDictionaryAndGetFile(final int formatVersion) {
|
||||
return createEmptyDictionaryWithAttributesAndGetFile(formatVersion,
|
||||
new HashMap<String, String>());
|
||||
}
|
||||
|
||||
private File createEmptyDictionaryWithAttributesAndGetFile(final int formatVersion,
|
||||
final HashMap<String, String> attributeMap) {
|
||||
if (formatVersion == FormatSpec.VERSION4
|
||||
|| formatVersion == FormatSpec.VERSION4_ONLY_FOR_TESTING
|
||||
|| formatVersion == FormatSpec.VERSION4_DEV) {
|
||||
try {
|
||||
final File dictFile = createEmptyVer4DictionaryAndGetFile(formatVersion);
|
||||
final File dictFile = createEmptyVer4DictionaryAndGetFile(formatVersion,
|
||||
attributeMap);
|
||||
mDictFilesToBeDeleted.add(dictFile);
|
||||
return dictFile;
|
||||
} catch (final IOException e) {
|
||||
|
@ -94,12 +102,12 @@ public class BinaryDictionaryTests extends AndroidTestCase {
|
|||
return null;
|
||||
}
|
||||
|
||||
private File createEmptyVer4DictionaryAndGetFile(final int formatVersion) throws IOException {
|
||||
private File createEmptyVer4DictionaryAndGetFile(final int formatVersion,
|
||||
final HashMap<String, String> attributeMap) throws IOException {
|
||||
final File file = File.createTempFile(DICTIONARY_ID, TEST_DICT_FILE_EXTENSION,
|
||||
getContext().getCacheDir());
|
||||
file.delete();
|
||||
file.mkdir();
|
||||
Map<String, String> attributeMap = new HashMap<>();
|
||||
if (BinaryDictionaryUtils.createEmptyDictFile(file.getAbsolutePath(), formatVersion,
|
||||
Locale.ENGLISH, attributeMap)) {
|
||||
return file;
|
||||
|
@ -669,6 +677,12 @@ public class BinaryDictionaryTests extends AndroidTestCase {
|
|||
}
|
||||
|
||||
private void testRandomOperationsAndFlashWithGC(final int formatVersion) {
|
||||
final int maxUnigramCount = 5000;
|
||||
final int maxBigramCount = 10000;
|
||||
final HashMap<String, String> attributeMap = new HashMap<>();
|
||||
attributeMap.put(DictionaryHeader.MAX_UNIGRAM_COUNT_KEY, String.valueOf(maxUnigramCount));
|
||||
attributeMap.put(DictionaryHeader.MAX_BIGRAM_COUNT_KEY, String.valueOf(maxBigramCount));
|
||||
|
||||
final int flashWithGCIterationCount = 50;
|
||||
final int operationCountInEachIteration = 200;
|
||||
final int initialUnigramCount = 100;
|
||||
|
@ -679,7 +693,8 @@ public class BinaryDictionaryTests extends AndroidTestCase {
|
|||
|
||||
final long seed = System.currentTimeMillis();
|
||||
final Random random = new Random(seed);
|
||||
final File dictFile = createEmptyDictionaryAndGetFile(formatVersion);
|
||||
final File dictFile = createEmptyDictionaryWithAttributesAndGetFile(formatVersion,
|
||||
attributeMap);
|
||||
BinaryDictionary binaryDictionary = getBinaryDictionary(dictFile);
|
||||
|
||||
final ArrayList<String> words = new ArrayList<>();
|
||||
|
@ -815,13 +830,20 @@ public class BinaryDictionaryTests extends AndroidTestCase {
|
|||
}
|
||||
|
||||
private void testUnigramAndBigramCount(final int formatVersion) {
|
||||
final int maxUnigramCount = 5000;
|
||||
final int maxBigramCount = 10000;
|
||||
final HashMap<String, String> attributeMap = new HashMap<>();
|
||||
attributeMap.put(DictionaryHeader.MAX_UNIGRAM_COUNT_KEY, String.valueOf(maxUnigramCount));
|
||||
attributeMap.put(DictionaryHeader.MAX_BIGRAM_COUNT_KEY, String.valueOf(maxBigramCount));
|
||||
|
||||
final int flashWithGCIterationCount = 10;
|
||||
final int codePointSetSize = 50;
|
||||
final int unigramCountPerIteration = 1000;
|
||||
final int bigramCountPerIteration = 2000;
|
||||
final long seed = System.currentTimeMillis();
|
||||
final Random random = new Random(seed);
|
||||
final File dictFile = createEmptyDictionaryAndGetFile(formatVersion);
|
||||
final File dictFile = createEmptyDictionaryWithAttributesAndGetFile(formatVersion,
|
||||
attributeMap);
|
||||
|
||||
final ArrayList<String> words = new ArrayList<>();
|
||||
final HashSet<Pair<String, String>> bigrams = new HashSet<>();
|
||||
|
|
Loading…
Reference in a new issue