From 1085fef8d040a6788f2185e7b03ab6b6032f321d Mon Sep 17 00:00:00 2001 From: Keisuke Kuroyanagi Date: Mon, 20 Oct 2014 15:01:49 +0900 Subject: [PATCH] Change entry count limit. Unigram 10K, Bigram 30K, Trigram 30K. Change-Id: Ibd19c6a2b618499df1c70000bad7b47498187f0a --- .../latin/ExpandableBinaryDictionary.java | 7 ---- .../latin/makedict/DictionaryHeader.java | 5 ++- .../dictionary/header/header_policy.cpp | 8 ++-- .../dictionary/header/header_policy.h | 2 + .../latin/BinaryDictionaryDecayingTests.java | 38 ++++++++++++++----- .../latin/BinaryDictionaryTests.java | 32 +++++++++++++--- 6 files changed, 65 insertions(+), 27 deletions(-) diff --git a/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java b/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java index d24f80a45..d58373ff6 100644 --- a/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java +++ b/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java @@ -64,9 +64,6 @@ abstract public class ExpandableBinaryDictionary extends Dictionary { private static final int TIMEOUT_FOR_READ_OPS_IN_MILLISECONDS = 100; - private static final int DEFAULT_MAX_UNIGRAM_COUNT = 10000; - private static final int DEFAULT_MAX_BIGRAM_COUNT = 10000; - /** * The maximum length of a word in this dictionary. */ @@ -225,10 +222,6 @@ abstract public class ExpandableBinaryDictionary extends Dictionary { attributeMap.put(DictionaryHeader.DICTIONARY_LOCALE_KEY, mLocale.toString()); attributeMap.put(DictionaryHeader.DICTIONARY_VERSION_KEY, String.valueOf(TimeUnit.MILLISECONDS.toSeconds(System.currentTimeMillis()))); - attributeMap.put(DictionaryHeader.MAX_UNIGRAM_COUNT_KEY, - String.valueOf(DEFAULT_MAX_UNIGRAM_COUNT)); - attributeMap.put(DictionaryHeader.MAX_BIGRAM_COUNT_KEY, - String.valueOf(DEFAULT_MAX_BIGRAM_COUNT)); return attributeMap; } diff --git a/java/src/com/android/inputmethod/latin/makedict/DictionaryHeader.java b/java/src/com/android/inputmethod/latin/makedict/DictionaryHeader.java index fa7c2c417..644818ba6 100644 --- a/java/src/com/android/inputmethod/latin/makedict/DictionaryHeader.java +++ b/java/src/com/android/inputmethod/latin/makedict/DictionaryHeader.java @@ -40,8 +40,9 @@ public final class DictionaryHeader { public static final String USES_FORGETTING_CURVE_KEY = "USES_FORGETTING_CURVE"; public static final String FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID_KEY = "FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID"; - public static final String MAX_UNIGRAM_COUNT_KEY = "MAX_UNIGRAM_COUNT"; - public static final String MAX_BIGRAM_COUNT_KEY = "MAX_BIGRAM_COUNT"; + public static final String MAX_UNIGRAM_COUNT_KEY = "MAX_UNIGRAM_ENTRY_COUNT"; + public static final String MAX_BIGRAM_COUNT_KEY = "MAX_BIGRAM_ENTRY_COUNT"; + public static final String MAX_TRIGRAM_COUNT_KEY = "MAX_TRIGRAM_ENTRY_COUNT"; public static final String ATTRIBUTE_VALUE_TRUE = "1"; public static final String CODE_POINT_TABLE_KEY = "codePointTable"; diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp index 4c4dfc578..8fb256c54 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp @@ -38,15 +38,17 @@ const char *const HeaderPolicy::LOCALE_KEY = "locale"; // match Java declaration const char *const HeaderPolicy::FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID_KEY = "FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID"; -const char *const HeaderPolicy::MAX_UNIGRAM_COUNT_KEY = "MAX_UNIGRAM_COUNT"; -const char *const HeaderPolicy::MAX_BIGRAM_COUNT_KEY = "MAX_BIGRAM_COUNT"; +const char *const HeaderPolicy::MAX_UNIGRAM_COUNT_KEY = "MAX_UNIGRAM_ENTRY_COUNT"; +const char *const HeaderPolicy::MAX_BIGRAM_COUNT_KEY = "MAX_BIGRAM_ENTRY_COUNT"; +const char *const HeaderPolicy::MAX_TRIGRAM_COUNT_KEY = "MAX_TRIGRAM_ENTRY_COUNT"; const int HeaderPolicy::DEFAULT_MULTIPLE_WORDS_DEMOTION_RATE = 100; const float HeaderPolicy::MULTIPLE_WORD_COST_MULTIPLIER_SCALE = 100.0f; const int HeaderPolicy::DEFAULT_FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID = 3; const int HeaderPolicy::DEFAULT_MAX_UNIGRAM_COUNT = 10000; -const int HeaderPolicy::DEFAULT_MAX_BIGRAM_COUNT = 10000; +const int HeaderPolicy::DEFAULT_MAX_BIGRAM_COUNT = 30000; +const int HeaderPolicy::DEFAULT_MAX_TRIGRAM_COUNT = 30000; // Used for logging. Question mark is used to indicate that the key is not found. void HeaderPolicy::readHeaderValueOrQuestionMark(const char *const key, int *outValue, diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h index bc8eaded3..836bbe5a1 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h @@ -253,11 +253,13 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { static const char *const FORGETTING_CURVE_DURATION_TO_LEVEL_DOWN_IN_SECONDS_KEY; static const char *const MAX_UNIGRAM_COUNT_KEY; static const char *const MAX_BIGRAM_COUNT_KEY; + static const char *const MAX_TRIGRAM_COUNT_KEY; static const int DEFAULT_MULTIPLE_WORDS_DEMOTION_RATE; static const float MULTIPLE_WORD_COST_MULTIPLIER_SCALE; static const int DEFAULT_FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID; static const int DEFAULT_MAX_UNIGRAM_COUNT; static const int DEFAULT_MAX_BIGRAM_COUNT; + static const int DEFAULT_MAX_TRIGRAM_COUNT; const FormatUtils::FORMAT_VERSION mDictFormatVersion; const HeaderReadWriteUtils::DictionaryFlags mDictionaryFlags; diff --git a/tests/src/com/android/inputmethod/latin/BinaryDictionaryDecayingTests.java b/tests/src/com/android/inputmethod/latin/BinaryDictionaryDecayingTests.java index fa70f9988..f9ae9b8e4 100644 --- a/tests/src/com/android/inputmethod/latin/BinaryDictionaryDecayingTests.java +++ b/tests/src/com/android/inputmethod/latin/BinaryDictionaryDecayingTests.java @@ -39,7 +39,6 @@ import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; import java.util.Locale; -import java.util.Map; import java.util.Random; import java.util.concurrent.TimeUnit; @@ -136,11 +135,18 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase { private HashSet mDictFilesToBeDeleted = new HashSet<>(); private File createEmptyDictionaryAndGetFile(final int formatVersion) { + return createEmptyDictionaryWithAttributeMapAndGetFile(formatVersion, + new HashMap()); + } + + private File createEmptyDictionaryWithAttributeMapAndGetFile(final int formatVersion, + final HashMap attributeMap) { if (formatVersion == FormatSpec.VERSION4 || formatVersion == FormatSpec.VERSION4_ONLY_FOR_TESTING || formatVersion == FormatSpec.VERSION4_DEV) { try { - final File dictFile = createEmptyVer4DictionaryAndGetFile(formatVersion); + final File dictFile = createEmptyVer4DictionaryAndGetFile(formatVersion, + attributeMap); mDictFilesToBeDeleted.add(dictFile); return dictFile; } catch (final IOException e) { @@ -152,12 +158,12 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase { return null; } - private File createEmptyVer4DictionaryAndGetFile(final int formatVersion) + private File createEmptyVer4DictionaryAndGetFile(final int formatVersion, + final HashMap attributeMap) throws IOException { final File file = File.createTempFile(DICTIONARY_ID, TEST_DICT_FILE_EXTENSION, getContext().getCacheDir()); FileUtils.deleteRecursively(file); - Map attributeMap = new HashMap<>(); attributeMap.put(DictionaryHeader.DICTIONARY_ID_KEY, DICTIONARY_ID); attributeMap.put(DictionaryHeader.DICTIONARY_VERSION_KEY, String.valueOf(TimeUnit.MILLISECONDS.toSeconds(System.currentTimeMillis()))); @@ -388,7 +394,8 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase { } final int maxUnigramCount = Integer.parseInt( - binaryDictionary.getPropertyForGettingStats(BinaryDictionary.MAX_UNIGRAM_COUNT_QUERY)); + binaryDictionary.getPropertyForGettingStats( + BinaryDictionary.MAX_UNIGRAM_COUNT_QUERY)); for (int i = 0; i < unigramTypedCount; i++) { final String word = words.get(random.nextInt(words.size())); onInputWord(binaryDictionary, word, true /* isValidWord */); @@ -476,6 +483,12 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase { } private void testAddManyBigramsToDecayingDict(final int formatVersion) { + final int maxUnigramCount = 5000; + final int maxBigramCount = 10000; + final HashMap attributeMap = new HashMap<>(); + attributeMap.put(DictionaryHeader.MAX_UNIGRAM_COUNT_KEY, String.valueOf(maxUnigramCount)); + attributeMap.put(DictionaryHeader.MAX_BIGRAM_COUNT_KEY, String.valueOf(maxBigramCount)); + final int unigramCount = 5000; final int bigramCount = 30000; final int bigramTypedCount = 100000; @@ -484,7 +497,8 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase { final Random random = new Random(seed); setCurrentTimeForTestMode(mCurrentTime); - final File dictFile = createEmptyDictionaryAndGetFile(formatVersion); + final File dictFile = createEmptyDictionaryWithAttributeMapAndGetFile(formatVersion, + attributeMap); final BinaryDictionary binaryDictionary = getBinaryDictionary(dictFile); final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); @@ -507,9 +521,6 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase { bigrams.add(bigram); } - final int maxBigramCount = Integer.parseInt( - binaryDictionary.getPropertyForGettingStats( - BinaryDictionary.MAX_BIGRAM_COUNT_QUERY)); for (int i = 0; i < bigramTypedCount; ++i) { final Pair bigram = bigrams.get(random.nextInt(bigrams.size())); onInputWord(binaryDictionary, bigram.first, true /* isValidWord */); @@ -546,6 +557,12 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase { } private void testOverflowBigrams(final int formatVersion) { + final int maxUnigramCount = 5000; + final int maxBigramCount = 10000; + final HashMap attributeMap = new HashMap<>(); + attributeMap.put(DictionaryHeader.MAX_UNIGRAM_COUNT_KEY, String.valueOf(maxUnigramCount)); + attributeMap.put(DictionaryHeader.MAX_BIGRAM_COUNT_KEY, String.valueOf(maxBigramCount)); + final int bigramCount = 20000; final int unigramCount = 1000; final int unigramTypedCount = 20; @@ -556,7 +573,8 @@ public class BinaryDictionaryDecayingTests extends AndroidTestCase { final long seed = System.currentTimeMillis(); final Random random = new Random(seed); setCurrentTimeForTestMode(mCurrentTime); - final File dictFile = createEmptyDictionaryAndGetFile(formatVersion); + final File dictFile = createEmptyDictionaryWithAttributeMapAndGetFile(formatVersion, + attributeMap); final BinaryDictionary binaryDictionary = getBinaryDictionary(dictFile); final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); diff --git a/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java b/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java index 90dd4366c..a640a9835 100644 --- a/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java +++ b/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java @@ -23,6 +23,7 @@ import android.util.Pair; import com.android.inputmethod.latin.NgramContext.WordInfo; import com.android.inputmethod.latin.makedict.CodePointUtils; +import com.android.inputmethod.latin.makedict.DictionaryHeader; import com.android.inputmethod.latin.makedict.FormatSpec; import com.android.inputmethod.latin.makedict.WeightedString; import com.android.inputmethod.latin.makedict.WordProperty; @@ -78,11 +79,18 @@ public class BinaryDictionaryTests extends AndroidTestCase { } private File createEmptyDictionaryAndGetFile(final int formatVersion) { + return createEmptyDictionaryWithAttributesAndGetFile(formatVersion, + new HashMap()); + } + + private File createEmptyDictionaryWithAttributesAndGetFile(final int formatVersion, + final HashMap attributeMap) { if (formatVersion == FormatSpec.VERSION4 || formatVersion == FormatSpec.VERSION4_ONLY_FOR_TESTING || formatVersion == FormatSpec.VERSION4_DEV) { try { - final File dictFile = createEmptyVer4DictionaryAndGetFile(formatVersion); + final File dictFile = createEmptyVer4DictionaryAndGetFile(formatVersion, + attributeMap); mDictFilesToBeDeleted.add(dictFile); return dictFile; } catch (final IOException e) { @@ -94,12 +102,12 @@ public class BinaryDictionaryTests extends AndroidTestCase { return null; } - private File createEmptyVer4DictionaryAndGetFile(final int formatVersion) throws IOException { + private File createEmptyVer4DictionaryAndGetFile(final int formatVersion, + final HashMap attributeMap) throws IOException { final File file = File.createTempFile(DICTIONARY_ID, TEST_DICT_FILE_EXTENSION, getContext().getCacheDir()); file.delete(); file.mkdir(); - Map attributeMap = new HashMap<>(); if (BinaryDictionaryUtils.createEmptyDictFile(file.getAbsolutePath(), formatVersion, Locale.ENGLISH, attributeMap)) { return file; @@ -669,6 +677,12 @@ public class BinaryDictionaryTests extends AndroidTestCase { } private void testRandomOperationsAndFlashWithGC(final int formatVersion) { + final int maxUnigramCount = 5000; + final int maxBigramCount = 10000; + final HashMap attributeMap = new HashMap<>(); + attributeMap.put(DictionaryHeader.MAX_UNIGRAM_COUNT_KEY, String.valueOf(maxUnigramCount)); + attributeMap.put(DictionaryHeader.MAX_BIGRAM_COUNT_KEY, String.valueOf(maxBigramCount)); + final int flashWithGCIterationCount = 50; final int operationCountInEachIteration = 200; final int initialUnigramCount = 100; @@ -679,7 +693,8 @@ public class BinaryDictionaryTests extends AndroidTestCase { final long seed = System.currentTimeMillis(); final Random random = new Random(seed); - final File dictFile = createEmptyDictionaryAndGetFile(formatVersion); + final File dictFile = createEmptyDictionaryWithAttributesAndGetFile(formatVersion, + attributeMap); BinaryDictionary binaryDictionary = getBinaryDictionary(dictFile); final ArrayList words = new ArrayList<>(); @@ -815,13 +830,20 @@ public class BinaryDictionaryTests extends AndroidTestCase { } private void testUnigramAndBigramCount(final int formatVersion) { + final int maxUnigramCount = 5000; + final int maxBigramCount = 10000; + final HashMap attributeMap = new HashMap<>(); + attributeMap.put(DictionaryHeader.MAX_UNIGRAM_COUNT_KEY, String.valueOf(maxUnigramCount)); + attributeMap.put(DictionaryHeader.MAX_BIGRAM_COUNT_KEY, String.valueOf(maxBigramCount)); + final int flashWithGCIterationCount = 10; final int codePointSetSize = 50; final int unigramCountPerIteration = 1000; final int bigramCountPerIteration = 2000; final long seed = System.currentTimeMillis(); final Random random = new Random(seed); - final File dictFile = createEmptyDictionaryAndGetFile(formatVersion); + final File dictFile = createEmptyDictionaryWithAttributesAndGetFile(formatVersion, + attributeMap); final ArrayList words = new ArrayList<>(); final HashSet> bigrams = new HashSet<>();