diff --git a/common/src/com/android/inputmethod/latin/common/Constants.java b/common/src/com/android/inputmethod/latin/common/Constants.java index abc377a84..a860d3560 100644 --- a/common/src/com/android/inputmethod/latin/common/Constants.java +++ b/common/src/com/android/inputmethod/latin/common/Constants.java @@ -179,7 +179,7 @@ public final class Constants { // (MAX_PREV_WORD_COUNT_FOR_N_GRAM + 1)-gram is supported in Java side. Needs to modify // MAX_PREV_WORD_COUNT_FOR_N_GRAM in native/jni/src/defines.h for suggestions. - public static final int MAX_PREV_WORD_COUNT_FOR_N_GRAM = 2; + public static final int MAX_PREV_WORD_COUNT_FOR_N_GRAM = 3; // Key events coming any faster than this are long-presses. public static final int LONG_PRESS_MILLISECONDS = 200; diff --git a/native/jni/src/defines.h b/native/jni/src/defines.h index 0e67b4d5a..10b930e4f 100644 --- a/native/jni/src/defines.h +++ b/native/jni/src/defines.h @@ -275,7 +275,7 @@ static inline void showStackTrace() { #define MAX_POINTER_COUNT_G 2 // (MAX_PREV_WORD_COUNT_FOR_N_GRAM + 1)-gram is supported. -#define MAX_PREV_WORD_COUNT_FOR_N_GRAM 2 +#define MAX_PREV_WORD_COUNT_FOR_N_GRAM 3 #define DISALLOW_DEFAULT_CONSTRUCTOR(TypeName) \ TypeName() = delete diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp index a2a0f11b4..c93f31017 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp @@ -31,10 +31,11 @@ const char *const HeaderPolicy::IS_DECAYING_DICT_KEY = "USES_FORGETTING_CURVE"; const char *const HeaderPolicy::DATE_KEY = "date"; const char *const HeaderPolicy::LAST_DECAYED_TIME_KEY = "LAST_DECAYED_TIME"; const char *const HeaderPolicy::NGRAM_COUNT_KEYS[] = - {"UNIGRAM_COUNT", "BIGRAM_COUNT", "TRIGRAM_COUNT"}; + {"UNIGRAM_COUNT", "BIGRAM_COUNT", "TRIGRAM_COUNT", "QUADGRAM_COUNT"}; const char *const HeaderPolicy::MAX_NGRAM_COUNT_KEYS[] = - {"MAX_UNIGRAM_ENTRY_COUNT", "MAX_BIGRAM_ENTRY_COUNT", "MAX_TRIGRAM_ENTRY_COUNT"}; -const int HeaderPolicy::DEFAULT_MAX_NGRAM_COUNTS[] = {10000, 30000, 30000}; + {"MAX_UNIGRAM_ENTRY_COUNT", "MAX_BIGRAM_ENTRY_COUNT", "MAX_TRIGRAM_ENTRY_COUNT", + "MAX_QUADGRAM_ENTRY_COUNT"}; +const int HeaderPolicy::DEFAULT_MAX_NGRAM_COUNTS[] = {10000, 30000, 30000, 30000}; const char *const HeaderPolicy::EXTENDED_REGION_SIZE_KEY = "EXTENDED_REGION_SIZE"; // Historical info is information that is needed to support decaying such as timestamp, level and // count. diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/dynamic_language_model_probability_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/dynamic_language_model_probability_utils.cpp index 29bc7f719..025ee9932 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/dynamic_language_model_probability_utils.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/dynamic_language_model_probability_utils.cpp @@ -19,12 +19,13 @@ namespace latinime { // Used to provide stable probabilities even if the user's input count is small. -const int DynamicLanguageModelProbabilityUtils::ASSUMED_MIN_COUNTS[] = {8192, 2, 2}; +const int DynamicLanguageModelProbabilityUtils::ASSUMED_MIN_COUNTS[] = {8192, 2, 2, 1}; // Encoded backoff weights. -// Note that we give positive value for trigrams that means the weight is more than 1. +// Note that we give positive values for trigrams and quadgrams that means the weight is more than +// 1. // TODO: Apply backoff for main dictionaries and quit giving a positive backoff weight. -const int DynamicLanguageModelProbabilityUtils::ENCODED_BACKOFF_WEIGHTS[] = {-32, 0, 8}; +const int DynamicLanguageModelProbabilityUtils::ENCODED_BACKOFF_WEIGHTS[] = {-32, -4, 2, 8}; // This value is used to remove too old entries from the dictionary. const int DynamicLanguageModelProbabilityUtils::DURATION_TO_DISCARD_ENTRY_IN_SECONDS = diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/dynamic_language_model_probability_utils.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/dynamic_language_model_probability_utils.h index b38047f49..644ae2ca7 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/dynamic_language_model_probability_utils.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/dynamic_language_model_probability_utils.h @@ -66,7 +66,7 @@ class DynamicLanguageModelProbabilityUtils { private: DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicLanguageModelProbabilityUtils); - static_assert(MAX_PREV_WORD_COUNT_FOR_N_GRAM <= 2, "Max supported Ngram is Trigram."); + static_assert(MAX_PREV_WORD_COUNT_FOR_N_GRAM <= 3, "Max supported Ngram is Quadgram."); static const int ASSUMED_MIN_COUNTS[]; static const int ENCODED_BACKOFF_WEIGHTS[]; diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/entry_counters.h b/native/jni/src/suggest/policyimpl/dictionary/utils/entry_counters.h index 7269913e8..5e443026e 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/entry_counters.h +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/entry_counters.h @@ -27,7 +27,7 @@ namespace latinime { // Copyable but immutable class EntryCounts final { public: - EntryCounts() : mEntryCounts({{0, 0, 0}}) {} + EntryCounts() : mEntryCounts({{0, 0, 0, 0}}) {} explicit EntryCounts(const std::array &counters) : mEntryCounts(counters) {} diff --git a/native/jni/src/utils/ngram_utils.h b/native/jni/src/utils/ngram_utils.h index 6227812d4..fa85ba35f 100644 --- a/native/jni/src/utils/ngram_utils.h +++ b/native/jni/src/utils/ngram_utils.h @@ -25,6 +25,7 @@ enum class NgramType : int { Unigram = 0, Bigram = 1, Trigram = 2, + Quadgram = 3, NotANgramType = -1, };