From b368089dbfabb84d1af4ad76d331b7add849c33b Mon Sep 17 00:00:00 2001 From: Keisuke Kuroyanagi Date: Wed, 26 Feb 2014 19:40:18 +0900 Subject: [PATCH] Stop dividing unigram probability by 2 for backoff. Bug: 13197276 Change-Id: I8eaa24b6f710383a8d257e3ec28c37a1a1da8e31 --- .../dictionary/utils/forgetting_curve_utils.cpp | 12 ++++++------ .../dictionary/utils/forgetting_curve_utils.h | 1 + 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp index c9bb1093c..d80ab77e9 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp @@ -30,6 +30,7 @@ const int ForgettingCurveUtils::MAX_UNIGRAM_COUNT_AFTER_GC = 10000; const int ForgettingCurveUtils::MAX_BIGRAM_COUNT = 12000; const int ForgettingCurveUtils::MAX_BIGRAM_COUNT_AFTER_GC = 10000; +const int ForgettingCurveUtils::MULTIPLIER_TWO_IN_PROBABILITY_SCALE = 8; const int ForgettingCurveUtils::MAX_COMPUTED_PROBABILITY = 127; const int ForgettingCurveUtils::DECAY_INTERVAL_SECONDS = 2 * 60 * 60; @@ -84,7 +85,9 @@ const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityT } else if (bigramProbability == NOT_A_PROBABILITY) { return min(backoff(unigramProbability), MAX_COMPUTED_PROBABILITY); } else { - return min(max(unigramProbability, bigramProbability), MAX_COMPUTED_PROBABILITY); + // TODO: Investigate better way to handle bigram probability. + return min(max(unigramProbability, bigramProbability + MULTIPLIER_TWO_IN_PROBABILITY_SCALE), + MAX_COMPUTED_PROBABILITY); } } @@ -137,11 +140,8 @@ const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityT // See comments in ProbabilityUtils::backoff(). /* static */ int ForgettingCurveUtils::backoff(const int unigramProbability) { - if (unigramProbability == NOT_A_PROBABILITY) { - return NOT_A_PROBABILITY; - } else { - return max(unigramProbability - 8, 0); - } + // See TODO comments in ForgettingCurveUtils::getProbability(). + return unigramProbability; } /* static */ int ForgettingCurveUtils::getElapsedTimeStepCount(const int timestamp) { diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h index db250474f..8818cfe3e 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h @@ -72,6 +72,7 @@ class ForgettingCurveUtils { std::vector > > mTables; }; + static const int MULTIPLIER_TWO_IN_PROBABILITY_SCALE; static const int MAX_COMPUTED_PROBABILITY; static const int DECAY_INTERVAL_SECONDS;