From 7c4dcf1e918c2b9251e7aa907d991a3ab8764baf Mon Sep 17 00:00:00 2001 From: Keisuke Kuroyanagi Date: Wed, 2 Oct 2013 18:01:47 +0900 Subject: [PATCH] Use probability table for decaying dictionaries. Bug: 6669677 Change-Id: Ib5994a2e343dfcdaf23755e523f52846709b1c6c --- .../utils/forgetting_curve_utils.cpp | 36 +++++++++++-------- .../dictionary/utils/forgetting_curve_utils.h | 24 +++++++++++-- 2 files changed, 42 insertions(+), 18 deletions(-) diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp index 62a19a5a6..4ff31ba0a 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp @@ -14,6 +14,7 @@ * limitations under the License. */ +#include #include #include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h" @@ -35,15 +36,17 @@ const int ForgettingCurveUtils::ENCODED_PROBABILITY_STEP = 1; // duration of the decay is approximately 66hours. const float ForgettingCurveUtils::MIN_PROBABILITY_TO_DECAY = 0.03f; +const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityTable; + /* static */ int ForgettingCurveUtils::getProbability(const int encodedUnigramProbability, const int encodedBigramProbability) { if (encodedUnigramProbability == NOT_A_PROBABILITY) { return NOT_A_PROBABILITY; } else if (encodedBigramProbability == NOT_A_PROBABILITY) { - return backoff(decodeUnigramProbability(encodedUnigramProbability)); + return backoff(decodeProbability(encodedUnigramProbability)); } else { - const int unigramProbability = decodeUnigramProbability(encodedUnigramProbability); - const int bigramProbability = decodeBigramProbability(encodedBigramProbability); + const int unigramProbability = decodeProbability(encodedUnigramProbability); + const int bigramProbability = decodeProbability(encodedBigramProbability); return min(max(unigramProbability, bigramProbability), MAX_COMPUTED_PROBABILITY); } } @@ -88,21 +91,12 @@ const float ForgettingCurveUtils::MIN_PROBABILITY_TO_DECAY = 0.03f; } } -/* static */ int ForgettingCurveUtils::decodeUnigramProbability(const int encodedProbability) { +/* static */ int ForgettingCurveUtils::decodeProbability(const int encodedProbability) { const int probability = encodedProbability - MIN_VALID_ENCODED_PROBABILITY; - if (probability < 0) { + if (encodedProbability < MIN_VALID_ENCODED_PROBABILITY) { return NOT_A_PROBABILITY; } else { - return min(probability, MAX_ENCODED_PROBABILITY) * 8; - } -} - -/* static */ int ForgettingCurveUtils::decodeBigramProbability(const int encodedProbability) { - const int probability = encodedProbability - MIN_VALID_ENCODED_PROBABILITY; - if (probability < 0) { - return NOT_A_PROBABILITY; - } else { - return min(probability, MAX_ENCODED_PROBABILITY) * 8; + return min(sProbabilityTable.getProbability(encodedProbability), MAX_ENCODED_PROBABILITY); } } @@ -115,4 +109,16 @@ const float ForgettingCurveUtils::MIN_PROBABILITY_TO_DECAY = 0.03f; } } +ForgettingCurveUtils::ProbabilityTable::ProbabilityTable() : mTable() { + // Table entry is as follows: + // 1, 1, 1, 2, 3, 5, 6, 9, 13, 18, 25, 34, 48, 66, 91, 127. + // Note that first MIN_VALID_ENCODED_PROBABILITY values are not used. + mTable.resize(MAX_ENCODED_PROBABILITY + 1); + for (int i = 0; i <= MAX_ENCODED_PROBABILITY; ++i) { + const int probability = static_cast(powf(static_cast(MAX_COMPUTED_PROBABILITY), + static_cast(i) / static_cast(MAX_ENCODED_PROBABILITY))); + mTable[i] = min(MAX_COMPUTED_PROBABILITY, max(0, probability)); + } +} + } // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h index 281f76a9c..d666f22aa 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h @@ -17,6 +17,8 @@ #ifndef LATINIME_FORGETTING_CURVE_UTILS_H #define LATINIME_FORGETTING_CURVE_UTILS_H +#include + #include "defines.h" namespace latinime { @@ -44,16 +46,32 @@ class ForgettingCurveUtils { private: DISALLOW_IMPLICIT_CONSTRUCTORS(ForgettingCurveUtils); + class ProbabilityTable { + public: + ProbabilityTable(); + + int getProbability(const int encodedProbability) const { + if (encodedProbability < 0 || encodedProbability > static_cast(mTable.size())) { + return NOT_A_PROBABILITY; + } + return mTable[encodedProbability]; + } + + private: + DISALLOW_COPY_AND_ASSIGN(ProbabilityTable); + + std::vector mTable; + }; + static const int MAX_COMPUTED_PROBABILITY; static const int MAX_ENCODED_PROBABILITY; static const int MIN_VALID_ENCODED_PROBABILITY; static const int ENCODED_PROBABILITY_STEP; - static const float MIN_PROBABILITY_TO_DECAY; - static int decodeUnigramProbability(const int encodedProbability); + static const ProbabilityTable sProbabilityTable; - static int decodeBigramProbability(const int encodedProbability); + static int decodeProbability(const int encodedProbability); static int backoff(const int unigramProbability); };