Merge "Use probability table for decaying dictionaries."

main
Keisuke Kuroyanagi 2013-10-02 09:42:38 +00:00 committed by Android (Google) Code Review
commit ba9dc0860f
2 changed files with 42 additions and 18 deletions

View File

@ -14,6 +14,7 @@
* limitations under the License. * limitations under the License.
*/ */
#include <cmath>
#include <stdlib.h> #include <stdlib.h>
#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h" #include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
@ -35,15 +36,17 @@ const int ForgettingCurveUtils::ENCODED_PROBABILITY_STEP = 1;
// duration of the decay is approximately 66hours. // duration of the decay is approximately 66hours.
const float ForgettingCurveUtils::MIN_PROBABILITY_TO_DECAY = 0.03f; const float ForgettingCurveUtils::MIN_PROBABILITY_TO_DECAY = 0.03f;
const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityTable;
/* static */ int ForgettingCurveUtils::getProbability(const int encodedUnigramProbability, /* static */ int ForgettingCurveUtils::getProbability(const int encodedUnigramProbability,
const int encodedBigramProbability) { const int encodedBigramProbability) {
if (encodedUnigramProbability == NOT_A_PROBABILITY) { if (encodedUnigramProbability == NOT_A_PROBABILITY) {
return NOT_A_PROBABILITY; return NOT_A_PROBABILITY;
} else if (encodedBigramProbability == NOT_A_PROBABILITY) { } else if (encodedBigramProbability == NOT_A_PROBABILITY) {
return backoff(decodeUnigramProbability(encodedUnigramProbability)); return backoff(decodeProbability(encodedUnigramProbability));
} else { } else {
const int unigramProbability = decodeUnigramProbability(encodedUnigramProbability); const int unigramProbability = decodeProbability(encodedUnigramProbability);
const int bigramProbability = decodeBigramProbability(encodedBigramProbability); const int bigramProbability = decodeProbability(encodedBigramProbability);
return min(max(unigramProbability, bigramProbability), MAX_COMPUTED_PROBABILITY); return min(max(unigramProbability, bigramProbability), MAX_COMPUTED_PROBABILITY);
} }
} }
@ -88,21 +91,12 @@ const float ForgettingCurveUtils::MIN_PROBABILITY_TO_DECAY = 0.03f;
} }
} }
/* static */ int ForgettingCurveUtils::decodeUnigramProbability(const int encodedProbability) { /* static */ int ForgettingCurveUtils::decodeProbability(const int encodedProbability) {
const int probability = encodedProbability - MIN_VALID_ENCODED_PROBABILITY; const int probability = encodedProbability - MIN_VALID_ENCODED_PROBABILITY;
if (probability < 0) { if (encodedProbability < MIN_VALID_ENCODED_PROBABILITY) {
return NOT_A_PROBABILITY; return NOT_A_PROBABILITY;
} else { } else {
return min(probability, MAX_ENCODED_PROBABILITY) * 8; return min(sProbabilityTable.getProbability(encodedProbability), MAX_ENCODED_PROBABILITY);
}
}
/* static */ int ForgettingCurveUtils::decodeBigramProbability(const int encodedProbability) {
const int probability = encodedProbability - MIN_VALID_ENCODED_PROBABILITY;
if (probability < 0) {
return NOT_A_PROBABILITY;
} else {
return min(probability, MAX_ENCODED_PROBABILITY) * 8;
} }
} }
@ -115,4 +109,16 @@ const float ForgettingCurveUtils::MIN_PROBABILITY_TO_DECAY = 0.03f;
} }
} }
ForgettingCurveUtils::ProbabilityTable::ProbabilityTable() : mTable() {
// Table entry is as follows:
// 1, 1, 1, 2, 3, 5, 6, 9, 13, 18, 25, 34, 48, 66, 91, 127.
// Note that first MIN_VALID_ENCODED_PROBABILITY values are not used.
mTable.resize(MAX_ENCODED_PROBABILITY + 1);
for (int i = 0; i <= MAX_ENCODED_PROBABILITY; ++i) {
const int probability = static_cast<int>(powf(static_cast<float>(MAX_COMPUTED_PROBABILITY),
static_cast<float>(i) / static_cast<float>(MAX_ENCODED_PROBABILITY)));
mTable[i] = min(MAX_COMPUTED_PROBABILITY, max(0, probability));
}
}
} // namespace latinime } // namespace latinime

View File

@ -17,6 +17,8 @@
#ifndef LATINIME_FORGETTING_CURVE_UTILS_H #ifndef LATINIME_FORGETTING_CURVE_UTILS_H
#define LATINIME_FORGETTING_CURVE_UTILS_H #define LATINIME_FORGETTING_CURVE_UTILS_H
#include <vector>
#include "defines.h" #include "defines.h"
namespace latinime { namespace latinime {
@ -44,16 +46,32 @@ class ForgettingCurveUtils {
private: private:
DISALLOW_IMPLICIT_CONSTRUCTORS(ForgettingCurveUtils); DISALLOW_IMPLICIT_CONSTRUCTORS(ForgettingCurveUtils);
class ProbabilityTable {
public:
ProbabilityTable();
int getProbability(const int encodedProbability) const {
if (encodedProbability < 0 || encodedProbability > static_cast<int>(mTable.size())) {
return NOT_A_PROBABILITY;
}
return mTable[encodedProbability];
}
private:
DISALLOW_COPY_AND_ASSIGN(ProbabilityTable);
std::vector<int> mTable;
};
static const int MAX_COMPUTED_PROBABILITY; static const int MAX_COMPUTED_PROBABILITY;
static const int MAX_ENCODED_PROBABILITY; static const int MAX_ENCODED_PROBABILITY;
static const int MIN_VALID_ENCODED_PROBABILITY; static const int MIN_VALID_ENCODED_PROBABILITY;
static const int ENCODED_PROBABILITY_STEP; static const int ENCODED_PROBABILITY_STEP;
static const float MIN_PROBABILITY_TO_DECAY; static const float MIN_PROBABILITY_TO_DECAY;
static int decodeUnigramProbability(const int encodedProbability); static const ProbabilityTable sProbabilityTable;
static int decodeBigramProbability(const int encodedProbability); static int decodeProbability(const int encodedProbability);
static int backoff(const int unigramProbability); static int backoff(const int unigramProbability);
}; };