Merge "Use probability table for decaying dictionaries."
commit
ba9dc0860f
|
@ -14,6 +14,7 @@
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#include <cmath>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
|
|
||||||
#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
|
#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
|
||||||
|
@ -35,15 +36,17 @@ const int ForgettingCurveUtils::ENCODED_PROBABILITY_STEP = 1;
|
||||||
// duration of the decay is approximately 66hours.
|
// duration of the decay is approximately 66hours.
|
||||||
const float ForgettingCurveUtils::MIN_PROBABILITY_TO_DECAY = 0.03f;
|
const float ForgettingCurveUtils::MIN_PROBABILITY_TO_DECAY = 0.03f;
|
||||||
|
|
||||||
|
const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityTable;
|
||||||
|
|
||||||
/* static */ int ForgettingCurveUtils::getProbability(const int encodedUnigramProbability,
|
/* static */ int ForgettingCurveUtils::getProbability(const int encodedUnigramProbability,
|
||||||
const int encodedBigramProbability) {
|
const int encodedBigramProbability) {
|
||||||
if (encodedUnigramProbability == NOT_A_PROBABILITY) {
|
if (encodedUnigramProbability == NOT_A_PROBABILITY) {
|
||||||
return NOT_A_PROBABILITY;
|
return NOT_A_PROBABILITY;
|
||||||
} else if (encodedBigramProbability == NOT_A_PROBABILITY) {
|
} else if (encodedBigramProbability == NOT_A_PROBABILITY) {
|
||||||
return backoff(decodeUnigramProbability(encodedUnigramProbability));
|
return backoff(decodeProbability(encodedUnigramProbability));
|
||||||
} else {
|
} else {
|
||||||
const int unigramProbability = decodeUnigramProbability(encodedUnigramProbability);
|
const int unigramProbability = decodeProbability(encodedUnigramProbability);
|
||||||
const int bigramProbability = decodeBigramProbability(encodedBigramProbability);
|
const int bigramProbability = decodeProbability(encodedBigramProbability);
|
||||||
return min(max(unigramProbability, bigramProbability), MAX_COMPUTED_PROBABILITY);
|
return min(max(unigramProbability, bigramProbability), MAX_COMPUTED_PROBABILITY);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -88,21 +91,12 @@ const float ForgettingCurveUtils::MIN_PROBABILITY_TO_DECAY = 0.03f;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* static */ int ForgettingCurveUtils::decodeUnigramProbability(const int encodedProbability) {
|
/* static */ int ForgettingCurveUtils::decodeProbability(const int encodedProbability) {
|
||||||
const int probability = encodedProbability - MIN_VALID_ENCODED_PROBABILITY;
|
const int probability = encodedProbability - MIN_VALID_ENCODED_PROBABILITY;
|
||||||
if (probability < 0) {
|
if (encodedProbability < MIN_VALID_ENCODED_PROBABILITY) {
|
||||||
return NOT_A_PROBABILITY;
|
return NOT_A_PROBABILITY;
|
||||||
} else {
|
} else {
|
||||||
return min(probability, MAX_ENCODED_PROBABILITY) * 8;
|
return min(sProbabilityTable.getProbability(encodedProbability), MAX_ENCODED_PROBABILITY);
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* static */ int ForgettingCurveUtils::decodeBigramProbability(const int encodedProbability) {
|
|
||||||
const int probability = encodedProbability - MIN_VALID_ENCODED_PROBABILITY;
|
|
||||||
if (probability < 0) {
|
|
||||||
return NOT_A_PROBABILITY;
|
|
||||||
} else {
|
|
||||||
return min(probability, MAX_ENCODED_PROBABILITY) * 8;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -115,4 +109,16 @@ const float ForgettingCurveUtils::MIN_PROBABILITY_TO_DECAY = 0.03f;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ForgettingCurveUtils::ProbabilityTable::ProbabilityTable() : mTable() {
|
||||||
|
// Table entry is as follows:
|
||||||
|
// 1, 1, 1, 2, 3, 5, 6, 9, 13, 18, 25, 34, 48, 66, 91, 127.
|
||||||
|
// Note that first MIN_VALID_ENCODED_PROBABILITY values are not used.
|
||||||
|
mTable.resize(MAX_ENCODED_PROBABILITY + 1);
|
||||||
|
for (int i = 0; i <= MAX_ENCODED_PROBABILITY; ++i) {
|
||||||
|
const int probability = static_cast<int>(powf(static_cast<float>(MAX_COMPUTED_PROBABILITY),
|
||||||
|
static_cast<float>(i) / static_cast<float>(MAX_ENCODED_PROBABILITY)));
|
||||||
|
mTable[i] = min(MAX_COMPUTED_PROBABILITY, max(0, probability));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
|
|
|
@ -17,6 +17,8 @@
|
||||||
#ifndef LATINIME_FORGETTING_CURVE_UTILS_H
|
#ifndef LATINIME_FORGETTING_CURVE_UTILS_H
|
||||||
#define LATINIME_FORGETTING_CURVE_UTILS_H
|
#define LATINIME_FORGETTING_CURVE_UTILS_H
|
||||||
|
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
#include "defines.h"
|
#include "defines.h"
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
@ -44,16 +46,32 @@ class ForgettingCurveUtils {
|
||||||
private:
|
private:
|
||||||
DISALLOW_IMPLICIT_CONSTRUCTORS(ForgettingCurveUtils);
|
DISALLOW_IMPLICIT_CONSTRUCTORS(ForgettingCurveUtils);
|
||||||
|
|
||||||
|
class ProbabilityTable {
|
||||||
|
public:
|
||||||
|
ProbabilityTable();
|
||||||
|
|
||||||
|
int getProbability(const int encodedProbability) const {
|
||||||
|
if (encodedProbability < 0 || encodedProbability > static_cast<int>(mTable.size())) {
|
||||||
|
return NOT_A_PROBABILITY;
|
||||||
|
}
|
||||||
|
return mTable[encodedProbability];
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
DISALLOW_COPY_AND_ASSIGN(ProbabilityTable);
|
||||||
|
|
||||||
|
std::vector<int> mTable;
|
||||||
|
};
|
||||||
|
|
||||||
static const int MAX_COMPUTED_PROBABILITY;
|
static const int MAX_COMPUTED_PROBABILITY;
|
||||||
static const int MAX_ENCODED_PROBABILITY;
|
static const int MAX_ENCODED_PROBABILITY;
|
||||||
static const int MIN_VALID_ENCODED_PROBABILITY;
|
static const int MIN_VALID_ENCODED_PROBABILITY;
|
||||||
static const int ENCODED_PROBABILITY_STEP;
|
static const int ENCODED_PROBABILITY_STEP;
|
||||||
|
|
||||||
static const float MIN_PROBABILITY_TO_DECAY;
|
static const float MIN_PROBABILITY_TO_DECAY;
|
||||||
|
|
||||||
static int decodeUnigramProbability(const int encodedProbability);
|
static const ProbabilityTable sProbabilityTable;
|
||||||
|
|
||||||
static int decodeBigramProbability(const int encodedProbability);
|
static int decodeProbability(const int encodedProbability);
|
||||||
|
|
||||||
static int backoff(const int unigramProbability);
|
static int backoff(const int unigramProbability);
|
||||||
};
|
};
|
||||||
|
|
Loading…
Reference in New Issue