Merge "Change name of DecayingUtils."

This commit is contained in:
Keisuke Kuroyanagi 2013-10-02 07:54:52 +00:00 committed by Android (Google) Code Review
commit 52c20927a9
7 changed files with 49 additions and 46 deletions

View file

@ -85,8 +85,8 @@ LATIN_IME_CORE_SRC_FILES := \
$(addprefix suggest/policyimpl/dictionary/utils/, \ $(addprefix suggest/policyimpl/dictionary/utils/, \
buffer_with_extendable_buffer.cpp \ buffer_with_extendable_buffer.cpp \
byte_array_utils.cpp \ byte_array_utils.cpp \
decaying_utils.cpp \
dict_file_writing_utils.cpp \ dict_file_writing_utils.cpp \
forgetting_curve_utils.cpp \
format_utils.cpp) \ format_utils.cpp) \
suggest/policyimpl/gesture/gesture_suggest_policy_factory.cpp \ suggest/policyimpl/gesture/gesture_suggest_policy_factory.cpp \
$(addprefix suggest/policyimpl/typing/, \ $(addprefix suggest/policyimpl/typing/, \

View file

@ -20,7 +20,7 @@
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h" #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h"
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h" #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" #include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
#include "suggest/policyimpl/dictionary/utils/decaying_utils.h" #include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
namespace latinime { namespace latinime {
@ -43,7 +43,7 @@ void DynamicBigramListPolicy::getNextBigram(int *const outBigramPos, int *const
} }
*outProbability = BigramListReadWriteUtils::getProbabilityFromFlags(bigramFlags); *outProbability = BigramListReadWriteUtils::getProbabilityFromFlags(bigramFlags);
*outHasNext = BigramListReadWriteUtils::hasNext(bigramFlags); *outHasNext = BigramListReadWriteUtils::hasNext(bigramFlags);
if (mIsDecayingDict && !DecayingUtils::isValidBigram(*outProbability)) { if (mIsDecayingDict && !ForgettingCurveUtils::isValidBigram(*outProbability)) {
// This bigram is too weak to output. // This bigram is too weak to output.
*outBigramPos = NOT_A_DICT_POS; *outBigramPos = NOT_A_DICT_POS;
} else { } else {
@ -261,7 +261,7 @@ bool DynamicBigramListPolicy::addNewBigramEntryToBigramList(const int bigramTarg
const int originalProbability = BigramListReadWriteUtils::getProbabilityFromFlags( const int originalProbability = BigramListReadWriteUtils::getProbabilityFromFlags(
bigramFlags); bigramFlags);
const int probabilityToWrite = mIsDecayingDict ? const int probabilityToWrite = mIsDecayingDict ?
DecayingUtils::getUpdatedBigramProbabilityDelta( ForgettingCurveUtils::getUpdatedBigramProbabilityDelta(
originalProbability, probability) : probability; originalProbability, probability) : probability;
const BigramListReadWriteUtils::BigramFlags updatedFlags = const BigramListReadWriteUtils::BigramFlags updatedFlags =
BigramListReadWriteUtils::setProbabilityInFlags(bigramFlags, BigramListReadWriteUtils::setProbabilityInFlags(bigramFlags,
@ -294,7 +294,7 @@ bool DynamicBigramListPolicy::writeNewBigramEntry(const int bigramTargetPos, con
int *const writingPos) { int *const writingPos) {
// hasNext is false because we are adding a new bigram entry at the end of the bigram list. // hasNext is false because we are adding a new bigram entry at the end of the bigram list.
const int probabilityToWrite = mIsDecayingDict ? const int probabilityToWrite = mIsDecayingDict ?
DecayingUtils::getUpdatedBigramProbabilityDelta(NOT_A_PROBABILITY, probability) : ForgettingCurveUtils::getUpdatedBigramProbabilityDelta(NOT_A_PROBABILITY, probability) :
probability; probability;
return BigramListReadWriteUtils::createAndWriteBigramEntry(mBuffer, bigramTargetPos, return BigramListReadWriteUtils::createAndWriteBigramEntry(mBuffer, bigramTargetPos,
probabilityToWrite, false /* hasNext */, writingPos); probabilityToWrite, false /* hasNext */, writingPos);
@ -365,9 +365,9 @@ bool DynamicBigramListPolicy::updateProbabilityForDecay(
*outRemoved = false; *outRemoved = false;
if (mIsDecayingDict) { if (mIsDecayingDict) {
// Update bigram probability for decaying. // Update bigram probability for decaying.
const int newProbability = DecayingUtils::getBigramProbabilityDeltaToSave( const int newProbability = ForgettingCurveUtils::getBigramProbabilityDeltaToSave(
BigramListReadWriteUtils::getProbabilityFromFlags(bigramFlags)); BigramListReadWriteUtils::getProbabilityFromFlags(bigramFlags));
if (DecayingUtils::isValidBigram(newProbability)) { if (ForgettingCurveUtils::isValidBigram(newProbability)) {
// Write new probability. // Write new probability.
const BigramListReadWriteUtils::BigramFlags updatedBigramFlags = const BigramListReadWriteUtils::BigramFlags updatedBigramFlags =
BigramListReadWriteUtils::setProbabilityInFlags( BigramListReadWriteUtils::setProbabilityInFlags(

View file

@ -16,7 +16,7 @@
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h" #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h"
#include "suggest/policyimpl/dictionary/utils/decaying_utils.h" #include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
namespace latinime { namespace latinime {
@ -29,14 +29,14 @@ bool DynamicPatriciaTrieGcEventListeners
bool isUselessPtNode = !node->isTerminal(); bool isUselessPtNode = !node->isTerminal();
if (node->isTerminal() && mIsDecayingDict) { if (node->isTerminal() && mIsDecayingDict) {
const int newProbability = const int newProbability =
DecayingUtils::getUnigramProbabilityToSave(node->getProbability()); ForgettingCurveUtils::getUnigramProbabilityToSave(node->getProbability());
int writingPos = node->getProbabilityFieldPos(); int writingPos = node->getProbabilityFieldPos();
// Update probability. // Update probability.
if (!DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition( if (!DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition(
mBuffer, newProbability, &writingPos)) { mBuffer, newProbability, &writingPos)) {
return false; return false;
} }
if (!DecayingUtils::isValidUnigram(newProbability)) { if (!ForgettingCurveUtils::isValidUnigram(newProbability)) {
isUselessPtNode = false; isUselessPtNode = false;
} }
} }

View file

@ -28,7 +28,7 @@
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h" #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h"
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h" #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h"
#include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h" #include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h"
#include "suggest/policyimpl/dictionary/utils/decaying_utils.h" #include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
#include "suggest/policyimpl/dictionary/utils/probability_utils.h" #include "suggest/policyimpl/dictionary/utils/probability_utils.h"
namespace latinime { namespace latinime {
@ -154,7 +154,7 @@ int DynamicPatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const in
int DynamicPatriciaTriePolicy::getProbability(const int unigramProbability, int DynamicPatriciaTriePolicy::getProbability(const int unigramProbability,
const int bigramProbability) const { const int bigramProbability) const {
if (mHeaderPolicy.isDecayingDict()) { if (mHeaderPolicy.isDecayingDict()) {
return DecayingUtils::getProbability(unigramProbability, bigramProbability); return ForgettingCurveUtils::getProbability(unigramProbability, bigramProbability);
} else { } else {
if (unigramProbability == NOT_A_PROBABILITY) { if (unigramProbability == NOT_A_PROBABILITY) {
return NOT_A_PROBABILITY; return NOT_A_PROBABILITY;
@ -344,10 +344,10 @@ bool DynamicPatriciaTriePolicy::needsToRunGC(const bool mindsBlockByGC) const {
// Needs to reduce dictionary size. // Needs to reduce dictionary size.
return true; return true;
} else if (mHeaderPolicy.isDecayingDict()) { } else if (mHeaderPolicy.isDecayingDict()) {
if (mUnigramCount >= DecayingUtils::MAX_UNIGRAM_COUNT) { if (mUnigramCount >= ForgettingCurveUtils::MAX_UNIGRAM_COUNT) {
// Unigram count exceeds the limit. // Unigram count exceeds the limit.
return true; return true;
} else if (mBigramCount >= DecayingUtils::MAX_BIGRAM_COUNT) { } else if (mBigramCount >= ForgettingCurveUtils::MAX_BIGRAM_COUNT) {
// Bigram count exceeds the limit. // Bigram count exceeds the limit.
return true; return true;
} else if (mindsBlockByGC && needsToDecay()) { } else if (mindsBlockByGC && needsToDecay()) {

View file

@ -25,8 +25,8 @@
#include "suggest/policyimpl/dictionary/header/header_policy.h" #include "suggest/policyimpl/dictionary/header/header_policy.h"
#include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h" #include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h"
#include "suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h" #include "suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h"
#include "suggest/policyimpl/dictionary/utils/decaying_utils.h"
#include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h" #include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h"
#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
#include "utils/hash_map_compat.h" #include "utils/hash_map_compat.h"
namespace latinime { namespace latinime {
@ -494,7 +494,7 @@ bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
return false; return false;
} }
if (mNeedsToDecay && traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted if (mNeedsToDecay && traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted
.getValidUnigramCount() > DecayingUtils::MAX_UNIGRAM_COUNT_AFTER_GC) { .getValidUnigramCount() > ForgettingCurveUtils::MAX_UNIGRAM_COUNT_AFTER_GC) {
// TODO: Remove more unigrams. // TODO: Remove more unigrams.
} }
@ -507,7 +507,7 @@ bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
} }
if (mNeedsToDecay && traversePolicyToUpdateBigramProbability.getValidBigramEntryCount() if (mNeedsToDecay && traversePolicyToUpdateBigramProbability.getValidBigramEntryCount()
> DecayingUtils::MAX_BIGRAM_COUNT_AFTER_GC) { > ForgettingCurveUtils::MAX_BIGRAM_COUNT_AFTER_GC) {
// TODO: Remove more bigrams. // TODO: Remove more bigrams.
} }
@ -545,7 +545,8 @@ bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
int DynamicPatriciaTrieWritingHelper::getUpdatedProbability(const int originalProbability, int DynamicPatriciaTrieWritingHelper::getUpdatedProbability(const int originalProbability,
const int newProbability) { const int newProbability) {
if (mNeedsToDecay) { if (mNeedsToDecay) {
return DecayingUtils::getUpdatedUnigramProbability(originalProbability, newProbability); return ForgettingCurveUtils::getUpdatedUnigramProbability(originalProbability,
newProbability);
} else { } else {
return newProbability; return newProbability;
} }

View file

@ -14,26 +14,26 @@
* limitations under the License. * limitations under the License.
*/ */
#include "suggest/policyimpl/dictionary/utils/decaying_utils.h" #include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
#include "suggest/policyimpl/dictionary/utils/probability_utils.h" #include "suggest/policyimpl/dictionary/utils/probability_utils.h"
namespace latinime { namespace latinime {
const int DecayingUtils::MAX_UNIGRAM_COUNT = 12000; const int ForgettingCurveUtils::MAX_UNIGRAM_COUNT = 12000;
const int DecayingUtils::MAX_UNIGRAM_COUNT_AFTER_GC = 10000; const int ForgettingCurveUtils::MAX_UNIGRAM_COUNT_AFTER_GC = 10000;
const int DecayingUtils::MAX_BIGRAM_COUNT = 12000; const int ForgettingCurveUtils::MAX_BIGRAM_COUNT = 12000;
const int DecayingUtils::MAX_BIGRAM_COUNT_AFTER_GC = 10000; const int ForgettingCurveUtils::MAX_BIGRAM_COUNT_AFTER_GC = 10000;
const int DecayingUtils::MAX_COMPUTED_PROBABILITY = 127; const int ForgettingCurveUtils::MAX_COMPUTED_PROBABILITY = 127;
const int DecayingUtils::MAX_UNIGRAM_PROBABILITY = 120; const int ForgettingCurveUtils::MAX_UNIGRAM_PROBABILITY = 120;
const int DecayingUtils::MIN_VALID_UNIGRAM_PROBABILITY = 24; const int ForgettingCurveUtils::MIN_VALID_UNIGRAM_PROBABILITY = 24;
const int DecayingUtils::UNIGRAM_PROBABILITY_STEP = 8; const int ForgettingCurveUtils::UNIGRAM_PROBABILITY_STEP = 8;
const int DecayingUtils::MAX_BIGRAM_PROBABILITY_DELTA = 15; const int ForgettingCurveUtils::MAX_BIGRAM_PROBABILITY_DELTA = 15;
const int DecayingUtils::MIN_VALID_BIGRAM_PROBABILITY_DELTA = 3; const int ForgettingCurveUtils::MIN_VALID_BIGRAM_PROBABILITY_DELTA = 3;
const int DecayingUtils::BIGRAM_PROBABILITY_DELTA_STEP = 1; const int ForgettingCurveUtils::BIGRAM_PROBABILITY_DELTA_STEP = 1;
/* static */ int DecayingUtils::getProbability(const int encodedUnigramProbability, /* static */ int ForgettingCurveUtils::getProbability(const int encodedUnigramProbability,
const int encodedBigramProbabilityDelta) { const int encodedBigramProbabilityDelta) {
if (encodedUnigramProbability == NOT_A_PROBABILITY) { if (encodedUnigramProbability == NOT_A_PROBABILITY) {
return NOT_A_PROBABILITY; return NOT_A_PROBABILITY;
@ -49,8 +49,8 @@ const int DecayingUtils::BIGRAM_PROBABILITY_DELTA_STEP = 1;
} }
} }
/* static */ int DecayingUtils::getUpdatedUnigramProbability(const int originalEncodedProbability, /* static */ int ForgettingCurveUtils::getUpdatedUnigramProbability(
const int newProbability) { const int originalEncodedProbability, const int newProbability) {
if (originalEncodedProbability == NOT_A_PROBABILITY) { if (originalEncodedProbability == NOT_A_PROBABILITY) {
// The unigram is not in this dictionary. // The unigram is not in this dictionary.
if (newProbability == NOT_A_PROBABILITY) { if (newProbability == NOT_A_PROBABILITY) {
@ -68,15 +68,16 @@ const int DecayingUtils::BIGRAM_PROBABILITY_DELTA_STEP = 1;
} }
} }
/* static */ int DecayingUtils::getUnigramProbabilityToSave(const int encodedProbability) { /* static */ int ForgettingCurveUtils::getUnigramProbabilityToSave(const int encodedProbability) {
return max(encodedProbability - UNIGRAM_PROBABILITY_STEP, 0); return max(encodedProbability - UNIGRAM_PROBABILITY_STEP, 0);
} }
/* static */ int DecayingUtils::getBigramProbabilityDeltaToSave(const int encodedProbabilityDelta) { /* static */ int ForgettingCurveUtils::getBigramProbabilityDeltaToSave(
const int encodedProbabilityDelta) {
return max(encodedProbabilityDelta - BIGRAM_PROBABILITY_DELTA_STEP, 0); return max(encodedProbabilityDelta - BIGRAM_PROBABILITY_DELTA_STEP, 0);
} }
/* static */ int DecayingUtils::getUpdatedBigramProbabilityDelta( /* static */ int ForgettingCurveUtils::getUpdatedBigramProbabilityDelta(
const int originalEncodedProbabilityDelta, const int newProbability) { const int originalEncodedProbabilityDelta, const int newProbability) {
if (originalEncodedProbabilityDelta == NOT_A_PROBABILITY) { if (originalEncodedProbabilityDelta == NOT_A_PROBABILITY) {
// The bigram relation is not in this dictionary. // The bigram relation is not in this dictionary.
@ -96,15 +97,15 @@ const int DecayingUtils::BIGRAM_PROBABILITY_DELTA_STEP = 1;
} }
} }
/* static */ int DecayingUtils::isValidUnigram(const int encodedUnigramProbability) { /* static */ int ForgettingCurveUtils::isValidUnigram(const int encodedUnigramProbability) {
return encodedUnigramProbability >= MIN_VALID_UNIGRAM_PROBABILITY; return encodedUnigramProbability >= MIN_VALID_UNIGRAM_PROBABILITY;
} }
/* static */ int DecayingUtils::isValidBigram(const int encodedBigramProbabilityDelta) { /* static */ int ForgettingCurveUtils::isValidBigram(const int encodedBigramProbabilityDelta) {
return encodedBigramProbabilityDelta >= MIN_VALID_BIGRAM_PROBABILITY_DELTA; return encodedBigramProbabilityDelta >= MIN_VALID_BIGRAM_PROBABILITY_DELTA;
} }
/* static */ int DecayingUtils::decodeUnigramProbability(const int encodedProbability) { /* static */ int ForgettingCurveUtils::decodeUnigramProbability(const int encodedProbability) {
const int probability = encodedProbability - MIN_VALID_UNIGRAM_PROBABILITY; const int probability = encodedProbability - MIN_VALID_UNIGRAM_PROBABILITY;
if (probability < 0) { if (probability < 0) {
return NOT_A_PROBABILITY; return NOT_A_PROBABILITY;
@ -113,7 +114,8 @@ const int DecayingUtils::BIGRAM_PROBABILITY_DELTA_STEP = 1;
} }
} }
/* static */ int DecayingUtils::decodeBigramProbabilityDelta(const int encodedProbabilityDelta) { /* static */ int ForgettingCurveUtils::decodeBigramProbabilityDelta(
const int encodedProbabilityDelta) {
const int probabilityDelta = encodedProbabilityDelta - MIN_VALID_BIGRAM_PROBABILITY_DELTA; const int probabilityDelta = encodedProbabilityDelta - MIN_VALID_BIGRAM_PROBABILITY_DELTA;
if (probabilityDelta < 0) { if (probabilityDelta < 0) {
return NOT_A_PROBABILITY; return NOT_A_PROBABILITY;
@ -122,7 +124,7 @@ const int DecayingUtils::BIGRAM_PROBABILITY_DELTA_STEP = 1;
} }
} }
/* static */ int DecayingUtils::getDecayedProbability(const int rawProbability) { /* static */ int ForgettingCurveUtils::getDecayedProbability(const int rawProbability) {
return rawProbability; return rawProbability;
} }

View file

@ -14,8 +14,8 @@
* limitations under the License. * limitations under the License.
*/ */
#ifndef LATINIME_DECAYING_UTILS_H #ifndef LATINIME_FORGETTING_CURVE_UTILS_H
#define LATINIME_DECAYING_UTILS_H #define LATINIME_FORGETTING_CURVE_UTILS_H
#include "defines.h" #include "defines.h"
@ -25,7 +25,7 @@ namespace latinime {
// required to introduced to each terminal PtNode and bigram entry. // required to introduced to each terminal PtNode and bigram entry.
// TODO: Quit using bigram probability to indicate the delta. // TODO: Quit using bigram probability to indicate the delta.
// TODO: Quit using bigram probability delta. // TODO: Quit using bigram probability delta.
class DecayingUtils { class ForgettingCurveUtils {
public: public:
static const int MAX_UNIGRAM_COUNT; static const int MAX_UNIGRAM_COUNT;
static const int MAX_UNIGRAM_COUNT_AFTER_GC; static const int MAX_UNIGRAM_COUNT_AFTER_GC;
@ -50,7 +50,7 @@ class DecayingUtils {
static int getBigramProbabilityDeltaToSave(const int encodedProbabilityDelta); static int getBigramProbabilityDeltaToSave(const int encodedProbabilityDelta);
private: private:
DISALLOW_IMPLICIT_CONSTRUCTORS(DecayingUtils); DISALLOW_IMPLICIT_CONSTRUCTORS(ForgettingCurveUtils);
static const int MAX_COMPUTED_PROBABILITY; static const int MAX_COMPUTED_PROBABILITY;
static const int MAX_UNIGRAM_PROBABILITY; static const int MAX_UNIGRAM_PROBABILITY;
@ -67,4 +67,4 @@ class DecayingUtils {
static int getDecayedProbability(const int rawProbability); static int getDecayedProbability(const int rawProbability);
}; };
} // namespace latinime } // namespace latinime
#endif /* LATINIME_DECAYING_UTILS_H */ #endif /* LATINIME_FORGETTING_CURVE_UTILS_H */