Merge "Change name of DecayingUtils."
This commit is contained in:
commit
52c20927a9
7 changed files with 49 additions and 46 deletions
|
@ -85,8 +85,8 @@ LATIN_IME_CORE_SRC_FILES := \
|
|||
$(addprefix suggest/policyimpl/dictionary/utils/, \
|
||||
buffer_with_extendable_buffer.cpp \
|
||||
byte_array_utils.cpp \
|
||||
decaying_utils.cpp \
|
||||
dict_file_writing_utils.cpp \
|
||||
forgetting_curve_utils.cpp \
|
||||
format_utils.cpp) \
|
||||
suggest/policyimpl/gesture/gesture_suggest_policy_factory.cpp \
|
||||
$(addprefix suggest/policyimpl/typing/, \
|
||||
|
|
|
@ -20,7 +20,7 @@
|
|||
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h"
|
||||
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h"
|
||||
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
|
||||
#include "suggest/policyimpl/dictionary/utils/decaying_utils.h"
|
||||
#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
|
||||
|
||||
namespace latinime {
|
||||
|
||||
|
@ -43,7 +43,7 @@ void DynamicBigramListPolicy::getNextBigram(int *const outBigramPos, int *const
|
|||
}
|
||||
*outProbability = BigramListReadWriteUtils::getProbabilityFromFlags(bigramFlags);
|
||||
*outHasNext = BigramListReadWriteUtils::hasNext(bigramFlags);
|
||||
if (mIsDecayingDict && !DecayingUtils::isValidBigram(*outProbability)) {
|
||||
if (mIsDecayingDict && !ForgettingCurveUtils::isValidBigram(*outProbability)) {
|
||||
// This bigram is too weak to output.
|
||||
*outBigramPos = NOT_A_DICT_POS;
|
||||
} else {
|
||||
|
@ -261,7 +261,7 @@ bool DynamicBigramListPolicy::addNewBigramEntryToBigramList(const int bigramTarg
|
|||
const int originalProbability = BigramListReadWriteUtils::getProbabilityFromFlags(
|
||||
bigramFlags);
|
||||
const int probabilityToWrite = mIsDecayingDict ?
|
||||
DecayingUtils::getUpdatedBigramProbabilityDelta(
|
||||
ForgettingCurveUtils::getUpdatedBigramProbabilityDelta(
|
||||
originalProbability, probability) : probability;
|
||||
const BigramListReadWriteUtils::BigramFlags updatedFlags =
|
||||
BigramListReadWriteUtils::setProbabilityInFlags(bigramFlags,
|
||||
|
@ -294,7 +294,7 @@ bool DynamicBigramListPolicy::writeNewBigramEntry(const int bigramTargetPos, con
|
|||
int *const writingPos) {
|
||||
// hasNext is false because we are adding a new bigram entry at the end of the bigram list.
|
||||
const int probabilityToWrite = mIsDecayingDict ?
|
||||
DecayingUtils::getUpdatedBigramProbabilityDelta(NOT_A_PROBABILITY, probability) :
|
||||
ForgettingCurveUtils::getUpdatedBigramProbabilityDelta(NOT_A_PROBABILITY, probability) :
|
||||
probability;
|
||||
return BigramListReadWriteUtils::createAndWriteBigramEntry(mBuffer, bigramTargetPos,
|
||||
probabilityToWrite, false /* hasNext */, writingPos);
|
||||
|
@ -365,9 +365,9 @@ bool DynamicBigramListPolicy::updateProbabilityForDecay(
|
|||
*outRemoved = false;
|
||||
if (mIsDecayingDict) {
|
||||
// Update bigram probability for decaying.
|
||||
const int newProbability = DecayingUtils::getBigramProbabilityDeltaToSave(
|
||||
const int newProbability = ForgettingCurveUtils::getBigramProbabilityDeltaToSave(
|
||||
BigramListReadWriteUtils::getProbabilityFromFlags(bigramFlags));
|
||||
if (DecayingUtils::isValidBigram(newProbability)) {
|
||||
if (ForgettingCurveUtils::isValidBigram(newProbability)) {
|
||||
// Write new probability.
|
||||
const BigramListReadWriteUtils::BigramFlags updatedBigramFlags =
|
||||
BigramListReadWriteUtils::setProbabilityInFlags(
|
||||
|
|
|
@ -16,7 +16,7 @@
|
|||
|
||||
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h"
|
||||
|
||||
#include "suggest/policyimpl/dictionary/utils/decaying_utils.h"
|
||||
#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
|
||||
|
||||
namespace latinime {
|
||||
|
||||
|
@ -29,14 +29,14 @@ bool DynamicPatriciaTrieGcEventListeners
|
|||
bool isUselessPtNode = !node->isTerminal();
|
||||
if (node->isTerminal() && mIsDecayingDict) {
|
||||
const int newProbability =
|
||||
DecayingUtils::getUnigramProbabilityToSave(node->getProbability());
|
||||
ForgettingCurveUtils::getUnigramProbabilityToSave(node->getProbability());
|
||||
int writingPos = node->getProbabilityFieldPos();
|
||||
// Update probability.
|
||||
if (!DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition(
|
||||
mBuffer, newProbability, &writingPos)) {
|
||||
return false;
|
||||
}
|
||||
if (!DecayingUtils::isValidUnigram(newProbability)) {
|
||||
if (!ForgettingCurveUtils::isValidUnigram(newProbability)) {
|
||||
isUselessPtNode = false;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -28,7 +28,7 @@
|
|||
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h"
|
||||
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h"
|
||||
#include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h"
|
||||
#include "suggest/policyimpl/dictionary/utils/decaying_utils.h"
|
||||
#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
|
||||
#include "suggest/policyimpl/dictionary/utils/probability_utils.h"
|
||||
|
||||
namespace latinime {
|
||||
|
@ -154,7 +154,7 @@ int DynamicPatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const in
|
|||
int DynamicPatriciaTriePolicy::getProbability(const int unigramProbability,
|
||||
const int bigramProbability) const {
|
||||
if (mHeaderPolicy.isDecayingDict()) {
|
||||
return DecayingUtils::getProbability(unigramProbability, bigramProbability);
|
||||
return ForgettingCurveUtils::getProbability(unigramProbability, bigramProbability);
|
||||
} else {
|
||||
if (unigramProbability == NOT_A_PROBABILITY) {
|
||||
return NOT_A_PROBABILITY;
|
||||
|
@ -344,10 +344,10 @@ bool DynamicPatriciaTriePolicy::needsToRunGC(const bool mindsBlockByGC) const {
|
|||
// Needs to reduce dictionary size.
|
||||
return true;
|
||||
} else if (mHeaderPolicy.isDecayingDict()) {
|
||||
if (mUnigramCount >= DecayingUtils::MAX_UNIGRAM_COUNT) {
|
||||
if (mUnigramCount >= ForgettingCurveUtils::MAX_UNIGRAM_COUNT) {
|
||||
// Unigram count exceeds the limit.
|
||||
return true;
|
||||
} else if (mBigramCount >= DecayingUtils::MAX_BIGRAM_COUNT) {
|
||||
} else if (mBigramCount >= ForgettingCurveUtils::MAX_BIGRAM_COUNT) {
|
||||
// Bigram count exceeds the limit.
|
||||
return true;
|
||||
} else if (mindsBlockByGC && needsToDecay()) {
|
||||
|
|
|
@ -25,8 +25,8 @@
|
|||
#include "suggest/policyimpl/dictionary/header/header_policy.h"
|
||||
#include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h"
|
||||
#include "suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h"
|
||||
#include "suggest/policyimpl/dictionary/utils/decaying_utils.h"
|
||||
#include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h"
|
||||
#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
|
||||
#include "utils/hash_map_compat.h"
|
||||
|
||||
namespace latinime {
|
||||
|
@ -494,7 +494,7 @@ bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
|
|||
return false;
|
||||
}
|
||||
if (mNeedsToDecay && traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted
|
||||
.getValidUnigramCount() > DecayingUtils::MAX_UNIGRAM_COUNT_AFTER_GC) {
|
||||
.getValidUnigramCount() > ForgettingCurveUtils::MAX_UNIGRAM_COUNT_AFTER_GC) {
|
||||
// TODO: Remove more unigrams.
|
||||
}
|
||||
|
||||
|
@ -507,7 +507,7 @@ bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
|
|||
}
|
||||
|
||||
if (mNeedsToDecay && traversePolicyToUpdateBigramProbability.getValidBigramEntryCount()
|
||||
> DecayingUtils::MAX_BIGRAM_COUNT_AFTER_GC) {
|
||||
> ForgettingCurveUtils::MAX_BIGRAM_COUNT_AFTER_GC) {
|
||||
// TODO: Remove more bigrams.
|
||||
}
|
||||
|
||||
|
@ -545,7 +545,8 @@ bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
|
|||
int DynamicPatriciaTrieWritingHelper::getUpdatedProbability(const int originalProbability,
|
||||
const int newProbability) {
|
||||
if (mNeedsToDecay) {
|
||||
return DecayingUtils::getUpdatedUnigramProbability(originalProbability, newProbability);
|
||||
return ForgettingCurveUtils::getUpdatedUnigramProbability(originalProbability,
|
||||
newProbability);
|
||||
} else {
|
||||
return newProbability;
|
||||
}
|
||||
|
|
|
@ -14,26 +14,26 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "suggest/policyimpl/dictionary/utils/decaying_utils.h"
|
||||
#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
|
||||
|
||||
#include "suggest/policyimpl/dictionary/utils/probability_utils.h"
|
||||
|
||||
namespace latinime {
|
||||
|
||||
const int DecayingUtils::MAX_UNIGRAM_COUNT = 12000;
|
||||
const int DecayingUtils::MAX_UNIGRAM_COUNT_AFTER_GC = 10000;
|
||||
const int DecayingUtils::MAX_BIGRAM_COUNT = 12000;
|
||||
const int DecayingUtils::MAX_BIGRAM_COUNT_AFTER_GC = 10000;
|
||||
const int ForgettingCurveUtils::MAX_UNIGRAM_COUNT = 12000;
|
||||
const int ForgettingCurveUtils::MAX_UNIGRAM_COUNT_AFTER_GC = 10000;
|
||||
const int ForgettingCurveUtils::MAX_BIGRAM_COUNT = 12000;
|
||||
const int ForgettingCurveUtils::MAX_BIGRAM_COUNT_AFTER_GC = 10000;
|
||||
|
||||
const int DecayingUtils::MAX_COMPUTED_PROBABILITY = 127;
|
||||
const int DecayingUtils::MAX_UNIGRAM_PROBABILITY = 120;
|
||||
const int DecayingUtils::MIN_VALID_UNIGRAM_PROBABILITY = 24;
|
||||
const int DecayingUtils::UNIGRAM_PROBABILITY_STEP = 8;
|
||||
const int DecayingUtils::MAX_BIGRAM_PROBABILITY_DELTA = 15;
|
||||
const int DecayingUtils::MIN_VALID_BIGRAM_PROBABILITY_DELTA = 3;
|
||||
const int DecayingUtils::BIGRAM_PROBABILITY_DELTA_STEP = 1;
|
||||
const int ForgettingCurveUtils::MAX_COMPUTED_PROBABILITY = 127;
|
||||
const int ForgettingCurveUtils::MAX_UNIGRAM_PROBABILITY = 120;
|
||||
const int ForgettingCurveUtils::MIN_VALID_UNIGRAM_PROBABILITY = 24;
|
||||
const int ForgettingCurveUtils::UNIGRAM_PROBABILITY_STEP = 8;
|
||||
const int ForgettingCurveUtils::MAX_BIGRAM_PROBABILITY_DELTA = 15;
|
||||
const int ForgettingCurveUtils::MIN_VALID_BIGRAM_PROBABILITY_DELTA = 3;
|
||||
const int ForgettingCurveUtils::BIGRAM_PROBABILITY_DELTA_STEP = 1;
|
||||
|
||||
/* static */ int DecayingUtils::getProbability(const int encodedUnigramProbability,
|
||||
/* static */ int ForgettingCurveUtils::getProbability(const int encodedUnigramProbability,
|
||||
const int encodedBigramProbabilityDelta) {
|
||||
if (encodedUnigramProbability == NOT_A_PROBABILITY) {
|
||||
return NOT_A_PROBABILITY;
|
||||
|
@ -49,8 +49,8 @@ const int DecayingUtils::BIGRAM_PROBABILITY_DELTA_STEP = 1;
|
|||
}
|
||||
}
|
||||
|
||||
/* static */ int DecayingUtils::getUpdatedUnigramProbability(const int originalEncodedProbability,
|
||||
const int newProbability) {
|
||||
/* static */ int ForgettingCurveUtils::getUpdatedUnigramProbability(
|
||||
const int originalEncodedProbability, const int newProbability) {
|
||||
if (originalEncodedProbability == NOT_A_PROBABILITY) {
|
||||
// The unigram is not in this dictionary.
|
||||
if (newProbability == NOT_A_PROBABILITY) {
|
||||
|
@ -68,15 +68,16 @@ const int DecayingUtils::BIGRAM_PROBABILITY_DELTA_STEP = 1;
|
|||
}
|
||||
}
|
||||
|
||||
/* static */ int DecayingUtils::getUnigramProbabilityToSave(const int encodedProbability) {
|
||||
/* static */ int ForgettingCurveUtils::getUnigramProbabilityToSave(const int encodedProbability) {
|
||||
return max(encodedProbability - UNIGRAM_PROBABILITY_STEP, 0);
|
||||
}
|
||||
|
||||
/* static */ int DecayingUtils::getBigramProbabilityDeltaToSave(const int encodedProbabilityDelta) {
|
||||
/* static */ int ForgettingCurveUtils::getBigramProbabilityDeltaToSave(
|
||||
const int encodedProbabilityDelta) {
|
||||
return max(encodedProbabilityDelta - BIGRAM_PROBABILITY_DELTA_STEP, 0);
|
||||
}
|
||||
|
||||
/* static */ int DecayingUtils::getUpdatedBigramProbabilityDelta(
|
||||
/* static */ int ForgettingCurveUtils::getUpdatedBigramProbabilityDelta(
|
||||
const int originalEncodedProbabilityDelta, const int newProbability) {
|
||||
if (originalEncodedProbabilityDelta == NOT_A_PROBABILITY) {
|
||||
// The bigram relation is not in this dictionary.
|
||||
|
@ -96,15 +97,15 @@ const int DecayingUtils::BIGRAM_PROBABILITY_DELTA_STEP = 1;
|
|||
}
|
||||
}
|
||||
|
||||
/* static */ int DecayingUtils::isValidUnigram(const int encodedUnigramProbability) {
|
||||
/* static */ int ForgettingCurveUtils::isValidUnigram(const int encodedUnigramProbability) {
|
||||
return encodedUnigramProbability >= MIN_VALID_UNIGRAM_PROBABILITY;
|
||||
}
|
||||
|
||||
/* static */ int DecayingUtils::isValidBigram(const int encodedBigramProbabilityDelta) {
|
||||
/* static */ int ForgettingCurveUtils::isValidBigram(const int encodedBigramProbabilityDelta) {
|
||||
return encodedBigramProbabilityDelta >= MIN_VALID_BIGRAM_PROBABILITY_DELTA;
|
||||
}
|
||||
|
||||
/* static */ int DecayingUtils::decodeUnigramProbability(const int encodedProbability) {
|
||||
/* static */ int ForgettingCurveUtils::decodeUnigramProbability(const int encodedProbability) {
|
||||
const int probability = encodedProbability - MIN_VALID_UNIGRAM_PROBABILITY;
|
||||
if (probability < 0) {
|
||||
return NOT_A_PROBABILITY;
|
||||
|
@ -113,7 +114,8 @@ const int DecayingUtils::BIGRAM_PROBABILITY_DELTA_STEP = 1;
|
|||
}
|
||||
}
|
||||
|
||||
/* static */ int DecayingUtils::decodeBigramProbabilityDelta(const int encodedProbabilityDelta) {
|
||||
/* static */ int ForgettingCurveUtils::decodeBigramProbabilityDelta(
|
||||
const int encodedProbabilityDelta) {
|
||||
const int probabilityDelta = encodedProbabilityDelta - MIN_VALID_BIGRAM_PROBABILITY_DELTA;
|
||||
if (probabilityDelta < 0) {
|
||||
return NOT_A_PROBABILITY;
|
||||
|
@ -122,7 +124,7 @@ const int DecayingUtils::BIGRAM_PROBABILITY_DELTA_STEP = 1;
|
|||
}
|
||||
}
|
||||
|
||||
/* static */ int DecayingUtils::getDecayedProbability(const int rawProbability) {
|
||||
/* static */ int ForgettingCurveUtils::getDecayedProbability(const int rawProbability) {
|
||||
return rawProbability;
|
||||
}
|
||||
|
|
@ -14,8 +14,8 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef LATINIME_DECAYING_UTILS_H
|
||||
#define LATINIME_DECAYING_UTILS_H
|
||||
#ifndef LATINIME_FORGETTING_CURVE_UTILS_H
|
||||
#define LATINIME_FORGETTING_CURVE_UTILS_H
|
||||
|
||||
#include "defines.h"
|
||||
|
||||
|
@ -25,7 +25,7 @@ namespace latinime {
|
|||
// required to introduced to each terminal PtNode and bigram entry.
|
||||
// TODO: Quit using bigram probability to indicate the delta.
|
||||
// TODO: Quit using bigram probability delta.
|
||||
class DecayingUtils {
|
||||
class ForgettingCurveUtils {
|
||||
public:
|
||||
static const int MAX_UNIGRAM_COUNT;
|
||||
static const int MAX_UNIGRAM_COUNT_AFTER_GC;
|
||||
|
@ -50,7 +50,7 @@ class DecayingUtils {
|
|||
static int getBigramProbabilityDeltaToSave(const int encodedProbabilityDelta);
|
||||
|
||||
private:
|
||||
DISALLOW_IMPLICIT_CONSTRUCTORS(DecayingUtils);
|
||||
DISALLOW_IMPLICIT_CONSTRUCTORS(ForgettingCurveUtils);
|
||||
|
||||
static const int MAX_COMPUTED_PROBABILITY;
|
||||
static const int MAX_UNIGRAM_PROBABILITY;
|
||||
|
@ -67,4 +67,4 @@ class DecayingUtils {
|
|||
static int getDecayedProbability(const int rawProbability);
|
||||
};
|
||||
} // namespace latinime
|
||||
#endif /* LATINIME_DECAYING_UTILS_H */
|
||||
#endif /* LATINIME_FORGETTING_CURVE_UTILS_H */
|
Loading…
Reference in a new issue