Move HistoricalInfo to property and use it in *Property.
Bug: 14425059 Change-Id: Icccccabad98fb543c6a6be2844cfc0086d80b739
This commit is contained in:
parent
c6a6f6a990
commit
287e155e44
21 changed files with 64 additions and 89 deletions
|
@ -373,7 +373,8 @@ static bool latinime_BinaryDictionary_addUnigramEntry(JNIEnv *env, jclass clazz,
|
|||
}
|
||||
// Use 1 for count to indicate the word has inputted.
|
||||
const UnigramProperty unigramProperty(isBeginningOfSentence, isNotAWord,
|
||||
isBlacklisted, probability, timestamp, 0 /* level */, 1 /* count */, &shortcuts);
|
||||
isBlacklisted, probability, HistoricalInfo(timestamp, 0 /* level */, 1 /* count */),
|
||||
&shortcuts);
|
||||
return dictionary->addUnigramEntry(CodePointArrayView(codePoints, codePointCount),
|
||||
&unigramProperty);
|
||||
}
|
||||
|
@ -405,7 +406,7 @@ static bool latinime_BinaryDictionary_addNgramEntry(JNIEnv *env, jclass clazz, j
|
|||
env->GetIntArrayRegion(word, 0, wordLength, wordCodePoints);
|
||||
// Use 1 for count to indicate the ngram has inputted.
|
||||
const NgramProperty ngramProperty(CodePointArrayView(wordCodePoints, wordLength).toVector(),
|
||||
probability, timestamp, 0 /* level */, 1 /* count */);
|
||||
probability, HistoricalInfo(timestamp, 0 /* level */, 1 /* count */));
|
||||
return dictionary->addNgramEntry(&prevWordsInfo, &ngramProperty);
|
||||
}
|
||||
|
||||
|
@ -494,8 +495,8 @@ static int latinime_BinaryDictionary_addMultipleDictionaryEntries(JNIEnv *env, j
|
|||
}
|
||||
// Use 1 for count to indicate the word has inputted.
|
||||
const UnigramProperty unigramProperty(false /* isBeginningOfSentence */, isNotAWord,
|
||||
isBlacklisted, unigramProbability, timestamp, 0 /* level */, 1 /* count */,
|
||||
&shortcuts);
|
||||
isBlacklisted, unigramProbability,
|
||||
HistoricalInfo(timestamp, 0 /* level */, 1 /* count */), &shortcuts);
|
||||
dictionary->addUnigramEntry(CodePointArrayView(word1CodePoints, word1Length),
|
||||
&unigramProperty);
|
||||
if (word0) {
|
||||
|
@ -503,7 +504,7 @@ static int latinime_BinaryDictionary_addMultipleDictionaryEntries(JNIEnv *env, j
|
|||
// Use 1 for count to indicate the bigram has inputted.
|
||||
const NgramProperty ngramProperty(
|
||||
CodePointArrayView(word1CodePoints, word1Length).toVector(),
|
||||
bigramProbability, timestamp, 0 /* level */, 1 /* count */);
|
||||
bigramProbability, HistoricalInfo(timestamp, 0 /* level */, 1 /* count */));
|
||||
const PrevWordsInfo prevWordsInfo(word0CodePoints, word0Length,
|
||||
false /* isBeginningOfSentence */);
|
||||
dictionary->addNgramEntry(&prevWordsInfo, &ngramProperty);
|
||||
|
|
|
@ -34,7 +34,7 @@ class HistoricalInfo {
|
|||
return mTimestamp != NOT_A_TIMESTAMP;
|
||||
}
|
||||
|
||||
int getTimeStamp() const {
|
||||
int getTimestamp() const {
|
||||
return mTimestamp;
|
||||
}
|
||||
|
||||
|
@ -47,12 +47,12 @@ class HistoricalInfo {
|
|||
}
|
||||
|
||||
private:
|
||||
// Copy constructor is public to use this class as a type of return value.
|
||||
DISALLOW_ASSIGNMENT_OPERATOR(HistoricalInfo);
|
||||
// Default copy constructor and assign operator are used for using in std::vector.
|
||||
|
||||
const int mTimestamp;
|
||||
const int mLevel;
|
||||
const int mCount;
|
||||
// TODO: Make members const.
|
||||
int mTimestamp;
|
||||
int mLevel;
|
||||
int mCount;
|
||||
};
|
||||
} // namespace latinime
|
||||
#endif /* LATINIME_HISTORICAL_INFO_H */
|
|
@ -20,15 +20,16 @@
|
|||
#include <vector>
|
||||
|
||||
#include "defines.h"
|
||||
#include "suggest/core/dictionary/property/historical_info.h"
|
||||
|
||||
namespace latinime {
|
||||
|
||||
class NgramProperty {
|
||||
public:
|
||||
NgramProperty(const std::vector<int> &&targetCodePoints, const int probability,
|
||||
const int timestamp, const int level, const int count)
|
||||
const HistoricalInfo &historicalInfo)
|
||||
: mTargetCodePoints(std::move(targetCodePoints)), mProbability(probability),
|
||||
mTimestamp(timestamp), mLevel(level), mCount(count) {}
|
||||
mHistoricalInfo(historicalInfo) {}
|
||||
|
||||
const std::vector<int> *getTargetCodePoints() const {
|
||||
return &mTargetCodePoints;
|
||||
|
@ -38,16 +39,8 @@ class NgramProperty {
|
|||
return mProbability;
|
||||
}
|
||||
|
||||
int getTimestamp() const {
|
||||
return mTimestamp;
|
||||
}
|
||||
|
||||
int getLevel() const {
|
||||
return mLevel;
|
||||
}
|
||||
|
||||
int getCount() const {
|
||||
return mCount;
|
||||
const HistoricalInfo getHistoricalInfo() const {
|
||||
return mHistoricalInfo;
|
||||
}
|
||||
|
||||
private:
|
||||
|
@ -57,9 +50,7 @@ class NgramProperty {
|
|||
// TODO: Make members const.
|
||||
std::vector<int> mTargetCodePoints;
|
||||
int mProbability;
|
||||
int mTimestamp;
|
||||
int mLevel;
|
||||
int mCount;
|
||||
HistoricalInfo mHistoricalInfo;
|
||||
};
|
||||
} // namespace latinime
|
||||
#endif // LATINIME_NGRAM_PROPERTY_H
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
#include <vector>
|
||||
|
||||
#include "defines.h"
|
||||
#include "suggest/core/dictionary/property/historical_info.h"
|
||||
|
||||
namespace latinime {
|
||||
|
||||
|
@ -50,15 +51,14 @@ class UnigramProperty {
|
|||
|
||||
UnigramProperty()
|
||||
: mRepresentsBeginningOfSentence(false), mIsNotAWord(false), mIsBlacklisted(false),
|
||||
mProbability(NOT_A_PROBABILITY), mTimestamp(NOT_A_TIMESTAMP), mLevel(0), mCount(0),
|
||||
mShortcuts() {}
|
||||
mProbability(NOT_A_PROBABILITY), mHistoricalInfo(), mShortcuts() {}
|
||||
|
||||
UnigramProperty(const bool representsBeginningOfSentence, const bool isNotAWord,
|
||||
const bool isBlacklisted, const int probability, const int timestamp, const int level,
|
||||
const int count, const std::vector<ShortcutProperty> *const shortcuts)
|
||||
const bool isBlacklisted, const int probability, const HistoricalInfo &historicalInfo,
|
||||
const std::vector<ShortcutProperty> *const shortcuts)
|
||||
: mRepresentsBeginningOfSentence(representsBeginningOfSentence),
|
||||
mIsNotAWord(isNotAWord), mIsBlacklisted(isBlacklisted), mProbability(probability),
|
||||
mTimestamp(timestamp), mLevel(level), mCount(count), mShortcuts(*shortcuts) {}
|
||||
mHistoricalInfo(historicalInfo), mShortcuts(*shortcuts) {}
|
||||
|
||||
bool representsBeginningOfSentence() const {
|
||||
return mRepresentsBeginningOfSentence;
|
||||
|
@ -85,16 +85,8 @@ class UnigramProperty {
|
|||
return mProbability;
|
||||
}
|
||||
|
||||
int getTimestamp() const {
|
||||
return mTimestamp;
|
||||
}
|
||||
|
||||
int getLevel() const {
|
||||
return mLevel;
|
||||
}
|
||||
|
||||
int getCount() const {
|
||||
return mCount;
|
||||
const HistoricalInfo getHistoricalInfo() const {
|
||||
return mHistoricalInfo;
|
||||
}
|
||||
|
||||
const std::vector<ShortcutProperty> &getShortcuts() const {
|
||||
|
@ -110,10 +102,7 @@ class UnigramProperty {
|
|||
bool mIsNotAWord;
|
||||
bool mIsBlacklisted;
|
||||
int mProbability;
|
||||
// Historical information
|
||||
int mTimestamp;
|
||||
int mLevel;
|
||||
int mCount;
|
||||
HistoricalInfo mHistoricalInfo;
|
||||
std::vector<ShortcutProperty> mShortcuts;
|
||||
};
|
||||
} // namespace latinime
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
#include "suggest/core/dictionary/property/word_property.h"
|
||||
|
||||
#include "utils/jni_data_utils.h"
|
||||
#include "suggest/core/dictionary/property/historical_info.h"
|
||||
|
||||
namespace latinime {
|
||||
|
||||
|
@ -31,8 +32,9 @@ void WordProperty::outputProperties(JNIEnv *const env, jintArray outCodePoints,
|
|||
!mNgrams.empty(), mUnigramProperty.hasShortcuts(),
|
||||
mUnigramProperty.representsBeginningOfSentence()};
|
||||
env->SetBooleanArrayRegion(outFlags, 0 /* start */, NELEMS(flags), flags);
|
||||
int probabilityInfo[] = {mUnigramProperty.getProbability(), mUnigramProperty.getTimestamp(),
|
||||
mUnigramProperty.getLevel(), mUnigramProperty.getCount()};
|
||||
const HistoricalInfo &historicalInfo = mUnigramProperty.getHistoricalInfo();
|
||||
int probabilityInfo[] = {mUnigramProperty.getProbability(), historicalInfo.getTimestamp(),
|
||||
historicalInfo.getLevel(), historicalInfo.getCount()};
|
||||
env->SetIntArrayRegion(outProbabilityInfo, 0 /* start */, NELEMS(probabilityInfo),
|
||||
probabilityInfo);
|
||||
|
||||
|
@ -51,10 +53,10 @@ void WordProperty::outputProperties(JNIEnv *const env, jintArray outCodePoints,
|
|||
false /* needsNullTermination */);
|
||||
env->CallBooleanMethod(outBigramTargets, addMethodId, bigramWord1CodePointArray);
|
||||
env->DeleteLocalRef(bigramWord1CodePointArray);
|
||||
|
||||
const HistoricalInfo &ngramHistoricalInfo = ngramProperty.getHistoricalInfo();
|
||||
int bigramProbabilityInfo[] = {ngramProperty.getProbability(),
|
||||
ngramProperty.getTimestamp(), ngramProperty.getLevel(),
|
||||
ngramProperty.getCount()};
|
||||
ngramHistoricalInfo.getTimestamp(), ngramHistoricalInfo.getLevel(),
|
||||
ngramHistoricalInfo.getCount()};
|
||||
jintArray bigramProbabilityInfoArray = env->NewIntArray(NELEMS(bigramProbabilityInfo));
|
||||
env->SetIntArrayRegion(bigramProbabilityInfoArray, 0 /* start */,
|
||||
NELEMS(bigramProbabilityInfo), bigramProbabilityInfo);
|
||||
|
|
|
@ -267,8 +267,7 @@ const BigramEntry Ver4BigramListPolicy::createUpdatedBigramEntryFrom(
|
|||
const NgramProperty *const ngramProperty) const {
|
||||
// TODO: Consolidate historical info and probability.
|
||||
if (mHeaderPolicy->hasHistoricalInfoOfWords()) {
|
||||
const HistoricalInfo historicalInfoForUpdate(ngramProperty->getTimestamp(),
|
||||
ngramProperty->getLevel(), ngramProperty->getCount());
|
||||
const HistoricalInfo &historicalInfoForUpdate = ngramProperty->getHistoricalInfo();
|
||||
const HistoricalInfo updatedHistoricalInfo =
|
||||
ForgettingCurveUtils::createUpdatedHistoricalInfo(
|
||||
originalBigramEntry->getHistoricalInfo(), ngramProperty->getProbability(),
|
||||
|
|
|
@ -83,10 +83,10 @@ bool BigramDictContent::writeBigramEntryAndAdvancePosition(
|
|||
}
|
||||
if (mHasHistoricalInfo) {
|
||||
const HistoricalInfo *const historicalInfo = bigramEntryToWrite->getHistoricalInfo();
|
||||
if (!bigramListBuffer->writeUintAndAdvancePosition(historicalInfo->getTimeStamp(),
|
||||
if (!bigramListBuffer->writeUintAndAdvancePosition(historicalInfo->getTimestamp(),
|
||||
Ver4DictConstants::TIME_STAMP_FIELD_SIZE, entryWritingPos)) {
|
||||
AKLOGE("Cannot write bigram timestamps. pos: %d, timestamp: %d", *entryWritingPos,
|
||||
historicalInfo->getTimeStamp());
|
||||
historicalInfo->getTimestamp());
|
||||
return false;
|
||||
}
|
||||
if (!bigramListBuffer->writeUintAndAdvancePosition(historicalInfo->getLevel(),
|
||||
|
|
|
@ -25,8 +25,8 @@
|
|||
#define LATINIME_BACKWARD_V402_BIGRAM_ENTRY_H
|
||||
|
||||
#include "defines.h"
|
||||
#include "suggest/core/dictionary/property/historical_info.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_constants.h"
|
||||
#include "suggest/policyimpl/dictionary/utils/historical_info.h"
|
||||
|
||||
namespace latinime {
|
||||
namespace backward {
|
||||
|
|
|
@ -147,7 +147,7 @@ bool ProbabilityDictContent::writeEntry(const ProbabilityEntry *const probabilit
|
|||
}
|
||||
if (mHasHistoricalInfo) {
|
||||
const HistoricalInfo *const historicalInfo = probabilityEntry->getHistoricalInfo();
|
||||
if (!bufferToWrite->writeUintAndAdvancePosition(historicalInfo->getTimeStamp(),
|
||||
if (!bufferToWrite->writeUintAndAdvancePosition(historicalInfo->getTimestamp(),
|
||||
Ver4DictConstants::TIME_STAMP_FIELD_SIZE, &writingPos)) {
|
||||
AKLOGE("Cannot write timestamp in probability dict content. pos: %d", writingPos);
|
||||
return false;
|
||||
|
|
|
@ -25,8 +25,8 @@
|
|||
#define LATINIME_BACKWARD_V402_PROBABILITY_ENTRY_H
|
||||
|
||||
#include "defines.h"
|
||||
#include "suggest/core/dictionary/property/historical_info.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_constants.h"
|
||||
#include "suggest/policyimpl/dictionary/utils/historical_info.h"
|
||||
|
||||
namespace latinime {
|
||||
namespace backward {
|
||||
|
|
|
@ -396,8 +396,7 @@ const ProbabilityEntry Ver4PatriciaTrieNodeWriter::createUpdatedEntryFrom(
|
|||
const UnigramProperty *const unigramProperty) const {
|
||||
// TODO: Consolidate historical info and probability.
|
||||
if (mHeaderPolicy->hasHistoricalInfoOfWords()) {
|
||||
const HistoricalInfo historicalInfoForUpdate(unigramProperty->getTimestamp(),
|
||||
unigramProperty->getLevel(), unigramProperty->getCount());
|
||||
const HistoricalInfo &historicalInfoForUpdate = unigramProperty->getHistoricalInfo();
|
||||
const HistoricalInfo updatedHistoricalInfo =
|
||||
ForgettingCurveUtils::createUpdatedHistoricalInfo(
|
||||
originalProbabilityEntry->getHistoricalInfo(),
|
||||
|
|
|
@ -343,7 +343,7 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsI
|
|||
const UnigramProperty beginningOfSentenceUnigramProperty(
|
||||
true /* representsBeginningOfSentence */, true /* isNotAWord */,
|
||||
false /* isBlacklisted */, MAX_PROBABILITY /* probability */,
|
||||
NOT_A_TIMESTAMP /* timestamp */, 0 /* level */, 0 /* count */, &shortcuts);
|
||||
HistoricalInfo(), &shortcuts);
|
||||
if (!addUnigramEntry(prevWordsInfo->getNthPrevWordCodePoints(1 /* n */),
|
||||
&beginningOfSentenceUnigramProperty)) {
|
||||
AKLOGE("Cannot add unigram entry for the beginning-of-sentence.");
|
||||
|
@ -528,8 +528,7 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty(
|
|||
bigramEntry.getProbability();
|
||||
ngrams.emplace_back(
|
||||
CodePointArrayView(bigramWord1CodePoints, codePointCount).toVector(),
|
||||
probability, historicalInfo->getTimeStamp(), historicalInfo->getLevel(),
|
||||
historicalInfo->getCount());
|
||||
probability, *historicalInfo);
|
||||
}
|
||||
}
|
||||
// Fetch shortcut information.
|
||||
|
@ -552,8 +551,7 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty(
|
|||
}
|
||||
const UnigramProperty unigramProperty(ptNodeParams.representsBeginningOfSentence(),
|
||||
ptNodeParams.isNotAWord(), ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(),
|
||||
historicalInfo->getTimeStamp(), historicalInfo->getLevel(),
|
||||
historicalInfo->getCount(), &shortcuts);
|
||||
*historicalInfo, &shortcuts);
|
||||
return WordProperty(wordCodePoints.toVector(), &unigramProperty, &ngrams);
|
||||
}
|
||||
|
||||
|
|
|
@ -216,7 +216,7 @@ bool Ver4PatriciaTrieWritingHelper::truncateUnigrams(
|
|||
probabilityEntry.getHistoricalInfo(), mBuffers->getHeaderPolicy()) :
|
||||
probabilityEntry.getProbability();
|
||||
priorityQueue.push(DictProbability(terminalPos, probability,
|
||||
probabilityEntry.getHistoricalInfo()->getTimeStamp()));
|
||||
probabilityEntry.getHistoricalInfo()->getTimestamp()));
|
||||
}
|
||||
|
||||
// Delete unigrams.
|
||||
|
@ -263,7 +263,7 @@ bool Ver4PatriciaTrieWritingHelper::truncateBigrams(const int maxBigramCount) {
|
|||
bigramEntry.getHistoricalInfo(), mBuffers->getHeaderPolicy()) :
|
||||
bigramEntry.getProbability();
|
||||
priorityQueue.push(DictProbability(entryPos, probability,
|
||||
bigramEntry.getHistoricalInfo()->getTimeStamp()));
|
||||
bigramEntry.getHistoricalInfo()->getTimestamp()));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -452,7 +452,7 @@ const WordProperty PatriciaTriePolicy::getWordProperty(
|
|||
const int probability = getProbability(word1Probability, bigramsIt.getProbability());
|
||||
ngrams.emplace_back(
|
||||
CodePointArrayView(bigramWord1CodePoints, word1CodePointCount).toVector(),
|
||||
probability, NOT_A_TIMESTAMP /* timestamp */, 0 /* level */, 0 /* count */);
|
||||
probability, HistoricalInfo());
|
||||
}
|
||||
}
|
||||
// Fetch shortcut information.
|
||||
|
@ -477,7 +477,7 @@ const WordProperty PatriciaTriePolicy::getWordProperty(
|
|||
}
|
||||
const UnigramProperty unigramProperty(ptNodeParams.representsBeginningOfSentence(),
|
||||
ptNodeParams.isNotAWord(), ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(),
|
||||
NOT_A_TIMESTAMP /* timestamp */, 0 /* level */, 0 /* count */, &shortcuts);
|
||||
HistoricalInfo(), &shortcuts);
|
||||
return WordProperty(wordCodePoints.toVector(), &unigramProperty, &ngrams);
|
||||
}
|
||||
|
||||
|
|
|
@ -296,7 +296,7 @@ bool LanguageModelDictContent::getEntryInfo(const HeaderPolicy *const headerPoli
|
|||
ForgettingCurveUtils::decodeProbability(probabilityEntry.getHistoricalInfo(),
|
||||
headerPolicy) : probabilityEntry.getProbability();
|
||||
outEntryInfo->emplace_back(probability,
|
||||
probabilityEntry.getHistoricalInfo()->getTimeStamp(),
|
||||
probabilityEntry.getHistoricalInfo()->getTimestamp(),
|
||||
entry.key(), targetLevel, prevWordIds->data());
|
||||
}
|
||||
return true;
|
||||
|
|
|
@ -21,10 +21,10 @@
|
|||
#include <cstdint>
|
||||
|
||||
#include "defines.h"
|
||||
#include "suggest/core/dictionary/property/historical_info.h"
|
||||
#include "suggest/core/dictionary/property/ngram_property.h"
|
||||
#include "suggest/core/dictionary/property/unigram_property.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
|
||||
#include "suggest/policyimpl/dictionary/utils/historical_info.h"
|
||||
|
||||
namespace latinime {
|
||||
|
||||
|
@ -53,15 +53,13 @@ class ProbabilityEntry {
|
|||
unigramProperty->isNotAWord(), unigramProperty->isBlacklisted(),
|
||||
unigramProperty->isPossiblyOffensive())),
|
||||
mProbability(unigramProperty->getProbability()),
|
||||
mHistoricalInfo(unigramProperty->getTimestamp(), unigramProperty->getLevel(),
|
||||
unigramProperty->getCount()) {}
|
||||
mHistoricalInfo(unigramProperty->getHistoricalInfo()) {}
|
||||
|
||||
// Create from ngram property.
|
||||
// TODO: Set flags.
|
||||
ProbabilityEntry(const NgramProperty *const ngramProperty)
|
||||
: mFlags(0), mProbability(ngramProperty->getProbability()),
|
||||
mHistoricalInfo(ngramProperty->getTimestamp(), ngramProperty->getLevel(),
|
||||
ngramProperty->getCount()) {}
|
||||
mHistoricalInfo(ngramProperty->getHistoricalInfo()) {}
|
||||
|
||||
bool isValid() const {
|
||||
return (mFlags & Ver4DictConstants::FLAG_NOT_A_VALID_ENTRY) == 0;
|
||||
|
@ -103,7 +101,7 @@ class ProbabilityEntry {
|
|||
uint64_t encodedEntry = static_cast<uint64_t>(mFlags);
|
||||
if (hasHistoricalInfo) {
|
||||
encodedEntry = (encodedEntry << (Ver4DictConstants::TIME_STAMP_FIELD_SIZE * CHAR_BIT))
|
||||
^ static_cast<uint64_t>(mHistoricalInfo.getTimeStamp());
|
||||
^ static_cast<uint64_t>(mHistoricalInfo.getTimestamp());
|
||||
encodedEntry = (encodedEntry << (Ver4DictConstants::WORD_LEVEL_FIELD_SIZE * CHAR_BIT))
|
||||
^ static_cast<uint64_t>(mHistoricalInfo.getLevel());
|
||||
encodedEntry = (encodedEntry << (Ver4DictConstants::WORD_COUNT_FIELD_SIZE * CHAR_BIT))
|
||||
|
|
|
@ -302,7 +302,7 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsI
|
|||
const UnigramProperty beginningOfSentenceUnigramProperty(
|
||||
true /* representsBeginningOfSentence */, true /* isNotAWord */,
|
||||
false /* isBlacklisted */, MAX_PROBABILITY /* probability */,
|
||||
NOT_A_TIMESTAMP /* timestamp */, 0 /* level */, 0 /* count */, &shortcuts);
|
||||
HistoricalInfo(), &shortcuts);
|
||||
if (!addUnigramEntry(prevWordsInfo->getNthPrevWordCodePoints(1 /* n */),
|
||||
&beginningOfSentenceUnigramProperty)) {
|
||||
AKLOGE("Cannot add unigram entry for the beginning-of-sentence.");
|
||||
|
@ -464,8 +464,7 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty(
|
|||
ForgettingCurveUtils::decodeProbability(historicalInfo, mHeaderPolicy) :
|
||||
probabilityEntry.getProbability();
|
||||
ngrams.emplace_back(CodePointArrayView(bigramWord1CodePoints, codePointCount).toVector(),
|
||||
probability, historicalInfo->getTimeStamp(), historicalInfo->getLevel(),
|
||||
historicalInfo->getCount());
|
||||
probability, *historicalInfo);
|
||||
}
|
||||
// Fetch shortcut information.
|
||||
std::vector<UnigramProperty::ShortcutProperty> shortcuts;
|
||||
|
@ -487,8 +486,7 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty(
|
|||
}
|
||||
const UnigramProperty unigramProperty(probabilityEntry.representsBeginningOfSentence(),
|
||||
probabilityEntry.isNotAWord(), probabilityEntry.isBlacklisted(),
|
||||
probabilityEntry.getProbability(), historicalInfo->getTimeStamp(),
|
||||
historicalInfo->getLevel(), historicalInfo->getCount(), &shortcuts);
|
||||
probabilityEntry.getProbability(), *historicalInfo, &shortcuts);
|
||||
return WordProperty(wordCodePoints.toVector(), &unigramProperty, &ngrams);
|
||||
}
|
||||
|
||||
|
|
|
@ -43,7 +43,7 @@ const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityT
|
|||
/* static */ const HistoricalInfo ForgettingCurveUtils::createUpdatedHistoricalInfo(
|
||||
const HistoricalInfo *const originalHistoricalInfo, const int newProbability,
|
||||
const HistoricalInfo *const newHistoricalInfo, const HeaderPolicy *const headerPolicy) {
|
||||
const int timestamp = newHistoricalInfo->getTimeStamp();
|
||||
const int timestamp = newHistoricalInfo->getTimestamp();
|
||||
if (newProbability != NOT_A_PROBABILITY && originalHistoricalInfo->getLevel() == 0) {
|
||||
// Add entry as a valid word.
|
||||
const int level = clampToVisibleEntryLevelRange(newHistoricalInfo->getLevel());
|
||||
|
@ -78,7 +78,7 @@ const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityT
|
|||
|
||||
/* static */ int ForgettingCurveUtils::decodeProbability(
|
||||
const HistoricalInfo *const historicalInfo, const HeaderPolicy *const headerPolicy) {
|
||||
const int elapsedTimeStepCount = getElapsedTimeStepCount(historicalInfo->getTimeStamp(),
|
||||
const int elapsedTimeStepCount = getElapsedTimeStepCount(historicalInfo->getTimestamp(),
|
||||
headerPolicy->getForgettingCurveDurationToLevelDown());
|
||||
return sProbabilityTable.getProbability(
|
||||
headerPolicy->getForgettingCurveProbabilityValuesTableId(),
|
||||
|
@ -102,7 +102,7 @@ const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityT
|
|||
/* static */ bool ForgettingCurveUtils::needsToKeep(const HistoricalInfo *const historicalInfo,
|
||||
const HeaderPolicy *const headerPolicy) {
|
||||
return historicalInfo->getLevel() > 0
|
||||
|| getElapsedTimeStepCount(historicalInfo->getTimeStamp(),
|
||||
|| getElapsedTimeStepCount(historicalInfo->getTimestamp(),
|
||||
headerPolicy->getForgettingCurveDurationToLevelDown())
|
||||
< DISCARD_LEVEL_ZERO_ENTRY_TIME_STEP_COUNT_THRESHOLD;
|
||||
}
|
||||
|
@ -110,12 +110,12 @@ const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityT
|
|||
/* static */ const HistoricalInfo ForgettingCurveUtils::createHistoricalInfoToSave(
|
||||
const HistoricalInfo *const originalHistoricalInfo,
|
||||
const HeaderPolicy *const headerPolicy) {
|
||||
if (originalHistoricalInfo->getTimeStamp() == NOT_A_TIMESTAMP) {
|
||||
if (originalHistoricalInfo->getTimestamp() == NOT_A_TIMESTAMP) {
|
||||
return HistoricalInfo();
|
||||
}
|
||||
const int durationToLevelDownInSeconds = headerPolicy->getForgettingCurveDurationToLevelDown();
|
||||
const int elapsedTimeStep = getElapsedTimeStepCount(
|
||||
originalHistoricalInfo->getTimeStamp(), durationToLevelDownInSeconds);
|
||||
originalHistoricalInfo->getTimestamp(), durationToLevelDownInSeconds);
|
||||
if (elapsedTimeStep <= MAX_ELAPSED_TIME_STEP_COUNT) {
|
||||
// No need to update historical info.
|
||||
return *originalHistoricalInfo;
|
||||
|
@ -124,7 +124,7 @@ const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityT
|
|||
const int maxLevelDownAmonut = elapsedTimeStep / (MAX_ELAPSED_TIME_STEP_COUNT + 1);
|
||||
const int levelDownAmount = (maxLevelDownAmonut >= originalHistoricalInfo->getLevel()) ?
|
||||
originalHistoricalInfo->getLevel() : maxLevelDownAmonut;
|
||||
const int adjustedTimestampInSeconds = originalHistoricalInfo->getTimeStamp() +
|
||||
const int adjustedTimestampInSeconds = originalHistoricalInfo->getTimestamp() +
|
||||
levelDownAmount * durationToLevelDownInSeconds;
|
||||
return HistoricalInfo(adjustedTimestampInSeconds,
|
||||
originalHistoricalInfo->getLevel() - levelDownAmount, 0 /* count */);
|
||||
|
|
|
@ -20,7 +20,7 @@
|
|||
#include <vector>
|
||||
|
||||
#include "defines.h"
|
||||
#include "suggest/policyimpl/dictionary/utils/historical_info.h"
|
||||
#include "suggest/core/dictionary/property/historical_info.h"
|
||||
|
||||
namespace latinime {
|
||||
|
||||
|
|
|
@ -60,7 +60,7 @@ TEST(LanguageModelDictContentTest, TestUnigramProbabilityWithHistoricalInfo) {
|
|||
languageModelDictContent.setProbabilityEntry(wordId, &probabilityEntry);
|
||||
const ProbabilityEntry entry = languageModelDictContent.getProbabilityEntry(wordId);
|
||||
EXPECT_EQ(flag, entry.getFlags());
|
||||
EXPECT_EQ(timestamp, entry.getHistoricalInfo()->getTimeStamp());
|
||||
EXPECT_EQ(timestamp, entry.getHistoricalInfo()->getTimestamp());
|
||||
EXPECT_EQ(level, entry.getHistoricalInfo()->getLevel());
|
||||
EXPECT_EQ(count, entry.getHistoricalInfo()->getCount());
|
||||
|
||||
|
|
|
@ -51,7 +51,7 @@ TEST(ProbabilityEntryTest, TestEncodeDecodeWithHistoricalInfo) {
|
|||
ProbabilityEntry::decode(encodedEntry, true /* hasHistoricalInfo */);
|
||||
|
||||
EXPECT_EQ(flag, decodedEntry.getFlags());
|
||||
EXPECT_EQ(timestamp, decodedEntry.getHistoricalInfo()->getTimeStamp());
|
||||
EXPECT_EQ(timestamp, decodedEntry.getHistoricalInfo()->getTimestamp());
|
||||
EXPECT_EQ(level, decodedEntry.getHistoricalInfo()->getLevel());
|
||||
EXPECT_EQ(count, decodedEntry.getHistoricalInfo()->getCount());
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue