Move HistoricalInfo to property and use it in *Property.

Bug: 14425059
Change-Id: Icccccabad98fb543c6a6be2844cfc0086d80b739
This commit is contained in:
Keisuke Kuroyanagi 2014-10-01 11:39:33 +09:00
parent c6a6f6a990
commit 287e155e44
21 changed files with 64 additions and 89 deletions

View file

@ -373,7 +373,8 @@ static bool latinime_BinaryDictionary_addUnigramEntry(JNIEnv *env, jclass clazz,
} }
// Use 1 for count to indicate the word has inputted. // Use 1 for count to indicate the word has inputted.
const UnigramProperty unigramProperty(isBeginningOfSentence, isNotAWord, const UnigramProperty unigramProperty(isBeginningOfSentence, isNotAWord,
isBlacklisted, probability, timestamp, 0 /* level */, 1 /* count */, &shortcuts); isBlacklisted, probability, HistoricalInfo(timestamp, 0 /* level */, 1 /* count */),
&shortcuts);
return dictionary->addUnigramEntry(CodePointArrayView(codePoints, codePointCount), return dictionary->addUnigramEntry(CodePointArrayView(codePoints, codePointCount),
&unigramProperty); &unigramProperty);
} }
@ -405,7 +406,7 @@ static bool latinime_BinaryDictionary_addNgramEntry(JNIEnv *env, jclass clazz, j
env->GetIntArrayRegion(word, 0, wordLength, wordCodePoints); env->GetIntArrayRegion(word, 0, wordLength, wordCodePoints);
// Use 1 for count to indicate the ngram has inputted. // Use 1 for count to indicate the ngram has inputted.
const NgramProperty ngramProperty(CodePointArrayView(wordCodePoints, wordLength).toVector(), const NgramProperty ngramProperty(CodePointArrayView(wordCodePoints, wordLength).toVector(),
probability, timestamp, 0 /* level */, 1 /* count */); probability, HistoricalInfo(timestamp, 0 /* level */, 1 /* count */));
return dictionary->addNgramEntry(&prevWordsInfo, &ngramProperty); return dictionary->addNgramEntry(&prevWordsInfo, &ngramProperty);
} }
@ -494,8 +495,8 @@ static int latinime_BinaryDictionary_addMultipleDictionaryEntries(JNIEnv *env, j
} }
// Use 1 for count to indicate the word has inputted. // Use 1 for count to indicate the word has inputted.
const UnigramProperty unigramProperty(false /* isBeginningOfSentence */, isNotAWord, const UnigramProperty unigramProperty(false /* isBeginningOfSentence */, isNotAWord,
isBlacklisted, unigramProbability, timestamp, 0 /* level */, 1 /* count */, isBlacklisted, unigramProbability,
&shortcuts); HistoricalInfo(timestamp, 0 /* level */, 1 /* count */), &shortcuts);
dictionary->addUnigramEntry(CodePointArrayView(word1CodePoints, word1Length), dictionary->addUnigramEntry(CodePointArrayView(word1CodePoints, word1Length),
&unigramProperty); &unigramProperty);
if (word0) { if (word0) {
@ -503,7 +504,7 @@ static int latinime_BinaryDictionary_addMultipleDictionaryEntries(JNIEnv *env, j
// Use 1 for count to indicate the bigram has inputted. // Use 1 for count to indicate the bigram has inputted.
const NgramProperty ngramProperty( const NgramProperty ngramProperty(
CodePointArrayView(word1CodePoints, word1Length).toVector(), CodePointArrayView(word1CodePoints, word1Length).toVector(),
bigramProbability, timestamp, 0 /* level */, 1 /* count */); bigramProbability, HistoricalInfo(timestamp, 0 /* level */, 1 /* count */));
const PrevWordsInfo prevWordsInfo(word0CodePoints, word0Length, const PrevWordsInfo prevWordsInfo(word0CodePoints, word0Length,
false /* isBeginningOfSentence */); false /* isBeginningOfSentence */);
dictionary->addNgramEntry(&prevWordsInfo, &ngramProperty); dictionary->addNgramEntry(&prevWordsInfo, &ngramProperty);

View file

@ -34,7 +34,7 @@ class HistoricalInfo {
return mTimestamp != NOT_A_TIMESTAMP; return mTimestamp != NOT_A_TIMESTAMP;
} }
int getTimeStamp() const { int getTimestamp() const {
return mTimestamp; return mTimestamp;
} }
@ -47,12 +47,12 @@ class HistoricalInfo {
} }
private: private:
// Copy constructor is public to use this class as a type of return value. // Default copy constructor and assign operator are used for using in std::vector.
DISALLOW_ASSIGNMENT_OPERATOR(HistoricalInfo);
const int mTimestamp; // TODO: Make members const.
const int mLevel; int mTimestamp;
const int mCount; int mLevel;
int mCount;
}; };
} // namespace latinime } // namespace latinime
#endif /* LATINIME_HISTORICAL_INFO_H */ #endif /* LATINIME_HISTORICAL_INFO_H */

View file

@ -20,15 +20,16 @@
#include <vector> #include <vector>
#include "defines.h" #include "defines.h"
#include "suggest/core/dictionary/property/historical_info.h"
namespace latinime { namespace latinime {
class NgramProperty { class NgramProperty {
public: public:
NgramProperty(const std::vector<int> &&targetCodePoints, const int probability, NgramProperty(const std::vector<int> &&targetCodePoints, const int probability,
const int timestamp, const int level, const int count) const HistoricalInfo &historicalInfo)
: mTargetCodePoints(std::move(targetCodePoints)), mProbability(probability), : mTargetCodePoints(std::move(targetCodePoints)), mProbability(probability),
mTimestamp(timestamp), mLevel(level), mCount(count) {} mHistoricalInfo(historicalInfo) {}
const std::vector<int> *getTargetCodePoints() const { const std::vector<int> *getTargetCodePoints() const {
return &mTargetCodePoints; return &mTargetCodePoints;
@ -38,16 +39,8 @@ class NgramProperty {
return mProbability; return mProbability;
} }
int getTimestamp() const { const HistoricalInfo getHistoricalInfo() const {
return mTimestamp; return mHistoricalInfo;
}
int getLevel() const {
return mLevel;
}
int getCount() const {
return mCount;
} }
private: private:
@ -57,9 +50,7 @@ class NgramProperty {
// TODO: Make members const. // TODO: Make members const.
std::vector<int> mTargetCodePoints; std::vector<int> mTargetCodePoints;
int mProbability; int mProbability;
int mTimestamp; HistoricalInfo mHistoricalInfo;
int mLevel;
int mCount;
}; };
} // namespace latinime } // namespace latinime
#endif // LATINIME_NGRAM_PROPERTY_H #endif // LATINIME_NGRAM_PROPERTY_H

View file

@ -20,6 +20,7 @@
#include <vector> #include <vector>
#include "defines.h" #include "defines.h"
#include "suggest/core/dictionary/property/historical_info.h"
namespace latinime { namespace latinime {
@ -50,15 +51,14 @@ class UnigramProperty {
UnigramProperty() UnigramProperty()
: mRepresentsBeginningOfSentence(false), mIsNotAWord(false), mIsBlacklisted(false), : mRepresentsBeginningOfSentence(false), mIsNotAWord(false), mIsBlacklisted(false),
mProbability(NOT_A_PROBABILITY), mTimestamp(NOT_A_TIMESTAMP), mLevel(0), mCount(0), mProbability(NOT_A_PROBABILITY), mHistoricalInfo(), mShortcuts() {}
mShortcuts() {}
UnigramProperty(const bool representsBeginningOfSentence, const bool isNotAWord, UnigramProperty(const bool representsBeginningOfSentence, const bool isNotAWord,
const bool isBlacklisted, const int probability, const int timestamp, const int level, const bool isBlacklisted, const int probability, const HistoricalInfo &historicalInfo,
const int count, const std::vector<ShortcutProperty> *const shortcuts) const std::vector<ShortcutProperty> *const shortcuts)
: mRepresentsBeginningOfSentence(representsBeginningOfSentence), : mRepresentsBeginningOfSentence(representsBeginningOfSentence),
mIsNotAWord(isNotAWord), mIsBlacklisted(isBlacklisted), mProbability(probability), mIsNotAWord(isNotAWord), mIsBlacklisted(isBlacklisted), mProbability(probability),
mTimestamp(timestamp), mLevel(level), mCount(count), mShortcuts(*shortcuts) {} mHistoricalInfo(historicalInfo), mShortcuts(*shortcuts) {}
bool representsBeginningOfSentence() const { bool representsBeginningOfSentence() const {
return mRepresentsBeginningOfSentence; return mRepresentsBeginningOfSentence;
@ -85,16 +85,8 @@ class UnigramProperty {
return mProbability; return mProbability;
} }
int getTimestamp() const { const HistoricalInfo getHistoricalInfo() const {
return mTimestamp; return mHistoricalInfo;
}
int getLevel() const {
return mLevel;
}
int getCount() const {
return mCount;
} }
const std::vector<ShortcutProperty> &getShortcuts() const { const std::vector<ShortcutProperty> &getShortcuts() const {
@ -110,10 +102,7 @@ class UnigramProperty {
bool mIsNotAWord; bool mIsNotAWord;
bool mIsBlacklisted; bool mIsBlacklisted;
int mProbability; int mProbability;
// Historical information HistoricalInfo mHistoricalInfo;
int mTimestamp;
int mLevel;
int mCount;
std::vector<ShortcutProperty> mShortcuts; std::vector<ShortcutProperty> mShortcuts;
}; };
} // namespace latinime } // namespace latinime

View file

@ -17,6 +17,7 @@
#include "suggest/core/dictionary/property/word_property.h" #include "suggest/core/dictionary/property/word_property.h"
#include "utils/jni_data_utils.h" #include "utils/jni_data_utils.h"
#include "suggest/core/dictionary/property/historical_info.h"
namespace latinime { namespace latinime {
@ -31,8 +32,9 @@ void WordProperty::outputProperties(JNIEnv *const env, jintArray outCodePoints,
!mNgrams.empty(), mUnigramProperty.hasShortcuts(), !mNgrams.empty(), mUnigramProperty.hasShortcuts(),
mUnigramProperty.representsBeginningOfSentence()}; mUnigramProperty.representsBeginningOfSentence()};
env->SetBooleanArrayRegion(outFlags, 0 /* start */, NELEMS(flags), flags); env->SetBooleanArrayRegion(outFlags, 0 /* start */, NELEMS(flags), flags);
int probabilityInfo[] = {mUnigramProperty.getProbability(), mUnigramProperty.getTimestamp(), const HistoricalInfo &historicalInfo = mUnigramProperty.getHistoricalInfo();
mUnigramProperty.getLevel(), mUnigramProperty.getCount()}; int probabilityInfo[] = {mUnigramProperty.getProbability(), historicalInfo.getTimestamp(),
historicalInfo.getLevel(), historicalInfo.getCount()};
env->SetIntArrayRegion(outProbabilityInfo, 0 /* start */, NELEMS(probabilityInfo), env->SetIntArrayRegion(outProbabilityInfo, 0 /* start */, NELEMS(probabilityInfo),
probabilityInfo); probabilityInfo);
@ -51,10 +53,10 @@ void WordProperty::outputProperties(JNIEnv *const env, jintArray outCodePoints,
false /* needsNullTermination */); false /* needsNullTermination */);
env->CallBooleanMethod(outBigramTargets, addMethodId, bigramWord1CodePointArray); env->CallBooleanMethod(outBigramTargets, addMethodId, bigramWord1CodePointArray);
env->DeleteLocalRef(bigramWord1CodePointArray); env->DeleteLocalRef(bigramWord1CodePointArray);
const HistoricalInfo &ngramHistoricalInfo = ngramProperty.getHistoricalInfo();
int bigramProbabilityInfo[] = {ngramProperty.getProbability(), int bigramProbabilityInfo[] = {ngramProperty.getProbability(),
ngramProperty.getTimestamp(), ngramProperty.getLevel(), ngramHistoricalInfo.getTimestamp(), ngramHistoricalInfo.getLevel(),
ngramProperty.getCount()}; ngramHistoricalInfo.getCount()};
jintArray bigramProbabilityInfoArray = env->NewIntArray(NELEMS(bigramProbabilityInfo)); jintArray bigramProbabilityInfoArray = env->NewIntArray(NELEMS(bigramProbabilityInfo));
env->SetIntArrayRegion(bigramProbabilityInfoArray, 0 /* start */, env->SetIntArrayRegion(bigramProbabilityInfoArray, 0 /* start */,
NELEMS(bigramProbabilityInfo), bigramProbabilityInfo); NELEMS(bigramProbabilityInfo), bigramProbabilityInfo);

View file

@ -267,8 +267,7 @@ const BigramEntry Ver4BigramListPolicy::createUpdatedBigramEntryFrom(
const NgramProperty *const ngramProperty) const { const NgramProperty *const ngramProperty) const {
// TODO: Consolidate historical info and probability. // TODO: Consolidate historical info and probability.
if (mHeaderPolicy->hasHistoricalInfoOfWords()) { if (mHeaderPolicy->hasHistoricalInfoOfWords()) {
const HistoricalInfo historicalInfoForUpdate(ngramProperty->getTimestamp(), const HistoricalInfo &historicalInfoForUpdate = ngramProperty->getHistoricalInfo();
ngramProperty->getLevel(), ngramProperty->getCount());
const HistoricalInfo updatedHistoricalInfo = const HistoricalInfo updatedHistoricalInfo =
ForgettingCurveUtils::createUpdatedHistoricalInfo( ForgettingCurveUtils::createUpdatedHistoricalInfo(
originalBigramEntry->getHistoricalInfo(), ngramProperty->getProbability(), originalBigramEntry->getHistoricalInfo(), ngramProperty->getProbability(),

View file

@ -83,10 +83,10 @@ bool BigramDictContent::writeBigramEntryAndAdvancePosition(
} }
if (mHasHistoricalInfo) { if (mHasHistoricalInfo) {
const HistoricalInfo *const historicalInfo = bigramEntryToWrite->getHistoricalInfo(); const HistoricalInfo *const historicalInfo = bigramEntryToWrite->getHistoricalInfo();
if (!bigramListBuffer->writeUintAndAdvancePosition(historicalInfo->getTimeStamp(), if (!bigramListBuffer->writeUintAndAdvancePosition(historicalInfo->getTimestamp(),
Ver4DictConstants::TIME_STAMP_FIELD_SIZE, entryWritingPos)) { Ver4DictConstants::TIME_STAMP_FIELD_SIZE, entryWritingPos)) {
AKLOGE("Cannot write bigram timestamps. pos: %d, timestamp: %d", *entryWritingPos, AKLOGE("Cannot write bigram timestamps. pos: %d, timestamp: %d", *entryWritingPos,
historicalInfo->getTimeStamp()); historicalInfo->getTimestamp());
return false; return false;
} }
if (!bigramListBuffer->writeUintAndAdvancePosition(historicalInfo->getLevel(), if (!bigramListBuffer->writeUintAndAdvancePosition(historicalInfo->getLevel(),

View file

@ -25,8 +25,8 @@
#define LATINIME_BACKWARD_V402_BIGRAM_ENTRY_H #define LATINIME_BACKWARD_V402_BIGRAM_ENTRY_H
#include "defines.h" #include "defines.h"
#include "suggest/core/dictionary/property/historical_info.h"
#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_constants.h" #include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_constants.h"
#include "suggest/policyimpl/dictionary/utils/historical_info.h"
namespace latinime { namespace latinime {
namespace backward { namespace backward {

View file

@ -147,7 +147,7 @@ bool ProbabilityDictContent::writeEntry(const ProbabilityEntry *const probabilit
} }
if (mHasHistoricalInfo) { if (mHasHistoricalInfo) {
const HistoricalInfo *const historicalInfo = probabilityEntry->getHistoricalInfo(); const HistoricalInfo *const historicalInfo = probabilityEntry->getHistoricalInfo();
if (!bufferToWrite->writeUintAndAdvancePosition(historicalInfo->getTimeStamp(), if (!bufferToWrite->writeUintAndAdvancePosition(historicalInfo->getTimestamp(),
Ver4DictConstants::TIME_STAMP_FIELD_SIZE, &writingPos)) { Ver4DictConstants::TIME_STAMP_FIELD_SIZE, &writingPos)) {
AKLOGE("Cannot write timestamp in probability dict content. pos: %d", writingPos); AKLOGE("Cannot write timestamp in probability dict content. pos: %d", writingPos);
return false; return false;

View file

@ -25,8 +25,8 @@
#define LATINIME_BACKWARD_V402_PROBABILITY_ENTRY_H #define LATINIME_BACKWARD_V402_PROBABILITY_ENTRY_H
#include "defines.h" #include "defines.h"
#include "suggest/core/dictionary/property/historical_info.h"
#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_constants.h" #include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_constants.h"
#include "suggest/policyimpl/dictionary/utils/historical_info.h"
namespace latinime { namespace latinime {
namespace backward { namespace backward {

View file

@ -396,8 +396,7 @@ const ProbabilityEntry Ver4PatriciaTrieNodeWriter::createUpdatedEntryFrom(
const UnigramProperty *const unigramProperty) const { const UnigramProperty *const unigramProperty) const {
// TODO: Consolidate historical info and probability. // TODO: Consolidate historical info and probability.
if (mHeaderPolicy->hasHistoricalInfoOfWords()) { if (mHeaderPolicy->hasHistoricalInfoOfWords()) {
const HistoricalInfo historicalInfoForUpdate(unigramProperty->getTimestamp(), const HistoricalInfo &historicalInfoForUpdate = unigramProperty->getHistoricalInfo();
unigramProperty->getLevel(), unigramProperty->getCount());
const HistoricalInfo updatedHistoricalInfo = const HistoricalInfo updatedHistoricalInfo =
ForgettingCurveUtils::createUpdatedHistoricalInfo( ForgettingCurveUtils::createUpdatedHistoricalInfo(
originalProbabilityEntry->getHistoricalInfo(), originalProbabilityEntry->getHistoricalInfo(),

View file

@ -343,7 +343,7 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsI
const UnigramProperty beginningOfSentenceUnigramProperty( const UnigramProperty beginningOfSentenceUnigramProperty(
true /* representsBeginningOfSentence */, true /* isNotAWord */, true /* representsBeginningOfSentence */, true /* isNotAWord */,
false /* isBlacklisted */, MAX_PROBABILITY /* probability */, false /* isBlacklisted */, MAX_PROBABILITY /* probability */,
NOT_A_TIMESTAMP /* timestamp */, 0 /* level */, 0 /* count */, &shortcuts); HistoricalInfo(), &shortcuts);
if (!addUnigramEntry(prevWordsInfo->getNthPrevWordCodePoints(1 /* n */), if (!addUnigramEntry(prevWordsInfo->getNthPrevWordCodePoints(1 /* n */),
&beginningOfSentenceUnigramProperty)) { &beginningOfSentenceUnigramProperty)) {
AKLOGE("Cannot add unigram entry for the beginning-of-sentence."); AKLOGE("Cannot add unigram entry for the beginning-of-sentence.");
@ -528,8 +528,7 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty(
bigramEntry.getProbability(); bigramEntry.getProbability();
ngrams.emplace_back( ngrams.emplace_back(
CodePointArrayView(bigramWord1CodePoints, codePointCount).toVector(), CodePointArrayView(bigramWord1CodePoints, codePointCount).toVector(),
probability, historicalInfo->getTimeStamp(), historicalInfo->getLevel(), probability, *historicalInfo);
historicalInfo->getCount());
} }
} }
// Fetch shortcut information. // Fetch shortcut information.
@ -552,8 +551,7 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty(
} }
const UnigramProperty unigramProperty(ptNodeParams.representsBeginningOfSentence(), const UnigramProperty unigramProperty(ptNodeParams.representsBeginningOfSentence(),
ptNodeParams.isNotAWord(), ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(), ptNodeParams.isNotAWord(), ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(),
historicalInfo->getTimeStamp(), historicalInfo->getLevel(), *historicalInfo, &shortcuts);
historicalInfo->getCount(), &shortcuts);
return WordProperty(wordCodePoints.toVector(), &unigramProperty, &ngrams); return WordProperty(wordCodePoints.toVector(), &unigramProperty, &ngrams);
} }

View file

@ -216,7 +216,7 @@ bool Ver4PatriciaTrieWritingHelper::truncateUnigrams(
probabilityEntry.getHistoricalInfo(), mBuffers->getHeaderPolicy()) : probabilityEntry.getHistoricalInfo(), mBuffers->getHeaderPolicy()) :
probabilityEntry.getProbability(); probabilityEntry.getProbability();
priorityQueue.push(DictProbability(terminalPos, probability, priorityQueue.push(DictProbability(terminalPos, probability,
probabilityEntry.getHistoricalInfo()->getTimeStamp())); probabilityEntry.getHistoricalInfo()->getTimestamp()));
} }
// Delete unigrams. // Delete unigrams.
@ -263,7 +263,7 @@ bool Ver4PatriciaTrieWritingHelper::truncateBigrams(const int maxBigramCount) {
bigramEntry.getHistoricalInfo(), mBuffers->getHeaderPolicy()) : bigramEntry.getHistoricalInfo(), mBuffers->getHeaderPolicy()) :
bigramEntry.getProbability(); bigramEntry.getProbability();
priorityQueue.push(DictProbability(entryPos, probability, priorityQueue.push(DictProbability(entryPos, probability,
bigramEntry.getHistoricalInfo()->getTimeStamp())); bigramEntry.getHistoricalInfo()->getTimestamp()));
} }
} }

View file

@ -452,7 +452,7 @@ const WordProperty PatriciaTriePolicy::getWordProperty(
const int probability = getProbability(word1Probability, bigramsIt.getProbability()); const int probability = getProbability(word1Probability, bigramsIt.getProbability());
ngrams.emplace_back( ngrams.emplace_back(
CodePointArrayView(bigramWord1CodePoints, word1CodePointCount).toVector(), CodePointArrayView(bigramWord1CodePoints, word1CodePointCount).toVector(),
probability, NOT_A_TIMESTAMP /* timestamp */, 0 /* level */, 0 /* count */); probability, HistoricalInfo());
} }
} }
// Fetch shortcut information. // Fetch shortcut information.
@ -477,7 +477,7 @@ const WordProperty PatriciaTriePolicy::getWordProperty(
} }
const UnigramProperty unigramProperty(ptNodeParams.representsBeginningOfSentence(), const UnigramProperty unigramProperty(ptNodeParams.representsBeginningOfSentence(),
ptNodeParams.isNotAWord(), ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(), ptNodeParams.isNotAWord(), ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(),
NOT_A_TIMESTAMP /* timestamp */, 0 /* level */, 0 /* count */, &shortcuts); HistoricalInfo(), &shortcuts);
return WordProperty(wordCodePoints.toVector(), &unigramProperty, &ngrams); return WordProperty(wordCodePoints.toVector(), &unigramProperty, &ngrams);
} }

View file

@ -296,7 +296,7 @@ bool LanguageModelDictContent::getEntryInfo(const HeaderPolicy *const headerPoli
ForgettingCurveUtils::decodeProbability(probabilityEntry.getHistoricalInfo(), ForgettingCurveUtils::decodeProbability(probabilityEntry.getHistoricalInfo(),
headerPolicy) : probabilityEntry.getProbability(); headerPolicy) : probabilityEntry.getProbability();
outEntryInfo->emplace_back(probability, outEntryInfo->emplace_back(probability,
probabilityEntry.getHistoricalInfo()->getTimeStamp(), probabilityEntry.getHistoricalInfo()->getTimestamp(),
entry.key(), targetLevel, prevWordIds->data()); entry.key(), targetLevel, prevWordIds->data());
} }
return true; return true;

View file

@ -21,10 +21,10 @@
#include <cstdint> #include <cstdint>
#include "defines.h" #include "defines.h"
#include "suggest/core/dictionary/property/historical_info.h"
#include "suggest/core/dictionary/property/ngram_property.h" #include "suggest/core/dictionary/property/ngram_property.h"
#include "suggest/core/dictionary/property/unigram_property.h" #include "suggest/core/dictionary/property/unigram_property.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h" #include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
#include "suggest/policyimpl/dictionary/utils/historical_info.h"
namespace latinime { namespace latinime {
@ -53,15 +53,13 @@ class ProbabilityEntry {
unigramProperty->isNotAWord(), unigramProperty->isBlacklisted(), unigramProperty->isNotAWord(), unigramProperty->isBlacklisted(),
unigramProperty->isPossiblyOffensive())), unigramProperty->isPossiblyOffensive())),
mProbability(unigramProperty->getProbability()), mProbability(unigramProperty->getProbability()),
mHistoricalInfo(unigramProperty->getTimestamp(), unigramProperty->getLevel(), mHistoricalInfo(unigramProperty->getHistoricalInfo()) {}
unigramProperty->getCount()) {}
// Create from ngram property. // Create from ngram property.
// TODO: Set flags. // TODO: Set flags.
ProbabilityEntry(const NgramProperty *const ngramProperty) ProbabilityEntry(const NgramProperty *const ngramProperty)
: mFlags(0), mProbability(ngramProperty->getProbability()), : mFlags(0), mProbability(ngramProperty->getProbability()),
mHistoricalInfo(ngramProperty->getTimestamp(), ngramProperty->getLevel(), mHistoricalInfo(ngramProperty->getHistoricalInfo()) {}
ngramProperty->getCount()) {}
bool isValid() const { bool isValid() const {
return (mFlags & Ver4DictConstants::FLAG_NOT_A_VALID_ENTRY) == 0; return (mFlags & Ver4DictConstants::FLAG_NOT_A_VALID_ENTRY) == 0;
@ -103,7 +101,7 @@ class ProbabilityEntry {
uint64_t encodedEntry = static_cast<uint64_t>(mFlags); uint64_t encodedEntry = static_cast<uint64_t>(mFlags);
if (hasHistoricalInfo) { if (hasHistoricalInfo) {
encodedEntry = (encodedEntry << (Ver4DictConstants::TIME_STAMP_FIELD_SIZE * CHAR_BIT)) encodedEntry = (encodedEntry << (Ver4DictConstants::TIME_STAMP_FIELD_SIZE * CHAR_BIT))
^ static_cast<uint64_t>(mHistoricalInfo.getTimeStamp()); ^ static_cast<uint64_t>(mHistoricalInfo.getTimestamp());
encodedEntry = (encodedEntry << (Ver4DictConstants::WORD_LEVEL_FIELD_SIZE * CHAR_BIT)) encodedEntry = (encodedEntry << (Ver4DictConstants::WORD_LEVEL_FIELD_SIZE * CHAR_BIT))
^ static_cast<uint64_t>(mHistoricalInfo.getLevel()); ^ static_cast<uint64_t>(mHistoricalInfo.getLevel());
encodedEntry = (encodedEntry << (Ver4DictConstants::WORD_COUNT_FIELD_SIZE * CHAR_BIT)) encodedEntry = (encodedEntry << (Ver4DictConstants::WORD_COUNT_FIELD_SIZE * CHAR_BIT))

View file

@ -302,7 +302,7 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsI
const UnigramProperty beginningOfSentenceUnigramProperty( const UnigramProperty beginningOfSentenceUnigramProperty(
true /* representsBeginningOfSentence */, true /* isNotAWord */, true /* representsBeginningOfSentence */, true /* isNotAWord */,
false /* isBlacklisted */, MAX_PROBABILITY /* probability */, false /* isBlacklisted */, MAX_PROBABILITY /* probability */,
NOT_A_TIMESTAMP /* timestamp */, 0 /* level */, 0 /* count */, &shortcuts); HistoricalInfo(), &shortcuts);
if (!addUnigramEntry(prevWordsInfo->getNthPrevWordCodePoints(1 /* n */), if (!addUnigramEntry(prevWordsInfo->getNthPrevWordCodePoints(1 /* n */),
&beginningOfSentenceUnigramProperty)) { &beginningOfSentenceUnigramProperty)) {
AKLOGE("Cannot add unigram entry for the beginning-of-sentence."); AKLOGE("Cannot add unigram entry for the beginning-of-sentence.");
@ -464,8 +464,7 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty(
ForgettingCurveUtils::decodeProbability(historicalInfo, mHeaderPolicy) : ForgettingCurveUtils::decodeProbability(historicalInfo, mHeaderPolicy) :
probabilityEntry.getProbability(); probabilityEntry.getProbability();
ngrams.emplace_back(CodePointArrayView(bigramWord1CodePoints, codePointCount).toVector(), ngrams.emplace_back(CodePointArrayView(bigramWord1CodePoints, codePointCount).toVector(),
probability, historicalInfo->getTimeStamp(), historicalInfo->getLevel(), probability, *historicalInfo);
historicalInfo->getCount());
} }
// Fetch shortcut information. // Fetch shortcut information.
std::vector<UnigramProperty::ShortcutProperty> shortcuts; std::vector<UnigramProperty::ShortcutProperty> shortcuts;
@ -487,8 +486,7 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty(
} }
const UnigramProperty unigramProperty(probabilityEntry.representsBeginningOfSentence(), const UnigramProperty unigramProperty(probabilityEntry.representsBeginningOfSentence(),
probabilityEntry.isNotAWord(), probabilityEntry.isBlacklisted(), probabilityEntry.isNotAWord(), probabilityEntry.isBlacklisted(),
probabilityEntry.getProbability(), historicalInfo->getTimeStamp(), probabilityEntry.getProbability(), *historicalInfo, &shortcuts);
historicalInfo->getLevel(), historicalInfo->getCount(), &shortcuts);
return WordProperty(wordCodePoints.toVector(), &unigramProperty, &ngrams); return WordProperty(wordCodePoints.toVector(), &unigramProperty, &ngrams);
} }

View file

@ -43,7 +43,7 @@ const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityT
/* static */ const HistoricalInfo ForgettingCurveUtils::createUpdatedHistoricalInfo( /* static */ const HistoricalInfo ForgettingCurveUtils::createUpdatedHistoricalInfo(
const HistoricalInfo *const originalHistoricalInfo, const int newProbability, const HistoricalInfo *const originalHistoricalInfo, const int newProbability,
const HistoricalInfo *const newHistoricalInfo, const HeaderPolicy *const headerPolicy) { const HistoricalInfo *const newHistoricalInfo, const HeaderPolicy *const headerPolicy) {
const int timestamp = newHistoricalInfo->getTimeStamp(); const int timestamp = newHistoricalInfo->getTimestamp();
if (newProbability != NOT_A_PROBABILITY && originalHistoricalInfo->getLevel() == 0) { if (newProbability != NOT_A_PROBABILITY && originalHistoricalInfo->getLevel() == 0) {
// Add entry as a valid word. // Add entry as a valid word.
const int level = clampToVisibleEntryLevelRange(newHistoricalInfo->getLevel()); const int level = clampToVisibleEntryLevelRange(newHistoricalInfo->getLevel());
@ -78,7 +78,7 @@ const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityT
/* static */ int ForgettingCurveUtils::decodeProbability( /* static */ int ForgettingCurveUtils::decodeProbability(
const HistoricalInfo *const historicalInfo, const HeaderPolicy *const headerPolicy) { const HistoricalInfo *const historicalInfo, const HeaderPolicy *const headerPolicy) {
const int elapsedTimeStepCount = getElapsedTimeStepCount(historicalInfo->getTimeStamp(), const int elapsedTimeStepCount = getElapsedTimeStepCount(historicalInfo->getTimestamp(),
headerPolicy->getForgettingCurveDurationToLevelDown()); headerPolicy->getForgettingCurveDurationToLevelDown());
return sProbabilityTable.getProbability( return sProbabilityTable.getProbability(
headerPolicy->getForgettingCurveProbabilityValuesTableId(), headerPolicy->getForgettingCurveProbabilityValuesTableId(),
@ -102,7 +102,7 @@ const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityT
/* static */ bool ForgettingCurveUtils::needsToKeep(const HistoricalInfo *const historicalInfo, /* static */ bool ForgettingCurveUtils::needsToKeep(const HistoricalInfo *const historicalInfo,
const HeaderPolicy *const headerPolicy) { const HeaderPolicy *const headerPolicy) {
return historicalInfo->getLevel() > 0 return historicalInfo->getLevel() > 0
|| getElapsedTimeStepCount(historicalInfo->getTimeStamp(), || getElapsedTimeStepCount(historicalInfo->getTimestamp(),
headerPolicy->getForgettingCurveDurationToLevelDown()) headerPolicy->getForgettingCurveDurationToLevelDown())
< DISCARD_LEVEL_ZERO_ENTRY_TIME_STEP_COUNT_THRESHOLD; < DISCARD_LEVEL_ZERO_ENTRY_TIME_STEP_COUNT_THRESHOLD;
} }
@ -110,12 +110,12 @@ const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityT
/* static */ const HistoricalInfo ForgettingCurveUtils::createHistoricalInfoToSave( /* static */ const HistoricalInfo ForgettingCurveUtils::createHistoricalInfoToSave(
const HistoricalInfo *const originalHistoricalInfo, const HistoricalInfo *const originalHistoricalInfo,
const HeaderPolicy *const headerPolicy) { const HeaderPolicy *const headerPolicy) {
if (originalHistoricalInfo->getTimeStamp() == NOT_A_TIMESTAMP) { if (originalHistoricalInfo->getTimestamp() == NOT_A_TIMESTAMP) {
return HistoricalInfo(); return HistoricalInfo();
} }
const int durationToLevelDownInSeconds = headerPolicy->getForgettingCurveDurationToLevelDown(); const int durationToLevelDownInSeconds = headerPolicy->getForgettingCurveDurationToLevelDown();
const int elapsedTimeStep = getElapsedTimeStepCount( const int elapsedTimeStep = getElapsedTimeStepCount(
originalHistoricalInfo->getTimeStamp(), durationToLevelDownInSeconds); originalHistoricalInfo->getTimestamp(), durationToLevelDownInSeconds);
if (elapsedTimeStep <= MAX_ELAPSED_TIME_STEP_COUNT) { if (elapsedTimeStep <= MAX_ELAPSED_TIME_STEP_COUNT) {
// No need to update historical info. // No need to update historical info.
return *originalHistoricalInfo; return *originalHistoricalInfo;
@ -124,7 +124,7 @@ const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityT
const int maxLevelDownAmonut = elapsedTimeStep / (MAX_ELAPSED_TIME_STEP_COUNT + 1); const int maxLevelDownAmonut = elapsedTimeStep / (MAX_ELAPSED_TIME_STEP_COUNT + 1);
const int levelDownAmount = (maxLevelDownAmonut >= originalHistoricalInfo->getLevel()) ? const int levelDownAmount = (maxLevelDownAmonut >= originalHistoricalInfo->getLevel()) ?
originalHistoricalInfo->getLevel() : maxLevelDownAmonut; originalHistoricalInfo->getLevel() : maxLevelDownAmonut;
const int adjustedTimestampInSeconds = originalHistoricalInfo->getTimeStamp() + const int adjustedTimestampInSeconds = originalHistoricalInfo->getTimestamp() +
levelDownAmount * durationToLevelDownInSeconds; levelDownAmount * durationToLevelDownInSeconds;
return HistoricalInfo(adjustedTimestampInSeconds, return HistoricalInfo(adjustedTimestampInSeconds,
originalHistoricalInfo->getLevel() - levelDownAmount, 0 /* count */); originalHistoricalInfo->getLevel() - levelDownAmount, 0 /* count */);

View file

@ -20,7 +20,7 @@
#include <vector> #include <vector>
#include "defines.h" #include "defines.h"
#include "suggest/policyimpl/dictionary/utils/historical_info.h" #include "suggest/core/dictionary/property/historical_info.h"
namespace latinime { namespace latinime {

View file

@ -60,7 +60,7 @@ TEST(LanguageModelDictContentTest, TestUnigramProbabilityWithHistoricalInfo) {
languageModelDictContent.setProbabilityEntry(wordId, &probabilityEntry); languageModelDictContent.setProbabilityEntry(wordId, &probabilityEntry);
const ProbabilityEntry entry = languageModelDictContent.getProbabilityEntry(wordId); const ProbabilityEntry entry = languageModelDictContent.getProbabilityEntry(wordId);
EXPECT_EQ(flag, entry.getFlags()); EXPECT_EQ(flag, entry.getFlags());
EXPECT_EQ(timestamp, entry.getHistoricalInfo()->getTimeStamp()); EXPECT_EQ(timestamp, entry.getHistoricalInfo()->getTimestamp());
EXPECT_EQ(level, entry.getHistoricalInfo()->getLevel()); EXPECT_EQ(level, entry.getHistoricalInfo()->getLevel());
EXPECT_EQ(count, entry.getHistoricalInfo()->getCount()); EXPECT_EQ(count, entry.getHistoricalInfo()->getCount());

View file

@ -51,7 +51,7 @@ TEST(ProbabilityEntryTest, TestEncodeDecodeWithHistoricalInfo) {
ProbabilityEntry::decode(encodedEntry, true /* hasHistoricalInfo */); ProbabilityEntry::decode(encodedEntry, true /* hasHistoricalInfo */);
EXPECT_EQ(flag, decodedEntry.getFlags()); EXPECT_EQ(flag, decodedEntry.getFlags());
EXPECT_EQ(timestamp, decodedEntry.getHistoricalInfo()->getTimeStamp()); EXPECT_EQ(timestamp, decodedEntry.getHistoricalInfo()->getTimestamp());
EXPECT_EQ(level, decodedEntry.getHistoricalInfo()->getLevel()); EXPECT_EQ(level, decodedEntry.getHistoricalInfo()->getLevel());
EXPECT_EQ(count, decodedEntry.getHistoricalInfo()->getCount()); EXPECT_EQ(count, decodedEntry.getHistoricalInfo()->getCount());
} }