Merge "Move HistoricalInfo to property and use it in *Property."
commit
841aa0b6f8
|
@ -373,7 +373,8 @@ static bool latinime_BinaryDictionary_addUnigramEntry(JNIEnv *env, jclass clazz,
|
||||||
}
|
}
|
||||||
// Use 1 for count to indicate the word has inputted.
|
// Use 1 for count to indicate the word has inputted.
|
||||||
const UnigramProperty unigramProperty(isBeginningOfSentence, isNotAWord,
|
const UnigramProperty unigramProperty(isBeginningOfSentence, isNotAWord,
|
||||||
isBlacklisted, probability, timestamp, 0 /* level */, 1 /* count */, &shortcuts);
|
isBlacklisted, probability, HistoricalInfo(timestamp, 0 /* level */, 1 /* count */),
|
||||||
|
&shortcuts);
|
||||||
return dictionary->addUnigramEntry(CodePointArrayView(codePoints, codePointCount),
|
return dictionary->addUnigramEntry(CodePointArrayView(codePoints, codePointCount),
|
||||||
&unigramProperty);
|
&unigramProperty);
|
||||||
}
|
}
|
||||||
|
@ -405,7 +406,7 @@ static bool latinime_BinaryDictionary_addNgramEntry(JNIEnv *env, jclass clazz, j
|
||||||
env->GetIntArrayRegion(word, 0, wordLength, wordCodePoints);
|
env->GetIntArrayRegion(word, 0, wordLength, wordCodePoints);
|
||||||
// Use 1 for count to indicate the ngram has inputted.
|
// Use 1 for count to indicate the ngram has inputted.
|
||||||
const NgramProperty ngramProperty(CodePointArrayView(wordCodePoints, wordLength).toVector(),
|
const NgramProperty ngramProperty(CodePointArrayView(wordCodePoints, wordLength).toVector(),
|
||||||
probability, timestamp, 0 /* level */, 1 /* count */);
|
probability, HistoricalInfo(timestamp, 0 /* level */, 1 /* count */));
|
||||||
return dictionary->addNgramEntry(&prevWordsInfo, &ngramProperty);
|
return dictionary->addNgramEntry(&prevWordsInfo, &ngramProperty);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -494,8 +495,8 @@ static int latinime_BinaryDictionary_addMultipleDictionaryEntries(JNIEnv *env, j
|
||||||
}
|
}
|
||||||
// Use 1 for count to indicate the word has inputted.
|
// Use 1 for count to indicate the word has inputted.
|
||||||
const UnigramProperty unigramProperty(false /* isBeginningOfSentence */, isNotAWord,
|
const UnigramProperty unigramProperty(false /* isBeginningOfSentence */, isNotAWord,
|
||||||
isBlacklisted, unigramProbability, timestamp, 0 /* level */, 1 /* count */,
|
isBlacklisted, unigramProbability,
|
||||||
&shortcuts);
|
HistoricalInfo(timestamp, 0 /* level */, 1 /* count */), &shortcuts);
|
||||||
dictionary->addUnigramEntry(CodePointArrayView(word1CodePoints, word1Length),
|
dictionary->addUnigramEntry(CodePointArrayView(word1CodePoints, word1Length),
|
||||||
&unigramProperty);
|
&unigramProperty);
|
||||||
if (word0) {
|
if (word0) {
|
||||||
|
@ -503,7 +504,7 @@ static int latinime_BinaryDictionary_addMultipleDictionaryEntries(JNIEnv *env, j
|
||||||
// Use 1 for count to indicate the bigram has inputted.
|
// Use 1 for count to indicate the bigram has inputted.
|
||||||
const NgramProperty ngramProperty(
|
const NgramProperty ngramProperty(
|
||||||
CodePointArrayView(word1CodePoints, word1Length).toVector(),
|
CodePointArrayView(word1CodePoints, word1Length).toVector(),
|
||||||
bigramProbability, timestamp, 0 /* level */, 1 /* count */);
|
bigramProbability, HistoricalInfo(timestamp, 0 /* level */, 1 /* count */));
|
||||||
const PrevWordsInfo prevWordsInfo(word0CodePoints, word0Length,
|
const PrevWordsInfo prevWordsInfo(word0CodePoints, word0Length,
|
||||||
false /* isBeginningOfSentence */);
|
false /* isBeginningOfSentence */);
|
||||||
dictionary->addNgramEntry(&prevWordsInfo, &ngramProperty);
|
dictionary->addNgramEntry(&prevWordsInfo, &ngramProperty);
|
||||||
|
|
|
@ -34,7 +34,7 @@ class HistoricalInfo {
|
||||||
return mTimestamp != NOT_A_TIMESTAMP;
|
return mTimestamp != NOT_A_TIMESTAMP;
|
||||||
}
|
}
|
||||||
|
|
||||||
int getTimeStamp() const {
|
int getTimestamp() const {
|
||||||
return mTimestamp;
|
return mTimestamp;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -47,12 +47,12 @@ class HistoricalInfo {
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
// Copy constructor is public to use this class as a type of return value.
|
// Default copy constructor and assign operator are used for using in std::vector.
|
||||||
DISALLOW_ASSIGNMENT_OPERATOR(HistoricalInfo);
|
|
||||||
|
|
||||||
const int mTimestamp;
|
// TODO: Make members const.
|
||||||
const int mLevel;
|
int mTimestamp;
|
||||||
const int mCount;
|
int mLevel;
|
||||||
|
int mCount;
|
||||||
};
|
};
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
#endif /* LATINIME_HISTORICAL_INFO_H */
|
#endif /* LATINIME_HISTORICAL_INFO_H */
|
|
@ -20,15 +20,16 @@
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include "defines.h"
|
#include "defines.h"
|
||||||
|
#include "suggest/core/dictionary/property/historical_info.h"
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
|
||||||
class NgramProperty {
|
class NgramProperty {
|
||||||
public:
|
public:
|
||||||
NgramProperty(const std::vector<int> &&targetCodePoints, const int probability,
|
NgramProperty(const std::vector<int> &&targetCodePoints, const int probability,
|
||||||
const int timestamp, const int level, const int count)
|
const HistoricalInfo &historicalInfo)
|
||||||
: mTargetCodePoints(std::move(targetCodePoints)), mProbability(probability),
|
: mTargetCodePoints(std::move(targetCodePoints)), mProbability(probability),
|
||||||
mTimestamp(timestamp), mLevel(level), mCount(count) {}
|
mHistoricalInfo(historicalInfo) {}
|
||||||
|
|
||||||
const std::vector<int> *getTargetCodePoints() const {
|
const std::vector<int> *getTargetCodePoints() const {
|
||||||
return &mTargetCodePoints;
|
return &mTargetCodePoints;
|
||||||
|
@ -38,16 +39,8 @@ class NgramProperty {
|
||||||
return mProbability;
|
return mProbability;
|
||||||
}
|
}
|
||||||
|
|
||||||
int getTimestamp() const {
|
const HistoricalInfo getHistoricalInfo() const {
|
||||||
return mTimestamp;
|
return mHistoricalInfo;
|
||||||
}
|
|
||||||
|
|
||||||
int getLevel() const {
|
|
||||||
return mLevel;
|
|
||||||
}
|
|
||||||
|
|
||||||
int getCount() const {
|
|
||||||
return mCount;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
@ -57,9 +50,7 @@ class NgramProperty {
|
||||||
// TODO: Make members const.
|
// TODO: Make members const.
|
||||||
std::vector<int> mTargetCodePoints;
|
std::vector<int> mTargetCodePoints;
|
||||||
int mProbability;
|
int mProbability;
|
||||||
int mTimestamp;
|
HistoricalInfo mHistoricalInfo;
|
||||||
int mLevel;
|
|
||||||
int mCount;
|
|
||||||
};
|
};
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
#endif // LATINIME_NGRAM_PROPERTY_H
|
#endif // LATINIME_NGRAM_PROPERTY_H
|
||||||
|
|
|
@ -20,6 +20,7 @@
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include "defines.h"
|
#include "defines.h"
|
||||||
|
#include "suggest/core/dictionary/property/historical_info.h"
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
|
||||||
|
@ -50,15 +51,14 @@ class UnigramProperty {
|
||||||
|
|
||||||
UnigramProperty()
|
UnigramProperty()
|
||||||
: mRepresentsBeginningOfSentence(false), mIsNotAWord(false), mIsBlacklisted(false),
|
: mRepresentsBeginningOfSentence(false), mIsNotAWord(false), mIsBlacklisted(false),
|
||||||
mProbability(NOT_A_PROBABILITY), mTimestamp(NOT_A_TIMESTAMP), mLevel(0), mCount(0),
|
mProbability(NOT_A_PROBABILITY), mHistoricalInfo(), mShortcuts() {}
|
||||||
mShortcuts() {}
|
|
||||||
|
|
||||||
UnigramProperty(const bool representsBeginningOfSentence, const bool isNotAWord,
|
UnigramProperty(const bool representsBeginningOfSentence, const bool isNotAWord,
|
||||||
const bool isBlacklisted, const int probability, const int timestamp, const int level,
|
const bool isBlacklisted, const int probability, const HistoricalInfo &historicalInfo,
|
||||||
const int count, const std::vector<ShortcutProperty> *const shortcuts)
|
const std::vector<ShortcutProperty> *const shortcuts)
|
||||||
: mRepresentsBeginningOfSentence(representsBeginningOfSentence),
|
: mRepresentsBeginningOfSentence(representsBeginningOfSentence),
|
||||||
mIsNotAWord(isNotAWord), mIsBlacklisted(isBlacklisted), mProbability(probability),
|
mIsNotAWord(isNotAWord), mIsBlacklisted(isBlacklisted), mProbability(probability),
|
||||||
mTimestamp(timestamp), mLevel(level), mCount(count), mShortcuts(*shortcuts) {}
|
mHistoricalInfo(historicalInfo), mShortcuts(*shortcuts) {}
|
||||||
|
|
||||||
bool representsBeginningOfSentence() const {
|
bool representsBeginningOfSentence() const {
|
||||||
return mRepresentsBeginningOfSentence;
|
return mRepresentsBeginningOfSentence;
|
||||||
|
@ -85,16 +85,8 @@ class UnigramProperty {
|
||||||
return mProbability;
|
return mProbability;
|
||||||
}
|
}
|
||||||
|
|
||||||
int getTimestamp() const {
|
const HistoricalInfo getHistoricalInfo() const {
|
||||||
return mTimestamp;
|
return mHistoricalInfo;
|
||||||
}
|
|
||||||
|
|
||||||
int getLevel() const {
|
|
||||||
return mLevel;
|
|
||||||
}
|
|
||||||
|
|
||||||
int getCount() const {
|
|
||||||
return mCount;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const std::vector<ShortcutProperty> &getShortcuts() const {
|
const std::vector<ShortcutProperty> &getShortcuts() const {
|
||||||
|
@ -110,10 +102,7 @@ class UnigramProperty {
|
||||||
bool mIsNotAWord;
|
bool mIsNotAWord;
|
||||||
bool mIsBlacklisted;
|
bool mIsBlacklisted;
|
||||||
int mProbability;
|
int mProbability;
|
||||||
// Historical information
|
HistoricalInfo mHistoricalInfo;
|
||||||
int mTimestamp;
|
|
||||||
int mLevel;
|
|
||||||
int mCount;
|
|
||||||
std::vector<ShortcutProperty> mShortcuts;
|
std::vector<ShortcutProperty> mShortcuts;
|
||||||
};
|
};
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
|
|
|
@ -17,6 +17,7 @@
|
||||||
#include "suggest/core/dictionary/property/word_property.h"
|
#include "suggest/core/dictionary/property/word_property.h"
|
||||||
|
|
||||||
#include "utils/jni_data_utils.h"
|
#include "utils/jni_data_utils.h"
|
||||||
|
#include "suggest/core/dictionary/property/historical_info.h"
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
|
||||||
|
@ -31,8 +32,9 @@ void WordProperty::outputProperties(JNIEnv *const env, jintArray outCodePoints,
|
||||||
!mNgrams.empty(), mUnigramProperty.hasShortcuts(),
|
!mNgrams.empty(), mUnigramProperty.hasShortcuts(),
|
||||||
mUnigramProperty.representsBeginningOfSentence()};
|
mUnigramProperty.representsBeginningOfSentence()};
|
||||||
env->SetBooleanArrayRegion(outFlags, 0 /* start */, NELEMS(flags), flags);
|
env->SetBooleanArrayRegion(outFlags, 0 /* start */, NELEMS(flags), flags);
|
||||||
int probabilityInfo[] = {mUnigramProperty.getProbability(), mUnigramProperty.getTimestamp(),
|
const HistoricalInfo &historicalInfo = mUnigramProperty.getHistoricalInfo();
|
||||||
mUnigramProperty.getLevel(), mUnigramProperty.getCount()};
|
int probabilityInfo[] = {mUnigramProperty.getProbability(), historicalInfo.getTimestamp(),
|
||||||
|
historicalInfo.getLevel(), historicalInfo.getCount()};
|
||||||
env->SetIntArrayRegion(outProbabilityInfo, 0 /* start */, NELEMS(probabilityInfo),
|
env->SetIntArrayRegion(outProbabilityInfo, 0 /* start */, NELEMS(probabilityInfo),
|
||||||
probabilityInfo);
|
probabilityInfo);
|
||||||
|
|
||||||
|
@ -51,10 +53,10 @@ void WordProperty::outputProperties(JNIEnv *const env, jintArray outCodePoints,
|
||||||
false /* needsNullTermination */);
|
false /* needsNullTermination */);
|
||||||
env->CallBooleanMethod(outBigramTargets, addMethodId, bigramWord1CodePointArray);
|
env->CallBooleanMethod(outBigramTargets, addMethodId, bigramWord1CodePointArray);
|
||||||
env->DeleteLocalRef(bigramWord1CodePointArray);
|
env->DeleteLocalRef(bigramWord1CodePointArray);
|
||||||
|
const HistoricalInfo &ngramHistoricalInfo = ngramProperty.getHistoricalInfo();
|
||||||
int bigramProbabilityInfo[] = {ngramProperty.getProbability(),
|
int bigramProbabilityInfo[] = {ngramProperty.getProbability(),
|
||||||
ngramProperty.getTimestamp(), ngramProperty.getLevel(),
|
ngramHistoricalInfo.getTimestamp(), ngramHistoricalInfo.getLevel(),
|
||||||
ngramProperty.getCount()};
|
ngramHistoricalInfo.getCount()};
|
||||||
jintArray bigramProbabilityInfoArray = env->NewIntArray(NELEMS(bigramProbabilityInfo));
|
jintArray bigramProbabilityInfoArray = env->NewIntArray(NELEMS(bigramProbabilityInfo));
|
||||||
env->SetIntArrayRegion(bigramProbabilityInfoArray, 0 /* start */,
|
env->SetIntArrayRegion(bigramProbabilityInfoArray, 0 /* start */,
|
||||||
NELEMS(bigramProbabilityInfo), bigramProbabilityInfo);
|
NELEMS(bigramProbabilityInfo), bigramProbabilityInfo);
|
||||||
|
|
|
@ -267,8 +267,7 @@ const BigramEntry Ver4BigramListPolicy::createUpdatedBigramEntryFrom(
|
||||||
const NgramProperty *const ngramProperty) const {
|
const NgramProperty *const ngramProperty) const {
|
||||||
// TODO: Consolidate historical info and probability.
|
// TODO: Consolidate historical info and probability.
|
||||||
if (mHeaderPolicy->hasHistoricalInfoOfWords()) {
|
if (mHeaderPolicy->hasHistoricalInfoOfWords()) {
|
||||||
const HistoricalInfo historicalInfoForUpdate(ngramProperty->getTimestamp(),
|
const HistoricalInfo &historicalInfoForUpdate = ngramProperty->getHistoricalInfo();
|
||||||
ngramProperty->getLevel(), ngramProperty->getCount());
|
|
||||||
const HistoricalInfo updatedHistoricalInfo =
|
const HistoricalInfo updatedHistoricalInfo =
|
||||||
ForgettingCurveUtils::createUpdatedHistoricalInfo(
|
ForgettingCurveUtils::createUpdatedHistoricalInfo(
|
||||||
originalBigramEntry->getHistoricalInfo(), ngramProperty->getProbability(),
|
originalBigramEntry->getHistoricalInfo(), ngramProperty->getProbability(),
|
||||||
|
|
|
@ -83,10 +83,10 @@ bool BigramDictContent::writeBigramEntryAndAdvancePosition(
|
||||||
}
|
}
|
||||||
if (mHasHistoricalInfo) {
|
if (mHasHistoricalInfo) {
|
||||||
const HistoricalInfo *const historicalInfo = bigramEntryToWrite->getHistoricalInfo();
|
const HistoricalInfo *const historicalInfo = bigramEntryToWrite->getHistoricalInfo();
|
||||||
if (!bigramListBuffer->writeUintAndAdvancePosition(historicalInfo->getTimeStamp(),
|
if (!bigramListBuffer->writeUintAndAdvancePosition(historicalInfo->getTimestamp(),
|
||||||
Ver4DictConstants::TIME_STAMP_FIELD_SIZE, entryWritingPos)) {
|
Ver4DictConstants::TIME_STAMP_FIELD_SIZE, entryWritingPos)) {
|
||||||
AKLOGE("Cannot write bigram timestamps. pos: %d, timestamp: %d", *entryWritingPos,
|
AKLOGE("Cannot write bigram timestamps. pos: %d, timestamp: %d", *entryWritingPos,
|
||||||
historicalInfo->getTimeStamp());
|
historicalInfo->getTimestamp());
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (!bigramListBuffer->writeUintAndAdvancePosition(historicalInfo->getLevel(),
|
if (!bigramListBuffer->writeUintAndAdvancePosition(historicalInfo->getLevel(),
|
||||||
|
|
|
@ -25,8 +25,8 @@
|
||||||
#define LATINIME_BACKWARD_V402_BIGRAM_ENTRY_H
|
#define LATINIME_BACKWARD_V402_BIGRAM_ENTRY_H
|
||||||
|
|
||||||
#include "defines.h"
|
#include "defines.h"
|
||||||
|
#include "suggest/core/dictionary/property/historical_info.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_constants.h"
|
#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_constants.h"
|
||||||
#include "suggest/policyimpl/dictionary/utils/historical_info.h"
|
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
namespace backward {
|
namespace backward {
|
||||||
|
|
|
@ -147,7 +147,7 @@ bool ProbabilityDictContent::writeEntry(const ProbabilityEntry *const probabilit
|
||||||
}
|
}
|
||||||
if (mHasHistoricalInfo) {
|
if (mHasHistoricalInfo) {
|
||||||
const HistoricalInfo *const historicalInfo = probabilityEntry->getHistoricalInfo();
|
const HistoricalInfo *const historicalInfo = probabilityEntry->getHistoricalInfo();
|
||||||
if (!bufferToWrite->writeUintAndAdvancePosition(historicalInfo->getTimeStamp(),
|
if (!bufferToWrite->writeUintAndAdvancePosition(historicalInfo->getTimestamp(),
|
||||||
Ver4DictConstants::TIME_STAMP_FIELD_SIZE, &writingPos)) {
|
Ver4DictConstants::TIME_STAMP_FIELD_SIZE, &writingPos)) {
|
||||||
AKLOGE("Cannot write timestamp in probability dict content. pos: %d", writingPos);
|
AKLOGE("Cannot write timestamp in probability dict content. pos: %d", writingPos);
|
||||||
return false;
|
return false;
|
||||||
|
|
|
@ -25,8 +25,8 @@
|
||||||
#define LATINIME_BACKWARD_V402_PROBABILITY_ENTRY_H
|
#define LATINIME_BACKWARD_V402_PROBABILITY_ENTRY_H
|
||||||
|
|
||||||
#include "defines.h"
|
#include "defines.h"
|
||||||
|
#include "suggest/core/dictionary/property/historical_info.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_constants.h"
|
#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_constants.h"
|
||||||
#include "suggest/policyimpl/dictionary/utils/historical_info.h"
|
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
namespace backward {
|
namespace backward {
|
||||||
|
|
|
@ -396,8 +396,7 @@ const ProbabilityEntry Ver4PatriciaTrieNodeWriter::createUpdatedEntryFrom(
|
||||||
const UnigramProperty *const unigramProperty) const {
|
const UnigramProperty *const unigramProperty) const {
|
||||||
// TODO: Consolidate historical info and probability.
|
// TODO: Consolidate historical info and probability.
|
||||||
if (mHeaderPolicy->hasHistoricalInfoOfWords()) {
|
if (mHeaderPolicy->hasHistoricalInfoOfWords()) {
|
||||||
const HistoricalInfo historicalInfoForUpdate(unigramProperty->getTimestamp(),
|
const HistoricalInfo &historicalInfoForUpdate = unigramProperty->getHistoricalInfo();
|
||||||
unigramProperty->getLevel(), unigramProperty->getCount());
|
|
||||||
const HistoricalInfo updatedHistoricalInfo =
|
const HistoricalInfo updatedHistoricalInfo =
|
||||||
ForgettingCurveUtils::createUpdatedHistoricalInfo(
|
ForgettingCurveUtils::createUpdatedHistoricalInfo(
|
||||||
originalProbabilityEntry->getHistoricalInfo(),
|
originalProbabilityEntry->getHistoricalInfo(),
|
||||||
|
|
|
@ -343,7 +343,7 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsI
|
||||||
const UnigramProperty beginningOfSentenceUnigramProperty(
|
const UnigramProperty beginningOfSentenceUnigramProperty(
|
||||||
true /* representsBeginningOfSentence */, true /* isNotAWord */,
|
true /* representsBeginningOfSentence */, true /* isNotAWord */,
|
||||||
false /* isBlacklisted */, MAX_PROBABILITY /* probability */,
|
false /* isBlacklisted */, MAX_PROBABILITY /* probability */,
|
||||||
NOT_A_TIMESTAMP /* timestamp */, 0 /* level */, 0 /* count */, &shortcuts);
|
HistoricalInfo(), &shortcuts);
|
||||||
if (!addUnigramEntry(prevWordsInfo->getNthPrevWordCodePoints(1 /* n */),
|
if (!addUnigramEntry(prevWordsInfo->getNthPrevWordCodePoints(1 /* n */),
|
||||||
&beginningOfSentenceUnigramProperty)) {
|
&beginningOfSentenceUnigramProperty)) {
|
||||||
AKLOGE("Cannot add unigram entry for the beginning-of-sentence.");
|
AKLOGE("Cannot add unigram entry for the beginning-of-sentence.");
|
||||||
|
@ -528,8 +528,7 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty(
|
||||||
bigramEntry.getProbability();
|
bigramEntry.getProbability();
|
||||||
ngrams.emplace_back(
|
ngrams.emplace_back(
|
||||||
CodePointArrayView(bigramWord1CodePoints, codePointCount).toVector(),
|
CodePointArrayView(bigramWord1CodePoints, codePointCount).toVector(),
|
||||||
probability, historicalInfo->getTimeStamp(), historicalInfo->getLevel(),
|
probability, *historicalInfo);
|
||||||
historicalInfo->getCount());
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Fetch shortcut information.
|
// Fetch shortcut information.
|
||||||
|
@ -552,8 +551,7 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty(
|
||||||
}
|
}
|
||||||
const UnigramProperty unigramProperty(ptNodeParams.representsBeginningOfSentence(),
|
const UnigramProperty unigramProperty(ptNodeParams.representsBeginningOfSentence(),
|
||||||
ptNodeParams.isNotAWord(), ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(),
|
ptNodeParams.isNotAWord(), ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(),
|
||||||
historicalInfo->getTimeStamp(), historicalInfo->getLevel(),
|
*historicalInfo, &shortcuts);
|
||||||
historicalInfo->getCount(), &shortcuts);
|
|
||||||
return WordProperty(wordCodePoints.toVector(), &unigramProperty, &ngrams);
|
return WordProperty(wordCodePoints.toVector(), &unigramProperty, &ngrams);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -216,7 +216,7 @@ bool Ver4PatriciaTrieWritingHelper::truncateUnigrams(
|
||||||
probabilityEntry.getHistoricalInfo(), mBuffers->getHeaderPolicy()) :
|
probabilityEntry.getHistoricalInfo(), mBuffers->getHeaderPolicy()) :
|
||||||
probabilityEntry.getProbability();
|
probabilityEntry.getProbability();
|
||||||
priorityQueue.push(DictProbability(terminalPos, probability,
|
priorityQueue.push(DictProbability(terminalPos, probability,
|
||||||
probabilityEntry.getHistoricalInfo()->getTimeStamp()));
|
probabilityEntry.getHistoricalInfo()->getTimestamp()));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Delete unigrams.
|
// Delete unigrams.
|
||||||
|
@ -263,7 +263,7 @@ bool Ver4PatriciaTrieWritingHelper::truncateBigrams(const int maxBigramCount) {
|
||||||
bigramEntry.getHistoricalInfo(), mBuffers->getHeaderPolicy()) :
|
bigramEntry.getHistoricalInfo(), mBuffers->getHeaderPolicy()) :
|
||||||
bigramEntry.getProbability();
|
bigramEntry.getProbability();
|
||||||
priorityQueue.push(DictProbability(entryPos, probability,
|
priorityQueue.push(DictProbability(entryPos, probability,
|
||||||
bigramEntry.getHistoricalInfo()->getTimeStamp()));
|
bigramEntry.getHistoricalInfo()->getTimestamp()));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -452,7 +452,7 @@ const WordProperty PatriciaTriePolicy::getWordProperty(
|
||||||
const int probability = getProbability(word1Probability, bigramsIt.getProbability());
|
const int probability = getProbability(word1Probability, bigramsIt.getProbability());
|
||||||
ngrams.emplace_back(
|
ngrams.emplace_back(
|
||||||
CodePointArrayView(bigramWord1CodePoints, word1CodePointCount).toVector(),
|
CodePointArrayView(bigramWord1CodePoints, word1CodePointCount).toVector(),
|
||||||
probability, NOT_A_TIMESTAMP /* timestamp */, 0 /* level */, 0 /* count */);
|
probability, HistoricalInfo());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Fetch shortcut information.
|
// Fetch shortcut information.
|
||||||
|
@ -477,7 +477,7 @@ const WordProperty PatriciaTriePolicy::getWordProperty(
|
||||||
}
|
}
|
||||||
const UnigramProperty unigramProperty(ptNodeParams.representsBeginningOfSentence(),
|
const UnigramProperty unigramProperty(ptNodeParams.representsBeginningOfSentence(),
|
||||||
ptNodeParams.isNotAWord(), ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(),
|
ptNodeParams.isNotAWord(), ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(),
|
||||||
NOT_A_TIMESTAMP /* timestamp */, 0 /* level */, 0 /* count */, &shortcuts);
|
HistoricalInfo(), &shortcuts);
|
||||||
return WordProperty(wordCodePoints.toVector(), &unigramProperty, &ngrams);
|
return WordProperty(wordCodePoints.toVector(), &unigramProperty, &ngrams);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -296,7 +296,7 @@ bool LanguageModelDictContent::getEntryInfo(const HeaderPolicy *const headerPoli
|
||||||
ForgettingCurveUtils::decodeProbability(probabilityEntry.getHistoricalInfo(),
|
ForgettingCurveUtils::decodeProbability(probabilityEntry.getHistoricalInfo(),
|
||||||
headerPolicy) : probabilityEntry.getProbability();
|
headerPolicy) : probabilityEntry.getProbability();
|
||||||
outEntryInfo->emplace_back(probability,
|
outEntryInfo->emplace_back(probability,
|
||||||
probabilityEntry.getHistoricalInfo()->getTimeStamp(),
|
probabilityEntry.getHistoricalInfo()->getTimestamp(),
|
||||||
entry.key(), targetLevel, prevWordIds->data());
|
entry.key(), targetLevel, prevWordIds->data());
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
|
|
|
@ -21,10 +21,10 @@
|
||||||
#include <cstdint>
|
#include <cstdint>
|
||||||
|
|
||||||
#include "defines.h"
|
#include "defines.h"
|
||||||
|
#include "suggest/core/dictionary/property/historical_info.h"
|
||||||
#include "suggest/core/dictionary/property/ngram_property.h"
|
#include "suggest/core/dictionary/property/ngram_property.h"
|
||||||
#include "suggest/core/dictionary/property/unigram_property.h"
|
#include "suggest/core/dictionary/property/unigram_property.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
|
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
|
||||||
#include "suggest/policyimpl/dictionary/utils/historical_info.h"
|
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
|
||||||
|
@ -53,15 +53,13 @@ class ProbabilityEntry {
|
||||||
unigramProperty->isNotAWord(), unigramProperty->isBlacklisted(),
|
unigramProperty->isNotAWord(), unigramProperty->isBlacklisted(),
|
||||||
unigramProperty->isPossiblyOffensive())),
|
unigramProperty->isPossiblyOffensive())),
|
||||||
mProbability(unigramProperty->getProbability()),
|
mProbability(unigramProperty->getProbability()),
|
||||||
mHistoricalInfo(unigramProperty->getTimestamp(), unigramProperty->getLevel(),
|
mHistoricalInfo(unigramProperty->getHistoricalInfo()) {}
|
||||||
unigramProperty->getCount()) {}
|
|
||||||
|
|
||||||
// Create from ngram property.
|
// Create from ngram property.
|
||||||
// TODO: Set flags.
|
// TODO: Set flags.
|
||||||
ProbabilityEntry(const NgramProperty *const ngramProperty)
|
ProbabilityEntry(const NgramProperty *const ngramProperty)
|
||||||
: mFlags(0), mProbability(ngramProperty->getProbability()),
|
: mFlags(0), mProbability(ngramProperty->getProbability()),
|
||||||
mHistoricalInfo(ngramProperty->getTimestamp(), ngramProperty->getLevel(),
|
mHistoricalInfo(ngramProperty->getHistoricalInfo()) {}
|
||||||
ngramProperty->getCount()) {}
|
|
||||||
|
|
||||||
bool isValid() const {
|
bool isValid() const {
|
||||||
return (mFlags & Ver4DictConstants::FLAG_NOT_A_VALID_ENTRY) == 0;
|
return (mFlags & Ver4DictConstants::FLAG_NOT_A_VALID_ENTRY) == 0;
|
||||||
|
@ -103,7 +101,7 @@ class ProbabilityEntry {
|
||||||
uint64_t encodedEntry = static_cast<uint64_t>(mFlags);
|
uint64_t encodedEntry = static_cast<uint64_t>(mFlags);
|
||||||
if (hasHistoricalInfo) {
|
if (hasHistoricalInfo) {
|
||||||
encodedEntry = (encodedEntry << (Ver4DictConstants::TIME_STAMP_FIELD_SIZE * CHAR_BIT))
|
encodedEntry = (encodedEntry << (Ver4DictConstants::TIME_STAMP_FIELD_SIZE * CHAR_BIT))
|
||||||
^ static_cast<uint64_t>(mHistoricalInfo.getTimeStamp());
|
^ static_cast<uint64_t>(mHistoricalInfo.getTimestamp());
|
||||||
encodedEntry = (encodedEntry << (Ver4DictConstants::WORD_LEVEL_FIELD_SIZE * CHAR_BIT))
|
encodedEntry = (encodedEntry << (Ver4DictConstants::WORD_LEVEL_FIELD_SIZE * CHAR_BIT))
|
||||||
^ static_cast<uint64_t>(mHistoricalInfo.getLevel());
|
^ static_cast<uint64_t>(mHistoricalInfo.getLevel());
|
||||||
encodedEntry = (encodedEntry << (Ver4DictConstants::WORD_COUNT_FIELD_SIZE * CHAR_BIT))
|
encodedEntry = (encodedEntry << (Ver4DictConstants::WORD_COUNT_FIELD_SIZE * CHAR_BIT))
|
||||||
|
|
|
@ -302,7 +302,7 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsI
|
||||||
const UnigramProperty beginningOfSentenceUnigramProperty(
|
const UnigramProperty beginningOfSentenceUnigramProperty(
|
||||||
true /* representsBeginningOfSentence */, true /* isNotAWord */,
|
true /* representsBeginningOfSentence */, true /* isNotAWord */,
|
||||||
false /* isBlacklisted */, MAX_PROBABILITY /* probability */,
|
false /* isBlacklisted */, MAX_PROBABILITY /* probability */,
|
||||||
NOT_A_TIMESTAMP /* timestamp */, 0 /* level */, 0 /* count */, &shortcuts);
|
HistoricalInfo(), &shortcuts);
|
||||||
if (!addUnigramEntry(prevWordsInfo->getNthPrevWordCodePoints(1 /* n */),
|
if (!addUnigramEntry(prevWordsInfo->getNthPrevWordCodePoints(1 /* n */),
|
||||||
&beginningOfSentenceUnigramProperty)) {
|
&beginningOfSentenceUnigramProperty)) {
|
||||||
AKLOGE("Cannot add unigram entry for the beginning-of-sentence.");
|
AKLOGE("Cannot add unigram entry for the beginning-of-sentence.");
|
||||||
|
@ -464,8 +464,7 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty(
|
||||||
ForgettingCurveUtils::decodeProbability(historicalInfo, mHeaderPolicy) :
|
ForgettingCurveUtils::decodeProbability(historicalInfo, mHeaderPolicy) :
|
||||||
probabilityEntry.getProbability();
|
probabilityEntry.getProbability();
|
||||||
ngrams.emplace_back(CodePointArrayView(bigramWord1CodePoints, codePointCount).toVector(),
|
ngrams.emplace_back(CodePointArrayView(bigramWord1CodePoints, codePointCount).toVector(),
|
||||||
probability, historicalInfo->getTimeStamp(), historicalInfo->getLevel(),
|
probability, *historicalInfo);
|
||||||
historicalInfo->getCount());
|
|
||||||
}
|
}
|
||||||
// Fetch shortcut information.
|
// Fetch shortcut information.
|
||||||
std::vector<UnigramProperty::ShortcutProperty> shortcuts;
|
std::vector<UnigramProperty::ShortcutProperty> shortcuts;
|
||||||
|
@ -487,8 +486,7 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty(
|
||||||
}
|
}
|
||||||
const UnigramProperty unigramProperty(probabilityEntry.representsBeginningOfSentence(),
|
const UnigramProperty unigramProperty(probabilityEntry.representsBeginningOfSentence(),
|
||||||
probabilityEntry.isNotAWord(), probabilityEntry.isBlacklisted(),
|
probabilityEntry.isNotAWord(), probabilityEntry.isBlacklisted(),
|
||||||
probabilityEntry.getProbability(), historicalInfo->getTimeStamp(),
|
probabilityEntry.getProbability(), *historicalInfo, &shortcuts);
|
||||||
historicalInfo->getLevel(), historicalInfo->getCount(), &shortcuts);
|
|
||||||
return WordProperty(wordCodePoints.toVector(), &unigramProperty, &ngrams);
|
return WordProperty(wordCodePoints.toVector(), &unigramProperty, &ngrams);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -43,7 +43,7 @@ const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityT
|
||||||
/* static */ const HistoricalInfo ForgettingCurveUtils::createUpdatedHistoricalInfo(
|
/* static */ const HistoricalInfo ForgettingCurveUtils::createUpdatedHistoricalInfo(
|
||||||
const HistoricalInfo *const originalHistoricalInfo, const int newProbability,
|
const HistoricalInfo *const originalHistoricalInfo, const int newProbability,
|
||||||
const HistoricalInfo *const newHistoricalInfo, const HeaderPolicy *const headerPolicy) {
|
const HistoricalInfo *const newHistoricalInfo, const HeaderPolicy *const headerPolicy) {
|
||||||
const int timestamp = newHistoricalInfo->getTimeStamp();
|
const int timestamp = newHistoricalInfo->getTimestamp();
|
||||||
if (newProbability != NOT_A_PROBABILITY && originalHistoricalInfo->getLevel() == 0) {
|
if (newProbability != NOT_A_PROBABILITY && originalHistoricalInfo->getLevel() == 0) {
|
||||||
// Add entry as a valid word.
|
// Add entry as a valid word.
|
||||||
const int level = clampToVisibleEntryLevelRange(newHistoricalInfo->getLevel());
|
const int level = clampToVisibleEntryLevelRange(newHistoricalInfo->getLevel());
|
||||||
|
@ -78,7 +78,7 @@ const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityT
|
||||||
|
|
||||||
/* static */ int ForgettingCurveUtils::decodeProbability(
|
/* static */ int ForgettingCurveUtils::decodeProbability(
|
||||||
const HistoricalInfo *const historicalInfo, const HeaderPolicy *const headerPolicy) {
|
const HistoricalInfo *const historicalInfo, const HeaderPolicy *const headerPolicy) {
|
||||||
const int elapsedTimeStepCount = getElapsedTimeStepCount(historicalInfo->getTimeStamp(),
|
const int elapsedTimeStepCount = getElapsedTimeStepCount(historicalInfo->getTimestamp(),
|
||||||
headerPolicy->getForgettingCurveDurationToLevelDown());
|
headerPolicy->getForgettingCurveDurationToLevelDown());
|
||||||
return sProbabilityTable.getProbability(
|
return sProbabilityTable.getProbability(
|
||||||
headerPolicy->getForgettingCurveProbabilityValuesTableId(),
|
headerPolicy->getForgettingCurveProbabilityValuesTableId(),
|
||||||
|
@ -102,7 +102,7 @@ const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityT
|
||||||
/* static */ bool ForgettingCurveUtils::needsToKeep(const HistoricalInfo *const historicalInfo,
|
/* static */ bool ForgettingCurveUtils::needsToKeep(const HistoricalInfo *const historicalInfo,
|
||||||
const HeaderPolicy *const headerPolicy) {
|
const HeaderPolicy *const headerPolicy) {
|
||||||
return historicalInfo->getLevel() > 0
|
return historicalInfo->getLevel() > 0
|
||||||
|| getElapsedTimeStepCount(historicalInfo->getTimeStamp(),
|
|| getElapsedTimeStepCount(historicalInfo->getTimestamp(),
|
||||||
headerPolicy->getForgettingCurveDurationToLevelDown())
|
headerPolicy->getForgettingCurveDurationToLevelDown())
|
||||||
< DISCARD_LEVEL_ZERO_ENTRY_TIME_STEP_COUNT_THRESHOLD;
|
< DISCARD_LEVEL_ZERO_ENTRY_TIME_STEP_COUNT_THRESHOLD;
|
||||||
}
|
}
|
||||||
|
@ -110,12 +110,12 @@ const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityT
|
||||||
/* static */ const HistoricalInfo ForgettingCurveUtils::createHistoricalInfoToSave(
|
/* static */ const HistoricalInfo ForgettingCurveUtils::createHistoricalInfoToSave(
|
||||||
const HistoricalInfo *const originalHistoricalInfo,
|
const HistoricalInfo *const originalHistoricalInfo,
|
||||||
const HeaderPolicy *const headerPolicy) {
|
const HeaderPolicy *const headerPolicy) {
|
||||||
if (originalHistoricalInfo->getTimeStamp() == NOT_A_TIMESTAMP) {
|
if (originalHistoricalInfo->getTimestamp() == NOT_A_TIMESTAMP) {
|
||||||
return HistoricalInfo();
|
return HistoricalInfo();
|
||||||
}
|
}
|
||||||
const int durationToLevelDownInSeconds = headerPolicy->getForgettingCurveDurationToLevelDown();
|
const int durationToLevelDownInSeconds = headerPolicy->getForgettingCurveDurationToLevelDown();
|
||||||
const int elapsedTimeStep = getElapsedTimeStepCount(
|
const int elapsedTimeStep = getElapsedTimeStepCount(
|
||||||
originalHistoricalInfo->getTimeStamp(), durationToLevelDownInSeconds);
|
originalHistoricalInfo->getTimestamp(), durationToLevelDownInSeconds);
|
||||||
if (elapsedTimeStep <= MAX_ELAPSED_TIME_STEP_COUNT) {
|
if (elapsedTimeStep <= MAX_ELAPSED_TIME_STEP_COUNT) {
|
||||||
// No need to update historical info.
|
// No need to update historical info.
|
||||||
return *originalHistoricalInfo;
|
return *originalHistoricalInfo;
|
||||||
|
@ -124,7 +124,7 @@ const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityT
|
||||||
const int maxLevelDownAmonut = elapsedTimeStep / (MAX_ELAPSED_TIME_STEP_COUNT + 1);
|
const int maxLevelDownAmonut = elapsedTimeStep / (MAX_ELAPSED_TIME_STEP_COUNT + 1);
|
||||||
const int levelDownAmount = (maxLevelDownAmonut >= originalHistoricalInfo->getLevel()) ?
|
const int levelDownAmount = (maxLevelDownAmonut >= originalHistoricalInfo->getLevel()) ?
|
||||||
originalHistoricalInfo->getLevel() : maxLevelDownAmonut;
|
originalHistoricalInfo->getLevel() : maxLevelDownAmonut;
|
||||||
const int adjustedTimestampInSeconds = originalHistoricalInfo->getTimeStamp() +
|
const int adjustedTimestampInSeconds = originalHistoricalInfo->getTimestamp() +
|
||||||
levelDownAmount * durationToLevelDownInSeconds;
|
levelDownAmount * durationToLevelDownInSeconds;
|
||||||
return HistoricalInfo(adjustedTimestampInSeconds,
|
return HistoricalInfo(adjustedTimestampInSeconds,
|
||||||
originalHistoricalInfo->getLevel() - levelDownAmount, 0 /* count */);
|
originalHistoricalInfo->getLevel() - levelDownAmount, 0 /* count */);
|
||||||
|
|
|
@ -20,7 +20,7 @@
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include "defines.h"
|
#include "defines.h"
|
||||||
#include "suggest/policyimpl/dictionary/utils/historical_info.h"
|
#include "suggest/core/dictionary/property/historical_info.h"
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
|
||||||
|
|
|
@ -60,7 +60,7 @@ TEST(LanguageModelDictContentTest, TestUnigramProbabilityWithHistoricalInfo) {
|
||||||
languageModelDictContent.setProbabilityEntry(wordId, &probabilityEntry);
|
languageModelDictContent.setProbabilityEntry(wordId, &probabilityEntry);
|
||||||
const ProbabilityEntry entry = languageModelDictContent.getProbabilityEntry(wordId);
|
const ProbabilityEntry entry = languageModelDictContent.getProbabilityEntry(wordId);
|
||||||
EXPECT_EQ(flag, entry.getFlags());
|
EXPECT_EQ(flag, entry.getFlags());
|
||||||
EXPECT_EQ(timestamp, entry.getHistoricalInfo()->getTimeStamp());
|
EXPECT_EQ(timestamp, entry.getHistoricalInfo()->getTimestamp());
|
||||||
EXPECT_EQ(level, entry.getHistoricalInfo()->getLevel());
|
EXPECT_EQ(level, entry.getHistoricalInfo()->getLevel());
|
||||||
EXPECT_EQ(count, entry.getHistoricalInfo()->getCount());
|
EXPECT_EQ(count, entry.getHistoricalInfo()->getCount());
|
||||||
|
|
||||||
|
|
|
@ -51,7 +51,7 @@ TEST(ProbabilityEntryTest, TestEncodeDecodeWithHistoricalInfo) {
|
||||||
ProbabilityEntry::decode(encodedEntry, true /* hasHistoricalInfo */);
|
ProbabilityEntry::decode(encodedEntry, true /* hasHistoricalInfo */);
|
||||||
|
|
||||||
EXPECT_EQ(flag, decodedEntry.getFlags());
|
EXPECT_EQ(flag, decodedEntry.getFlags());
|
||||||
EXPECT_EQ(timestamp, decodedEntry.getHistoricalInfo()->getTimeStamp());
|
EXPECT_EQ(timestamp, decodedEntry.getHistoricalInfo()->getTimestamp());
|
||||||
EXPECT_EQ(level, decodedEntry.getHistoricalInfo()->getLevel());
|
EXPECT_EQ(level, decodedEntry.getHistoricalInfo()->getLevel());
|
||||||
EXPECT_EQ(count, decodedEntry.getHistoricalInfo()->getCount());
|
EXPECT_EQ(count, decodedEntry.getHistoricalInfo()->getCount());
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue