Merge "Introduce EntryCounters to count entries in a dictionary."
This commit is contained in:
commit
2b087f9a12
16 changed files with 230 additions and 89 deletions
|
@ -30,6 +30,7 @@ const char *const HeaderPolicy::DATE_KEY = "date";
|
|||
const char *const HeaderPolicy::LAST_DECAYED_TIME_KEY = "LAST_DECAYED_TIME";
|
||||
const char *const HeaderPolicy::UNIGRAM_COUNT_KEY = "UNIGRAM_COUNT";
|
||||
const char *const HeaderPolicy::BIGRAM_COUNT_KEY = "BIGRAM_COUNT";
|
||||
const char *const HeaderPolicy::TRIGRAM_COUNT_KEY = "TRIGRAM_COUNT";
|
||||
const char *const HeaderPolicy::EXTENDED_REGION_SIZE_KEY = "EXTENDED_REGION_SIZE";
|
||||
// Historical info is information that is needed to support decaying such as timestamp, level and
|
||||
// count.
|
||||
|
@ -94,12 +95,11 @@ bool HeaderPolicy::readRequiresGermanUmlautProcessing() const {
|
|||
}
|
||||
|
||||
bool HeaderPolicy::fillInAndWriteHeaderToBuffer(const bool updatesLastDecayedTime,
|
||||
const int unigramCount, const int bigramCount,
|
||||
const int extendedRegionSize, BufferWithExtendableBuffer *const outBuffer) const {
|
||||
const EntryCounts &entryCounts, const int extendedRegionSize,
|
||||
BufferWithExtendableBuffer *const outBuffer) const {
|
||||
int writingPos = 0;
|
||||
DictionaryHeaderStructurePolicy::AttributeMap attributeMapToWrite(mAttributeMap);
|
||||
fillInHeader(updatesLastDecayedTime, unigramCount, bigramCount,
|
||||
extendedRegionSize, &attributeMapToWrite);
|
||||
fillInHeader(updatesLastDecayedTime, entryCounts, extendedRegionSize, &attributeMapToWrite);
|
||||
if (!HeaderReadWriteUtils::writeDictionaryVersion(outBuffer, mDictFormatVersion,
|
||||
&writingPos)) {
|
||||
return false;
|
||||
|
@ -126,11 +126,15 @@ bool HeaderPolicy::fillInAndWriteHeaderToBuffer(const bool updatesLastDecayedTim
|
|||
return true;
|
||||
}
|
||||
|
||||
void HeaderPolicy::fillInHeader(const bool updatesLastDecayedTime, const int unigramCount,
|
||||
const int bigramCount, const int extendedRegionSize,
|
||||
void HeaderPolicy::fillInHeader(const bool updatesLastDecayedTime,
|
||||
const EntryCounts &entryCounts, const int extendedRegionSize,
|
||||
DictionaryHeaderStructurePolicy::AttributeMap *outAttributeMap) const {
|
||||
HeaderReadWriteUtils::setIntAttribute(outAttributeMap, UNIGRAM_COUNT_KEY, unigramCount);
|
||||
HeaderReadWriteUtils::setIntAttribute(outAttributeMap, BIGRAM_COUNT_KEY, bigramCount);
|
||||
HeaderReadWriteUtils::setIntAttribute(outAttributeMap, UNIGRAM_COUNT_KEY,
|
||||
entryCounts.getUnigramCount());
|
||||
HeaderReadWriteUtils::setIntAttribute(outAttributeMap, BIGRAM_COUNT_KEY,
|
||||
entryCounts.getBigramCount());
|
||||
HeaderReadWriteUtils::setIntAttribute(outAttributeMap, TRIGRAM_COUNT_KEY,
|
||||
entryCounts.getTrigramCount());
|
||||
HeaderReadWriteUtils::setIntAttribute(outAttributeMap, EXTENDED_REGION_SIZE_KEY,
|
||||
extendedRegionSize);
|
||||
// Set the current time as the generation time.
|
||||
|
|
|
@ -22,6 +22,7 @@
|
|||
#include "defines.h"
|
||||
#include "suggest/core/policy/dictionary_header_structure_policy.h"
|
||||
#include "suggest/policyimpl/dictionary/header/header_read_write_utils.h"
|
||||
#include "suggest/policyimpl/dictionary/utils/entry_counters.h"
|
||||
#include "suggest/policyimpl/dictionary/utils/format_utils.h"
|
||||
#include "utils/char_utils.h"
|
||||
#include "utils/time_keeper.h"
|
||||
|
@ -49,6 +50,8 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
|
|||
UNIGRAM_COUNT_KEY, 0 /* defaultValue */)),
|
||||
mBigramCount(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap,
|
||||
BIGRAM_COUNT_KEY, 0 /* defaultValue */)),
|
||||
mTrigramCount(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap,
|
||||
TRIGRAM_COUNT_KEY, 0 /* defaultValue */)),
|
||||
mExtendedRegionSize(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap,
|
||||
EXTENDED_REGION_SIZE_KEY, 0 /* defaultValue */)),
|
||||
mHasHistoricalInfoOfWords(HeaderReadWriteUtils::readBoolAttributeValue(
|
||||
|
@ -60,6 +63,8 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
|
|||
&mAttributeMap, MAX_UNIGRAM_COUNT_KEY, DEFAULT_MAX_UNIGRAM_COUNT)),
|
||||
mMaxBigramCount(HeaderReadWriteUtils::readIntAttributeValue(
|
||||
&mAttributeMap, MAX_BIGRAM_COUNT_KEY, DEFAULT_MAX_BIGRAM_COUNT)),
|
||||
mMaxTrigramCount(HeaderReadWriteUtils::readIntAttributeValue(
|
||||
&mAttributeMap, MAX_TRIGRAM_COUNT_KEY, DEFAULT_MAX_TRIGRAM_COUNT)),
|
||||
mCodePointTable(HeaderReadWriteUtils::readCodePointTable(&mAttributeMap)) {}
|
||||
|
||||
// Constructs header information using an attribute map.
|
||||
|
@ -77,7 +82,7 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
|
|||
DATE_KEY, TimeKeeper::peekCurrentTime() /* defaultValue */)),
|
||||
mLastDecayedTime(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap,
|
||||
DATE_KEY, TimeKeeper::peekCurrentTime() /* defaultValue */)),
|
||||
mUnigramCount(0), mBigramCount(0), mExtendedRegionSize(0),
|
||||
mUnigramCount(0), mBigramCount(0), mTrigramCount(0), mExtendedRegionSize(0),
|
||||
mHasHistoricalInfoOfWords(HeaderReadWriteUtils::readBoolAttributeValue(
|
||||
&mAttributeMap, HAS_HISTORICAL_INFO_KEY, false /* defaultValue */)),
|
||||
mForgettingCurveProbabilityValuesTableId(HeaderReadWriteUtils::readIntAttributeValue(
|
||||
|
@ -87,6 +92,8 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
|
|||
&mAttributeMap, MAX_UNIGRAM_COUNT_KEY, DEFAULT_MAX_UNIGRAM_COUNT)),
|
||||
mMaxBigramCount(HeaderReadWriteUtils::readIntAttributeValue(
|
||||
&mAttributeMap, MAX_BIGRAM_COUNT_KEY, DEFAULT_MAX_BIGRAM_COUNT)),
|
||||
mMaxTrigramCount(HeaderReadWriteUtils::readIntAttributeValue(
|
||||
&mAttributeMap, MAX_TRIGRAM_COUNT_KEY, DEFAULT_MAX_TRIGRAM_COUNT)),
|
||||
mCodePointTable(HeaderReadWriteUtils::readCodePointTable(&mAttributeMap)) {}
|
||||
|
||||
// Copy header information
|
||||
|
@ -99,12 +106,14 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
|
|||
mIsDecayingDict(headerPolicy->mIsDecayingDict),
|
||||
mDate(headerPolicy->mDate), mLastDecayedTime(headerPolicy->mLastDecayedTime),
|
||||
mUnigramCount(headerPolicy->mUnigramCount), mBigramCount(headerPolicy->mBigramCount),
|
||||
mTrigramCount(headerPolicy->mTrigramCount),
|
||||
mExtendedRegionSize(headerPolicy->mExtendedRegionSize),
|
||||
mHasHistoricalInfoOfWords(headerPolicy->mHasHistoricalInfoOfWords),
|
||||
mForgettingCurveProbabilityValuesTableId(
|
||||
headerPolicy->mForgettingCurveProbabilityValuesTableId),
|
||||
mMaxUnigramCount(headerPolicy->mMaxUnigramCount),
|
||||
mMaxBigramCount(headerPolicy->mMaxBigramCount),
|
||||
mMaxTrigramCount(headerPolicy->mMaxTrigramCount),
|
||||
mCodePointTable(headerPolicy->mCodePointTable) {}
|
||||
|
||||
// Temporary dummy header.
|
||||
|
@ -112,10 +121,10 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
|
|||
: mDictFormatVersion(FormatUtils::UNKNOWN_VERSION), mDictionaryFlags(0), mSize(0),
|
||||
mAttributeMap(), mLocale(CharUtils::EMPTY_STRING), mMultiWordCostMultiplier(0.0f),
|
||||
mRequiresGermanUmlautProcessing(false), mIsDecayingDict(false),
|
||||
mDate(0), mLastDecayedTime(0), mUnigramCount(0), mBigramCount(0),
|
||||
mDate(0), mLastDecayedTime(0), mUnigramCount(0), mBigramCount(0), mTrigramCount(0),
|
||||
mExtendedRegionSize(0), mHasHistoricalInfoOfWords(false),
|
||||
mForgettingCurveProbabilityValuesTableId(0), mMaxUnigramCount(0), mMaxBigramCount(0),
|
||||
mCodePointTable(nullptr) {}
|
||||
mMaxTrigramCount(0), mCodePointTable(nullptr) {}
|
||||
|
||||
~HeaderPolicy() {}
|
||||
|
||||
|
@ -183,6 +192,10 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
|
|||
return mBigramCount;
|
||||
}
|
||||
|
||||
AK_FORCE_INLINE int getTrigramCount() const {
|
||||
return mTrigramCount;
|
||||
}
|
||||
|
||||
AK_FORCE_INLINE int getExtendedRegionSize() const {
|
||||
return mExtendedRegionSize;
|
||||
}
|
||||
|
@ -212,15 +225,19 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
|
|||
return mMaxBigramCount;
|
||||
}
|
||||
|
||||
AK_FORCE_INLINE int getMaxTrigramCount() const {
|
||||
return mMaxTrigramCount;
|
||||
}
|
||||
|
||||
void readHeaderValueOrQuestionMark(const char *const key,
|
||||
int *outValue, int outValueSize) const;
|
||||
|
||||
bool fillInAndWriteHeaderToBuffer(const bool updatesLastDecayedTime,
|
||||
const int unigramCount, const int bigramCount,
|
||||
const int extendedRegionSize, BufferWithExtendableBuffer *const outBuffer) const;
|
||||
const EntryCounts &entryCounts, const int extendedRegionSize,
|
||||
BufferWithExtendableBuffer *const outBuffer) const;
|
||||
|
||||
void fillInHeader(const bool updatesLastDecayedTime,
|
||||
const int unigramCount, const int bigramCount, const int extendedRegionSize,
|
||||
void fillInHeader(const bool updatesLastDecayedTime, const EntryCounts &entryCounts,
|
||||
const int extendedRegionSize,
|
||||
DictionaryHeaderStructurePolicy::AttributeMap *outAttributeMap) const;
|
||||
|
||||
AK_FORCE_INLINE const std::vector<int> *getLocale() const {
|
||||
|
@ -245,6 +262,7 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
|
|||
static const char *const LAST_DECAYED_TIME_KEY;
|
||||
static const char *const UNIGRAM_COUNT_KEY;
|
||||
static const char *const BIGRAM_COUNT_KEY;
|
||||
static const char *const TRIGRAM_COUNT_KEY;
|
||||
static const char *const EXTENDED_REGION_SIZE_KEY;
|
||||
static const char *const HAS_HISTORICAL_INFO_KEY;
|
||||
static const char *const LOCALE_KEY;
|
||||
|
@ -273,11 +291,13 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
|
|||
const int mLastDecayedTime;
|
||||
const int mUnigramCount;
|
||||
const int mBigramCount;
|
||||
const int mTrigramCount;
|
||||
const int mExtendedRegionSize;
|
||||
const bool mHasHistoricalInfoOfWords;
|
||||
const int mForgettingCurveProbabilityValuesTableId;
|
||||
const int mMaxUnigramCount;
|
||||
const int mMaxBigramCount;
|
||||
const int mMaxTrigramCount;
|
||||
const int *const mCodePointTable;
|
||||
|
||||
const std::vector<int> readLocale() const;
|
||||
|
|
|
@ -303,7 +303,7 @@ bool Ver4PatriciaTriePolicy::addUnigramEntry(const CodePointArrayView wordCodePo
|
|||
if (mUpdatingHelper.addUnigramWord(&readingHelper, codePointArrayView, unigramProperty,
|
||||
&addedNewUnigram)) {
|
||||
if (addedNewUnigram && !unigramProperty->representsBeginningOfSentence()) {
|
||||
mUnigramCount++;
|
||||
mEntryCounters.incrementUnigramCount();
|
||||
}
|
||||
if (unigramProperty->getShortcuts().size() > 0) {
|
||||
// Add shortcut target.
|
||||
|
@ -397,7 +397,7 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const NgramContext *const ngramContex
|
|||
if (mUpdatingHelper.addNgramEntry(PtNodePosArrayView::singleElementView(&prevWordPtNodePos),
|
||||
wordPos, ngramProperty, &addedNewBigram)) {
|
||||
if (addedNewBigram) {
|
||||
mBigramCount++;
|
||||
mEntryCounters.incrementBigramCount();
|
||||
}
|
||||
return true;
|
||||
} else {
|
||||
|
@ -438,7 +438,7 @@ bool Ver4PatriciaTriePolicy::removeNgramEntry(const NgramContext *const ngramCon
|
|||
const int prevWordPtNodePos = getTerminalPtNodePosFromWordId(prevWordIds[0]);
|
||||
if (mUpdatingHelper.removeNgramEntry(
|
||||
PtNodePosArrayView::singleElementView(&prevWordPtNodePos), wordPos)) {
|
||||
mBigramCount--;
|
||||
mEntryCounters.decrementBigramCount();
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
|
@ -477,7 +477,7 @@ bool Ver4PatriciaTriePolicy::flush(const char *const filePath) {
|
|||
AKLOGI("Warning: flush() is called for non-updatable dictionary. filePath: %s", filePath);
|
||||
return false;
|
||||
}
|
||||
if (!mWritingHelper.writeToDictFile(filePath, mUnigramCount, mBigramCount)) {
|
||||
if (!mWritingHelper.writeToDictFile(filePath, mEntryCounters.getEntryCounts())) {
|
||||
AKLOGE("Cannot flush the dictionary to file.");
|
||||
mIsCorrupted = true;
|
||||
return false;
|
||||
|
@ -515,7 +515,7 @@ bool Ver4PatriciaTriePolicy::needsToRunGC(const bool mindsBlockByGC) const {
|
|||
// Needs to reduce dictionary size.
|
||||
return true;
|
||||
} else if (mHeaderPolicy->isDecayingDict()) {
|
||||
return ForgettingCurveUtils::needsToDecay(mindsBlockByGC, mUnigramCount, mBigramCount,
|
||||
return ForgettingCurveUtils::needsToDecay(mindsBlockByGC, mEntryCounters.getEntryCounts(),
|
||||
mHeaderPolicy);
|
||||
}
|
||||
return false;
|
||||
|
@ -525,19 +525,19 @@ void Ver4PatriciaTriePolicy::getProperty(const char *const query, const int quer
|
|||
char *const outResult, const int maxResultLength) {
|
||||
const int compareLength = queryLength + 1 /* terminator */;
|
||||
if (strncmp(query, UNIGRAM_COUNT_QUERY, compareLength) == 0) {
|
||||
snprintf(outResult, maxResultLength, "%d", mUnigramCount);
|
||||
snprintf(outResult, maxResultLength, "%d", mEntryCounters.getUnigramCount());
|
||||
} else if (strncmp(query, BIGRAM_COUNT_QUERY, compareLength) == 0) {
|
||||
snprintf(outResult, maxResultLength, "%d", mBigramCount);
|
||||
snprintf(outResult, maxResultLength, "%d", mEntryCounters.getBigramCount());
|
||||
} else if (strncmp(query, MAX_UNIGRAM_COUNT_QUERY, compareLength) == 0) {
|
||||
snprintf(outResult, maxResultLength, "%d",
|
||||
mHeaderPolicy->isDecayingDict() ?
|
||||
ForgettingCurveUtils::getUnigramCountHardLimit(
|
||||
ForgettingCurveUtils::getEntryCountHardLimit(
|
||||
mHeaderPolicy->getMaxUnigramCount()) :
|
||||
static_cast<int>(Ver4DictConstants::MAX_DICTIONARY_SIZE));
|
||||
} else if (strncmp(query, MAX_BIGRAM_COUNT_QUERY, compareLength) == 0) {
|
||||
snprintf(outResult, maxResultLength, "%d",
|
||||
mHeaderPolicy->isDecayingDict() ?
|
||||
ForgettingCurveUtils::getBigramCountHardLimit(
|
||||
ForgettingCurveUtils::getEntryCountHardLimit(
|
||||
mHeaderPolicy->getMaxBigramCount()) :
|
||||
static_cast<int>(Ver4DictConstants::MAX_DICTIONARY_SIZE));
|
||||
}
|
||||
|
|
|
@ -41,6 +41,7 @@
|
|||
#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_writing_helper.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_pt_node_array_reader.h"
|
||||
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
|
||||
#include "suggest/policyimpl/dictionary/utils/entry_counters.h"
|
||||
#include "utils/int_array_view.h"
|
||||
|
||||
namespace latinime {
|
||||
|
@ -75,8 +76,8 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
|||
&mPtNodeArrayReader, &mBigramPolicy, &mShortcutPolicy),
|
||||
mUpdatingHelper(mDictBuffer, &mNodeReader, &mNodeWriter),
|
||||
mWritingHelper(mBuffers.get()),
|
||||
mUnigramCount(mHeaderPolicy->getUnigramCount()),
|
||||
mBigramCount(mHeaderPolicy->getBigramCount()),
|
||||
mEntryCounters(mHeaderPolicy->getUnigramCount(), mHeaderPolicy->getBigramCount(),
|
||||
mHeaderPolicy->getTrigramCount()),
|
||||
mTerminalPtNodePositionsForIteratingWords(), mIsCorrupted(false) {};
|
||||
|
||||
virtual int getRootPosition() const {
|
||||
|
@ -163,8 +164,7 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
|||
Ver4PatriciaTrieNodeWriter mNodeWriter;
|
||||
DynamicPtUpdatingHelper mUpdatingHelper;
|
||||
Ver4PatriciaTrieWritingHelper mWritingHelper;
|
||||
int mUnigramCount;
|
||||
int mBigramCount;
|
||||
MutableEntryCounters mEntryCounters;
|
||||
std::vector<int> mTerminalPtNodePositionsForIteratingWords;
|
||||
mutable bool mIsCorrupted;
|
||||
|
||||
|
|
|
@ -43,18 +43,18 @@ namespace backward {
|
|||
namespace v402 {
|
||||
|
||||
bool Ver4PatriciaTrieWritingHelper::writeToDictFile(const char *const dictDirPath,
|
||||
const int unigramCount, const int bigramCount) const {
|
||||
const EntryCounts &entryCounts) const {
|
||||
const HeaderPolicy *const headerPolicy = mBuffers->getHeaderPolicy();
|
||||
BufferWithExtendableBuffer headerBuffer(
|
||||
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE);
|
||||
const int extendedRegionSize = headerPolicy->getExtendedRegionSize()
|
||||
+ mBuffers->getTrieBuffer()->getUsedAdditionalBufferSize();
|
||||
if (!headerPolicy->fillInAndWriteHeaderToBuffer(false /* updatesLastDecayedTime */,
|
||||
unigramCount, bigramCount, extendedRegionSize, &headerBuffer)) {
|
||||
entryCounts, extendedRegionSize, &headerBuffer)) {
|
||||
AKLOGE("Cannot write header structure to buffer. "
|
||||
"updatesLastDecayedTime: %d, unigramCount: %d, bigramCount: %d, "
|
||||
"extendedRegionSize: %d", false, unigramCount, bigramCount,
|
||||
extendedRegionSize);
|
||||
"extendedRegionSize: %d", false, entryCounters.getUnigramCount(),
|
||||
entryCounters.getBigramCount(), extendedRegionSize);
|
||||
return false;
|
||||
}
|
||||
return mBuffers->flushHeaderAndDictBuffers(dictDirPath, &headerBuffer);
|
||||
|
@ -74,7 +74,8 @@ bool Ver4PatriciaTrieWritingHelper::writeToDictFileWithGC(const int rootPtNodeAr
|
|||
BufferWithExtendableBuffer headerBuffer(
|
||||
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE);
|
||||
if (!headerPolicy->fillInAndWriteHeaderToBuffer(true /* updatesLastDecayedTime */,
|
||||
unigramCount, bigramCount, 0 /* extendedRegionSize */, &headerBuffer)) {
|
||||
EntryCounts(unigramCount, bigramCount, 0 /* trigramCount */),
|
||||
0 /* extendedRegionSize */, &headerBuffer)) {
|
||||
return false;
|
||||
}
|
||||
return dictBuffers->flushHeaderAndDictBuffers(dictDirPath, &headerBuffer);
|
||||
|
|
|
@ -27,6 +27,7 @@
|
|||
#include "defines.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_gc_event_listeners.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/backward/v402/content/terminal_position_lookup_table.h"
|
||||
#include "suggest/policyimpl/dictionary/utils/entry_counters.h"
|
||||
|
||||
namespace latinime {
|
||||
namespace backward {
|
||||
|
@ -46,8 +47,7 @@ class Ver4PatriciaTrieWritingHelper {
|
|||
Ver4PatriciaTrieWritingHelper(Ver4DictBuffers *const buffers)
|
||||
: mBuffers(buffers) {}
|
||||
|
||||
bool writeToDictFile(const char *const dictDirPath, const int unigramCount,
|
||||
const int bigramCount) const;
|
||||
bool writeToDictFile(const char *const dictDirPath, const EntryCounts &entryCounts) const;
|
||||
|
||||
// This method cannot be const because the original dictionary buffer will be updated to detect
|
||||
// useless PtNodes during GC.
|
||||
|
|
|
@ -161,10 +161,7 @@ bool LanguageModelDictContent::truncateEntries(const int *const entryCounts,
|
|||
|
||||
bool LanguageModelDictContent::updateAllEntriesOnInputWord(const WordIdArrayView prevWordIds,
|
||||
const int wordId, const bool isValid, const HistoricalInfo historicalInfo,
|
||||
const HeaderPolicy *const headerPolicy, int *const outAddedNewNgramEntryCount) {
|
||||
if (outAddedNewNgramEntryCount) {
|
||||
*outAddedNewNgramEntryCount = 0;
|
||||
}
|
||||
const HeaderPolicy *const headerPolicy, MutableEntryCounters *const entryCountersToUpdate) {
|
||||
if (!mHasHistoricalInfo) {
|
||||
AKLOGE("updateAllEntriesOnInputWord is called for dictionary without historical info.");
|
||||
return false;
|
||||
|
@ -188,8 +185,8 @@ bool LanguageModelDictContent::updateAllEntriesOnInputWord(const WordIdArrayView
|
|||
if (!setNgramProbabilityEntry(limitedPrevWordIds, wordId, &updatedNgramProbabilityEntry)) {
|
||||
return false;
|
||||
}
|
||||
if (!originalNgramProbabilityEntry.isValid() && outAddedNewNgramEntryCount) {
|
||||
*outAddedNewNgramEntryCount += 1;
|
||||
if (!originalNgramProbabilityEntry.isValid()) {
|
||||
entryCountersToUpdate->incrementNgramCount(i + 2);
|
||||
}
|
||||
}
|
||||
return true;
|
||||
|
|
|
@ -25,6 +25,7 @@
|
|||
#include "suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
|
||||
#include "suggest/policyimpl/dictionary/utils/entry_counters.h"
|
||||
#include "suggest/policyimpl/dictionary/utils/trie_map.h"
|
||||
#include "utils/byte_array_view.h"
|
||||
#include "utils/int_array_view.h"
|
||||
|
@ -169,7 +170,8 @@ class LanguageModelDictContent {
|
|||
|
||||
bool updateAllEntriesOnInputWord(const WordIdArrayView prevWordIds, const int wordId,
|
||||
const bool isValid, const HistoricalInfo historicalInfo,
|
||||
const HeaderPolicy *const headerPolicy, int *const outAddedNewNgramEntryCount);
|
||||
const HeaderPolicy *const headerPolicy,
|
||||
MutableEntryCounters *const entryCountersToUpdate);
|
||||
|
||||
private:
|
||||
DISALLOW_COPY_AND_ASSIGN(LanguageModelDictContent);
|
||||
|
|
|
@ -211,7 +211,7 @@ bool Ver4PatriciaTriePolicy::addUnigramEntry(const CodePointArrayView wordCodePo
|
|||
if (mUpdatingHelper.addUnigramWord(&readingHelper, codePointArrayView, unigramProperty,
|
||||
&addedNewUnigram)) {
|
||||
if (addedNewUnigram && !unigramProperty->representsBeginningOfSentence()) {
|
||||
mUnigramCount++;
|
||||
mEntryCounters.incrementUnigramCount();
|
||||
}
|
||||
if (unigramProperty->getShortcuts().size() > 0) {
|
||||
// Add shortcut target.
|
||||
|
@ -259,7 +259,7 @@ bool Ver4PatriciaTriePolicy::removeUnigramEntry(const CodePointArrayView wordCod
|
|||
return false;
|
||||
}
|
||||
if (!ptNodeParams.representsNonWordInfo()) {
|
||||
mUnigramCount--;
|
||||
mEntryCounters.decrementUnigramCount();
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
@ -316,7 +316,7 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const NgramContext *const ngramContex
|
|||
bool addedNewEntry = false;
|
||||
if (mNodeWriter.addNgramEntry(prevWordIds, wordId, ngramProperty, &addedNewEntry)) {
|
||||
if (addedNewEntry) {
|
||||
mBigramCount++;
|
||||
mEntryCounters.incrementNgramCount(prevWordIds.size() + 1);
|
||||
}
|
||||
return true;
|
||||
} else {
|
||||
|
@ -354,7 +354,7 @@ bool Ver4PatriciaTriePolicy::removeNgramEntry(const NgramContext *const ngramCon
|
|||
return false;
|
||||
}
|
||||
if (mNodeWriter.removeNgramEntry(prevWordIds, wordId)) {
|
||||
mBigramCount--;
|
||||
mEntryCounters.decrementNgramCount(prevWordIds.size());
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
|
@ -401,12 +401,10 @@ bool Ver4PatriciaTriePolicy::updateEntriesForWordWithNgramContext(
|
|||
// Refresh word ids.
|
||||
ngramContext->getPrevWordIds(this, &prevWordIdArray, false /* tryLowerCaseSearch */);
|
||||
}
|
||||
int addedNewNgramEntryCount = 0;
|
||||
if (!mBuffers->getMutableLanguageModelDictContent()->updateAllEntriesOnInputWord(prevWordIds,
|
||||
wordId, updateAsAValidWord, historicalInfo, mHeaderPolicy, &addedNewNgramEntryCount)) {
|
||||
wordId, updateAsAValidWord, historicalInfo, mHeaderPolicy, &mEntryCounters)) {
|
||||
return false;
|
||||
}
|
||||
mBigramCount += addedNewNgramEntryCount;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -415,7 +413,7 @@ bool Ver4PatriciaTriePolicy::flush(const char *const filePath) {
|
|||
AKLOGI("Warning: flush() is called for non-updatable dictionary. filePath: %s", filePath);
|
||||
return false;
|
||||
}
|
||||
if (!mWritingHelper.writeToDictFile(filePath, mUnigramCount, mBigramCount)) {
|
||||
if (!mWritingHelper.writeToDictFile(filePath, mEntryCounters.getEntryCounts())) {
|
||||
AKLOGE("Cannot flush the dictionary to file.");
|
||||
mIsCorrupted = true;
|
||||
return false;
|
||||
|
@ -453,8 +451,7 @@ bool Ver4PatriciaTriePolicy::needsToRunGC(const bool mindsBlockByGC) const {
|
|||
// Needs to reduce dictionary size.
|
||||
return true;
|
||||
} else if (mHeaderPolicy->isDecayingDict()) {
|
||||
return ForgettingCurveUtils::needsToDecay(mindsBlockByGC, mUnigramCount, mBigramCount,
|
||||
mHeaderPolicy);
|
||||
return ForgettingCurveUtils::needsToDecay(mindsBlockByGC, mEntryCounters.getEntryCounts(), mHeaderPolicy);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
@ -463,19 +460,19 @@ void Ver4PatriciaTriePolicy::getProperty(const char *const query, const int quer
|
|||
char *const outResult, const int maxResultLength) {
|
||||
const int compareLength = queryLength + 1 /* terminator */;
|
||||
if (strncmp(query, UNIGRAM_COUNT_QUERY, compareLength) == 0) {
|
||||
snprintf(outResult, maxResultLength, "%d", mUnigramCount);
|
||||
snprintf(outResult, maxResultLength, "%d", mEntryCounters.getUnigramCount());
|
||||
} else if (strncmp(query, BIGRAM_COUNT_QUERY, compareLength) == 0) {
|
||||
snprintf(outResult, maxResultLength, "%d", mBigramCount);
|
||||
snprintf(outResult, maxResultLength, "%d", mEntryCounters.getBigramCount());
|
||||
} else if (strncmp(query, MAX_UNIGRAM_COUNT_QUERY, compareLength) == 0) {
|
||||
snprintf(outResult, maxResultLength, "%d",
|
||||
mHeaderPolicy->isDecayingDict() ?
|
||||
ForgettingCurveUtils::getUnigramCountHardLimit(
|
||||
ForgettingCurveUtils::getEntryCountHardLimit(
|
||||
mHeaderPolicy->getMaxUnigramCount()) :
|
||||
static_cast<int>(Ver4DictConstants::MAX_DICTIONARY_SIZE));
|
||||
} else if (strncmp(query, MAX_BIGRAM_COUNT_QUERY, compareLength) == 0) {
|
||||
snprintf(outResult, maxResultLength, "%d",
|
||||
mHeaderPolicy->isDecayingDict() ?
|
||||
ForgettingCurveUtils::getBigramCountHardLimit(
|
||||
ForgettingCurveUtils::getEntryCountHardLimit(
|
||||
mHeaderPolicy->getMaxBigramCount()) :
|
||||
static_cast<int>(Ver4DictConstants::MAX_DICTIONARY_SIZE));
|
||||
}
|
||||
|
|
|
@ -30,6 +30,7 @@
|
|||
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/v4/ver4_pt_node_array_reader.h"
|
||||
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
|
||||
#include "suggest/policyimpl/dictionary/utils/entry_counters.h"
|
||||
#include "utils/int_array_view.h"
|
||||
|
||||
namespace latinime {
|
||||
|
@ -37,7 +38,6 @@ namespace latinime {
|
|||
class DicNode;
|
||||
class DicNodeVector;
|
||||
|
||||
// TODO: Support counting ngram entries.
|
||||
// Word id = Artificial id that is stored in the PtNode looked up by the word.
|
||||
class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
||||
public:
|
||||
|
@ -51,8 +51,8 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
|||
&mShortcutPolicy),
|
||||
mUpdatingHelper(mDictBuffer, &mNodeReader, &mNodeWriter),
|
||||
mWritingHelper(mBuffers.get()),
|
||||
mUnigramCount(mHeaderPolicy->getUnigramCount()),
|
||||
mBigramCount(mHeaderPolicy->getBigramCount()),
|
||||
mEntryCounters(mHeaderPolicy->getUnigramCount(), mHeaderPolicy->getBigramCount(),
|
||||
mHeaderPolicy->getTrigramCount()),
|
||||
mTerminalPtNodePositionsForIteratingWords(), mIsCorrupted(false) {};
|
||||
|
||||
AK_FORCE_INLINE int getRootPosition() const {
|
||||
|
@ -141,9 +141,7 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
|||
Ver4PatriciaTrieNodeWriter mNodeWriter;
|
||||
DynamicPtUpdatingHelper mUpdatingHelper;
|
||||
Ver4PatriciaTrieWritingHelper mWritingHelper;
|
||||
int mUnigramCount;
|
||||
// TODO: Support counting ngram entries.
|
||||
int mBigramCount;
|
||||
MutableEntryCounters mEntryCounters;
|
||||
std::vector<int> mTerminalPtNodePositionsForIteratingWords;
|
||||
mutable bool mIsCorrupted;
|
||||
|
||||
|
|
|
@ -33,17 +33,18 @@
|
|||
namespace latinime {
|
||||
|
||||
bool Ver4PatriciaTrieWritingHelper::writeToDictFile(const char *const dictDirPath,
|
||||
const int unigramCount, const int bigramCount) const {
|
||||
const EntryCounts &entryCounts) const {
|
||||
const HeaderPolicy *const headerPolicy = mBuffers->getHeaderPolicy();
|
||||
BufferWithExtendableBuffer headerBuffer(
|
||||
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE);
|
||||
const int extendedRegionSize = headerPolicy->getExtendedRegionSize()
|
||||
+ mBuffers->getTrieBuffer()->getUsedAdditionalBufferSize();
|
||||
if (!headerPolicy->fillInAndWriteHeaderToBuffer(false /* updatesLastDecayedTime */,
|
||||
unigramCount, bigramCount, extendedRegionSize, &headerBuffer)) {
|
||||
entryCounts, extendedRegionSize, &headerBuffer)) {
|
||||
AKLOGE("Cannot write header structure to buffer. "
|
||||
"updatesLastDecayedTime: %d, unigramCount: %d, bigramCount: %d, "
|
||||
"extendedRegionSize: %d", false, unigramCount, bigramCount,
|
||||
"updatesLastDecayedTime: %d, unigramCount: %d, bigramCount: %d, trigramCount: %d,"
|
||||
"extendedRegionSize: %d", false, entryCounters.getUnigramCount(),
|
||||
entryCounters.getBigramCount(), entryCounters.getTrigramCount(),
|
||||
extendedRegionSize);
|
||||
return false;
|
||||
}
|
||||
|
@ -64,7 +65,8 @@ bool Ver4PatriciaTrieWritingHelper::writeToDictFileWithGC(const int rootPtNodeAr
|
|||
BufferWithExtendableBuffer headerBuffer(
|
||||
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE);
|
||||
if (!headerPolicy->fillInAndWriteHeaderToBuffer(true /* updatesLastDecayedTime */,
|
||||
unigramCount, bigramCount, 0 /* extendedRegionSize */, &headerBuffer)) {
|
||||
EntryCounts(unigramCount, bigramCount, 0 /* trigramCount */),
|
||||
0 /* extendedRegionSize */, &headerBuffer)) {
|
||||
return false;
|
||||
}
|
||||
return dictBuffers->flushHeaderAndDictBuffers(dictDirPath, &headerBuffer);
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
#include "defines.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_gc_event_listeners.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h"
|
||||
#include "suggest/policyimpl/dictionary/utils/entry_counters.h"
|
||||
|
||||
namespace latinime {
|
||||
|
||||
|
@ -33,9 +34,7 @@ class Ver4PatriciaTrieWritingHelper {
|
|||
Ver4PatriciaTrieWritingHelper(Ver4DictBuffers *const buffers)
|
||||
: mBuffers(buffers) {}
|
||||
|
||||
// TODO: Support counting ngram entries.
|
||||
bool writeToDictFile(const char *const dictDirPath, const int unigramCount,
|
||||
const int bigramCount) const;
|
||||
bool writeToDictFile(const char *const dictDirPath, const EntryCounts &entryCounts) const;
|
||||
|
||||
// This method cannot be const because the original dictionary buffer will be updated to detect
|
||||
// useless PtNodes during GC.
|
||||
|
|
|
@ -27,6 +27,7 @@
|
|||
#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_writing_utils.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h"
|
||||
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
|
||||
#include "suggest/policyimpl/dictionary/utils/entry_counters.h"
|
||||
#include "suggest/policyimpl/dictionary/utils/file_utils.h"
|
||||
#include "suggest/policyimpl/dictionary/utils/format_utils.h"
|
||||
#include "utils/time_keeper.h"
|
||||
|
@ -69,8 +70,7 @@ template<class DictConstants, class DictBuffers, class DictBuffersPtr>
|
|||
DictBuffersPtr dictBuffers = DictBuffers::createVer4DictBuffers(&headerPolicy,
|
||||
DictConstants::MAX_DICT_EXTENDED_REGION_SIZE);
|
||||
headerPolicy.fillInAndWriteHeaderToBuffer(true /* updatesLastDecayedTime */,
|
||||
0 /* unigramCount */, 0 /* bigramCount */,
|
||||
0 /* extendedRegionSize */, dictBuffers->getWritableHeaderBuffer());
|
||||
EntryCounts(), 0 /* extendedRegionSize */, dictBuffers->getWritableHeaderBuffer());
|
||||
if (!DynamicPtWritingUtils::writeEmptyDictionary(
|
||||
dictBuffers->getWritableTrieBuffer(), 0 /* rootPos */)) {
|
||||
AKLOGE("Empty ver4 dictionary structure cannot be created on memory.");
|
||||
|
|
|
@ -0,0 +1,119 @@
|
|||
/*
|
||||
* Copyright (C) 2014, The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef LATINIME_ENTRY_COUNTERS_H
|
||||
#define LATINIME_ENTRY_COUNTERS_H
|
||||
|
||||
#include <array>
|
||||
|
||||
#include "defines.h"
|
||||
|
||||
namespace latinime {
|
||||
|
||||
// Copyable but immutable
|
||||
class EntryCounts final {
|
||||
public:
|
||||
EntryCounts() : mEntryCounts({{0, 0, 0}}) {}
|
||||
|
||||
EntryCounts(const int unigramCount, const int bigramCount, const int trigramCount)
|
||||
: mEntryCounts({{unigramCount, bigramCount, trigramCount}}) {}
|
||||
|
||||
explicit EntryCounts(const std::array<int, MAX_PREV_WORD_COUNT_FOR_N_GRAM + 1> &counters)
|
||||
: mEntryCounts(counters) {}
|
||||
|
||||
int getUnigramCount() const {
|
||||
return mEntryCounts[0];
|
||||
}
|
||||
|
||||
int getBigramCount() const {
|
||||
return mEntryCounts[1];
|
||||
}
|
||||
|
||||
int getTrigramCount() const {
|
||||
return mEntryCounts[2];
|
||||
}
|
||||
|
||||
private:
|
||||
DISALLOW_ASSIGNMENT_OPERATOR(EntryCounts);
|
||||
|
||||
const std::array<int, MAX_PREV_WORD_COUNT_FOR_N_GRAM + 1> mEntryCounts;
|
||||
};
|
||||
|
||||
class MutableEntryCounters final {
|
||||
public:
|
||||
MutableEntryCounters() {
|
||||
mEntryCounters.fill(0);
|
||||
}
|
||||
|
||||
MutableEntryCounters(const int unigramCount, const int bigramCount, const int trigramCount)
|
||||
: mEntryCounters({{unigramCount, bigramCount, trigramCount}}) {}
|
||||
|
||||
const EntryCounts getEntryCounts() const {
|
||||
return EntryCounts(mEntryCounters);
|
||||
}
|
||||
|
||||
int getUnigramCount() const {
|
||||
return mEntryCounters[0];
|
||||
}
|
||||
|
||||
int getBigramCount() const {
|
||||
return mEntryCounters[1];
|
||||
}
|
||||
|
||||
int getTrigramCount() const {
|
||||
return mEntryCounters[2];
|
||||
}
|
||||
|
||||
void incrementUnigramCount() {
|
||||
++mEntryCounters[0];
|
||||
}
|
||||
|
||||
void decrementUnigramCount() {
|
||||
ASSERT(mEntryCounters[0] != 0);
|
||||
--mEntryCounters[0];
|
||||
}
|
||||
|
||||
void incrementBigramCount() {
|
||||
++mEntryCounters[1];
|
||||
}
|
||||
|
||||
void decrementBigramCount() {
|
||||
ASSERT(mEntryCounters[1] != 0);
|
||||
--mEntryCounters[1];
|
||||
}
|
||||
|
||||
void incrementNgramCount(const size_t n) {
|
||||
if (n < 1 || n > mEntryCounters.size()) {
|
||||
return;
|
||||
}
|
||||
++mEntryCounters[n - 1];
|
||||
}
|
||||
|
||||
void decrementNgramCount(const size_t n) {
|
||||
if (n < 1 || n > mEntryCounters.size()) {
|
||||
return;
|
||||
}
|
||||
ASSERT(mEntryCounters[n - 1] != 0);
|
||||
--mEntryCounters[n - 1];
|
||||
}
|
||||
|
||||
private:
|
||||
DISALLOW_COPY_AND_ASSIGN(MutableEntryCounters);
|
||||
|
||||
std::array<int, MAX_PREV_WORD_COUNT_FOR_N_GRAM + 1> mEntryCounters;
|
||||
};
|
||||
} // namespace latinime
|
||||
#endif /* LATINIME_ENTRY_COUNTERS_H */
|
|
@ -38,8 +38,7 @@ const int ForgettingCurveUtils::OCCURRENCES_TO_RAISE_THE_LEVEL = 1;
|
|||
// 15 days
|
||||
const int ForgettingCurveUtils::DURATION_TO_LOWER_THE_LEVEL_IN_SECONDS = 15 * 24 * 60 * 60;
|
||||
|
||||
const float ForgettingCurveUtils::UNIGRAM_COUNT_HARD_LIMIT_WEIGHT = 1.2;
|
||||
const float ForgettingCurveUtils::BIGRAM_COUNT_HARD_LIMIT_WEIGHT = 1.2;
|
||||
const float ForgettingCurveUtils::ENTRY_COUNT_HARD_LIMIT_WEIGHT = 1.2;
|
||||
|
||||
const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityTable;
|
||||
|
||||
|
@ -126,14 +125,22 @@ const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityT
|
|||
}
|
||||
|
||||
/* static */ bool ForgettingCurveUtils::needsToDecay(const bool mindsBlockByDecay,
|
||||
const int unigramCount, const int bigramCount, const HeaderPolicy *const headerPolicy) {
|
||||
if (unigramCount >= getUnigramCountHardLimit(headerPolicy->getMaxUnigramCount())) {
|
||||
const EntryCounts &entryCounts, const HeaderPolicy *const headerPolicy) {
|
||||
if (entryCounts.getUnigramCount()
|
||||
>= getEntryCountHardLimit(headerPolicy->getMaxUnigramCount())) {
|
||||
// Unigram count exceeds the limit.
|
||||
return true;
|
||||
} else if (bigramCount >= getBigramCountHardLimit(headerPolicy->getMaxBigramCount())) {
|
||||
}
|
||||
if (entryCounts.getBigramCount()
|
||||
>= getEntryCountHardLimit(headerPolicy->getMaxBigramCount())) {
|
||||
// Bigram count exceeds the limit.
|
||||
return true;
|
||||
}
|
||||
if (entryCounts.getTrigramCount()
|
||||
>= getEntryCountHardLimit(headerPolicy->getMaxTrigramCount())) {
|
||||
// Trigram count exceeds the limit.
|
||||
return true;
|
||||
}
|
||||
if (mindsBlockByDecay) {
|
||||
return false;
|
||||
}
|
||||
|
|
|
@ -21,6 +21,7 @@
|
|||
|
||||
#include "defines.h"
|
||||
#include "suggest/core/dictionary/property/historical_info.h"
|
||||
#include "suggest/policyimpl/dictionary/utils/entry_counters.h"
|
||||
|
||||
namespace latinime {
|
||||
|
||||
|
@ -42,22 +43,17 @@ class ForgettingCurveUtils {
|
|||
static bool needsToKeep(const HistoricalInfo *const historicalInfo,
|
||||
const HeaderPolicy *const headerPolicy);
|
||||
|
||||
static bool needsToDecay(const bool mindsBlockByDecay, const int unigramCount,
|
||||
const int bigramCount, const HeaderPolicy *const headerPolicy);
|
||||
static bool needsToDecay(const bool mindsBlockByDecay, const EntryCounts &entryCounters,
|
||||
const HeaderPolicy *const headerPolicy);
|
||||
|
||||
// TODO: Improve probability computation method and remove this.
|
||||
static int getProbabilityBiasForNgram(const int n) {
|
||||
return (n - 1) * MULTIPLIER_TWO_IN_PROBABILITY_SCALE;
|
||||
}
|
||||
|
||||
AK_FORCE_INLINE static int getUnigramCountHardLimit(const int maxUnigramCount) {
|
||||
return static_cast<int>(static_cast<float>(maxUnigramCount)
|
||||
* UNIGRAM_COUNT_HARD_LIMIT_WEIGHT);
|
||||
}
|
||||
|
||||
AK_FORCE_INLINE static int getBigramCountHardLimit(const int maxBigramCount) {
|
||||
return static_cast<int>(static_cast<float>(maxBigramCount)
|
||||
* BIGRAM_COUNT_HARD_LIMIT_WEIGHT);
|
||||
AK_FORCE_INLINE static int getEntryCountHardLimit(const int maxEntryCount) {
|
||||
return static_cast<int>(static_cast<float>(maxEntryCount)
|
||||
* ENTRY_COUNT_HARD_LIMIT_WEIGHT);
|
||||
}
|
||||
|
||||
private:
|
||||
|
@ -101,8 +97,7 @@ class ForgettingCurveUtils {
|
|||
static const int OCCURRENCES_TO_RAISE_THE_LEVEL;
|
||||
static const int DURATION_TO_LOWER_THE_LEVEL_IN_SECONDS;
|
||||
|
||||
static const float UNIGRAM_COUNT_HARD_LIMIT_WEIGHT;
|
||||
static const float BIGRAM_COUNT_HARD_LIMIT_WEIGHT;
|
||||
static const float ENTRY_COUNT_HARD_LIMIT_WEIGHT;
|
||||
|
||||
static const ProbabilityTable sProbabilityTable;
|
||||
|
||||
|
|
Loading…
Reference in a new issue