am 5ea2f297
: Merge "Start updating historical information."
* commit '5ea2f2972ee3d04ecbf6a6cdad6d056a2d85bdfe': Start updating historical information.
This commit is contained in:
commit
183bf8bb8d
5 changed files with 55 additions and 9 deletions
|
@ -55,7 +55,7 @@ bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTarget
|
||||||
}
|
}
|
||||||
const BigramEntry newBigramEntry(false /* hasNext */, NOT_A_PROBABILITY,
|
const BigramEntry newBigramEntry(false /* hasNext */, NOT_A_PROBABILITY,
|
||||||
newTargetTerminalId);
|
newTargetTerminalId);
|
||||||
const BigramEntry bigramEntryToWrite = getUpdatedBigramEntry(&newBigramEntry,
|
const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom(&newBigramEntry,
|
||||||
newProbability, timestamp);
|
newProbability, timestamp);
|
||||||
// Write an entry.
|
// Write an entry.
|
||||||
const int writingPos = mBigramDictContent->getBigramListHeadPos(terminalId);
|
const int writingPos = mBigramDictContent->getBigramListHeadPos(terminalId);
|
||||||
|
@ -81,7 +81,7 @@ bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTarget
|
||||||
}
|
}
|
||||||
const BigramEntry updatedBigramEntry =
|
const BigramEntry updatedBigramEntry =
|
||||||
originalBigramEntry.updateTargetTerminalIdAndGetEntry(newTargetTerminalId);
|
originalBigramEntry.updateTargetTerminalIdAndGetEntry(newTargetTerminalId);
|
||||||
const BigramEntry bigramEntryToWrite = getUpdatedBigramEntry(
|
const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom(
|
||||||
&updatedBigramEntry, newProbability, timestamp);
|
&updatedBigramEntry, newProbability, timestamp);
|
||||||
return mBigramDictContent->writeBigramEntry(&bigramEntryToWrite, entryPosToUpdate);
|
return mBigramDictContent->writeBigramEntry(&bigramEntryToWrite, entryPosToUpdate);
|
||||||
}
|
}
|
||||||
|
@ -94,7 +94,7 @@ bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTarget
|
||||||
// Write new entry at a head position of the bigram list.
|
// Write new entry at a head position of the bigram list.
|
||||||
int writingPos = mBigramDictContent->getBigramListHeadPos(terminalId);
|
int writingPos = mBigramDictContent->getBigramListHeadPos(terminalId);
|
||||||
const BigramEntry newBigramEntry(true /* hasNext */, NOT_A_PROBABILITY, newTargetTerminalId);
|
const BigramEntry newBigramEntry(true /* hasNext */, NOT_A_PROBABILITY, newTargetTerminalId);
|
||||||
const BigramEntry bigramEntryToWrite = getUpdatedBigramEntry(
|
const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom(
|
||||||
&newBigramEntry, newProbability, timestamp);
|
&newBigramEntry, newProbability, timestamp);
|
||||||
if (!mBigramDictContent->writeBigramEntryAndAdvancePosition(&bigramEntryToWrite, &writingPos)) {
|
if (!mBigramDictContent->writeBigramEntryAndAdvancePosition(&bigramEntryToWrite, &writingPos)) {
|
||||||
return false;
|
return false;
|
||||||
|
@ -218,14 +218,17 @@ int Ver4BigramListPolicy::getEntryPosToUpdate(const int targetTerminalIdToFind,
|
||||||
return invalidEntryPos;
|
return invalidEntryPos;
|
||||||
}
|
}
|
||||||
|
|
||||||
const BigramEntry Ver4BigramListPolicy::getUpdatedBigramEntry(
|
const BigramEntry Ver4BigramListPolicy::createUpdatedBigramEntryFrom(
|
||||||
const BigramEntry *const originalBigramEntry, const int newProbability,
|
const BigramEntry *const originalBigramEntry, const int newProbability,
|
||||||
const int timestamp) const {
|
const int timestamp) const {
|
||||||
if (mNeedsToDecayWhenUpdating) {
|
if (mNeedsToDecayWhenUpdating) {
|
||||||
// TODO: Update historical information.
|
|
||||||
const int probability = ForgettingCurveUtils::getUpdatedEncodedProbability(
|
const int probability = ForgettingCurveUtils::getUpdatedEncodedProbability(
|
||||||
originalBigramEntry->getProbability(), newProbability);
|
originalBigramEntry->getProbability(), newProbability);
|
||||||
return originalBigramEntry->updateProbabilityAndGetEntry(probability);
|
const HistoricalInfo updatedHistoricalInfo =
|
||||||
|
ForgettingCurveUtils::createUpdatedHistoricalInfoFrom(
|
||||||
|
originalBigramEntry->getHistoricalInfo(), newProbability, timestamp);
|
||||||
|
return originalBigramEntry->updateProbabilityAndGetEntry(probability)
|
||||||
|
.updateHistoricalInfoAndGetEntry(&updatedHistoricalInfo);
|
||||||
} else {
|
} else {
|
||||||
return originalBigramEntry->updateProbabilityAndGetEntry(newProbability);
|
return originalBigramEntry->updateProbabilityAndGetEntry(newProbability);
|
||||||
}
|
}
|
||||||
|
|
|
@ -59,7 +59,7 @@ class Ver4BigramListPolicy : public DictionaryBigramsStructurePolicy {
|
||||||
|
|
||||||
int getEntryPosToUpdate(const int targetTerminalIdToFind, const int bigramListPos) const;
|
int getEntryPosToUpdate(const int targetTerminalIdToFind, const int bigramListPos) const;
|
||||||
|
|
||||||
const BigramEntry getUpdatedBigramEntry(const BigramEntry *const originalBigramEntry,
|
const BigramEntry createUpdatedBigramEntryFrom(const BigramEntry *const originalBigramEntry,
|
||||||
const int newProbability, const int timestamp) const;
|
const int newProbability, const int timestamp) const;
|
||||||
|
|
||||||
BigramDictContent *const mBigramDictContent;
|
BigramDictContent *const mBigramDictContent;
|
||||||
|
|
|
@ -292,10 +292,13 @@ const ProbabilityEntry Ver4PatriciaTrieNodeWriter::createUpdatedEntryFrom(
|
||||||
const ProbabilityEntry *const originalProbabilityEntry, const int newProbability,
|
const ProbabilityEntry *const originalProbabilityEntry, const int newProbability,
|
||||||
const int timestamp) const {
|
const int timestamp) const {
|
||||||
if (mNeedsToDecayWhenUpdating) {
|
if (mNeedsToDecayWhenUpdating) {
|
||||||
// TODO: Update historical information.
|
|
||||||
const int updatedProbability = ForgettingCurveUtils::getUpdatedEncodedProbability(
|
const int updatedProbability = ForgettingCurveUtils::getUpdatedEncodedProbability(
|
||||||
originalProbabilityEntry->getProbability(), newProbability);
|
originalProbabilityEntry->getProbability(), newProbability);
|
||||||
return originalProbabilityEntry->createEntryWithUpdatedProbability(updatedProbability);
|
const HistoricalInfo updatedHistoricalInfo =
|
||||||
|
ForgettingCurveUtils::createUpdatedHistoricalInfoFrom(
|
||||||
|
originalProbabilityEntry->getHistoricalInfo(), newProbability, timestamp);
|
||||||
|
return originalProbabilityEntry->createEntryWithUpdatedProbability(updatedProbability)
|
||||||
|
.createEntryWithUpdatedHistoricalInfo(&updatedHistoricalInfo);
|
||||||
} else {
|
} else {
|
||||||
return originalProbabilityEntry->createEntryWithUpdatedProbability(newProbability);
|
return originalProbabilityEntry->createEntryWithUpdatedProbability(newProbability);
|
||||||
}
|
}
|
||||||
|
|
|
@ -39,8 +39,39 @@ const int ForgettingCurveUtils::ENCODED_PROBABILITY_STEP = 1;
|
||||||
const float ForgettingCurveUtils::MIN_PROBABILITY_TO_DECAY = 0.03f;
|
const float ForgettingCurveUtils::MIN_PROBABILITY_TO_DECAY = 0.03f;
|
||||||
const int ForgettingCurveUtils::DECAY_INTERVAL_SECONDS = 2 * 60 * 60;
|
const int ForgettingCurveUtils::DECAY_INTERVAL_SECONDS = 2 * 60 * 60;
|
||||||
|
|
||||||
|
const int ForgettingCurveUtils::MAX_LEVEL = 3;
|
||||||
|
const int ForgettingCurveUtils::MAX_COUNT = 3;
|
||||||
|
const int ForgettingCurveUtils::MIN_VALID_LEVEL = 1;
|
||||||
|
|
||||||
const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityTable;
|
const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityTable;
|
||||||
|
|
||||||
|
/* static */ const HistoricalInfo ForgettingCurveUtils::createUpdatedHistoricalInfoFrom(
|
||||||
|
const HistoricalInfo *const originalHistoricalInfo,
|
||||||
|
const int newProbability, const int timestamp) {
|
||||||
|
if (newProbability != NOT_A_PROBABILITY && originalHistoricalInfo->getLevel() == 0) {
|
||||||
|
return HistoricalInfo(timestamp, MIN_VALID_LEVEL /* level */, 0 /* count */);
|
||||||
|
} else if (originalHistoricalInfo->getTimeStamp() == NOT_A_TIMESTAMP) {
|
||||||
|
// Initial information.
|
||||||
|
return HistoricalInfo(timestamp, 0 /* level */, 0 /* count */);
|
||||||
|
} else {
|
||||||
|
const int updatedCount = originalHistoricalInfo->getCount() + 1;
|
||||||
|
if (updatedCount > MAX_COUNT) {
|
||||||
|
// The count exceeds the max value the level can be incremented.
|
||||||
|
if (originalHistoricalInfo->getLevel() >= MAX_LEVEL) {
|
||||||
|
// The level is already max.
|
||||||
|
return HistoricalInfo(timestamp, originalHistoricalInfo->getLevel(),
|
||||||
|
originalHistoricalInfo->getCount());
|
||||||
|
} else {
|
||||||
|
// Level up.
|
||||||
|
return HistoricalInfo(timestamp, originalHistoricalInfo->getLevel() + 1,
|
||||||
|
0 /* count */);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
return HistoricalInfo(timestamp, originalHistoricalInfo->getLevel(), updatedCount);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/* static */ int ForgettingCurveUtils::getProbability(const int encodedUnigramProbability,
|
/* static */ int ForgettingCurveUtils::getProbability(const int encodedUnigramProbability,
|
||||||
const int encodedBigramProbability) {
|
const int encodedBigramProbability) {
|
||||||
if (encodedUnigramProbability == NOT_A_PROBABILITY) {
|
if (encodedUnigramProbability == NOT_A_PROBABILITY) {
|
||||||
|
|
|
@ -20,6 +20,7 @@
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include "defines.h"
|
#include "defines.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/utils/historical_info.h"
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
|
||||||
|
@ -35,6 +36,10 @@ class ForgettingCurveUtils {
|
||||||
static const int MAX_BIGRAM_COUNT;
|
static const int MAX_BIGRAM_COUNT;
|
||||||
static const int MAX_BIGRAM_COUNT_AFTER_GC;
|
static const int MAX_BIGRAM_COUNT_AFTER_GC;
|
||||||
|
|
||||||
|
static const HistoricalInfo createUpdatedHistoricalInfoFrom(
|
||||||
|
const HistoricalInfo *const originalHistoricalInfo, const int newProbability,
|
||||||
|
const int timestamp);
|
||||||
|
|
||||||
static int getProbability(const int encodedUnigramProbability,
|
static int getProbability(const int encodedUnigramProbability,
|
||||||
const int encodedBigramProbability);
|
const int encodedBigramProbability);
|
||||||
|
|
||||||
|
@ -76,6 +81,10 @@ class ForgettingCurveUtils {
|
||||||
static const float MIN_PROBABILITY_TO_DECAY;
|
static const float MIN_PROBABILITY_TO_DECAY;
|
||||||
static const int DECAY_INTERVAL_SECONDS;
|
static const int DECAY_INTERVAL_SECONDS;
|
||||||
|
|
||||||
|
static const int MAX_LEVEL;
|
||||||
|
static const int MAX_COUNT;
|
||||||
|
static const int MIN_VALID_LEVEL;
|
||||||
|
|
||||||
static const ProbabilityTable sProbabilityTable;
|
static const ProbabilityTable sProbabilityTable;
|
||||||
|
|
||||||
static int decodeProbability(const int encodedProbability);
|
static int decodeProbability(const int encodedProbability);
|
||||||
|
|
Loading…
Reference in a new issue