am 5ea2f297: Merge "Start updating historical information."

* commit '5ea2f2972ee3d04ecbf6a6cdad6d056a2d85bdfe':
  Start updating historical information.
This commit is contained in:
Keisuke Kuroyanagi 2013-12-08 23:24:26 -08:00 committed by Android Git Automerger
commit 183bf8bb8d
5 changed files with 55 additions and 9 deletions

View file

@ -55,7 +55,7 @@ bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTarget
} }
const BigramEntry newBigramEntry(false /* hasNext */, NOT_A_PROBABILITY, const BigramEntry newBigramEntry(false /* hasNext */, NOT_A_PROBABILITY,
newTargetTerminalId); newTargetTerminalId);
const BigramEntry bigramEntryToWrite = getUpdatedBigramEntry(&newBigramEntry, const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom(&newBigramEntry,
newProbability, timestamp); newProbability, timestamp);
// Write an entry. // Write an entry.
const int writingPos = mBigramDictContent->getBigramListHeadPos(terminalId); const int writingPos = mBigramDictContent->getBigramListHeadPos(terminalId);
@ -81,7 +81,7 @@ bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTarget
} }
const BigramEntry updatedBigramEntry = const BigramEntry updatedBigramEntry =
originalBigramEntry.updateTargetTerminalIdAndGetEntry(newTargetTerminalId); originalBigramEntry.updateTargetTerminalIdAndGetEntry(newTargetTerminalId);
const BigramEntry bigramEntryToWrite = getUpdatedBigramEntry( const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom(
&updatedBigramEntry, newProbability, timestamp); &updatedBigramEntry, newProbability, timestamp);
return mBigramDictContent->writeBigramEntry(&bigramEntryToWrite, entryPosToUpdate); return mBigramDictContent->writeBigramEntry(&bigramEntryToWrite, entryPosToUpdate);
} }
@ -94,7 +94,7 @@ bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTarget
// Write new entry at a head position of the bigram list. // Write new entry at a head position of the bigram list.
int writingPos = mBigramDictContent->getBigramListHeadPos(terminalId); int writingPos = mBigramDictContent->getBigramListHeadPos(terminalId);
const BigramEntry newBigramEntry(true /* hasNext */, NOT_A_PROBABILITY, newTargetTerminalId); const BigramEntry newBigramEntry(true /* hasNext */, NOT_A_PROBABILITY, newTargetTerminalId);
const BigramEntry bigramEntryToWrite = getUpdatedBigramEntry( const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom(
&newBigramEntry, newProbability, timestamp); &newBigramEntry, newProbability, timestamp);
if (!mBigramDictContent->writeBigramEntryAndAdvancePosition(&bigramEntryToWrite, &writingPos)) { if (!mBigramDictContent->writeBigramEntryAndAdvancePosition(&bigramEntryToWrite, &writingPos)) {
return false; return false;
@ -218,14 +218,17 @@ int Ver4BigramListPolicy::getEntryPosToUpdate(const int targetTerminalIdToFind,
return invalidEntryPos; return invalidEntryPos;
} }
const BigramEntry Ver4BigramListPolicy::getUpdatedBigramEntry( const BigramEntry Ver4BigramListPolicy::createUpdatedBigramEntryFrom(
const BigramEntry *const originalBigramEntry, const int newProbability, const BigramEntry *const originalBigramEntry, const int newProbability,
const int timestamp) const { const int timestamp) const {
if (mNeedsToDecayWhenUpdating) { if (mNeedsToDecayWhenUpdating) {
// TODO: Update historical information.
const int probability = ForgettingCurveUtils::getUpdatedEncodedProbability( const int probability = ForgettingCurveUtils::getUpdatedEncodedProbability(
originalBigramEntry->getProbability(), newProbability); originalBigramEntry->getProbability(), newProbability);
return originalBigramEntry->updateProbabilityAndGetEntry(probability); const HistoricalInfo updatedHistoricalInfo =
ForgettingCurveUtils::createUpdatedHistoricalInfoFrom(
originalBigramEntry->getHistoricalInfo(), newProbability, timestamp);
return originalBigramEntry->updateProbabilityAndGetEntry(probability)
.updateHistoricalInfoAndGetEntry(&updatedHistoricalInfo);
} else { } else {
return originalBigramEntry->updateProbabilityAndGetEntry(newProbability); return originalBigramEntry->updateProbabilityAndGetEntry(newProbability);
} }

View file

@ -59,7 +59,7 @@ class Ver4BigramListPolicy : public DictionaryBigramsStructurePolicy {
int getEntryPosToUpdate(const int targetTerminalIdToFind, const int bigramListPos) const; int getEntryPosToUpdate(const int targetTerminalIdToFind, const int bigramListPos) const;
const BigramEntry getUpdatedBigramEntry(const BigramEntry *const originalBigramEntry, const BigramEntry createUpdatedBigramEntryFrom(const BigramEntry *const originalBigramEntry,
const int newProbability, const int timestamp) const; const int newProbability, const int timestamp) const;
BigramDictContent *const mBigramDictContent; BigramDictContent *const mBigramDictContent;

View file

@ -292,10 +292,13 @@ const ProbabilityEntry Ver4PatriciaTrieNodeWriter::createUpdatedEntryFrom(
const ProbabilityEntry *const originalProbabilityEntry, const int newProbability, const ProbabilityEntry *const originalProbabilityEntry, const int newProbability,
const int timestamp) const { const int timestamp) const {
if (mNeedsToDecayWhenUpdating) { if (mNeedsToDecayWhenUpdating) {
// TODO: Update historical information.
const int updatedProbability = ForgettingCurveUtils::getUpdatedEncodedProbability( const int updatedProbability = ForgettingCurveUtils::getUpdatedEncodedProbability(
originalProbabilityEntry->getProbability(), newProbability); originalProbabilityEntry->getProbability(), newProbability);
return originalProbabilityEntry->createEntryWithUpdatedProbability(updatedProbability); const HistoricalInfo updatedHistoricalInfo =
ForgettingCurveUtils::createUpdatedHistoricalInfoFrom(
originalProbabilityEntry->getHistoricalInfo(), newProbability, timestamp);
return originalProbabilityEntry->createEntryWithUpdatedProbability(updatedProbability)
.createEntryWithUpdatedHistoricalInfo(&updatedHistoricalInfo);
} else { } else {
return originalProbabilityEntry->createEntryWithUpdatedProbability(newProbability); return originalProbabilityEntry->createEntryWithUpdatedProbability(newProbability);
} }

View file

@ -39,8 +39,39 @@ const int ForgettingCurveUtils::ENCODED_PROBABILITY_STEP = 1;
const float ForgettingCurveUtils::MIN_PROBABILITY_TO_DECAY = 0.03f; const float ForgettingCurveUtils::MIN_PROBABILITY_TO_DECAY = 0.03f;
const int ForgettingCurveUtils::DECAY_INTERVAL_SECONDS = 2 * 60 * 60; const int ForgettingCurveUtils::DECAY_INTERVAL_SECONDS = 2 * 60 * 60;
const int ForgettingCurveUtils::MAX_LEVEL = 3;
const int ForgettingCurveUtils::MAX_COUNT = 3;
const int ForgettingCurveUtils::MIN_VALID_LEVEL = 1;
const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityTable; const ForgettingCurveUtils::ProbabilityTable ForgettingCurveUtils::sProbabilityTable;
/* static */ const HistoricalInfo ForgettingCurveUtils::createUpdatedHistoricalInfoFrom(
const HistoricalInfo *const originalHistoricalInfo,
const int newProbability, const int timestamp) {
if (newProbability != NOT_A_PROBABILITY && originalHistoricalInfo->getLevel() == 0) {
return HistoricalInfo(timestamp, MIN_VALID_LEVEL /* level */, 0 /* count */);
} else if (originalHistoricalInfo->getTimeStamp() == NOT_A_TIMESTAMP) {
// Initial information.
return HistoricalInfo(timestamp, 0 /* level */, 0 /* count */);
} else {
const int updatedCount = originalHistoricalInfo->getCount() + 1;
if (updatedCount > MAX_COUNT) {
// The count exceeds the max value the level can be incremented.
if (originalHistoricalInfo->getLevel() >= MAX_LEVEL) {
// The level is already max.
return HistoricalInfo(timestamp, originalHistoricalInfo->getLevel(),
originalHistoricalInfo->getCount());
} else {
// Level up.
return HistoricalInfo(timestamp, originalHistoricalInfo->getLevel() + 1,
0 /* count */);
}
} else {
return HistoricalInfo(timestamp, originalHistoricalInfo->getLevel(), updatedCount);
}
}
}
/* static */ int ForgettingCurveUtils::getProbability(const int encodedUnigramProbability, /* static */ int ForgettingCurveUtils::getProbability(const int encodedUnigramProbability,
const int encodedBigramProbability) { const int encodedBigramProbability) {
if (encodedUnigramProbability == NOT_A_PROBABILITY) { if (encodedUnigramProbability == NOT_A_PROBABILITY) {

View file

@ -20,6 +20,7 @@
#include <vector> #include <vector>
#include "defines.h" #include "defines.h"
#include "suggest/policyimpl/dictionary/utils/historical_info.h"
namespace latinime { namespace latinime {
@ -35,6 +36,10 @@ class ForgettingCurveUtils {
static const int MAX_BIGRAM_COUNT; static const int MAX_BIGRAM_COUNT;
static const int MAX_BIGRAM_COUNT_AFTER_GC; static const int MAX_BIGRAM_COUNT_AFTER_GC;
static const HistoricalInfo createUpdatedHistoricalInfoFrom(
const HistoricalInfo *const originalHistoricalInfo, const int newProbability,
const int timestamp);
static int getProbability(const int encodedUnigramProbability, static int getProbability(const int encodedUnigramProbability,
const int encodedBigramProbability); const int encodedBigramProbability);
@ -76,6 +81,10 @@ class ForgettingCurveUtils {
static const float MIN_PROBABILITY_TO_DECAY; static const float MIN_PROBABILITY_TO_DECAY;
static const int DECAY_INTERVAL_SECONDS; static const int DECAY_INTERVAL_SECONDS;
static const int MAX_LEVEL;
static const int MAX_COUNT;
static const int MIN_VALID_LEVEL;
static const ProbabilityTable sProbabilityTable; static const ProbabilityTable sProbabilityTable;
static int decodeProbability(const int encodedProbability); static int decodeProbability(const int encodedProbability);