Extend bigram probability field to support historical info.

Bug: 11073222
Change-Id: I020520251629c5a7c5b5fac21108392c8c2a38b6
main
Keisuke Kuroyanagi 2013-12-02 19:45:37 +09:00
parent 6e587b7f17
commit 69e6165d2e
9 changed files with 291 additions and 121 deletions

View File

@ -341,12 +341,21 @@ template<typename T> AK_FORCE_INLINE const T &max(const T &a, const T &b) { retu
#define INPUTLENGTH_FOR_DEBUG (-1) #define INPUTLENGTH_FOR_DEBUG (-1)
#define MIN_OUTPUT_INDEX_FOR_DEBUG (-1) #define MIN_OUTPUT_INDEX_FOR_DEBUG (-1)
#define DISALLOW_COPY_AND_ASSIGN(TypeName) \ #define DISALLOW_DEFAULT_CONSTRUCTOR(TypeName) \
TypeName(const TypeName&); \ TypeName()
#define DISALLOW_COPY_CONSTRUCTOR(TypeName) \
TypeName(const TypeName&)
#define DISALLOW_ASSIGNMENT_OPERATOR(TypeName) \
void operator=(const TypeName&) void operator=(const TypeName&)
#define DISALLOW_COPY_AND_ASSIGN(TypeName) \
DISALLOW_COPY_CONSTRUCTOR(TypeName); \
DISALLOW_ASSIGNMENT_OPERATOR(TypeName)
#define DISALLOW_IMPLICIT_CONSTRUCTORS(TypeName) \ #define DISALLOW_IMPLICIT_CONSTRUCTORS(TypeName) \
TypeName(); \ DISALLOW_DEFAULT_CONSTRUCTOR(TypeName); \
DISALLOW_COPY_AND_ASSIGN(TypeName) DISALLOW_COPY_AND_ASSIGN(TypeName)
// Used as a return value for character comparison // Used as a return value for character comparison

View File

@ -50,6 +50,8 @@ class BloomFilter {
} }
private: private:
DISALLOW_ASSIGNMENT_OPERATOR(BloomFilter);
// Size, in bytes, of the bloom filter index for bigrams // Size, in bytes, of the bloom filter index for bigrams
// 128 gives us 1024 buckets. The probability of false positive is (1 - e ** (-kn/m))**k, // 128 gives us 1024 buckets. The probability of false positive is (1 - e ** (-kn/m))**k,
// where k is the number of hash functions, n the number of bigrams, and m the number of // where k is the number of hash functions, n the number of bigrams, and m the number of

View File

@ -26,12 +26,18 @@ namespace latinime {
void Ver4BigramListPolicy::getNextBigram(int *const outBigramPos, int *const outProbability, void Ver4BigramListPolicy::getNextBigram(int *const outBigramPos, int *const outProbability,
bool *const outHasNext, int *const bigramEntryPos) const { bool *const outHasNext, int *const bigramEntryPos) const {
int targetTerminalId = Ver4DictConstants::NOT_A_TERMINAL_ID; const BigramEntry bigramEntry =
mBigramDictContent->getBigramEntryAndAdvancePosition(outProbability, outHasNext, mBigramDictContent->getBigramEntryAndAdvancePosition(bigramEntryPos);
&targetTerminalId, bigramEntryPos);
if (outBigramPos) { if (outBigramPos) {
// Lookup target PtNode position. // Lookup target PtNode position.
*outBigramPos = mTerminalPositionLookupTable->getTerminalPtNodePosition(targetTerminalId); *outBigramPos = mTerminalPositionLookupTable->getTerminalPtNodePosition(
bigramEntry.getTargetTerminalId());
}
if (outProbability) {
*outProbability = bigramEntry.getProbability();
}
if (outHasNext) {
*outHasNext = bigramEntry.hasNext();
} }
} }
@ -47,12 +53,13 @@ bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTarget
if (!mBigramDictContent->createNewBigramList(terminalId)) { if (!mBigramDictContent->createNewBigramList(terminalId)) {
return false; return false;
} }
const int probabilityToWrite = getUpdatedProbability( const BigramEntry newBigramEntry(false /* hasNext */, NOT_A_PROBABILITY,
NOT_A_PROBABILITY /* originalProbability */, newProbability); newTargetTerminalId);
const BigramEntry bigramEntryToWrite = getUpdatedBigramEntry(&newBigramEntry,
newProbability, timestamp);
// Write an entry. // Write an entry.
const int writingPos = mBigramDictContent->getBigramListHeadPos(terminalId); const int writingPos = mBigramDictContent->getBigramListHeadPos(terminalId);
if (!mBigramDictContent->writeBigramEntry(probabilityToWrite, false /* hasNext */, if (!mBigramDictContent->writeBigramEntry(&bigramEntryToWrite, writingPos)) {
newTargetTerminalId, writingPos)) {
return false; return false;
} }
if (outAddedNewEntry) { if (outAddedNewEntry) {
@ -64,18 +71,19 @@ bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTarget
const int entryPosToUpdate = getEntryPosToUpdate(newTargetTerminalId, bigramListPos); const int entryPosToUpdate = getEntryPosToUpdate(newTargetTerminalId, bigramListPos);
if (entryPosToUpdate != NOT_A_DICT_POS) { if (entryPosToUpdate != NOT_A_DICT_POS) {
// Overwrite existing entry. // Overwrite existing entry.
bool hasNext = false; const BigramEntry originalBigramEntry =
int probability = NOT_A_PROBABILITY; mBigramDictContent->getBigramEntry(entryPosToUpdate);
int targetTerminalId = Ver4DictConstants::NOT_A_TERMINAL_ID; if (!originalBigramEntry.isValid()) {
mBigramDictContent->getBigramEntry(&probability, &hasNext, &targetTerminalId,
entryPosToUpdate);
const int probabilityToWrite = getUpdatedProbability(probability, newProbability);
if (targetTerminalId == Ver4DictConstants::NOT_A_TERMINAL_ID && outAddedNewEntry) {
// Reuse invalid entry. // Reuse invalid entry.
*outAddedNewEntry = true; if (outAddedNewEntry) {
*outAddedNewEntry = true;
}
} }
return mBigramDictContent->writeBigramEntry(probabilityToWrite, hasNext, const BigramEntry updatedBigramEntry =
newTargetTerminalId, entryPosToUpdate); originalBigramEntry.updateTargetTerminalIdAndGetEntry(newTargetTerminalId);
const BigramEntry bigramEntryToWrite = getUpdatedBigramEntry(
&updatedBigramEntry, newProbability, timestamp);
return mBigramDictContent->writeBigramEntry(&bigramEntryToWrite, entryPosToUpdate);
} }
// Add new entry to the bigram list. // Add new entry to the bigram list.
@ -85,10 +93,10 @@ bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTarget
} }
// Write new entry at a head position of the bigram list. // Write new entry at a head position of the bigram list.
int writingPos = mBigramDictContent->getBigramListHeadPos(terminalId); int writingPos = mBigramDictContent->getBigramListHeadPos(terminalId);
const int probabilityToWrite = getUpdatedProbability( const BigramEntry newBigramEntry(true /* hasNext */, NOT_A_PROBABILITY, newTargetTerminalId);
NOT_A_PROBABILITY /* originalProbability */, newProbability); const BigramEntry bigramEntryToWrite = getUpdatedBigramEntry(
if (!mBigramDictContent->writeBigramEntryAndAdvancePosition(probabilityToWrite, &newBigramEntry, newProbability, timestamp);
true /* hasNext */, newTargetTerminalId, &writingPos)) { if (!mBigramDictContent->writeBigramEntryAndAdvancePosition(&bigramEntryToWrite, &writingPos)) {
return false; return false;
} }
if (outAddedNewEntry) { if (outAddedNewEntry) {
@ -109,18 +117,14 @@ bool Ver4BigramListPolicy::removeEntry(const int terminalId, const int targetTer
// Bigram entry doesn't exist. // Bigram entry doesn't exist.
return false; return false;
} }
bool hasNext = false; const BigramEntry bigramEntry = mBigramDictContent->getBigramEntry(entryPosToUpdate);
int probability = NOT_A_PROBABILITY; if (targetTerminalId != bigramEntry.getTargetTerminalId()) {
int originalTargetTerminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
mBigramDictContent->getBigramEntry(&probability, &hasNext, &originalTargetTerminalId,
entryPosToUpdate);
if (targetTerminalId != originalTargetTerminalId) {
// Bigram entry doesn't exist. // Bigram entry doesn't exist.
return false; return false;
} }
// Remove bigram entry by overwriting target terminal Id. // Remove bigram entry by marking it as invalid entry and overwriting the original entry.
return mBigramDictContent->writeBigramEntry(probability, hasNext, const BigramEntry updatedBigramEntry = bigramEntry.getInvalidatedEntry();
Ver4DictConstants::NOT_A_TERMINAL_ID /* targetTerminalId */, entryPosToUpdate); return mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPosToUpdate);
} }
bool Ver4BigramListPolicy::updateAllBigramEntriesAndDeleteUselessEntries(const int terminalId, bool Ver4BigramListPolicy::updateAllBigramEntriesAndDeleteUselessEntries(const int terminalId,
@ -134,34 +138,35 @@ bool Ver4BigramListPolicy::updateAllBigramEntriesAndDeleteUselessEntries(const i
int readingPos = bigramListPos; int readingPos = bigramListPos;
while (hasNext) { while (hasNext) {
const int entryPos = readingPos; const int entryPos = readingPos;
int probability = NOT_A_PROBABILITY; const BigramEntry bigramEntry =
int targetTerminalId = Ver4DictConstants::NOT_A_TERMINAL_ID; mBigramDictContent->getBigramEntryAndAdvancePosition(&readingPos);
mBigramDictContent->getBigramEntryAndAdvancePosition(&probability, &hasNext, hasNext = bigramEntry.hasNext();
&targetTerminalId, &readingPos); if (!bigramEntry.isValid()) {
if (targetTerminalId == Ver4DictConstants::NOT_A_TERMINAL_ID) {
continue; continue;
} }
const int targetPtNodePos = mTerminalPositionLookupTable->getTerminalPtNodePosition( const int targetPtNodePos = mTerminalPositionLookupTable->getTerminalPtNodePosition(
targetTerminalId); bigramEntry.getTargetTerminalId());
if (targetPtNodePos == NOT_A_DICT_POS) { if (targetPtNodePos == NOT_A_DICT_POS) {
// Invalidate bigram entry. // Invalidate bigram entry.
if (!mBigramDictContent->writeBigramEntry(probability, hasNext, const BigramEntry updatedBigramEntry = bigramEntry.getInvalidatedEntry();
Ver4DictConstants::NOT_A_TERMINAL_ID /* targetTerminalId */, entryPos)) { if (!mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPos)) {
return false; return false;
} }
} else if (mNeedsToDecayWhenUpdating) { } else if (mNeedsToDecayWhenUpdating) {
probability = ForgettingCurveUtils::getEncodedProbabilityToSave( // TODO: Quit decaying probability during GC.
probability, mHeaderPolicy); const int probability = ForgettingCurveUtils::getEncodedProbabilityToSave(
bigramEntry.getProbability(), mHeaderPolicy);
if (ForgettingCurveUtils::isValidEncodedProbability(probability)) { if (ForgettingCurveUtils::isValidEncodedProbability(probability)) {
if (!mBigramDictContent->writeBigramEntry(probability, hasNext, targetTerminalId, const BigramEntry updatedBigramEntry =
entryPos)) { bigramEntry.updateProbabilityAndGetEntry(probability);
if (!mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPos)) {
return false; return false;
} }
*outBigramCount += 1; *outBigramCount += 1;
} else { } else {
// Remove entry. // Remove entry.
if (!mBigramDictContent->writeBigramEntry(probability, hasNext, const BigramEntry updatedBigramEntry = bigramEntry.getInvalidatedEntry();
Ver4DictConstants::NOT_A_TERMINAL_ID /* targetTerminalId */, entryPos)) { if (!mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPos)) {
return false; return false;
} }
} }
@ -182,10 +187,10 @@ int Ver4BigramListPolicy::getBigramEntryConut(const int terminalId) {
bool hasNext = true; bool hasNext = true;
int readingPos = bigramListPos; int readingPos = bigramListPos;
while (hasNext) { while (hasNext) {
int targetTerminalId = Ver4DictConstants::NOT_A_TERMINAL_ID; const BigramEntry bigramEntry =
mBigramDictContent->getBigramEntryAndAdvancePosition(0 /* probability */, &hasNext, mBigramDictContent->getBigramEntryAndAdvancePosition(&readingPos);
&targetTerminalId, &readingPos); hasNext = bigramEntry.hasNext();
if (targetTerminalId != Ver4DictConstants::NOT_A_TERMINAL_ID) { if (bigramEntry.isValid()) {
bigramCount++; bigramCount++;
} }
} }
@ -199,13 +204,13 @@ int Ver4BigramListPolicy::getEntryPosToUpdate(const int targetTerminalIdToFind,
int readingPos = bigramListPos; int readingPos = bigramListPos;
while (hasNext) { while (hasNext) {
const int entryPos = readingPos; const int entryPos = readingPos;
int targetTerminalId = Ver4DictConstants::NOT_A_TERMINAL_ID; const BigramEntry bigramEntry =
mBigramDictContent->getBigramEntryAndAdvancePosition(0 /* probability */, &hasNext, mBigramDictContent->getBigramEntryAndAdvancePosition(&readingPos);
&targetTerminalId, &readingPos); hasNext = bigramEntry.hasNext();
if (targetTerminalId == targetTerminalIdToFind) { if (bigramEntry.getTargetTerminalId() == targetTerminalIdToFind) {
// Entry with same target is found. // Entry with same target is found.
return entryPos; return entryPos;
} else if (targetTerminalId == Ver4DictConstants::NOT_A_TERMINAL_ID) { } else if (!bigramEntry.isValid()) {
// Invalid entry that can be reused is found. // Invalid entry that can be reused is found.
invalidEntryPos = entryPos; invalidEntryPos = entryPos;
} }
@ -213,13 +218,16 @@ int Ver4BigramListPolicy::getEntryPosToUpdate(const int targetTerminalIdToFind,
return invalidEntryPos; return invalidEntryPos;
} }
int Ver4BigramListPolicy::getUpdatedProbability(const int originalProbability, const BigramEntry Ver4BigramListPolicy::getUpdatedBigramEntry(
const int newProbability) const { const BigramEntry *const originalBigramEntry, const int newProbability,
const int timestamp) const {
if (mNeedsToDecayWhenUpdating) { if (mNeedsToDecayWhenUpdating) {
return ForgettingCurveUtils::getUpdatedEncodedProbability(originalProbability, // TODO: Update historical information.
newProbability); const int probability = ForgettingCurveUtils::getUpdatedEncodedProbability(
originalBigramEntry->getProbability(), newProbability);
return originalBigramEntry->updateProbabilityAndGetEntry(probability);
} else { } else {
return newProbability; return originalBigramEntry->updateProbabilityAndGetEntry(newProbability);
} }
} }

View File

@ -19,6 +19,7 @@
#include "defines.h" #include "defines.h"
#include "suggest/core/policy/dictionary_bigrams_structure_policy.h" #include "suggest/core/policy/dictionary_bigrams_structure_policy.h"
#include "suggest/policyimpl/dictionary/structure/v4/content/bigram_entry.h"
namespace latinime { namespace latinime {
@ -58,7 +59,8 @@ class Ver4BigramListPolicy : public DictionaryBigramsStructurePolicy {
int getEntryPosToUpdate(const int targetTerminalIdToFind, const int bigramListPos) const; int getEntryPosToUpdate(const int targetTerminalIdToFind, const int bigramListPos) const;
int getUpdatedProbability(const int originalProbability, const int newProbability) const; const BigramEntry getUpdatedBigramEntry(const BigramEntry *const originalBigramEntry,
const int newProbability, const int timestamp) const;
BigramDictContent *const mBigramDictContent; BigramDictContent *const mBigramDictContent;
const TerminalPositionLookupTable *const mTerminalPositionLookupTable; const TerminalPositionLookupTable *const mTerminalPositionLookupTable;

View File

@ -20,53 +20,98 @@
namespace latinime { namespace latinime {
void BigramDictContent::getBigramEntryAndAdvancePosition(int *const outProbability, const BigramEntry BigramDictContent::getBigramEntryAndAdvancePosition(
bool *const outHasNext, int *const outTargetTerminalId, int *const bigramEntryPos) const { int *const bigramEntryPos) const {
const BufferWithExtendableBuffer *const bigramListBuffer = getContentBuffer(); const BufferWithExtendableBuffer *const bigramListBuffer = getContentBuffer();
const int bigramFlags = bigramListBuffer->readUintAndAdvancePosition( const int bigramFlags = bigramListBuffer->readUintAndAdvancePosition(
Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE, bigramEntryPos); Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE, bigramEntryPos);
if (outProbability) { const int hasNext = (bigramFlags & Ver4DictConstants::BIGRAM_HAS_NEXT_MASK) != 0;
*outProbability = bigramFlags & Ver4DictConstants::BIGRAM_PROBABILITY_MASK; int probability = NOT_A_PROBABILITY;
int timestamp = Ver4DictConstants::NOT_A_TIME_STAMP;
int level = 0;
int count = 0;
if (mHasHistoricalInfo) {
probability = bigramListBuffer->readUintAndAdvancePosition(
Ver4DictConstants::PROBABILITY_SIZE, bigramEntryPos);
timestamp = bigramListBuffer->readUintAndAdvancePosition(
Ver4DictConstants::TIME_STAMP_FIELD_SIZE, bigramEntryPos);
level = bigramListBuffer->readUintAndAdvancePosition(
Ver4DictConstants::WORD_LEVEL_FIELD_SIZE, bigramEntryPos);
count = bigramListBuffer->readUintAndAdvancePosition(
Ver4DictConstants::WORD_COUNT_FIELD_SIZE, bigramEntryPos);
} else {
probability = bigramFlags & Ver4DictConstants::BIGRAM_PROBABILITY_MASK;
} }
if (outHasNext) { const int encodedTargetTerminalId = bigramListBuffer->readUintAndAdvancePosition(
*outHasNext = (bigramFlags & Ver4DictConstants::BIGRAM_HAS_NEXT_MASK) != 0;
}
const int targetTerminalId = bigramListBuffer->readUintAndAdvancePosition(
Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE, bigramEntryPos); Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE, bigramEntryPos);
if (outTargetTerminalId) { const int targetTerminalId =
*outTargetTerminalId = (encodedTargetTerminalId == Ver4DictConstants::INVALID_BIGRAM_TARGET_TERMINAL_ID) ?
(targetTerminalId == Ver4DictConstants::INVALID_BIGRAM_TARGET_TERMINAL_ID) ? Ver4DictConstants::NOT_A_TERMINAL_ID : encodedTargetTerminalId;
Ver4DictConstants::NOT_A_TERMINAL_ID : targetTerminalId; if (mHasHistoricalInfo) {
return BigramEntry(hasNext, probability, timestamp, level, count, targetTerminalId);
} else {
return BigramEntry(hasNext, probability, targetTerminalId);
} }
} }
bool BigramDictContent::writeBigramEntryAndAdvancePosition(const int probability, const int hasNext, bool BigramDictContent::writeBigramEntryAndAdvancePosition(
const int targetTerminalId, int *const entryWritingPos) { const BigramEntry *const bigramEntryToWrite, int *const entryWritingPos) {
BufferWithExtendableBuffer *const bigramListBuffer = getWritableContentBuffer(); BufferWithExtendableBuffer *const bigramListBuffer = getWritableContentBuffer();
const int bigramFlags = createAndGetBigramFlags(probability, hasNext); const int bigramFlags = createAndGetBigramFlags(
mHasHistoricalInfo ? 0 : bigramEntryToWrite->getProbability(),
bigramEntryToWrite->hasNext());
if (!bigramListBuffer->writeUintAndAdvancePosition(bigramFlags, if (!bigramListBuffer->writeUintAndAdvancePosition(bigramFlags,
Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE, entryWritingPos)) { Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE, entryWritingPos)) {
AKLOGE("Cannot write bigram flags. pos: %d, flags: %x", *entryWritingPos, bigramFlags); AKLOGE("Cannot write bigram flags. pos: %d, flags: %x", *entryWritingPos, bigramFlags);
return false; return false;
} }
if (mHasHistoricalInfo) {
if (!bigramListBuffer->writeUintAndAdvancePosition(bigramEntryToWrite->getProbability(),
Ver4DictConstants::PROBABILITY_SIZE, entryWritingPos)) {
AKLOGE("Cannot write bigram probability. pos: %d, probability: %d", *entryWritingPos,
bigramEntryToWrite->getProbability());
return false;
}
if (!bigramListBuffer->writeUintAndAdvancePosition(bigramEntryToWrite->getTimeStamp(),
Ver4DictConstants::TIME_STAMP_FIELD_SIZE, entryWritingPos)) {
AKLOGE("Cannot write bigram timestamps. pos: %d, timestamp: %d", *entryWritingPos,
bigramEntryToWrite->getTimeStamp());
return false;
}
if (!bigramListBuffer->writeUintAndAdvancePosition(bigramEntryToWrite->getLevel(),
Ver4DictConstants::WORD_LEVEL_FIELD_SIZE, entryWritingPos)) {
AKLOGE("Cannot write bigram level. pos: %d, level: %d", *entryWritingPos,
bigramEntryToWrite->getLevel());
return false;
}
if (!bigramListBuffer->writeUintAndAdvancePosition(bigramEntryToWrite->getCount(),
Ver4DictConstants::WORD_COUNT_FIELD_SIZE, entryWritingPos)) {
AKLOGE("Cannot write bigram count. pos: %d, count: %d", *entryWritingPos,
bigramEntryToWrite->getCount());
return false;
}
}
const int targetTerminalIdToWrite = const int targetTerminalIdToWrite =
(targetTerminalId == Ver4DictConstants::NOT_A_TERMINAL_ID) ? (bigramEntryToWrite->getTargetTerminalId() == Ver4DictConstants::NOT_A_TERMINAL_ID) ?
Ver4DictConstants::INVALID_BIGRAM_TARGET_TERMINAL_ID : targetTerminalId; Ver4DictConstants::INVALID_BIGRAM_TARGET_TERMINAL_ID :
return bigramListBuffer->writeUintAndAdvancePosition(targetTerminalIdToWrite, bigramEntryToWrite->getTargetTerminalId();
Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE, entryWritingPos); if (!bigramListBuffer->writeUintAndAdvancePosition(targetTerminalIdToWrite,
Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE, entryWritingPos)) {
AKLOGE("Cannot write bigram target terminal id. pos: %d, target terminal id: %d",
*entryWritingPos, bigramEntryToWrite->getTargetTerminalId());
return false;
}
return true;
} }
bool BigramDictContent::copyBigramList(const int bigramListPos, const int toPos) { bool BigramDictContent::copyBigramList(const int bigramListPos, const int toPos) {
bool hasNext = true;
int readingPos = bigramListPos; int readingPos = bigramListPos;
int writingPos = toPos; int writingPos = toPos;
bool hasNext = true;
while (hasNext) { while (hasNext) {
int probability = NOT_A_PROBABILITY; const BigramEntry bigramEntry = getBigramEntryAndAdvancePosition(&readingPos);
int targetTerminalId = Ver4DictConstants::NOT_A_TERMINAL_ID; hasNext = bigramEntry.hasNext();
getBigramEntryAndAdvancePosition(&probability, &hasNext, &targetTerminalId, if (!writeBigramEntryAndAdvancePosition(&bigramEntry, &writingPos)) {
&readingPos);
if (!writeBigramEntryAndAdvancePosition(probability, hasNext, targetTerminalId,
&writingPos)) {
AKLOGE("Cannot write bigram entry to copy. pos: %d", writingPos); AKLOGE("Cannot write bigram entry to copy. pos: %d", writingPos);
return false; return false;
} }
@ -119,22 +164,22 @@ bool BigramDictContent::runGCBigramList(const int bigramListPos,
int writingPos = toPos; int writingPos = toPos;
int lastEntryPos = NOT_A_DICT_POS; int lastEntryPos = NOT_A_DICT_POS;
while (hasNext) { while (hasNext) {
int probability = NOT_A_PROBABILITY; const BigramEntry originalBigramEntry =
int targetTerminalId = Ver4DictConstants::NOT_A_TERMINAL_ID; sourceBigramDictContent->getBigramEntryAndAdvancePosition(&readingPos);
sourceBigramDictContent->getBigramEntryAndAdvancePosition(&probability, &hasNext, hasNext = originalBigramEntry.hasNext();
&targetTerminalId, &readingPos); if (originalBigramEntry.getTargetTerminalId() == Ver4DictConstants::NOT_A_TERMINAL_ID) {
if (targetTerminalId == Ver4DictConstants::NOT_A_TERMINAL_ID) {
continue; continue;
} }
TerminalPositionLookupTable::TerminalIdMap::const_iterator it = TerminalPositionLookupTable::TerminalIdMap::const_iterator it =
terminalIdMap->find(targetTerminalId); terminalIdMap->find(originalBigramEntry.getTargetTerminalId());
if (it == terminalIdMap->end()) { if (it == terminalIdMap->end()) {
// Target word has been removed. // Target word has been removed.
continue; continue;
} }
lastEntryPos = hasNext ? writingPos : NOT_A_DICT_POS; lastEntryPos = hasNext ? writingPos : NOT_A_DICT_POS;
if (!writeBigramEntryAndAdvancePosition(probability, hasNext, it->second, const BigramEntry updatedBigramEntry =
&writingPos)) { originalBigramEntry.updateTargetTerminalIdAndGetEntry(it->second);
if (!writeBigramEntryAndAdvancePosition(&updatedBigramEntry, &writingPos)) {
AKLOGE("Cannot write bigram entry to run GC. pos: %d", writingPos); AKLOGE("Cannot write bigram entry to run GC. pos: %d", writingPos);
return false; return false;
} }
@ -142,10 +187,9 @@ bool BigramDictContent::runGCBigramList(const int bigramListPos,
} }
if (lastEntryPos != NOT_A_DICT_POS) { if (lastEntryPos != NOT_A_DICT_POS) {
// Update has next flag in the last written entry. // Update has next flag in the last written entry.
int probability = NOT_A_PROBABILITY; const BigramEntry bigramEntry = getBigramEntry(lastEntryPos).updateHasNextAndGetEntry(
int targetTerminalId = Ver4DictConstants::NOT_A_TERMINAL_ID; false /* hasNext */);
getBigramEntry(&probability, 0 /* outHasNext */, &targetTerminalId, lastEntryPos); if (!writeBigramEntry(&bigramEntry, writingPos)) {
if (!writeBigramEntry(probability, false /* hasNext */, targetTerminalId, writingPos)) {
AKLOGE("Cannot write bigram entry to set hasNext flag after GC. pos: %d", writingPos); AKLOGE("Cannot write bigram entry to set hasNext flag after GC. pos: %d", writingPos);
return false; return false;
} }

View File

@ -18,6 +18,7 @@
#define LATINIME_BIGRAM_DICT_CONTENT_H #define LATINIME_BIGRAM_DICT_CONTENT_H
#include "defines.h" #include "defines.h"
#include "suggest/policyimpl/dictionary/structure/v4/content/bigram_entry.h"
#include "suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h" #include "suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h"
#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h" #include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h" #include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
@ -26,27 +27,27 @@ namespace latinime {
class BigramDictContent : public SparseTableDictContent { class BigramDictContent : public SparseTableDictContent {
public: public:
BigramDictContent(const char *const dictDirPath, const bool isUpdatable) BigramDictContent(const char *const dictDirPath, const bool hasHistoricalInfo,
const bool isUpdatable)
: SparseTableDictContent(dictDirPath, : SparseTableDictContent(dictDirPath,
Ver4DictConstants::BIGRAM_LOOKUP_TABLE_FILE_EXTENSION, Ver4DictConstants::BIGRAM_LOOKUP_TABLE_FILE_EXTENSION,
Ver4DictConstants::BIGRAM_CONTENT_TABLE_FILE_EXTENSION, Ver4DictConstants::BIGRAM_CONTENT_TABLE_FILE_EXTENSION,
Ver4DictConstants::BIGRAM_FILE_EXTENSION, isUpdatable, Ver4DictConstants::BIGRAM_FILE_EXTENSION, isUpdatable,
Ver4DictConstants::BIGRAM_ADDRESS_TABLE_BLOCK_SIZE, Ver4DictConstants::BIGRAM_ADDRESS_TABLE_BLOCK_SIZE,
Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE) {} Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE),
mHasHistoricalInfo(hasHistoricalInfo) {}
BigramDictContent() BigramDictContent(const bool hasHistoricalInfo)
: SparseTableDictContent(Ver4DictConstants::BIGRAM_ADDRESS_TABLE_BLOCK_SIZE, : SparseTableDictContent(Ver4DictConstants::BIGRAM_ADDRESS_TABLE_BLOCK_SIZE,
Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE) {} Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE),
mHasHistoricalInfo(hasHistoricalInfo) {}
void getBigramEntry(int *const outProbability, bool *const outHasNext, const BigramEntry getBigramEntry(const int bigramEntryPos) const {
int *const outTargetTerminalId, const int bigramEntryPos) const {
int readingPos = bigramEntryPos; int readingPos = bigramEntryPos;
getBigramEntryAndAdvancePosition(outProbability, outHasNext, outTargetTerminalId, return getBigramEntryAndAdvancePosition(&readingPos);
&readingPos);
} }
void getBigramEntryAndAdvancePosition(int *const outProbability, bool *const outHasNext, const BigramEntry getBigramEntryAndAdvancePosition(int *const bigramEntryPos) const;
int *const outTargetTerminalId, int *const bigramEntryPos) const;
// Returns head position of bigram list for a PtNode specified by terminalId. // Returns head position of bigram list for a PtNode specified by terminalId.
int getBigramListHeadPos(const int terminalId) const { int getBigramListHeadPos(const int terminalId) const {
@ -57,15 +58,13 @@ class BigramDictContent : public SparseTableDictContent {
return addressLookupTable->get(terminalId); return addressLookupTable->get(terminalId);
} }
bool writeBigramEntry(const int probability, const int hasNext, const int targetTerminalId, bool writeBigramEntry(const BigramEntry *const bigramEntryToWrite, const int entryWritingPos) {
const int entryWritingPos) {
int writingPos = entryWritingPos; int writingPos = entryWritingPos;
return writeBigramEntryAndAdvancePosition(probability, hasNext, targetTerminalId, return writeBigramEntryAndAdvancePosition(bigramEntryToWrite, &writingPos);
&writingPos);
} }
bool writeBigramEntryAndAdvancePosition(const int probability, const int hasNext, bool writeBigramEntryAndAdvancePosition(const BigramEntry *const bigramEntryToWrite,
const int targetTerminalId, int *const entryWritingPos); int *const entryWritingPos);
bool createNewBigramList(const int terminalId) { bool createNewBigramList(const int terminalId) {
const int bigramListPos = getContentBuffer()->getTailPosition(); const int bigramListPos = getContentBuffer()->getTailPosition();
@ -96,6 +95,8 @@ class BigramDictContent : public SparseTableDictContent {
const BigramDictContent *const sourceBigramDictContent, const int toPos, const BigramDictContent *const sourceBigramDictContent, const int toPos,
const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap, const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
int *const outEntryCount); int *const outEntryCount);
bool mHasHistoricalInfo;
}; };
} // namespace latinime } // namespace latinime
#endif /* LATINIME_BIGRAM_DICT_CONTENT_H */ #endif /* LATINIME_BIGRAM_DICT_CONTENT_H */

View File

@ -0,0 +1,104 @@
/*
* Copyright (C) 2013, The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef LATINIME_BIGRAM_ENTRY_H
#define LATINIME_BIGRAM_ENTRY_H
#include "defines.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
namespace latinime {
class BigramEntry {
public:
BigramEntry(const BigramEntry& bigramEntry)
: mHasNext(bigramEntry.mHasNext), mProbability(bigramEntry.mProbability),
mTimestamp(bigramEntry.mTimestamp), mLevel(bigramEntry.mLevel),
mCount(bigramEntry.mCount), mTargetTerminalId(bigramEntry.mTargetTerminalId) {}
// Entry with historical information.
BigramEntry(const bool hasNext, const int probability, const int targetTerminalId)
: mHasNext(hasNext), mProbability(probability),
mTimestamp(Ver4DictConstants::NOT_A_TIME_STAMP), mLevel(0), mCount(0),
mTargetTerminalId(targetTerminalId) {}
// Entry with historical information.
BigramEntry(const bool hasNext, const int probability, const int timestamp, const int level,
const int count, const int targetTerminalId)
: mHasNext(hasNext), mProbability(probability), mTimestamp(timestamp),
mLevel(level), mCount(count), mTargetTerminalId(targetTerminalId) {}
const BigramEntry getInvalidatedEntry() const {
return updateTargetTerminalIdAndGetEntry(Ver4DictConstants::NOT_A_TERMINAL_ID);
}
const BigramEntry updateHasNextAndGetEntry(const bool hasNext) const {
return BigramEntry(hasNext, mProbability, mTimestamp, mLevel, mCount,
mTargetTerminalId);
}
const BigramEntry updateTargetTerminalIdAndGetEntry(const int newTargetTerminalId) const {
return BigramEntry(mHasNext, mProbability, mTimestamp, mLevel, mCount,
newTargetTerminalId);
}
const BigramEntry updateProbabilityAndGetEntry(const int probability) const {
return BigramEntry(mHasNext, probability, mTimestamp, mLevel, mCount,
mTargetTerminalId);
}
bool isValid() const {
return mTargetTerminalId != Ver4DictConstants::NOT_A_TERMINAL_ID;
}
bool hasNext() const {
return mHasNext;
}
int getProbability() const {
return mProbability;
}
int getTimeStamp() const {
return mTimestamp;
}
int getLevel() const {
return mLevel;
}
int getCount() const {
return mCount;
}
int getTargetTerminalId() const {
return mTargetTerminalId;
}
private:
// Copy constructor is public to use this class as a type of return value.
DISALLOW_DEFAULT_CONSTRUCTOR(BigramEntry);
DISALLOW_ASSIGNMENT_OPERATOR(BigramEntry);
const bool mHasNext;
const int mProbability;
const int mTimestamp;
const int mLevel;
const int mCount;
const int mTargetTerminalId;
};
} // namespace latinime
#endif /* LATINIME_BIGRAM_ENTRY_H */

View File

@ -127,7 +127,7 @@ class Ver4DictBuffers {
// TODO: Quit using header size. // TODO: Quit using header size.
mTerminalPositionLookupTable(dictDirPath, isUpdatable, mHeaderSize), mTerminalPositionLookupTable(dictDirPath, isUpdatable, mHeaderSize),
mProbabilityDictContent(dictDirPath, false /* hasHistoricalInfo */, isUpdatable), mProbabilityDictContent(dictDirPath, false /* hasHistoricalInfo */, isUpdatable),
mBigramDictContent(dictDirPath, isUpdatable), mBigramDictContent(dictDirPath, false /* hasHistoricalInfo */, isUpdatable),
mShortcutDictContent(dictDirPath, isUpdatable), mShortcutDictContent(dictDirPath, isUpdatable),
mIsUpdatable(isUpdatable) {} mIsUpdatable(isUpdatable) {}
@ -137,7 +137,8 @@ class Ver4DictBuffers {
mExpandableTrieBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE), mExpandableTrieBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE),
mTerminalPositionLookupTable(), mTerminalPositionLookupTable(),
mProbabilityDictContent(false /* hasHistoricalInfo */), mProbabilityDictContent(false /* hasHistoricalInfo */),
mBigramDictContent(), mShortcutDictContent(), mIsUpdatable(true) {} mBigramDictContent(false /* hasHistoricalInfo */), mShortcutDictContent(),
mIsUpdatable(true) {}
const MmappedBuffer::MmappedBufferPtr mDictBuffer; const MmappedBuffer::MmappedBufferPtr mDictBuffer;
const int mHeaderSize; const int mHeaderSize;

View File

@ -56,8 +56,7 @@ class ExclusiveOwnershipPointer {
private: private:
// This class allows to copy and assign and ensures only one instance has the ownership of the // This class allows to copy and assign and ensures only one instance has the ownership of the
// managed pointer. // managed pointer.
DISALLOW_DEFAULT_CONSTRUCTOR(ExclusiveOwnershipPointer);
ExclusiveOwnershipPointer() : mPointer(0), mSharedOwnerPtr(0) {}
void transferOwnership(const ExclusiveOwnershipPointer<T> *const src) { void transferOwnership(const ExclusiveOwnershipPointer<T> *const src) {
if (*mSharedOwnerPtr != src) { if (*mSharedOwnerPtr != src) {