Merge "Extend bigram probability field to support historical info."
commit
027de36706
|
@ -341,12 +341,21 @@ template<typename T> AK_FORCE_INLINE const T &max(const T &a, const T &b) { retu
|
|||
#define INPUTLENGTH_FOR_DEBUG (-1)
|
||||
#define MIN_OUTPUT_INDEX_FOR_DEBUG (-1)
|
||||
|
||||
#define DISALLOW_COPY_AND_ASSIGN(TypeName) \
|
||||
TypeName(const TypeName&); \
|
||||
#define DISALLOW_DEFAULT_CONSTRUCTOR(TypeName) \
|
||||
TypeName()
|
||||
|
||||
#define DISALLOW_COPY_CONSTRUCTOR(TypeName) \
|
||||
TypeName(const TypeName&)
|
||||
|
||||
#define DISALLOW_ASSIGNMENT_OPERATOR(TypeName) \
|
||||
void operator=(const TypeName&)
|
||||
|
||||
#define DISALLOW_COPY_AND_ASSIGN(TypeName) \
|
||||
DISALLOW_COPY_CONSTRUCTOR(TypeName); \
|
||||
DISALLOW_ASSIGNMENT_OPERATOR(TypeName)
|
||||
|
||||
#define DISALLOW_IMPLICIT_CONSTRUCTORS(TypeName) \
|
||||
TypeName(); \
|
||||
DISALLOW_DEFAULT_CONSTRUCTOR(TypeName); \
|
||||
DISALLOW_COPY_AND_ASSIGN(TypeName)
|
||||
|
||||
// Used as a return value for character comparison
|
||||
|
|
|
@ -50,6 +50,8 @@ class BloomFilter {
|
|||
}
|
||||
|
||||
private:
|
||||
DISALLOW_ASSIGNMENT_OPERATOR(BloomFilter);
|
||||
|
||||
// Size, in bytes, of the bloom filter index for bigrams
|
||||
// 128 gives us 1024 buckets. The probability of false positive is (1 - e ** (-kn/m))**k,
|
||||
// where k is the number of hash functions, n the number of bigrams, and m the number of
|
||||
|
|
|
@ -26,12 +26,18 @@ namespace latinime {
|
|||
|
||||
void Ver4BigramListPolicy::getNextBigram(int *const outBigramPos, int *const outProbability,
|
||||
bool *const outHasNext, int *const bigramEntryPos) const {
|
||||
int targetTerminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
|
||||
mBigramDictContent->getBigramEntryAndAdvancePosition(outProbability, outHasNext,
|
||||
&targetTerminalId, bigramEntryPos);
|
||||
const BigramEntry bigramEntry =
|
||||
mBigramDictContent->getBigramEntryAndAdvancePosition(bigramEntryPos);
|
||||
if (outBigramPos) {
|
||||
// Lookup target PtNode position.
|
||||
*outBigramPos = mTerminalPositionLookupTable->getTerminalPtNodePosition(targetTerminalId);
|
||||
*outBigramPos = mTerminalPositionLookupTable->getTerminalPtNodePosition(
|
||||
bigramEntry.getTargetTerminalId());
|
||||
}
|
||||
if (outProbability) {
|
||||
*outProbability = bigramEntry.getProbability();
|
||||
}
|
||||
if (outHasNext) {
|
||||
*outHasNext = bigramEntry.hasNext();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -47,12 +53,13 @@ bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTarget
|
|||
if (!mBigramDictContent->createNewBigramList(terminalId)) {
|
||||
return false;
|
||||
}
|
||||
const int probabilityToWrite = getUpdatedProbability(
|
||||
NOT_A_PROBABILITY /* originalProbability */, newProbability);
|
||||
const BigramEntry newBigramEntry(false /* hasNext */, NOT_A_PROBABILITY,
|
||||
newTargetTerminalId);
|
||||
const BigramEntry bigramEntryToWrite = getUpdatedBigramEntry(&newBigramEntry,
|
||||
newProbability, timestamp);
|
||||
// Write an entry.
|
||||
const int writingPos = mBigramDictContent->getBigramListHeadPos(terminalId);
|
||||
if (!mBigramDictContent->writeBigramEntry(probabilityToWrite, false /* hasNext */,
|
||||
newTargetTerminalId, writingPos)) {
|
||||
if (!mBigramDictContent->writeBigramEntry(&bigramEntryToWrite, writingPos)) {
|
||||
return false;
|
||||
}
|
||||
if (outAddedNewEntry) {
|
||||
|
@ -64,18 +71,19 @@ bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTarget
|
|||
const int entryPosToUpdate = getEntryPosToUpdate(newTargetTerminalId, bigramListPos);
|
||||
if (entryPosToUpdate != NOT_A_DICT_POS) {
|
||||
// Overwrite existing entry.
|
||||
bool hasNext = false;
|
||||
int probability = NOT_A_PROBABILITY;
|
||||
int targetTerminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
|
||||
mBigramDictContent->getBigramEntry(&probability, &hasNext, &targetTerminalId,
|
||||
entryPosToUpdate);
|
||||
const int probabilityToWrite = getUpdatedProbability(probability, newProbability);
|
||||
if (targetTerminalId == Ver4DictConstants::NOT_A_TERMINAL_ID && outAddedNewEntry) {
|
||||
const BigramEntry originalBigramEntry =
|
||||
mBigramDictContent->getBigramEntry(entryPosToUpdate);
|
||||
if (!originalBigramEntry.isValid()) {
|
||||
// Reuse invalid entry.
|
||||
*outAddedNewEntry = true;
|
||||
if (outAddedNewEntry) {
|
||||
*outAddedNewEntry = true;
|
||||
}
|
||||
}
|
||||
return mBigramDictContent->writeBigramEntry(probabilityToWrite, hasNext,
|
||||
newTargetTerminalId, entryPosToUpdate);
|
||||
const BigramEntry updatedBigramEntry =
|
||||
originalBigramEntry.updateTargetTerminalIdAndGetEntry(newTargetTerminalId);
|
||||
const BigramEntry bigramEntryToWrite = getUpdatedBigramEntry(
|
||||
&updatedBigramEntry, newProbability, timestamp);
|
||||
return mBigramDictContent->writeBigramEntry(&bigramEntryToWrite, entryPosToUpdate);
|
||||
}
|
||||
|
||||
// Add new entry to the bigram list.
|
||||
|
@ -85,10 +93,10 @@ bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTarget
|
|||
}
|
||||
// Write new entry at a head position of the bigram list.
|
||||
int writingPos = mBigramDictContent->getBigramListHeadPos(terminalId);
|
||||
const int probabilityToWrite = getUpdatedProbability(
|
||||
NOT_A_PROBABILITY /* originalProbability */, newProbability);
|
||||
if (!mBigramDictContent->writeBigramEntryAndAdvancePosition(probabilityToWrite,
|
||||
true /* hasNext */, newTargetTerminalId, &writingPos)) {
|
||||
const BigramEntry newBigramEntry(true /* hasNext */, NOT_A_PROBABILITY, newTargetTerminalId);
|
||||
const BigramEntry bigramEntryToWrite = getUpdatedBigramEntry(
|
||||
&newBigramEntry, newProbability, timestamp);
|
||||
if (!mBigramDictContent->writeBigramEntryAndAdvancePosition(&bigramEntryToWrite, &writingPos)) {
|
||||
return false;
|
||||
}
|
||||
if (outAddedNewEntry) {
|
||||
|
@ -109,18 +117,14 @@ bool Ver4BigramListPolicy::removeEntry(const int terminalId, const int targetTer
|
|||
// Bigram entry doesn't exist.
|
||||
return false;
|
||||
}
|
||||
bool hasNext = false;
|
||||
int probability = NOT_A_PROBABILITY;
|
||||
int originalTargetTerminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
|
||||
mBigramDictContent->getBigramEntry(&probability, &hasNext, &originalTargetTerminalId,
|
||||
entryPosToUpdate);
|
||||
if (targetTerminalId != originalTargetTerminalId) {
|
||||
const BigramEntry bigramEntry = mBigramDictContent->getBigramEntry(entryPosToUpdate);
|
||||
if (targetTerminalId != bigramEntry.getTargetTerminalId()) {
|
||||
// Bigram entry doesn't exist.
|
||||
return false;
|
||||
}
|
||||
// Remove bigram entry by overwriting target terminal Id.
|
||||
return mBigramDictContent->writeBigramEntry(probability, hasNext,
|
||||
Ver4DictConstants::NOT_A_TERMINAL_ID /* targetTerminalId */, entryPosToUpdate);
|
||||
// Remove bigram entry by marking it as invalid entry and overwriting the original entry.
|
||||
const BigramEntry updatedBigramEntry = bigramEntry.getInvalidatedEntry();
|
||||
return mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPosToUpdate);
|
||||
}
|
||||
|
||||
bool Ver4BigramListPolicy::updateAllBigramEntriesAndDeleteUselessEntries(const int terminalId,
|
||||
|
@ -134,34 +138,35 @@ bool Ver4BigramListPolicy::updateAllBigramEntriesAndDeleteUselessEntries(const i
|
|||
int readingPos = bigramListPos;
|
||||
while (hasNext) {
|
||||
const int entryPos = readingPos;
|
||||
int probability = NOT_A_PROBABILITY;
|
||||
int targetTerminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
|
||||
mBigramDictContent->getBigramEntryAndAdvancePosition(&probability, &hasNext,
|
||||
&targetTerminalId, &readingPos);
|
||||
if (targetTerminalId == Ver4DictConstants::NOT_A_TERMINAL_ID) {
|
||||
const BigramEntry bigramEntry =
|
||||
mBigramDictContent->getBigramEntryAndAdvancePosition(&readingPos);
|
||||
hasNext = bigramEntry.hasNext();
|
||||
if (!bigramEntry.isValid()) {
|
||||
continue;
|
||||
}
|
||||
const int targetPtNodePos = mTerminalPositionLookupTable->getTerminalPtNodePosition(
|
||||
targetTerminalId);
|
||||
bigramEntry.getTargetTerminalId());
|
||||
if (targetPtNodePos == NOT_A_DICT_POS) {
|
||||
// Invalidate bigram entry.
|
||||
if (!mBigramDictContent->writeBigramEntry(probability, hasNext,
|
||||
Ver4DictConstants::NOT_A_TERMINAL_ID /* targetTerminalId */, entryPos)) {
|
||||
const BigramEntry updatedBigramEntry = bigramEntry.getInvalidatedEntry();
|
||||
if (!mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPos)) {
|
||||
return false;
|
||||
}
|
||||
} else if (mNeedsToDecayWhenUpdating) {
|
||||
probability = ForgettingCurveUtils::getEncodedProbabilityToSave(
|
||||
probability, mHeaderPolicy);
|
||||
// TODO: Quit decaying probability during GC.
|
||||
const int probability = ForgettingCurveUtils::getEncodedProbabilityToSave(
|
||||
bigramEntry.getProbability(), mHeaderPolicy);
|
||||
if (ForgettingCurveUtils::isValidEncodedProbability(probability)) {
|
||||
if (!mBigramDictContent->writeBigramEntry(probability, hasNext, targetTerminalId,
|
||||
entryPos)) {
|
||||
const BigramEntry updatedBigramEntry =
|
||||
bigramEntry.updateProbabilityAndGetEntry(probability);
|
||||
if (!mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPos)) {
|
||||
return false;
|
||||
}
|
||||
*outBigramCount += 1;
|
||||
} else {
|
||||
// Remove entry.
|
||||
if (!mBigramDictContent->writeBigramEntry(probability, hasNext,
|
||||
Ver4DictConstants::NOT_A_TERMINAL_ID /* targetTerminalId */, entryPos)) {
|
||||
const BigramEntry updatedBigramEntry = bigramEntry.getInvalidatedEntry();
|
||||
if (!mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPos)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
@ -182,10 +187,10 @@ int Ver4BigramListPolicy::getBigramEntryConut(const int terminalId) {
|
|||
bool hasNext = true;
|
||||
int readingPos = bigramListPos;
|
||||
while (hasNext) {
|
||||
int targetTerminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
|
||||
mBigramDictContent->getBigramEntryAndAdvancePosition(0 /* probability */, &hasNext,
|
||||
&targetTerminalId, &readingPos);
|
||||
if (targetTerminalId != Ver4DictConstants::NOT_A_TERMINAL_ID) {
|
||||
const BigramEntry bigramEntry =
|
||||
mBigramDictContent->getBigramEntryAndAdvancePosition(&readingPos);
|
||||
hasNext = bigramEntry.hasNext();
|
||||
if (bigramEntry.isValid()) {
|
||||
bigramCount++;
|
||||
}
|
||||
}
|
||||
|
@ -199,13 +204,13 @@ int Ver4BigramListPolicy::getEntryPosToUpdate(const int targetTerminalIdToFind,
|
|||
int readingPos = bigramListPos;
|
||||
while (hasNext) {
|
||||
const int entryPos = readingPos;
|
||||
int targetTerminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
|
||||
mBigramDictContent->getBigramEntryAndAdvancePosition(0 /* probability */, &hasNext,
|
||||
&targetTerminalId, &readingPos);
|
||||
if (targetTerminalId == targetTerminalIdToFind) {
|
||||
const BigramEntry bigramEntry =
|
||||
mBigramDictContent->getBigramEntryAndAdvancePosition(&readingPos);
|
||||
hasNext = bigramEntry.hasNext();
|
||||
if (bigramEntry.getTargetTerminalId() == targetTerminalIdToFind) {
|
||||
// Entry with same target is found.
|
||||
return entryPos;
|
||||
} else if (targetTerminalId == Ver4DictConstants::NOT_A_TERMINAL_ID) {
|
||||
} else if (!bigramEntry.isValid()) {
|
||||
// Invalid entry that can be reused is found.
|
||||
invalidEntryPos = entryPos;
|
||||
}
|
||||
|
@ -213,13 +218,16 @@ int Ver4BigramListPolicy::getEntryPosToUpdate(const int targetTerminalIdToFind,
|
|||
return invalidEntryPos;
|
||||
}
|
||||
|
||||
int Ver4BigramListPolicy::getUpdatedProbability(const int originalProbability,
|
||||
const int newProbability) const {
|
||||
const BigramEntry Ver4BigramListPolicy::getUpdatedBigramEntry(
|
||||
const BigramEntry *const originalBigramEntry, const int newProbability,
|
||||
const int timestamp) const {
|
||||
if (mNeedsToDecayWhenUpdating) {
|
||||
return ForgettingCurveUtils::getUpdatedEncodedProbability(originalProbability,
|
||||
newProbability);
|
||||
// TODO: Update historical information.
|
||||
const int probability = ForgettingCurveUtils::getUpdatedEncodedProbability(
|
||||
originalBigramEntry->getProbability(), newProbability);
|
||||
return originalBigramEntry->updateProbabilityAndGetEntry(probability);
|
||||
} else {
|
||||
return newProbability;
|
||||
return originalBigramEntry->updateProbabilityAndGetEntry(newProbability);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
|
||||
#include "defines.h"
|
||||
#include "suggest/core/policy/dictionary_bigrams_structure_policy.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/v4/content/bigram_entry.h"
|
||||
|
||||
namespace latinime {
|
||||
|
||||
|
@ -58,7 +59,8 @@ class Ver4BigramListPolicy : public DictionaryBigramsStructurePolicy {
|
|||
|
||||
int getEntryPosToUpdate(const int targetTerminalIdToFind, const int bigramListPos) const;
|
||||
|
||||
int getUpdatedProbability(const int originalProbability, const int newProbability) const;
|
||||
const BigramEntry getUpdatedBigramEntry(const BigramEntry *const originalBigramEntry,
|
||||
const int newProbability, const int timestamp) const;
|
||||
|
||||
BigramDictContent *const mBigramDictContent;
|
||||
const TerminalPositionLookupTable *const mTerminalPositionLookupTable;
|
||||
|
|
|
@ -20,53 +20,98 @@
|
|||
|
||||
namespace latinime {
|
||||
|
||||
void BigramDictContent::getBigramEntryAndAdvancePosition(int *const outProbability,
|
||||
bool *const outHasNext, int *const outTargetTerminalId, int *const bigramEntryPos) const {
|
||||
const BigramEntry BigramDictContent::getBigramEntryAndAdvancePosition(
|
||||
int *const bigramEntryPos) const {
|
||||
const BufferWithExtendableBuffer *const bigramListBuffer = getContentBuffer();
|
||||
const int bigramFlags = bigramListBuffer->readUintAndAdvancePosition(
|
||||
Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE, bigramEntryPos);
|
||||
if (outProbability) {
|
||||
*outProbability = bigramFlags & Ver4DictConstants::BIGRAM_PROBABILITY_MASK;
|
||||
const int hasNext = (bigramFlags & Ver4DictConstants::BIGRAM_HAS_NEXT_MASK) != 0;
|
||||
int probability = NOT_A_PROBABILITY;
|
||||
int timestamp = Ver4DictConstants::NOT_A_TIME_STAMP;
|
||||
int level = 0;
|
||||
int count = 0;
|
||||
if (mHasHistoricalInfo) {
|
||||
probability = bigramListBuffer->readUintAndAdvancePosition(
|
||||
Ver4DictConstants::PROBABILITY_SIZE, bigramEntryPos);
|
||||
timestamp = bigramListBuffer->readUintAndAdvancePosition(
|
||||
Ver4DictConstants::TIME_STAMP_FIELD_SIZE, bigramEntryPos);
|
||||
level = bigramListBuffer->readUintAndAdvancePosition(
|
||||
Ver4DictConstants::WORD_LEVEL_FIELD_SIZE, bigramEntryPos);
|
||||
count = bigramListBuffer->readUintAndAdvancePosition(
|
||||
Ver4DictConstants::WORD_COUNT_FIELD_SIZE, bigramEntryPos);
|
||||
} else {
|
||||
probability = bigramFlags & Ver4DictConstants::BIGRAM_PROBABILITY_MASK;
|
||||
}
|
||||
if (outHasNext) {
|
||||
*outHasNext = (bigramFlags & Ver4DictConstants::BIGRAM_HAS_NEXT_MASK) != 0;
|
||||
}
|
||||
const int targetTerminalId = bigramListBuffer->readUintAndAdvancePosition(
|
||||
const int encodedTargetTerminalId = bigramListBuffer->readUintAndAdvancePosition(
|
||||
Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE, bigramEntryPos);
|
||||
if (outTargetTerminalId) {
|
||||
*outTargetTerminalId =
|
||||
(targetTerminalId == Ver4DictConstants::INVALID_BIGRAM_TARGET_TERMINAL_ID) ?
|
||||
Ver4DictConstants::NOT_A_TERMINAL_ID : targetTerminalId;
|
||||
const int targetTerminalId =
|
||||
(encodedTargetTerminalId == Ver4DictConstants::INVALID_BIGRAM_TARGET_TERMINAL_ID) ?
|
||||
Ver4DictConstants::NOT_A_TERMINAL_ID : encodedTargetTerminalId;
|
||||
if (mHasHistoricalInfo) {
|
||||
return BigramEntry(hasNext, probability, timestamp, level, count, targetTerminalId);
|
||||
} else {
|
||||
return BigramEntry(hasNext, probability, targetTerminalId);
|
||||
}
|
||||
}
|
||||
|
||||
bool BigramDictContent::writeBigramEntryAndAdvancePosition(const int probability, const int hasNext,
|
||||
const int targetTerminalId, int *const entryWritingPos) {
|
||||
bool BigramDictContent::writeBigramEntryAndAdvancePosition(
|
||||
const BigramEntry *const bigramEntryToWrite, int *const entryWritingPos) {
|
||||
BufferWithExtendableBuffer *const bigramListBuffer = getWritableContentBuffer();
|
||||
const int bigramFlags = createAndGetBigramFlags(probability, hasNext);
|
||||
const int bigramFlags = createAndGetBigramFlags(
|
||||
mHasHistoricalInfo ? 0 : bigramEntryToWrite->getProbability(),
|
||||
bigramEntryToWrite->hasNext());
|
||||
if (!bigramListBuffer->writeUintAndAdvancePosition(bigramFlags,
|
||||
Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE, entryWritingPos)) {
|
||||
AKLOGE("Cannot write bigram flags. pos: %d, flags: %x", *entryWritingPos, bigramFlags);
|
||||
return false;
|
||||
}
|
||||
if (mHasHistoricalInfo) {
|
||||
if (!bigramListBuffer->writeUintAndAdvancePosition(bigramEntryToWrite->getProbability(),
|
||||
Ver4DictConstants::PROBABILITY_SIZE, entryWritingPos)) {
|
||||
AKLOGE("Cannot write bigram probability. pos: %d, probability: %d", *entryWritingPos,
|
||||
bigramEntryToWrite->getProbability());
|
||||
return false;
|
||||
}
|
||||
if (!bigramListBuffer->writeUintAndAdvancePosition(bigramEntryToWrite->getTimeStamp(),
|
||||
Ver4DictConstants::TIME_STAMP_FIELD_SIZE, entryWritingPos)) {
|
||||
AKLOGE("Cannot write bigram timestamps. pos: %d, timestamp: %d", *entryWritingPos,
|
||||
bigramEntryToWrite->getTimeStamp());
|
||||
return false;
|
||||
}
|
||||
if (!bigramListBuffer->writeUintAndAdvancePosition(bigramEntryToWrite->getLevel(),
|
||||
Ver4DictConstants::WORD_LEVEL_FIELD_SIZE, entryWritingPos)) {
|
||||
AKLOGE("Cannot write bigram level. pos: %d, level: %d", *entryWritingPos,
|
||||
bigramEntryToWrite->getLevel());
|
||||
return false;
|
||||
}
|
||||
if (!bigramListBuffer->writeUintAndAdvancePosition(bigramEntryToWrite->getCount(),
|
||||
Ver4DictConstants::WORD_COUNT_FIELD_SIZE, entryWritingPos)) {
|
||||
AKLOGE("Cannot write bigram count. pos: %d, count: %d", *entryWritingPos,
|
||||
bigramEntryToWrite->getCount());
|
||||
return false;
|
||||
}
|
||||
}
|
||||
const int targetTerminalIdToWrite =
|
||||
(targetTerminalId == Ver4DictConstants::NOT_A_TERMINAL_ID) ?
|
||||
Ver4DictConstants::INVALID_BIGRAM_TARGET_TERMINAL_ID : targetTerminalId;
|
||||
return bigramListBuffer->writeUintAndAdvancePosition(targetTerminalIdToWrite,
|
||||
Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE, entryWritingPos);
|
||||
(bigramEntryToWrite->getTargetTerminalId() == Ver4DictConstants::NOT_A_TERMINAL_ID) ?
|
||||
Ver4DictConstants::INVALID_BIGRAM_TARGET_TERMINAL_ID :
|
||||
bigramEntryToWrite->getTargetTerminalId();
|
||||
if (!bigramListBuffer->writeUintAndAdvancePosition(targetTerminalIdToWrite,
|
||||
Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE, entryWritingPos)) {
|
||||
AKLOGE("Cannot write bigram target terminal id. pos: %d, target terminal id: %d",
|
||||
*entryWritingPos, bigramEntryToWrite->getTargetTerminalId());
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool BigramDictContent::copyBigramList(const int bigramListPos, const int toPos) {
|
||||
bool hasNext = true;
|
||||
int readingPos = bigramListPos;
|
||||
int writingPos = toPos;
|
||||
bool hasNext = true;
|
||||
while (hasNext) {
|
||||
int probability = NOT_A_PROBABILITY;
|
||||
int targetTerminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
|
||||
getBigramEntryAndAdvancePosition(&probability, &hasNext, &targetTerminalId,
|
||||
&readingPos);
|
||||
if (!writeBigramEntryAndAdvancePosition(probability, hasNext, targetTerminalId,
|
||||
&writingPos)) {
|
||||
const BigramEntry bigramEntry = getBigramEntryAndAdvancePosition(&readingPos);
|
||||
hasNext = bigramEntry.hasNext();
|
||||
if (!writeBigramEntryAndAdvancePosition(&bigramEntry, &writingPos)) {
|
||||
AKLOGE("Cannot write bigram entry to copy. pos: %d", writingPos);
|
||||
return false;
|
||||
}
|
||||
|
@ -119,22 +164,22 @@ bool BigramDictContent::runGCBigramList(const int bigramListPos,
|
|||
int writingPos = toPos;
|
||||
int lastEntryPos = NOT_A_DICT_POS;
|
||||
while (hasNext) {
|
||||
int probability = NOT_A_PROBABILITY;
|
||||
int targetTerminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
|
||||
sourceBigramDictContent->getBigramEntryAndAdvancePosition(&probability, &hasNext,
|
||||
&targetTerminalId, &readingPos);
|
||||
if (targetTerminalId == Ver4DictConstants::NOT_A_TERMINAL_ID) {
|
||||
const BigramEntry originalBigramEntry =
|
||||
sourceBigramDictContent->getBigramEntryAndAdvancePosition(&readingPos);
|
||||
hasNext = originalBigramEntry.hasNext();
|
||||
if (originalBigramEntry.getTargetTerminalId() == Ver4DictConstants::NOT_A_TERMINAL_ID) {
|
||||
continue;
|
||||
}
|
||||
TerminalPositionLookupTable::TerminalIdMap::const_iterator it =
|
||||
terminalIdMap->find(targetTerminalId);
|
||||
terminalIdMap->find(originalBigramEntry.getTargetTerminalId());
|
||||
if (it == terminalIdMap->end()) {
|
||||
// Target word has been removed.
|
||||
continue;
|
||||
}
|
||||
lastEntryPos = hasNext ? writingPos : NOT_A_DICT_POS;
|
||||
if (!writeBigramEntryAndAdvancePosition(probability, hasNext, it->second,
|
||||
&writingPos)) {
|
||||
const BigramEntry updatedBigramEntry =
|
||||
originalBigramEntry.updateTargetTerminalIdAndGetEntry(it->second);
|
||||
if (!writeBigramEntryAndAdvancePosition(&updatedBigramEntry, &writingPos)) {
|
||||
AKLOGE("Cannot write bigram entry to run GC. pos: %d", writingPos);
|
||||
return false;
|
||||
}
|
||||
|
@ -142,10 +187,9 @@ bool BigramDictContent::runGCBigramList(const int bigramListPos,
|
|||
}
|
||||
if (lastEntryPos != NOT_A_DICT_POS) {
|
||||
// Update has next flag in the last written entry.
|
||||
int probability = NOT_A_PROBABILITY;
|
||||
int targetTerminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
|
||||
getBigramEntry(&probability, 0 /* outHasNext */, &targetTerminalId, lastEntryPos);
|
||||
if (!writeBigramEntry(probability, false /* hasNext */, targetTerminalId, writingPos)) {
|
||||
const BigramEntry bigramEntry = getBigramEntry(lastEntryPos).updateHasNextAndGetEntry(
|
||||
false /* hasNext */);
|
||||
if (!writeBigramEntry(&bigramEntry, writingPos)) {
|
||||
AKLOGE("Cannot write bigram entry to set hasNext flag after GC. pos: %d", writingPos);
|
||||
return false;
|
||||
}
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
#define LATINIME_BIGRAM_DICT_CONTENT_H
|
||||
|
||||
#include "defines.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/v4/content/bigram_entry.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
|
||||
|
@ -26,27 +27,27 @@ namespace latinime {
|
|||
|
||||
class BigramDictContent : public SparseTableDictContent {
|
||||
public:
|
||||
BigramDictContent(const char *const dictDirPath, const bool isUpdatable)
|
||||
BigramDictContent(const char *const dictDirPath, const bool hasHistoricalInfo,
|
||||
const bool isUpdatable)
|
||||
: SparseTableDictContent(dictDirPath,
|
||||
Ver4DictConstants::BIGRAM_LOOKUP_TABLE_FILE_EXTENSION,
|
||||
Ver4DictConstants::BIGRAM_CONTENT_TABLE_FILE_EXTENSION,
|
||||
Ver4DictConstants::BIGRAM_FILE_EXTENSION, isUpdatable,
|
||||
Ver4DictConstants::BIGRAM_ADDRESS_TABLE_BLOCK_SIZE,
|
||||
Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE) {}
|
||||
Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE),
|
||||
mHasHistoricalInfo(hasHistoricalInfo) {}
|
||||
|
||||
BigramDictContent()
|
||||
BigramDictContent(const bool hasHistoricalInfo)
|
||||
: SparseTableDictContent(Ver4DictConstants::BIGRAM_ADDRESS_TABLE_BLOCK_SIZE,
|
||||
Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE) {}
|
||||
Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE),
|
||||
mHasHistoricalInfo(hasHistoricalInfo) {}
|
||||
|
||||
void getBigramEntry(int *const outProbability, bool *const outHasNext,
|
||||
int *const outTargetTerminalId, const int bigramEntryPos) const {
|
||||
const BigramEntry getBigramEntry(const int bigramEntryPos) const {
|
||||
int readingPos = bigramEntryPos;
|
||||
getBigramEntryAndAdvancePosition(outProbability, outHasNext, outTargetTerminalId,
|
||||
&readingPos);
|
||||
return getBigramEntryAndAdvancePosition(&readingPos);
|
||||
}
|
||||
|
||||
void getBigramEntryAndAdvancePosition(int *const outProbability, bool *const outHasNext,
|
||||
int *const outTargetTerminalId, int *const bigramEntryPos) const;
|
||||
const BigramEntry getBigramEntryAndAdvancePosition(int *const bigramEntryPos) const;
|
||||
|
||||
// Returns head position of bigram list for a PtNode specified by terminalId.
|
||||
int getBigramListHeadPos(const int terminalId) const {
|
||||
|
@ -57,15 +58,13 @@ class BigramDictContent : public SparseTableDictContent {
|
|||
return addressLookupTable->get(terminalId);
|
||||
}
|
||||
|
||||
bool writeBigramEntry(const int probability, const int hasNext, const int targetTerminalId,
|
||||
const int entryWritingPos) {
|
||||
bool writeBigramEntry(const BigramEntry *const bigramEntryToWrite, const int entryWritingPos) {
|
||||
int writingPos = entryWritingPos;
|
||||
return writeBigramEntryAndAdvancePosition(probability, hasNext, targetTerminalId,
|
||||
&writingPos);
|
||||
return writeBigramEntryAndAdvancePosition(bigramEntryToWrite, &writingPos);
|
||||
}
|
||||
|
||||
bool writeBigramEntryAndAdvancePosition(const int probability, const int hasNext,
|
||||
const int targetTerminalId, int *const entryWritingPos);
|
||||
bool writeBigramEntryAndAdvancePosition(const BigramEntry *const bigramEntryToWrite,
|
||||
int *const entryWritingPos);
|
||||
|
||||
bool createNewBigramList(const int terminalId) {
|
||||
const int bigramListPos = getContentBuffer()->getTailPosition();
|
||||
|
@ -96,6 +95,8 @@ class BigramDictContent : public SparseTableDictContent {
|
|||
const BigramDictContent *const sourceBigramDictContent, const int toPos,
|
||||
const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
|
||||
int *const outEntryCount);
|
||||
|
||||
bool mHasHistoricalInfo;
|
||||
};
|
||||
} // namespace latinime
|
||||
#endif /* LATINIME_BIGRAM_DICT_CONTENT_H */
|
||||
|
|
|
@ -0,0 +1,104 @@
|
|||
/*
|
||||
* Copyright (C) 2013, The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef LATINIME_BIGRAM_ENTRY_H
|
||||
#define LATINIME_BIGRAM_ENTRY_H
|
||||
|
||||
#include "defines.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
|
||||
|
||||
namespace latinime {
|
||||
|
||||
class BigramEntry {
|
||||
public:
|
||||
BigramEntry(const BigramEntry& bigramEntry)
|
||||
: mHasNext(bigramEntry.mHasNext), mProbability(bigramEntry.mProbability),
|
||||
mTimestamp(bigramEntry.mTimestamp), mLevel(bigramEntry.mLevel),
|
||||
mCount(bigramEntry.mCount), mTargetTerminalId(bigramEntry.mTargetTerminalId) {}
|
||||
|
||||
// Entry with historical information.
|
||||
BigramEntry(const bool hasNext, const int probability, const int targetTerminalId)
|
||||
: mHasNext(hasNext), mProbability(probability),
|
||||
mTimestamp(Ver4DictConstants::NOT_A_TIME_STAMP), mLevel(0), mCount(0),
|
||||
mTargetTerminalId(targetTerminalId) {}
|
||||
|
||||
// Entry with historical information.
|
||||
BigramEntry(const bool hasNext, const int probability, const int timestamp, const int level,
|
||||
const int count, const int targetTerminalId)
|
||||
: mHasNext(hasNext), mProbability(probability), mTimestamp(timestamp),
|
||||
mLevel(level), mCount(count), mTargetTerminalId(targetTerminalId) {}
|
||||
|
||||
const BigramEntry getInvalidatedEntry() const {
|
||||
return updateTargetTerminalIdAndGetEntry(Ver4DictConstants::NOT_A_TERMINAL_ID);
|
||||
}
|
||||
|
||||
const BigramEntry updateHasNextAndGetEntry(const bool hasNext) const {
|
||||
return BigramEntry(hasNext, mProbability, mTimestamp, mLevel, mCount,
|
||||
mTargetTerminalId);
|
||||
}
|
||||
|
||||
const BigramEntry updateTargetTerminalIdAndGetEntry(const int newTargetTerminalId) const {
|
||||
return BigramEntry(mHasNext, mProbability, mTimestamp, mLevel, mCount,
|
||||
newTargetTerminalId);
|
||||
}
|
||||
|
||||
const BigramEntry updateProbabilityAndGetEntry(const int probability) const {
|
||||
return BigramEntry(mHasNext, probability, mTimestamp, mLevel, mCount,
|
||||
mTargetTerminalId);
|
||||
}
|
||||
|
||||
bool isValid() const {
|
||||
return mTargetTerminalId != Ver4DictConstants::NOT_A_TERMINAL_ID;
|
||||
}
|
||||
|
||||
bool hasNext() const {
|
||||
return mHasNext;
|
||||
}
|
||||
|
||||
int getProbability() const {
|
||||
return mProbability;
|
||||
}
|
||||
|
||||
int getTimeStamp() const {
|
||||
return mTimestamp;
|
||||
}
|
||||
|
||||
int getLevel() const {
|
||||
return mLevel;
|
||||
}
|
||||
|
||||
int getCount() const {
|
||||
return mCount;
|
||||
}
|
||||
|
||||
int getTargetTerminalId() const {
|
||||
return mTargetTerminalId;
|
||||
}
|
||||
|
||||
private:
|
||||
// Copy constructor is public to use this class as a type of return value.
|
||||
DISALLOW_DEFAULT_CONSTRUCTOR(BigramEntry);
|
||||
DISALLOW_ASSIGNMENT_OPERATOR(BigramEntry);
|
||||
|
||||
const bool mHasNext;
|
||||
const int mProbability;
|
||||
const int mTimestamp;
|
||||
const int mLevel;
|
||||
const int mCount;
|
||||
const int mTargetTerminalId;
|
||||
};
|
||||
} // namespace latinime
|
||||
#endif /* LATINIME_BIGRAM_ENTRY_H */
|
|
@ -127,7 +127,7 @@ class Ver4DictBuffers {
|
|||
// TODO: Quit using header size.
|
||||
mTerminalPositionLookupTable(dictDirPath, isUpdatable, mHeaderSize),
|
||||
mProbabilityDictContent(dictDirPath, false /* hasHistoricalInfo */, isUpdatable),
|
||||
mBigramDictContent(dictDirPath, isUpdatable),
|
||||
mBigramDictContent(dictDirPath, false /* hasHistoricalInfo */, isUpdatable),
|
||||
mShortcutDictContent(dictDirPath, isUpdatable),
|
||||
mIsUpdatable(isUpdatable) {}
|
||||
|
||||
|
@ -137,7 +137,8 @@ class Ver4DictBuffers {
|
|||
mExpandableTrieBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE),
|
||||
mTerminalPositionLookupTable(),
|
||||
mProbabilityDictContent(false /* hasHistoricalInfo */),
|
||||
mBigramDictContent(), mShortcutDictContent(), mIsUpdatable(true) {}
|
||||
mBigramDictContent(false /* hasHistoricalInfo */), mShortcutDictContent(),
|
||||
mIsUpdatable(true) {}
|
||||
|
||||
const MmappedBuffer::MmappedBufferPtr mDictBuffer;
|
||||
const int mHeaderSize;
|
||||
|
|
|
@ -56,8 +56,7 @@ class ExclusiveOwnershipPointer {
|
|||
private:
|
||||
// This class allows to copy and assign and ensures only one instance has the ownership of the
|
||||
// managed pointer.
|
||||
|
||||
ExclusiveOwnershipPointer() : mPointer(0), mSharedOwnerPtr(0) {}
|
||||
DISALLOW_DEFAULT_CONSTRUCTOR(ExclusiveOwnershipPointer);
|
||||
|
||||
void transferOwnership(const ExclusiveOwnershipPointer<T> *const src) {
|
||||
if (*mSharedOwnerPtr != src) {
|
||||
|
|
Loading…
Reference in New Issue