Merge "Extend bigram probability field to support historical info."

main
Keisuke Kuroyanagi 2013-12-02 10:53:18 +00:00 committed by Android (Google) Code Review
commit 027de36706
9 changed files with 291 additions and 121 deletions

View File

@ -341,12 +341,21 @@ template<typename T> AK_FORCE_INLINE const T &max(const T &a, const T &b) { retu
#define INPUTLENGTH_FOR_DEBUG (-1)
#define MIN_OUTPUT_INDEX_FOR_DEBUG (-1)
#define DISALLOW_COPY_AND_ASSIGN(TypeName) \
TypeName(const TypeName&); \
#define DISALLOW_DEFAULT_CONSTRUCTOR(TypeName) \
TypeName()
#define DISALLOW_COPY_CONSTRUCTOR(TypeName) \
TypeName(const TypeName&)
#define DISALLOW_ASSIGNMENT_OPERATOR(TypeName) \
void operator=(const TypeName&)
#define DISALLOW_COPY_AND_ASSIGN(TypeName) \
DISALLOW_COPY_CONSTRUCTOR(TypeName); \
DISALLOW_ASSIGNMENT_OPERATOR(TypeName)
#define DISALLOW_IMPLICIT_CONSTRUCTORS(TypeName) \
TypeName(); \
DISALLOW_DEFAULT_CONSTRUCTOR(TypeName); \
DISALLOW_COPY_AND_ASSIGN(TypeName)
// Used as a return value for character comparison

View File

@ -50,6 +50,8 @@ class BloomFilter {
}
private:
DISALLOW_ASSIGNMENT_OPERATOR(BloomFilter);
// Size, in bytes, of the bloom filter index for bigrams
// 128 gives us 1024 buckets. The probability of false positive is (1 - e ** (-kn/m))**k,
// where k is the number of hash functions, n the number of bigrams, and m the number of

View File

@ -26,12 +26,18 @@ namespace latinime {
void Ver4BigramListPolicy::getNextBigram(int *const outBigramPos, int *const outProbability,
bool *const outHasNext, int *const bigramEntryPos) const {
int targetTerminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
mBigramDictContent->getBigramEntryAndAdvancePosition(outProbability, outHasNext,
&targetTerminalId, bigramEntryPos);
const BigramEntry bigramEntry =
mBigramDictContent->getBigramEntryAndAdvancePosition(bigramEntryPos);
if (outBigramPos) {
// Lookup target PtNode position.
*outBigramPos = mTerminalPositionLookupTable->getTerminalPtNodePosition(targetTerminalId);
*outBigramPos = mTerminalPositionLookupTable->getTerminalPtNodePosition(
bigramEntry.getTargetTerminalId());
}
if (outProbability) {
*outProbability = bigramEntry.getProbability();
}
if (outHasNext) {
*outHasNext = bigramEntry.hasNext();
}
}
@ -47,12 +53,13 @@ bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTarget
if (!mBigramDictContent->createNewBigramList(terminalId)) {
return false;
}
const int probabilityToWrite = getUpdatedProbability(
NOT_A_PROBABILITY /* originalProbability */, newProbability);
const BigramEntry newBigramEntry(false /* hasNext */, NOT_A_PROBABILITY,
newTargetTerminalId);
const BigramEntry bigramEntryToWrite = getUpdatedBigramEntry(&newBigramEntry,
newProbability, timestamp);
// Write an entry.
const int writingPos = mBigramDictContent->getBigramListHeadPos(terminalId);
if (!mBigramDictContent->writeBigramEntry(probabilityToWrite, false /* hasNext */,
newTargetTerminalId, writingPos)) {
if (!mBigramDictContent->writeBigramEntry(&bigramEntryToWrite, writingPos)) {
return false;
}
if (outAddedNewEntry) {
@ -64,18 +71,19 @@ bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTarget
const int entryPosToUpdate = getEntryPosToUpdate(newTargetTerminalId, bigramListPos);
if (entryPosToUpdate != NOT_A_DICT_POS) {
// Overwrite existing entry.
bool hasNext = false;
int probability = NOT_A_PROBABILITY;
int targetTerminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
mBigramDictContent->getBigramEntry(&probability, &hasNext, &targetTerminalId,
entryPosToUpdate);
const int probabilityToWrite = getUpdatedProbability(probability, newProbability);
if (targetTerminalId == Ver4DictConstants::NOT_A_TERMINAL_ID && outAddedNewEntry) {
const BigramEntry originalBigramEntry =
mBigramDictContent->getBigramEntry(entryPosToUpdate);
if (!originalBigramEntry.isValid()) {
// Reuse invalid entry.
*outAddedNewEntry = true;
if (outAddedNewEntry) {
*outAddedNewEntry = true;
}
}
return mBigramDictContent->writeBigramEntry(probabilityToWrite, hasNext,
newTargetTerminalId, entryPosToUpdate);
const BigramEntry updatedBigramEntry =
originalBigramEntry.updateTargetTerminalIdAndGetEntry(newTargetTerminalId);
const BigramEntry bigramEntryToWrite = getUpdatedBigramEntry(
&updatedBigramEntry, newProbability, timestamp);
return mBigramDictContent->writeBigramEntry(&bigramEntryToWrite, entryPosToUpdate);
}
// Add new entry to the bigram list.
@ -85,10 +93,10 @@ bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTarget
}
// Write new entry at a head position of the bigram list.
int writingPos = mBigramDictContent->getBigramListHeadPos(terminalId);
const int probabilityToWrite = getUpdatedProbability(
NOT_A_PROBABILITY /* originalProbability */, newProbability);
if (!mBigramDictContent->writeBigramEntryAndAdvancePosition(probabilityToWrite,
true /* hasNext */, newTargetTerminalId, &writingPos)) {
const BigramEntry newBigramEntry(true /* hasNext */, NOT_A_PROBABILITY, newTargetTerminalId);
const BigramEntry bigramEntryToWrite = getUpdatedBigramEntry(
&newBigramEntry, newProbability, timestamp);
if (!mBigramDictContent->writeBigramEntryAndAdvancePosition(&bigramEntryToWrite, &writingPos)) {
return false;
}
if (outAddedNewEntry) {
@ -109,18 +117,14 @@ bool Ver4BigramListPolicy::removeEntry(const int terminalId, const int targetTer
// Bigram entry doesn't exist.
return false;
}
bool hasNext = false;
int probability = NOT_A_PROBABILITY;
int originalTargetTerminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
mBigramDictContent->getBigramEntry(&probability, &hasNext, &originalTargetTerminalId,
entryPosToUpdate);
if (targetTerminalId != originalTargetTerminalId) {
const BigramEntry bigramEntry = mBigramDictContent->getBigramEntry(entryPosToUpdate);
if (targetTerminalId != bigramEntry.getTargetTerminalId()) {
// Bigram entry doesn't exist.
return false;
}
// Remove bigram entry by overwriting target terminal Id.
return mBigramDictContent->writeBigramEntry(probability, hasNext,
Ver4DictConstants::NOT_A_TERMINAL_ID /* targetTerminalId */, entryPosToUpdate);
// Remove bigram entry by marking it as invalid entry and overwriting the original entry.
const BigramEntry updatedBigramEntry = bigramEntry.getInvalidatedEntry();
return mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPosToUpdate);
}
bool Ver4BigramListPolicy::updateAllBigramEntriesAndDeleteUselessEntries(const int terminalId,
@ -134,34 +138,35 @@ bool Ver4BigramListPolicy::updateAllBigramEntriesAndDeleteUselessEntries(const i
int readingPos = bigramListPos;
while (hasNext) {
const int entryPos = readingPos;
int probability = NOT_A_PROBABILITY;
int targetTerminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
mBigramDictContent->getBigramEntryAndAdvancePosition(&probability, &hasNext,
&targetTerminalId, &readingPos);
if (targetTerminalId == Ver4DictConstants::NOT_A_TERMINAL_ID) {
const BigramEntry bigramEntry =
mBigramDictContent->getBigramEntryAndAdvancePosition(&readingPos);
hasNext = bigramEntry.hasNext();
if (!bigramEntry.isValid()) {
continue;
}
const int targetPtNodePos = mTerminalPositionLookupTable->getTerminalPtNodePosition(
targetTerminalId);
bigramEntry.getTargetTerminalId());
if (targetPtNodePos == NOT_A_DICT_POS) {
// Invalidate bigram entry.
if (!mBigramDictContent->writeBigramEntry(probability, hasNext,
Ver4DictConstants::NOT_A_TERMINAL_ID /* targetTerminalId */, entryPos)) {
const BigramEntry updatedBigramEntry = bigramEntry.getInvalidatedEntry();
if (!mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPos)) {
return false;
}
} else if (mNeedsToDecayWhenUpdating) {
probability = ForgettingCurveUtils::getEncodedProbabilityToSave(
probability, mHeaderPolicy);
// TODO: Quit decaying probability during GC.
const int probability = ForgettingCurveUtils::getEncodedProbabilityToSave(
bigramEntry.getProbability(), mHeaderPolicy);
if (ForgettingCurveUtils::isValidEncodedProbability(probability)) {
if (!mBigramDictContent->writeBigramEntry(probability, hasNext, targetTerminalId,
entryPos)) {
const BigramEntry updatedBigramEntry =
bigramEntry.updateProbabilityAndGetEntry(probability);
if (!mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPos)) {
return false;
}
*outBigramCount += 1;
} else {
// Remove entry.
if (!mBigramDictContent->writeBigramEntry(probability, hasNext,
Ver4DictConstants::NOT_A_TERMINAL_ID /* targetTerminalId */, entryPos)) {
const BigramEntry updatedBigramEntry = bigramEntry.getInvalidatedEntry();
if (!mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPos)) {
return false;
}
}
@ -182,10 +187,10 @@ int Ver4BigramListPolicy::getBigramEntryConut(const int terminalId) {
bool hasNext = true;
int readingPos = bigramListPos;
while (hasNext) {
int targetTerminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
mBigramDictContent->getBigramEntryAndAdvancePosition(0 /* probability */, &hasNext,
&targetTerminalId, &readingPos);
if (targetTerminalId != Ver4DictConstants::NOT_A_TERMINAL_ID) {
const BigramEntry bigramEntry =
mBigramDictContent->getBigramEntryAndAdvancePosition(&readingPos);
hasNext = bigramEntry.hasNext();
if (bigramEntry.isValid()) {
bigramCount++;
}
}
@ -199,13 +204,13 @@ int Ver4BigramListPolicy::getEntryPosToUpdate(const int targetTerminalIdToFind,
int readingPos = bigramListPos;
while (hasNext) {
const int entryPos = readingPos;
int targetTerminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
mBigramDictContent->getBigramEntryAndAdvancePosition(0 /* probability */, &hasNext,
&targetTerminalId, &readingPos);
if (targetTerminalId == targetTerminalIdToFind) {
const BigramEntry bigramEntry =
mBigramDictContent->getBigramEntryAndAdvancePosition(&readingPos);
hasNext = bigramEntry.hasNext();
if (bigramEntry.getTargetTerminalId() == targetTerminalIdToFind) {
// Entry with same target is found.
return entryPos;
} else if (targetTerminalId == Ver4DictConstants::NOT_A_TERMINAL_ID) {
} else if (!bigramEntry.isValid()) {
// Invalid entry that can be reused is found.
invalidEntryPos = entryPos;
}
@ -213,13 +218,16 @@ int Ver4BigramListPolicy::getEntryPosToUpdate(const int targetTerminalIdToFind,
return invalidEntryPos;
}
int Ver4BigramListPolicy::getUpdatedProbability(const int originalProbability,
const int newProbability) const {
const BigramEntry Ver4BigramListPolicy::getUpdatedBigramEntry(
const BigramEntry *const originalBigramEntry, const int newProbability,
const int timestamp) const {
if (mNeedsToDecayWhenUpdating) {
return ForgettingCurveUtils::getUpdatedEncodedProbability(originalProbability,
newProbability);
// TODO: Update historical information.
const int probability = ForgettingCurveUtils::getUpdatedEncodedProbability(
originalBigramEntry->getProbability(), newProbability);
return originalBigramEntry->updateProbabilityAndGetEntry(probability);
} else {
return newProbability;
return originalBigramEntry->updateProbabilityAndGetEntry(newProbability);
}
}

View File

@ -19,6 +19,7 @@
#include "defines.h"
#include "suggest/core/policy/dictionary_bigrams_structure_policy.h"
#include "suggest/policyimpl/dictionary/structure/v4/content/bigram_entry.h"
namespace latinime {
@ -58,7 +59,8 @@ class Ver4BigramListPolicy : public DictionaryBigramsStructurePolicy {
int getEntryPosToUpdate(const int targetTerminalIdToFind, const int bigramListPos) const;
int getUpdatedProbability(const int originalProbability, const int newProbability) const;
const BigramEntry getUpdatedBigramEntry(const BigramEntry *const originalBigramEntry,
const int newProbability, const int timestamp) const;
BigramDictContent *const mBigramDictContent;
const TerminalPositionLookupTable *const mTerminalPositionLookupTable;

View File

@ -20,53 +20,98 @@
namespace latinime {
void BigramDictContent::getBigramEntryAndAdvancePosition(int *const outProbability,
bool *const outHasNext, int *const outTargetTerminalId, int *const bigramEntryPos) const {
const BigramEntry BigramDictContent::getBigramEntryAndAdvancePosition(
int *const bigramEntryPos) const {
const BufferWithExtendableBuffer *const bigramListBuffer = getContentBuffer();
const int bigramFlags = bigramListBuffer->readUintAndAdvancePosition(
Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE, bigramEntryPos);
if (outProbability) {
*outProbability = bigramFlags & Ver4DictConstants::BIGRAM_PROBABILITY_MASK;
const int hasNext = (bigramFlags & Ver4DictConstants::BIGRAM_HAS_NEXT_MASK) != 0;
int probability = NOT_A_PROBABILITY;
int timestamp = Ver4DictConstants::NOT_A_TIME_STAMP;
int level = 0;
int count = 0;
if (mHasHistoricalInfo) {
probability = bigramListBuffer->readUintAndAdvancePosition(
Ver4DictConstants::PROBABILITY_SIZE, bigramEntryPos);
timestamp = bigramListBuffer->readUintAndAdvancePosition(
Ver4DictConstants::TIME_STAMP_FIELD_SIZE, bigramEntryPos);
level = bigramListBuffer->readUintAndAdvancePosition(
Ver4DictConstants::WORD_LEVEL_FIELD_SIZE, bigramEntryPos);
count = bigramListBuffer->readUintAndAdvancePosition(
Ver4DictConstants::WORD_COUNT_FIELD_SIZE, bigramEntryPos);
} else {
probability = bigramFlags & Ver4DictConstants::BIGRAM_PROBABILITY_MASK;
}
if (outHasNext) {
*outHasNext = (bigramFlags & Ver4DictConstants::BIGRAM_HAS_NEXT_MASK) != 0;
}
const int targetTerminalId = bigramListBuffer->readUintAndAdvancePosition(
const int encodedTargetTerminalId = bigramListBuffer->readUintAndAdvancePosition(
Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE, bigramEntryPos);
if (outTargetTerminalId) {
*outTargetTerminalId =
(targetTerminalId == Ver4DictConstants::INVALID_BIGRAM_TARGET_TERMINAL_ID) ?
Ver4DictConstants::NOT_A_TERMINAL_ID : targetTerminalId;
const int targetTerminalId =
(encodedTargetTerminalId == Ver4DictConstants::INVALID_BIGRAM_TARGET_TERMINAL_ID) ?
Ver4DictConstants::NOT_A_TERMINAL_ID : encodedTargetTerminalId;
if (mHasHistoricalInfo) {
return BigramEntry(hasNext, probability, timestamp, level, count, targetTerminalId);
} else {
return BigramEntry(hasNext, probability, targetTerminalId);
}
}
bool BigramDictContent::writeBigramEntryAndAdvancePosition(const int probability, const int hasNext,
const int targetTerminalId, int *const entryWritingPos) {
bool BigramDictContent::writeBigramEntryAndAdvancePosition(
const BigramEntry *const bigramEntryToWrite, int *const entryWritingPos) {
BufferWithExtendableBuffer *const bigramListBuffer = getWritableContentBuffer();
const int bigramFlags = createAndGetBigramFlags(probability, hasNext);
const int bigramFlags = createAndGetBigramFlags(
mHasHistoricalInfo ? 0 : bigramEntryToWrite->getProbability(),
bigramEntryToWrite->hasNext());
if (!bigramListBuffer->writeUintAndAdvancePosition(bigramFlags,
Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE, entryWritingPos)) {
AKLOGE("Cannot write bigram flags. pos: %d, flags: %x", *entryWritingPos, bigramFlags);
return false;
}
if (mHasHistoricalInfo) {
if (!bigramListBuffer->writeUintAndAdvancePosition(bigramEntryToWrite->getProbability(),
Ver4DictConstants::PROBABILITY_SIZE, entryWritingPos)) {
AKLOGE("Cannot write bigram probability. pos: %d, probability: %d", *entryWritingPos,
bigramEntryToWrite->getProbability());
return false;
}
if (!bigramListBuffer->writeUintAndAdvancePosition(bigramEntryToWrite->getTimeStamp(),
Ver4DictConstants::TIME_STAMP_FIELD_SIZE, entryWritingPos)) {
AKLOGE("Cannot write bigram timestamps. pos: %d, timestamp: %d", *entryWritingPos,
bigramEntryToWrite->getTimeStamp());
return false;
}
if (!bigramListBuffer->writeUintAndAdvancePosition(bigramEntryToWrite->getLevel(),
Ver4DictConstants::WORD_LEVEL_FIELD_SIZE, entryWritingPos)) {
AKLOGE("Cannot write bigram level. pos: %d, level: %d", *entryWritingPos,
bigramEntryToWrite->getLevel());
return false;
}
if (!bigramListBuffer->writeUintAndAdvancePosition(bigramEntryToWrite->getCount(),
Ver4DictConstants::WORD_COUNT_FIELD_SIZE, entryWritingPos)) {
AKLOGE("Cannot write bigram count. pos: %d, count: %d", *entryWritingPos,
bigramEntryToWrite->getCount());
return false;
}
}
const int targetTerminalIdToWrite =
(targetTerminalId == Ver4DictConstants::NOT_A_TERMINAL_ID) ?
Ver4DictConstants::INVALID_BIGRAM_TARGET_TERMINAL_ID : targetTerminalId;
return bigramListBuffer->writeUintAndAdvancePosition(targetTerminalIdToWrite,
Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE, entryWritingPos);
(bigramEntryToWrite->getTargetTerminalId() == Ver4DictConstants::NOT_A_TERMINAL_ID) ?
Ver4DictConstants::INVALID_BIGRAM_TARGET_TERMINAL_ID :
bigramEntryToWrite->getTargetTerminalId();
if (!bigramListBuffer->writeUintAndAdvancePosition(targetTerminalIdToWrite,
Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE, entryWritingPos)) {
AKLOGE("Cannot write bigram target terminal id. pos: %d, target terminal id: %d",
*entryWritingPos, bigramEntryToWrite->getTargetTerminalId());
return false;
}
return true;
}
bool BigramDictContent::copyBigramList(const int bigramListPos, const int toPos) {
bool hasNext = true;
int readingPos = bigramListPos;
int writingPos = toPos;
bool hasNext = true;
while (hasNext) {
int probability = NOT_A_PROBABILITY;
int targetTerminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
getBigramEntryAndAdvancePosition(&probability, &hasNext, &targetTerminalId,
&readingPos);
if (!writeBigramEntryAndAdvancePosition(probability, hasNext, targetTerminalId,
&writingPos)) {
const BigramEntry bigramEntry = getBigramEntryAndAdvancePosition(&readingPos);
hasNext = bigramEntry.hasNext();
if (!writeBigramEntryAndAdvancePosition(&bigramEntry, &writingPos)) {
AKLOGE("Cannot write bigram entry to copy. pos: %d", writingPos);
return false;
}
@ -119,22 +164,22 @@ bool BigramDictContent::runGCBigramList(const int bigramListPos,
int writingPos = toPos;
int lastEntryPos = NOT_A_DICT_POS;
while (hasNext) {
int probability = NOT_A_PROBABILITY;
int targetTerminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
sourceBigramDictContent->getBigramEntryAndAdvancePosition(&probability, &hasNext,
&targetTerminalId, &readingPos);
if (targetTerminalId == Ver4DictConstants::NOT_A_TERMINAL_ID) {
const BigramEntry originalBigramEntry =
sourceBigramDictContent->getBigramEntryAndAdvancePosition(&readingPos);
hasNext = originalBigramEntry.hasNext();
if (originalBigramEntry.getTargetTerminalId() == Ver4DictConstants::NOT_A_TERMINAL_ID) {
continue;
}
TerminalPositionLookupTable::TerminalIdMap::const_iterator it =
terminalIdMap->find(targetTerminalId);
terminalIdMap->find(originalBigramEntry.getTargetTerminalId());
if (it == terminalIdMap->end()) {
// Target word has been removed.
continue;
}
lastEntryPos = hasNext ? writingPos : NOT_A_DICT_POS;
if (!writeBigramEntryAndAdvancePosition(probability, hasNext, it->second,
&writingPos)) {
const BigramEntry updatedBigramEntry =
originalBigramEntry.updateTargetTerminalIdAndGetEntry(it->second);
if (!writeBigramEntryAndAdvancePosition(&updatedBigramEntry, &writingPos)) {
AKLOGE("Cannot write bigram entry to run GC. pos: %d", writingPos);
return false;
}
@ -142,10 +187,9 @@ bool BigramDictContent::runGCBigramList(const int bigramListPos,
}
if (lastEntryPos != NOT_A_DICT_POS) {
// Update has next flag in the last written entry.
int probability = NOT_A_PROBABILITY;
int targetTerminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
getBigramEntry(&probability, 0 /* outHasNext */, &targetTerminalId, lastEntryPos);
if (!writeBigramEntry(probability, false /* hasNext */, targetTerminalId, writingPos)) {
const BigramEntry bigramEntry = getBigramEntry(lastEntryPos).updateHasNextAndGetEntry(
false /* hasNext */);
if (!writeBigramEntry(&bigramEntry, writingPos)) {
AKLOGE("Cannot write bigram entry to set hasNext flag after GC. pos: %d", writingPos);
return false;
}

View File

@ -18,6 +18,7 @@
#define LATINIME_BIGRAM_DICT_CONTENT_H
#include "defines.h"
#include "suggest/policyimpl/dictionary/structure/v4/content/bigram_entry.h"
#include "suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h"
#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
@ -26,27 +27,27 @@ namespace latinime {
class BigramDictContent : public SparseTableDictContent {
public:
BigramDictContent(const char *const dictDirPath, const bool isUpdatable)
BigramDictContent(const char *const dictDirPath, const bool hasHistoricalInfo,
const bool isUpdatable)
: SparseTableDictContent(dictDirPath,
Ver4DictConstants::BIGRAM_LOOKUP_TABLE_FILE_EXTENSION,
Ver4DictConstants::BIGRAM_CONTENT_TABLE_FILE_EXTENSION,
Ver4DictConstants::BIGRAM_FILE_EXTENSION, isUpdatable,
Ver4DictConstants::BIGRAM_ADDRESS_TABLE_BLOCK_SIZE,
Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE) {}
Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE),
mHasHistoricalInfo(hasHistoricalInfo) {}
BigramDictContent()
BigramDictContent(const bool hasHistoricalInfo)
: SparseTableDictContent(Ver4DictConstants::BIGRAM_ADDRESS_TABLE_BLOCK_SIZE,
Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE) {}
Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE),
mHasHistoricalInfo(hasHistoricalInfo) {}
void getBigramEntry(int *const outProbability, bool *const outHasNext,
int *const outTargetTerminalId, const int bigramEntryPos) const {
const BigramEntry getBigramEntry(const int bigramEntryPos) const {
int readingPos = bigramEntryPos;
getBigramEntryAndAdvancePosition(outProbability, outHasNext, outTargetTerminalId,
&readingPos);
return getBigramEntryAndAdvancePosition(&readingPos);
}
void getBigramEntryAndAdvancePosition(int *const outProbability, bool *const outHasNext,
int *const outTargetTerminalId, int *const bigramEntryPos) const;
const BigramEntry getBigramEntryAndAdvancePosition(int *const bigramEntryPos) const;
// Returns head position of bigram list for a PtNode specified by terminalId.
int getBigramListHeadPos(const int terminalId) const {
@ -57,15 +58,13 @@ class BigramDictContent : public SparseTableDictContent {
return addressLookupTable->get(terminalId);
}
bool writeBigramEntry(const int probability, const int hasNext, const int targetTerminalId,
const int entryWritingPos) {
bool writeBigramEntry(const BigramEntry *const bigramEntryToWrite, const int entryWritingPos) {
int writingPos = entryWritingPos;
return writeBigramEntryAndAdvancePosition(probability, hasNext, targetTerminalId,
&writingPos);
return writeBigramEntryAndAdvancePosition(bigramEntryToWrite, &writingPos);
}
bool writeBigramEntryAndAdvancePosition(const int probability, const int hasNext,
const int targetTerminalId, int *const entryWritingPos);
bool writeBigramEntryAndAdvancePosition(const BigramEntry *const bigramEntryToWrite,
int *const entryWritingPos);
bool createNewBigramList(const int terminalId) {
const int bigramListPos = getContentBuffer()->getTailPosition();
@ -96,6 +95,8 @@ class BigramDictContent : public SparseTableDictContent {
const BigramDictContent *const sourceBigramDictContent, const int toPos,
const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
int *const outEntryCount);
bool mHasHistoricalInfo;
};
} // namespace latinime
#endif /* LATINIME_BIGRAM_DICT_CONTENT_H */

View File

@ -0,0 +1,104 @@
/*
* Copyright (C) 2013, The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef LATINIME_BIGRAM_ENTRY_H
#define LATINIME_BIGRAM_ENTRY_H
#include "defines.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
namespace latinime {
class BigramEntry {
public:
BigramEntry(const BigramEntry& bigramEntry)
: mHasNext(bigramEntry.mHasNext), mProbability(bigramEntry.mProbability),
mTimestamp(bigramEntry.mTimestamp), mLevel(bigramEntry.mLevel),
mCount(bigramEntry.mCount), mTargetTerminalId(bigramEntry.mTargetTerminalId) {}
// Entry with historical information.
BigramEntry(const bool hasNext, const int probability, const int targetTerminalId)
: mHasNext(hasNext), mProbability(probability),
mTimestamp(Ver4DictConstants::NOT_A_TIME_STAMP), mLevel(0), mCount(0),
mTargetTerminalId(targetTerminalId) {}
// Entry with historical information.
BigramEntry(const bool hasNext, const int probability, const int timestamp, const int level,
const int count, const int targetTerminalId)
: mHasNext(hasNext), mProbability(probability), mTimestamp(timestamp),
mLevel(level), mCount(count), mTargetTerminalId(targetTerminalId) {}
const BigramEntry getInvalidatedEntry() const {
return updateTargetTerminalIdAndGetEntry(Ver4DictConstants::NOT_A_TERMINAL_ID);
}
const BigramEntry updateHasNextAndGetEntry(const bool hasNext) const {
return BigramEntry(hasNext, mProbability, mTimestamp, mLevel, mCount,
mTargetTerminalId);
}
const BigramEntry updateTargetTerminalIdAndGetEntry(const int newTargetTerminalId) const {
return BigramEntry(mHasNext, mProbability, mTimestamp, mLevel, mCount,
newTargetTerminalId);
}
const BigramEntry updateProbabilityAndGetEntry(const int probability) const {
return BigramEntry(mHasNext, probability, mTimestamp, mLevel, mCount,
mTargetTerminalId);
}
bool isValid() const {
return mTargetTerminalId != Ver4DictConstants::NOT_A_TERMINAL_ID;
}
bool hasNext() const {
return mHasNext;
}
int getProbability() const {
return mProbability;
}
int getTimeStamp() const {
return mTimestamp;
}
int getLevel() const {
return mLevel;
}
int getCount() const {
return mCount;
}
int getTargetTerminalId() const {
return mTargetTerminalId;
}
private:
// Copy constructor is public to use this class as a type of return value.
DISALLOW_DEFAULT_CONSTRUCTOR(BigramEntry);
DISALLOW_ASSIGNMENT_OPERATOR(BigramEntry);
const bool mHasNext;
const int mProbability;
const int mTimestamp;
const int mLevel;
const int mCount;
const int mTargetTerminalId;
};
} // namespace latinime
#endif /* LATINIME_BIGRAM_ENTRY_H */

View File

@ -127,7 +127,7 @@ class Ver4DictBuffers {
// TODO: Quit using header size.
mTerminalPositionLookupTable(dictDirPath, isUpdatable, mHeaderSize),
mProbabilityDictContent(dictDirPath, false /* hasHistoricalInfo */, isUpdatable),
mBigramDictContent(dictDirPath, isUpdatable),
mBigramDictContent(dictDirPath, false /* hasHistoricalInfo */, isUpdatable),
mShortcutDictContent(dictDirPath, isUpdatable),
mIsUpdatable(isUpdatable) {}
@ -137,7 +137,8 @@ class Ver4DictBuffers {
mExpandableTrieBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE),
mTerminalPositionLookupTable(),
mProbabilityDictContent(false /* hasHistoricalInfo */),
mBigramDictContent(), mShortcutDictContent(), mIsUpdatable(true) {}
mBigramDictContent(false /* hasHistoricalInfo */), mShortcutDictContent(),
mIsUpdatable(true) {}
const MmappedBuffer::MmappedBufferPtr mDictBuffer;
const int mHeaderSize;

View File

@ -56,8 +56,7 @@ class ExclusiveOwnershipPointer {
private:
// This class allows to copy and assign and ensures only one instance has the ownership of the
// managed pointer.
ExclusiveOwnershipPointer() : mPointer(0), mSharedOwnerPtr(0) {}
DISALLOW_DEFAULT_CONSTRUCTOR(ExclusiveOwnershipPointer);
void transferOwnership(const ExclusiveOwnershipPointer<T> *const src) {
if (*mSharedOwnerPtr != src) {