From 69e6165d2eea707ab6ba2d5b2bfd1a959b412984 Mon Sep 17 00:00:00 2001 From: Keisuke Kuroyanagi Date: Mon, 2 Dec 2013 19:45:37 +0900 Subject: [PATCH] Extend bigram probability field to support historical info. Bug: 11073222 Change-Id: I020520251629c5a7c5b5fac21108392c8c2a38b6 --- native/jni/src/defines.h | 15 ++- .../suggest/core/dictionary/bloom_filter.h | 2 + .../bigram/ver4_bigram_list_policy.cpp | 126 ++++++++++-------- .../bigram/ver4_bigram_list_policy.h | 4 +- .../v4/content/bigram_dict_content.cpp | 120 +++++++++++------ .../v4/content/bigram_dict_content.h | 33 ++--- .../structure/v4/content/bigram_entry.h | 104 +++++++++++++++ .../structure/v4/ver4_dict_buffers.h | 5 +- .../src/utils/exclusive_ownership_pointer.h | 3 +- 9 files changed, 291 insertions(+), 121 deletions(-) create mode 100644 native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_entry.h diff --git a/native/jni/src/defines.h b/native/jni/src/defines.h index fbcd612b7..564811560 100644 --- a/native/jni/src/defines.h +++ b/native/jni/src/defines.h @@ -341,12 +341,21 @@ template AK_FORCE_INLINE const T &max(const T &a, const T &b) { retu #define INPUTLENGTH_FOR_DEBUG (-1) #define MIN_OUTPUT_INDEX_FOR_DEBUG (-1) -#define DISALLOW_COPY_AND_ASSIGN(TypeName) \ - TypeName(const TypeName&); \ +#define DISALLOW_DEFAULT_CONSTRUCTOR(TypeName) \ + TypeName() + +#define DISALLOW_COPY_CONSTRUCTOR(TypeName) \ + TypeName(const TypeName&) + +#define DISALLOW_ASSIGNMENT_OPERATOR(TypeName) \ void operator=(const TypeName&) +#define DISALLOW_COPY_AND_ASSIGN(TypeName) \ + DISALLOW_COPY_CONSTRUCTOR(TypeName); \ + DISALLOW_ASSIGNMENT_OPERATOR(TypeName) + #define DISALLOW_IMPLICIT_CONSTRUCTORS(TypeName) \ - TypeName(); \ + DISALLOW_DEFAULT_CONSTRUCTOR(TypeName); \ DISALLOW_COPY_AND_ASSIGN(TypeName) // Used as a return value for character comparison diff --git a/native/jni/src/suggest/core/dictionary/bloom_filter.h b/native/jni/src/suggest/core/dictionary/bloom_filter.h index 5205456a8..5f9700486 100644 --- a/native/jni/src/suggest/core/dictionary/bloom_filter.h +++ b/native/jni/src/suggest/core/dictionary/bloom_filter.h @@ -50,6 +50,8 @@ class BloomFilter { } private: + DISALLOW_ASSIGNMENT_OPERATOR(BloomFilter); + // Size, in bytes, of the bloom filter index for bigrams // 128 gives us 1024 buckets. The probability of false positive is (1 - e ** (-kn/m))**k, // where k is the number of hash functions, n the number of bigrams, and m the number of diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp index 7160f6f48..48ddb2ff4 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp @@ -26,12 +26,18 @@ namespace latinime { void Ver4BigramListPolicy::getNextBigram(int *const outBigramPos, int *const outProbability, bool *const outHasNext, int *const bigramEntryPos) const { - int targetTerminalId = Ver4DictConstants::NOT_A_TERMINAL_ID; - mBigramDictContent->getBigramEntryAndAdvancePosition(outProbability, outHasNext, - &targetTerminalId, bigramEntryPos); + const BigramEntry bigramEntry = + mBigramDictContent->getBigramEntryAndAdvancePosition(bigramEntryPos); if (outBigramPos) { // Lookup target PtNode position. - *outBigramPos = mTerminalPositionLookupTable->getTerminalPtNodePosition(targetTerminalId); + *outBigramPos = mTerminalPositionLookupTable->getTerminalPtNodePosition( + bigramEntry.getTargetTerminalId()); + } + if (outProbability) { + *outProbability = bigramEntry.getProbability(); + } + if (outHasNext) { + *outHasNext = bigramEntry.hasNext(); } } @@ -47,12 +53,13 @@ bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTarget if (!mBigramDictContent->createNewBigramList(terminalId)) { return false; } - const int probabilityToWrite = getUpdatedProbability( - NOT_A_PROBABILITY /* originalProbability */, newProbability); + const BigramEntry newBigramEntry(false /* hasNext */, NOT_A_PROBABILITY, + newTargetTerminalId); + const BigramEntry bigramEntryToWrite = getUpdatedBigramEntry(&newBigramEntry, + newProbability, timestamp); // Write an entry. const int writingPos = mBigramDictContent->getBigramListHeadPos(terminalId); - if (!mBigramDictContent->writeBigramEntry(probabilityToWrite, false /* hasNext */, - newTargetTerminalId, writingPos)) { + if (!mBigramDictContent->writeBigramEntry(&bigramEntryToWrite, writingPos)) { return false; } if (outAddedNewEntry) { @@ -64,18 +71,19 @@ bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTarget const int entryPosToUpdate = getEntryPosToUpdate(newTargetTerminalId, bigramListPos); if (entryPosToUpdate != NOT_A_DICT_POS) { // Overwrite existing entry. - bool hasNext = false; - int probability = NOT_A_PROBABILITY; - int targetTerminalId = Ver4DictConstants::NOT_A_TERMINAL_ID; - mBigramDictContent->getBigramEntry(&probability, &hasNext, &targetTerminalId, - entryPosToUpdate); - const int probabilityToWrite = getUpdatedProbability(probability, newProbability); - if (targetTerminalId == Ver4DictConstants::NOT_A_TERMINAL_ID && outAddedNewEntry) { + const BigramEntry originalBigramEntry = + mBigramDictContent->getBigramEntry(entryPosToUpdate); + if (!originalBigramEntry.isValid()) { // Reuse invalid entry. - *outAddedNewEntry = true; + if (outAddedNewEntry) { + *outAddedNewEntry = true; + } } - return mBigramDictContent->writeBigramEntry(probabilityToWrite, hasNext, - newTargetTerminalId, entryPosToUpdate); + const BigramEntry updatedBigramEntry = + originalBigramEntry.updateTargetTerminalIdAndGetEntry(newTargetTerminalId); + const BigramEntry bigramEntryToWrite = getUpdatedBigramEntry( + &updatedBigramEntry, newProbability, timestamp); + return mBigramDictContent->writeBigramEntry(&bigramEntryToWrite, entryPosToUpdate); } // Add new entry to the bigram list. @@ -85,10 +93,10 @@ bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTarget } // Write new entry at a head position of the bigram list. int writingPos = mBigramDictContent->getBigramListHeadPos(terminalId); - const int probabilityToWrite = getUpdatedProbability( - NOT_A_PROBABILITY /* originalProbability */, newProbability); - if (!mBigramDictContent->writeBigramEntryAndAdvancePosition(probabilityToWrite, - true /* hasNext */, newTargetTerminalId, &writingPos)) { + const BigramEntry newBigramEntry(true /* hasNext */, NOT_A_PROBABILITY, newTargetTerminalId); + const BigramEntry bigramEntryToWrite = getUpdatedBigramEntry( + &newBigramEntry, newProbability, timestamp); + if (!mBigramDictContent->writeBigramEntryAndAdvancePosition(&bigramEntryToWrite, &writingPos)) { return false; } if (outAddedNewEntry) { @@ -109,18 +117,14 @@ bool Ver4BigramListPolicy::removeEntry(const int terminalId, const int targetTer // Bigram entry doesn't exist. return false; } - bool hasNext = false; - int probability = NOT_A_PROBABILITY; - int originalTargetTerminalId = Ver4DictConstants::NOT_A_TERMINAL_ID; - mBigramDictContent->getBigramEntry(&probability, &hasNext, &originalTargetTerminalId, - entryPosToUpdate); - if (targetTerminalId != originalTargetTerminalId) { + const BigramEntry bigramEntry = mBigramDictContent->getBigramEntry(entryPosToUpdate); + if (targetTerminalId != bigramEntry.getTargetTerminalId()) { // Bigram entry doesn't exist. return false; } - // Remove bigram entry by overwriting target terminal Id. - return mBigramDictContent->writeBigramEntry(probability, hasNext, - Ver4DictConstants::NOT_A_TERMINAL_ID /* targetTerminalId */, entryPosToUpdate); + // Remove bigram entry by marking it as invalid entry and overwriting the original entry. + const BigramEntry updatedBigramEntry = bigramEntry.getInvalidatedEntry(); + return mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPosToUpdate); } bool Ver4BigramListPolicy::updateAllBigramEntriesAndDeleteUselessEntries(const int terminalId, @@ -134,34 +138,35 @@ bool Ver4BigramListPolicy::updateAllBigramEntriesAndDeleteUselessEntries(const i int readingPos = bigramListPos; while (hasNext) { const int entryPos = readingPos; - int probability = NOT_A_PROBABILITY; - int targetTerminalId = Ver4DictConstants::NOT_A_TERMINAL_ID; - mBigramDictContent->getBigramEntryAndAdvancePosition(&probability, &hasNext, - &targetTerminalId, &readingPos); - if (targetTerminalId == Ver4DictConstants::NOT_A_TERMINAL_ID) { + const BigramEntry bigramEntry = + mBigramDictContent->getBigramEntryAndAdvancePosition(&readingPos); + hasNext = bigramEntry.hasNext(); + if (!bigramEntry.isValid()) { continue; } const int targetPtNodePos = mTerminalPositionLookupTable->getTerminalPtNodePosition( - targetTerminalId); + bigramEntry.getTargetTerminalId()); if (targetPtNodePos == NOT_A_DICT_POS) { // Invalidate bigram entry. - if (!mBigramDictContent->writeBigramEntry(probability, hasNext, - Ver4DictConstants::NOT_A_TERMINAL_ID /* targetTerminalId */, entryPos)) { + const BigramEntry updatedBigramEntry = bigramEntry.getInvalidatedEntry(); + if (!mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPos)) { return false; } } else if (mNeedsToDecayWhenUpdating) { - probability = ForgettingCurveUtils::getEncodedProbabilityToSave( - probability, mHeaderPolicy); + // TODO: Quit decaying probability during GC. + const int probability = ForgettingCurveUtils::getEncodedProbabilityToSave( + bigramEntry.getProbability(), mHeaderPolicy); if (ForgettingCurveUtils::isValidEncodedProbability(probability)) { - if (!mBigramDictContent->writeBigramEntry(probability, hasNext, targetTerminalId, - entryPos)) { + const BigramEntry updatedBigramEntry = + bigramEntry.updateProbabilityAndGetEntry(probability); + if (!mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPos)) { return false; } *outBigramCount += 1; } else { // Remove entry. - if (!mBigramDictContent->writeBigramEntry(probability, hasNext, - Ver4DictConstants::NOT_A_TERMINAL_ID /* targetTerminalId */, entryPos)) { + const BigramEntry updatedBigramEntry = bigramEntry.getInvalidatedEntry(); + if (!mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPos)) { return false; } } @@ -182,10 +187,10 @@ int Ver4BigramListPolicy::getBigramEntryConut(const int terminalId) { bool hasNext = true; int readingPos = bigramListPos; while (hasNext) { - int targetTerminalId = Ver4DictConstants::NOT_A_TERMINAL_ID; - mBigramDictContent->getBigramEntryAndAdvancePosition(0 /* probability */, &hasNext, - &targetTerminalId, &readingPos); - if (targetTerminalId != Ver4DictConstants::NOT_A_TERMINAL_ID) { + const BigramEntry bigramEntry = + mBigramDictContent->getBigramEntryAndAdvancePosition(&readingPos); + hasNext = bigramEntry.hasNext(); + if (bigramEntry.isValid()) { bigramCount++; } } @@ -199,13 +204,13 @@ int Ver4BigramListPolicy::getEntryPosToUpdate(const int targetTerminalIdToFind, int readingPos = bigramListPos; while (hasNext) { const int entryPos = readingPos; - int targetTerminalId = Ver4DictConstants::NOT_A_TERMINAL_ID; - mBigramDictContent->getBigramEntryAndAdvancePosition(0 /* probability */, &hasNext, - &targetTerminalId, &readingPos); - if (targetTerminalId == targetTerminalIdToFind) { + const BigramEntry bigramEntry = + mBigramDictContent->getBigramEntryAndAdvancePosition(&readingPos); + hasNext = bigramEntry.hasNext(); + if (bigramEntry.getTargetTerminalId() == targetTerminalIdToFind) { // Entry with same target is found. return entryPos; - } else if (targetTerminalId == Ver4DictConstants::NOT_A_TERMINAL_ID) { + } else if (!bigramEntry.isValid()) { // Invalid entry that can be reused is found. invalidEntryPos = entryPos; } @@ -213,13 +218,16 @@ int Ver4BigramListPolicy::getEntryPosToUpdate(const int targetTerminalIdToFind, return invalidEntryPos; } -int Ver4BigramListPolicy::getUpdatedProbability(const int originalProbability, - const int newProbability) const { +const BigramEntry Ver4BigramListPolicy::getUpdatedBigramEntry( + const BigramEntry *const originalBigramEntry, const int newProbability, + const int timestamp) const { if (mNeedsToDecayWhenUpdating) { - return ForgettingCurveUtils::getUpdatedEncodedProbability(originalProbability, - newProbability); + // TODO: Update historical information. + const int probability = ForgettingCurveUtils::getUpdatedEncodedProbability( + originalBigramEntry->getProbability(), newProbability); + return originalBigramEntry->updateProbabilityAndGetEntry(probability); } else { - return newProbability; + return originalBigramEntry->updateProbabilityAndGetEntry(newProbability); } } diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h index c0959118e..e718645b4 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h @@ -19,6 +19,7 @@ #include "defines.h" #include "suggest/core/policy/dictionary_bigrams_structure_policy.h" +#include "suggest/policyimpl/dictionary/structure/v4/content/bigram_entry.h" namespace latinime { @@ -58,7 +59,8 @@ class Ver4BigramListPolicy : public DictionaryBigramsStructurePolicy { int getEntryPosToUpdate(const int targetTerminalIdToFind, const int bigramListPos) const; - int getUpdatedProbability(const int originalProbability, const int newProbability) const; + const BigramEntry getUpdatedBigramEntry(const BigramEntry *const originalBigramEntry, + const int newProbability, const int timestamp) const; BigramDictContent *const mBigramDictContent; const TerminalPositionLookupTable *const mTerminalPositionLookupTable; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp index 431f342ae..2a783543a 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp @@ -20,53 +20,98 @@ namespace latinime { -void BigramDictContent::getBigramEntryAndAdvancePosition(int *const outProbability, - bool *const outHasNext, int *const outTargetTerminalId, int *const bigramEntryPos) const { +const BigramEntry BigramDictContent::getBigramEntryAndAdvancePosition( + int *const bigramEntryPos) const { const BufferWithExtendableBuffer *const bigramListBuffer = getContentBuffer(); const int bigramFlags = bigramListBuffer->readUintAndAdvancePosition( Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE, bigramEntryPos); - if (outProbability) { - *outProbability = bigramFlags & Ver4DictConstants::BIGRAM_PROBABILITY_MASK; + const int hasNext = (bigramFlags & Ver4DictConstants::BIGRAM_HAS_NEXT_MASK) != 0; + int probability = NOT_A_PROBABILITY; + int timestamp = Ver4DictConstants::NOT_A_TIME_STAMP; + int level = 0; + int count = 0; + if (mHasHistoricalInfo) { + probability = bigramListBuffer->readUintAndAdvancePosition( + Ver4DictConstants::PROBABILITY_SIZE, bigramEntryPos); + timestamp = bigramListBuffer->readUintAndAdvancePosition( + Ver4DictConstants::TIME_STAMP_FIELD_SIZE, bigramEntryPos); + level = bigramListBuffer->readUintAndAdvancePosition( + Ver4DictConstants::WORD_LEVEL_FIELD_SIZE, bigramEntryPos); + count = bigramListBuffer->readUintAndAdvancePosition( + Ver4DictConstants::WORD_COUNT_FIELD_SIZE, bigramEntryPos); + } else { + probability = bigramFlags & Ver4DictConstants::BIGRAM_PROBABILITY_MASK; } - if (outHasNext) { - *outHasNext = (bigramFlags & Ver4DictConstants::BIGRAM_HAS_NEXT_MASK) != 0; - } - const int targetTerminalId = bigramListBuffer->readUintAndAdvancePosition( + const int encodedTargetTerminalId = bigramListBuffer->readUintAndAdvancePosition( Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE, bigramEntryPos); - if (outTargetTerminalId) { - *outTargetTerminalId = - (targetTerminalId == Ver4DictConstants::INVALID_BIGRAM_TARGET_TERMINAL_ID) ? - Ver4DictConstants::NOT_A_TERMINAL_ID : targetTerminalId; + const int targetTerminalId = + (encodedTargetTerminalId == Ver4DictConstants::INVALID_BIGRAM_TARGET_TERMINAL_ID) ? + Ver4DictConstants::NOT_A_TERMINAL_ID : encodedTargetTerminalId; + if (mHasHistoricalInfo) { + return BigramEntry(hasNext, probability, timestamp, level, count, targetTerminalId); + } else { + return BigramEntry(hasNext, probability, targetTerminalId); } } -bool BigramDictContent::writeBigramEntryAndAdvancePosition(const int probability, const int hasNext, - const int targetTerminalId, int *const entryWritingPos) { +bool BigramDictContent::writeBigramEntryAndAdvancePosition( + const BigramEntry *const bigramEntryToWrite, int *const entryWritingPos) { BufferWithExtendableBuffer *const bigramListBuffer = getWritableContentBuffer(); - const int bigramFlags = createAndGetBigramFlags(probability, hasNext); + const int bigramFlags = createAndGetBigramFlags( + mHasHistoricalInfo ? 0 : bigramEntryToWrite->getProbability(), + bigramEntryToWrite->hasNext()); if (!bigramListBuffer->writeUintAndAdvancePosition(bigramFlags, Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE, entryWritingPos)) { AKLOGE("Cannot write bigram flags. pos: %d, flags: %x", *entryWritingPos, bigramFlags); return false; } + if (mHasHistoricalInfo) { + if (!bigramListBuffer->writeUintAndAdvancePosition(bigramEntryToWrite->getProbability(), + Ver4DictConstants::PROBABILITY_SIZE, entryWritingPos)) { + AKLOGE("Cannot write bigram probability. pos: %d, probability: %d", *entryWritingPos, + bigramEntryToWrite->getProbability()); + return false; + } + if (!bigramListBuffer->writeUintAndAdvancePosition(bigramEntryToWrite->getTimeStamp(), + Ver4DictConstants::TIME_STAMP_FIELD_SIZE, entryWritingPos)) { + AKLOGE("Cannot write bigram timestamps. pos: %d, timestamp: %d", *entryWritingPos, + bigramEntryToWrite->getTimeStamp()); + return false; + } + if (!bigramListBuffer->writeUintAndAdvancePosition(bigramEntryToWrite->getLevel(), + Ver4DictConstants::WORD_LEVEL_FIELD_SIZE, entryWritingPos)) { + AKLOGE("Cannot write bigram level. pos: %d, level: %d", *entryWritingPos, + bigramEntryToWrite->getLevel()); + return false; + } + if (!bigramListBuffer->writeUintAndAdvancePosition(bigramEntryToWrite->getCount(), + Ver4DictConstants::WORD_COUNT_FIELD_SIZE, entryWritingPos)) { + AKLOGE("Cannot write bigram count. pos: %d, count: %d", *entryWritingPos, + bigramEntryToWrite->getCount()); + return false; + } + } const int targetTerminalIdToWrite = - (targetTerminalId == Ver4DictConstants::NOT_A_TERMINAL_ID) ? - Ver4DictConstants::INVALID_BIGRAM_TARGET_TERMINAL_ID : targetTerminalId; - return bigramListBuffer->writeUintAndAdvancePosition(targetTerminalIdToWrite, - Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE, entryWritingPos); + (bigramEntryToWrite->getTargetTerminalId() == Ver4DictConstants::NOT_A_TERMINAL_ID) ? + Ver4DictConstants::INVALID_BIGRAM_TARGET_TERMINAL_ID : + bigramEntryToWrite->getTargetTerminalId(); + if (!bigramListBuffer->writeUintAndAdvancePosition(targetTerminalIdToWrite, + Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE, entryWritingPos)) { + AKLOGE("Cannot write bigram target terminal id. pos: %d, target terminal id: %d", + *entryWritingPos, bigramEntryToWrite->getTargetTerminalId()); + return false; + } + return true; } bool BigramDictContent::copyBigramList(const int bigramListPos, const int toPos) { - bool hasNext = true; int readingPos = bigramListPos; int writingPos = toPos; + bool hasNext = true; while (hasNext) { - int probability = NOT_A_PROBABILITY; - int targetTerminalId = Ver4DictConstants::NOT_A_TERMINAL_ID; - getBigramEntryAndAdvancePosition(&probability, &hasNext, &targetTerminalId, - &readingPos); - if (!writeBigramEntryAndAdvancePosition(probability, hasNext, targetTerminalId, - &writingPos)) { + const BigramEntry bigramEntry = getBigramEntryAndAdvancePosition(&readingPos); + hasNext = bigramEntry.hasNext(); + if (!writeBigramEntryAndAdvancePosition(&bigramEntry, &writingPos)) { AKLOGE("Cannot write bigram entry to copy. pos: %d", writingPos); return false; } @@ -119,22 +164,22 @@ bool BigramDictContent::runGCBigramList(const int bigramListPos, int writingPos = toPos; int lastEntryPos = NOT_A_DICT_POS; while (hasNext) { - int probability = NOT_A_PROBABILITY; - int targetTerminalId = Ver4DictConstants::NOT_A_TERMINAL_ID; - sourceBigramDictContent->getBigramEntryAndAdvancePosition(&probability, &hasNext, - &targetTerminalId, &readingPos); - if (targetTerminalId == Ver4DictConstants::NOT_A_TERMINAL_ID) { + const BigramEntry originalBigramEntry = + sourceBigramDictContent->getBigramEntryAndAdvancePosition(&readingPos); + hasNext = originalBigramEntry.hasNext(); + if (originalBigramEntry.getTargetTerminalId() == Ver4DictConstants::NOT_A_TERMINAL_ID) { continue; } TerminalPositionLookupTable::TerminalIdMap::const_iterator it = - terminalIdMap->find(targetTerminalId); + terminalIdMap->find(originalBigramEntry.getTargetTerminalId()); if (it == terminalIdMap->end()) { // Target word has been removed. continue; } lastEntryPos = hasNext ? writingPos : NOT_A_DICT_POS; - if (!writeBigramEntryAndAdvancePosition(probability, hasNext, it->second, - &writingPos)) { + const BigramEntry updatedBigramEntry = + originalBigramEntry.updateTargetTerminalIdAndGetEntry(it->second); + if (!writeBigramEntryAndAdvancePosition(&updatedBigramEntry, &writingPos)) { AKLOGE("Cannot write bigram entry to run GC. pos: %d", writingPos); return false; } @@ -142,10 +187,9 @@ bool BigramDictContent::runGCBigramList(const int bigramListPos, } if (lastEntryPos != NOT_A_DICT_POS) { // Update has next flag in the last written entry. - int probability = NOT_A_PROBABILITY; - int targetTerminalId = Ver4DictConstants::NOT_A_TERMINAL_ID; - getBigramEntry(&probability, 0 /* outHasNext */, &targetTerminalId, lastEntryPos); - if (!writeBigramEntry(probability, false /* hasNext */, targetTerminalId, writingPos)) { + const BigramEntry bigramEntry = getBigramEntry(lastEntryPos).updateHasNextAndGetEntry( + false /* hasNext */); + if (!writeBigramEntry(&bigramEntry, writingPos)) { AKLOGE("Cannot write bigram entry to set hasNext flag after GC. pos: %d", writingPos); return false; } diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h index cf380f403..ac05b215b 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h @@ -18,6 +18,7 @@ #define LATINIME_BIGRAM_DICT_CONTENT_H #include "defines.h" +#include "suggest/policyimpl/dictionary/structure/v4/content/bigram_entry.h" #include "suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h" #include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h" #include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h" @@ -26,27 +27,27 @@ namespace latinime { class BigramDictContent : public SparseTableDictContent { public: - BigramDictContent(const char *const dictDirPath, const bool isUpdatable) + BigramDictContent(const char *const dictDirPath, const bool hasHistoricalInfo, + const bool isUpdatable) : SparseTableDictContent(dictDirPath, Ver4DictConstants::BIGRAM_LOOKUP_TABLE_FILE_EXTENSION, Ver4DictConstants::BIGRAM_CONTENT_TABLE_FILE_EXTENSION, Ver4DictConstants::BIGRAM_FILE_EXTENSION, isUpdatable, Ver4DictConstants::BIGRAM_ADDRESS_TABLE_BLOCK_SIZE, - Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE) {} + Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE), + mHasHistoricalInfo(hasHistoricalInfo) {} - BigramDictContent() + BigramDictContent(const bool hasHistoricalInfo) : SparseTableDictContent(Ver4DictConstants::BIGRAM_ADDRESS_TABLE_BLOCK_SIZE, - Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE) {} + Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE), + mHasHistoricalInfo(hasHistoricalInfo) {} - void getBigramEntry(int *const outProbability, bool *const outHasNext, - int *const outTargetTerminalId, const int bigramEntryPos) const { + const BigramEntry getBigramEntry(const int bigramEntryPos) const { int readingPos = bigramEntryPos; - getBigramEntryAndAdvancePosition(outProbability, outHasNext, outTargetTerminalId, - &readingPos); + return getBigramEntryAndAdvancePosition(&readingPos); } - void getBigramEntryAndAdvancePosition(int *const outProbability, bool *const outHasNext, - int *const outTargetTerminalId, int *const bigramEntryPos) const; + const BigramEntry getBigramEntryAndAdvancePosition(int *const bigramEntryPos) const; // Returns head position of bigram list for a PtNode specified by terminalId. int getBigramListHeadPos(const int terminalId) const { @@ -57,15 +58,13 @@ class BigramDictContent : public SparseTableDictContent { return addressLookupTable->get(terminalId); } - bool writeBigramEntry(const int probability, const int hasNext, const int targetTerminalId, - const int entryWritingPos) { + bool writeBigramEntry(const BigramEntry *const bigramEntryToWrite, const int entryWritingPos) { int writingPos = entryWritingPos; - return writeBigramEntryAndAdvancePosition(probability, hasNext, targetTerminalId, - &writingPos); + return writeBigramEntryAndAdvancePosition(bigramEntryToWrite, &writingPos); } - bool writeBigramEntryAndAdvancePosition(const int probability, const int hasNext, - const int targetTerminalId, int *const entryWritingPos); + bool writeBigramEntryAndAdvancePosition(const BigramEntry *const bigramEntryToWrite, + int *const entryWritingPos); bool createNewBigramList(const int terminalId) { const int bigramListPos = getContentBuffer()->getTailPosition(); @@ -96,6 +95,8 @@ class BigramDictContent : public SparseTableDictContent { const BigramDictContent *const sourceBigramDictContent, const int toPos, const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap, int *const outEntryCount); + + bool mHasHistoricalInfo; }; } // namespace latinime #endif /* LATINIME_BIGRAM_DICT_CONTENT_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_entry.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_entry.h new file mode 100644 index 000000000..10b3ec8dc --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_entry.h @@ -0,0 +1,104 @@ +/* + * Copyright (C) 2013, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_BIGRAM_ENTRY_H +#define LATINIME_BIGRAM_ENTRY_H + +#include "defines.h" +#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h" + +namespace latinime { + +class BigramEntry { + public: + BigramEntry(const BigramEntry& bigramEntry) + : mHasNext(bigramEntry.mHasNext), mProbability(bigramEntry.mProbability), + mTimestamp(bigramEntry.mTimestamp), mLevel(bigramEntry.mLevel), + mCount(bigramEntry.mCount), mTargetTerminalId(bigramEntry.mTargetTerminalId) {} + + // Entry with historical information. + BigramEntry(const bool hasNext, const int probability, const int targetTerminalId) + : mHasNext(hasNext), mProbability(probability), + mTimestamp(Ver4DictConstants::NOT_A_TIME_STAMP), mLevel(0), mCount(0), + mTargetTerminalId(targetTerminalId) {} + + // Entry with historical information. + BigramEntry(const bool hasNext, const int probability, const int timestamp, const int level, + const int count, const int targetTerminalId) + : mHasNext(hasNext), mProbability(probability), mTimestamp(timestamp), + mLevel(level), mCount(count), mTargetTerminalId(targetTerminalId) {} + + const BigramEntry getInvalidatedEntry() const { + return updateTargetTerminalIdAndGetEntry(Ver4DictConstants::NOT_A_TERMINAL_ID); + } + + const BigramEntry updateHasNextAndGetEntry(const bool hasNext) const { + return BigramEntry(hasNext, mProbability, mTimestamp, mLevel, mCount, + mTargetTerminalId); + } + + const BigramEntry updateTargetTerminalIdAndGetEntry(const int newTargetTerminalId) const { + return BigramEntry(mHasNext, mProbability, mTimestamp, mLevel, mCount, + newTargetTerminalId); + } + + const BigramEntry updateProbabilityAndGetEntry(const int probability) const { + return BigramEntry(mHasNext, probability, mTimestamp, mLevel, mCount, + mTargetTerminalId); + } + + bool isValid() const { + return mTargetTerminalId != Ver4DictConstants::NOT_A_TERMINAL_ID; + } + + bool hasNext() const { + return mHasNext; + } + + int getProbability() const { + return mProbability; + } + + int getTimeStamp() const { + return mTimestamp; + } + + int getLevel() const { + return mLevel; + } + + int getCount() const { + return mCount; + } + + int getTargetTerminalId() const { + return mTargetTerminalId; + } + + private: + // Copy constructor is public to use this class as a type of return value. + DISALLOW_DEFAULT_CONSTRUCTOR(BigramEntry); + DISALLOW_ASSIGNMENT_OPERATOR(BigramEntry); + + const bool mHasNext; + const int mProbability; + const int mTimestamp; + const int mLevel; + const int mCount; + const int mTargetTerminalId; +}; +} // namespace latinime +#endif /* LATINIME_BIGRAM_ENTRY_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h index 8fdbbedfe..07b8f181d 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h @@ -127,7 +127,7 @@ class Ver4DictBuffers { // TODO: Quit using header size. mTerminalPositionLookupTable(dictDirPath, isUpdatable, mHeaderSize), mProbabilityDictContent(dictDirPath, false /* hasHistoricalInfo */, isUpdatable), - mBigramDictContent(dictDirPath, isUpdatable), + mBigramDictContent(dictDirPath, false /* hasHistoricalInfo */, isUpdatable), mShortcutDictContent(dictDirPath, isUpdatable), mIsUpdatable(isUpdatable) {} @@ -137,7 +137,8 @@ class Ver4DictBuffers { mExpandableTrieBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE), mTerminalPositionLookupTable(), mProbabilityDictContent(false /* hasHistoricalInfo */), - mBigramDictContent(), mShortcutDictContent(), mIsUpdatable(true) {} + mBigramDictContent(false /* hasHistoricalInfo */), mShortcutDictContent(), + mIsUpdatable(true) {} const MmappedBuffer::MmappedBufferPtr mDictBuffer; const int mHeaderSize; diff --git a/native/jni/src/utils/exclusive_ownership_pointer.h b/native/jni/src/utils/exclusive_ownership_pointer.h index 3cf78954a..6c67df28e 100644 --- a/native/jni/src/utils/exclusive_ownership_pointer.h +++ b/native/jni/src/utils/exclusive_ownership_pointer.h @@ -56,8 +56,7 @@ class ExclusiveOwnershipPointer { private: // This class allows to copy and assign and ensures only one instance has the ownership of the // managed pointer. - - ExclusiveOwnershipPointer() : mPointer(0), mSharedOwnerPtr(0) {} + DISALLOW_DEFAULT_CONSTRUCTOR(ExclusiveOwnershipPointer); void transferOwnership(const ExclusiveOwnershipPointer *const src) { if (*mSharedOwnerPtr != src) {