From 804f7450fc94ad47c2a12ec9c1183a244f3f1a17 Mon Sep 17 00:00:00 2001 From: Keisuke Kuroyanagi Date: Mon, 7 Jul 2014 21:09:25 +0900 Subject: [PATCH] Use linked list for bigram list. BinaryDictionaryTests for VERSION4_DEV: Before Time: 36.461 After Time: 33.031 Bug: 14425059 Change-Id: I9ca2714f450f61f713df6ebd34c953dece991cdb --- .../v4/bigram/ver4_bigram_list_policy.cpp | 72 +++++++------- .../v4/bigram/ver4_bigram_list_policy.h | 2 - .../v4/content/bigram_dict_content.cpp | 98 ++++++++++--------- .../v4/content/bigram_dict_content.h | 33 ++++--- .../structure/v4/ver4_dict_constants.cpp | 2 +- .../structure/v4/ver4_dict_constants.h | 2 +- 6 files changed, 112 insertions(+), 97 deletions(-) diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.cpp index 7a52fd180..146cab6c2 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.cpp @@ -71,8 +71,14 @@ bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTarget const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom(&newBigramEntry, bigramProperty); // Write an entry. - const int writingPos = mBigramDictContent->getBigramListHeadPos(terminalId); - if (!mBigramDictContent->writeBigramEntry(&bigramEntryToWrite, writingPos)) { + int writingPos = mBigramDictContent->getBigramListHeadPos(terminalId); + if (!mBigramDictContent->writeBigramEntryAndAdvancePosition(&bigramEntryToWrite, + &writingPos)) { + AKLOGE("Cannot write bigram entry. pos: %d.", writingPos); + return false; + } + if (!mBigramDictContent->writeTerminator(writingPos)) { + AKLOGE("Cannot write bigram list terminator. pos: %d.", writingPos); return false; } if (outAddedNewEntry) { @@ -84,32 +90,37 @@ bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTarget int tailEntryPos = NOT_A_DICT_POS; const int entryPosToUpdate = getEntryPosToUpdate(newTargetTerminalId, bigramListPos, &tailEntryPos); - if (tailEntryPos != NOT_A_DICT_POS || entryPosToUpdate == NOT_A_DICT_POS) { - // Case 4, 5. - // Add new entry to the bigram list. - if (tailEntryPos == NOT_A_DICT_POS) { - // Case 4. Create new bigram list. - if (!mBigramDictContent->createNewBigramList(terminalId)) { - return false; - } - const int destPos = mBigramDictContent->getBigramListHeadPos(terminalId); - // Copy existing bigram list. - if (!mBigramDictContent->copyBigramList(bigramListPos, destPos, &tailEntryPos)) { - return false; - } - } + if (entryPosToUpdate == NOT_A_DICT_POS) { + // Case 4, 5. Add new entry to the bigram list. + const int contentTailPos = mBigramDictContent->getContentTailPos(); + // If the tail entry is at the tail of content buffer, the new entry can be written without + // link (Case 5). + const bool canAppendEntry = + contentTailPos == tailEntryPos + mBigramDictContent->getBigramEntrySize(); + const int newEntryPos = canAppendEntry ? tailEntryPos : contentTailPos; + int writingPos = newEntryPos; // Write new entry at the tail position of the bigram content. const BigramEntry newBigramEntry(false /* hasNext */, NOT_A_PROBABILITY, newTargetTerminalId); const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom( &newBigramEntry, bigramProperty); - if (!mBigramDictContent->writeBigramEntryAtTail(&bigramEntryToWrite)) { + if (!mBigramDictContent->writeBigramEntryAndAdvancePosition(&bigramEntryToWrite, + &writingPos)) { + AKLOGE("Cannot write bigram entry. pos: %d.", writingPos); return false; } - // Update has next flag of the tail entry. - if (!updateHasNextFlag(true /* hasNext */, tailEntryPos)) { + if (!mBigramDictContent->writeTerminator(writingPos)) { + AKLOGE("Cannot write bigram list terminator. pos: %d.", writingPos); return false; } + if (!canAppendEntry) { + // Update link of the current tail entry. + if (!mBigramDictContent->writeLink(newEntryPos, tailEntryPos)) { + AKLOGE("Cannot update bigram entry link. pos: %d, linked entry pos: %d.", + tailEntryPos, newEntryPos); + return false; + } + } if (outAddedNewEntry) { *outAddedNewEntry = true; } @@ -228,14 +239,18 @@ int Ver4BigramListPolicy::getEntryPosToUpdate(const int targetTerminalIdToFind, if (outTailEntryPos) { *outTailEntryPos = NOT_A_DICT_POS; } - bool hasNext = true; int invalidEntryPos = NOT_A_DICT_POS; int readingPos = bigramListPos; - while (hasNext) { - const int entryPos = readingPos; + while (true) { const BigramEntry bigramEntry = mBigramDictContent->getBigramEntryAndAdvancePosition(&readingPos); - hasNext = bigramEntry.hasNext(); + const int entryPos = readingPos - mBigramDictContent->getBigramEntrySize(); + if (!bigramEntry.hasNext()) { + if (outTailEntryPos) { + *outTailEntryPos = entryPos; + } + break; + } if (bigramEntry.getTargetTerminalId() == targetTerminalIdToFind) { // Entry with same target is found. return entryPos; @@ -243,11 +258,6 @@ int Ver4BigramListPolicy::getEntryPosToUpdate(const int targetTerminalIdToFind, // Invalid entry that can be reused is found. invalidEntryPos = entryPos; } - if (!hasNext && mBigramDictContent->isContentTailPos(readingPos)) { - if (outTailEntryPos) { - *outTailEntryPos = entryPos; - } - } } return invalidEntryPos; } @@ -269,10 +279,4 @@ const BigramEntry Ver4BigramListPolicy::createUpdatedBigramEntryFrom( } } -bool Ver4BigramListPolicy::updateHasNextFlag(const bool hasNext, const int bigramEntryPos) { - const BigramEntry bigramEntry = mBigramDictContent->getBigramEntry(bigramEntryPos); - const BigramEntry updatedBigramEntry = bigramEntry.updateHasNextAndGetEntry(hasNext); - return mBigramDictContent->writeBigramEntry(&updatedBigramEntry, bigramEntryPos); -} - } // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.h index 1613941c4..55ba613a5 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.h @@ -63,8 +63,6 @@ class Ver4BigramListPolicy : public DictionaryBigramsStructurePolicy { const BigramEntry createUpdatedBigramEntryFrom(const BigramEntry *const originalBigramEntry, const BigramProperty *const bigramProperty) const; - bool updateHasNextFlag(const bool hasNext, const int bigramEntryPos); - BigramDictContent *const mBigramDictContent; const TerminalPositionLookupTable *const mTerminalPositionLookupTable; const HeaderPolicy *const mHeaderPolicy; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp index e1ceaee49..d7e1952b5 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp @@ -20,6 +20,8 @@ namespace latinime { +const int BigramDictContent::INVALID_LINKED_ENTRY_POS = Ver4DictConstants::NOT_A_TERMINAL_ID; + const BigramEntry BigramDictContent::getBigramEntryAndAdvancePosition( int *const bigramEntryPos) const { const BufferWithExtendableBuffer *const bigramListBuffer = getContentBuffer(); @@ -34,7 +36,7 @@ const BigramEntry BigramDictContent::getBigramEntryAndAdvancePosition( } const int bigramFlags = bigramListBuffer->readUintAndAdvancePosition( Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE, bigramEntryPos); - const bool hasNext = (bigramFlags & Ver4DictConstants::BIGRAM_HAS_NEXT_MASK) != 0; + const bool isLink = (bigramFlags & Ver4DictConstants::BIGRAM_IS_LINK_MASK) != 0; int probability = NOT_A_PROBABILITY; int timestamp = NOT_A_TIMESTAMP; int level = 0; @@ -55,81 +57,90 @@ const BigramEntry BigramDictContent::getBigramEntryAndAdvancePosition( const int targetTerminalId = (encodedTargetTerminalId == Ver4DictConstants::INVALID_BIGRAM_TARGET_TERMINAL_ID) ? Ver4DictConstants::NOT_A_TERMINAL_ID : encodedTargetTerminalId; + if (isLink) { + const int linkedEntryPos = targetTerminalId; + if (linkedEntryPos == INVALID_LINKED_ENTRY_POS) { + // Bigram list terminator is found. + return BigramEntry(false /* hasNext */, NOT_A_PROBABILITY, + Ver4DictConstants::NOT_A_TERMINAL_ID); + } + *bigramEntryPos = linkedEntryPos; + return getBigramEntryAndAdvancePosition(bigramEntryPos); + } + // hasNext is always true because we should continue to read the next entry until the terminator + // is found. if (mHasHistoricalInfo) { const HistoricalInfo historicalInfo(timestamp, level, count); - return BigramEntry(hasNext, probability, &historicalInfo, targetTerminalId); + return BigramEntry(true /* hasNext */, probability, &historicalInfo, targetTerminalId); } else { - return BigramEntry(hasNext, probability, targetTerminalId); + return BigramEntry(true /* hasNext */, probability, targetTerminalId); } } bool BigramDictContent::writeBigramEntryAndAdvancePosition( const BigramEntry *const bigramEntryToWrite, int *const entryWritingPos) { + return writeBigramEntryAttributesAndAdvancePosition(false /* isLink */, + bigramEntryToWrite->getProbability(), bigramEntryToWrite->getTargetTerminalId(), + bigramEntryToWrite->getHistoricalInfo()->getTimeStamp(), + bigramEntryToWrite->getHistoricalInfo()->getLevel(), + bigramEntryToWrite->getHistoricalInfo()->getCount(), + entryWritingPos); +} + +bool BigramDictContent::writeBigramEntryAttributesAndAdvancePosition( + const bool isLink, const int probability, const int targetTerminalId, + const int timestamp, const int level, const int count, int *const entryWritingPos) { BufferWithExtendableBuffer *const bigramListBuffer = getWritableContentBuffer(); - const int bigramFlags = createAndGetBigramFlags(bigramEntryToWrite->hasNext()); + const int bigramFlags = isLink ? Ver4DictConstants::BIGRAM_IS_LINK_MASK : 0; if (!bigramListBuffer->writeUintAndAdvancePosition(bigramFlags, Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE, entryWritingPos)) { AKLOGE("Cannot write bigram flags. pos: %d, flags: %x", *entryWritingPos, bigramFlags); return false; } if (mHasHistoricalInfo) { - const HistoricalInfo *const historicalInfo = bigramEntryToWrite->getHistoricalInfo(); - if (!bigramListBuffer->writeUintAndAdvancePosition(historicalInfo->getTimeStamp(), + if (!bigramListBuffer->writeUintAndAdvancePosition(timestamp, Ver4DictConstants::TIME_STAMP_FIELD_SIZE, entryWritingPos)) { AKLOGE("Cannot write bigram timestamps. pos: %d, timestamp: %d", *entryWritingPos, - historicalInfo->getTimeStamp()); + timestamp); return false; } - if (!bigramListBuffer->writeUintAndAdvancePosition(historicalInfo->getLevel(), + if (!bigramListBuffer->writeUintAndAdvancePosition(level, Ver4DictConstants::WORD_LEVEL_FIELD_SIZE, entryWritingPos)) { AKLOGE("Cannot write bigram level. pos: %d, level: %d", *entryWritingPos, - historicalInfo->getLevel()); + level); return false; } - if (!bigramListBuffer->writeUintAndAdvancePosition(historicalInfo->getCount(), + if (!bigramListBuffer->writeUintAndAdvancePosition(count, Ver4DictConstants::WORD_COUNT_FIELD_SIZE, entryWritingPos)) { AKLOGE("Cannot write bigram count. pos: %d, count: %d", *entryWritingPos, - historicalInfo->getCount()); + count); return false; } } else { - if (!bigramListBuffer->writeUintAndAdvancePosition(bigramEntryToWrite->getProbability(), + if (!bigramListBuffer->writeUintAndAdvancePosition(probability, Ver4DictConstants::PROBABILITY_SIZE, entryWritingPos)) { AKLOGE("Cannot write bigram probability. pos: %d, probability: %d", *entryWritingPos, - bigramEntryToWrite->getProbability()); + probability); return false; } } - const int targetTerminalIdToWrite = - (bigramEntryToWrite->getTargetTerminalId() == Ver4DictConstants::NOT_A_TERMINAL_ID) ? - Ver4DictConstants::INVALID_BIGRAM_TARGET_TERMINAL_ID : - bigramEntryToWrite->getTargetTerminalId(); + const int targetTerminalIdToWrite = (targetTerminalId == Ver4DictConstants::NOT_A_TERMINAL_ID) ? + Ver4DictConstants::INVALID_BIGRAM_TARGET_TERMINAL_ID : targetTerminalId; if (!bigramListBuffer->writeUintAndAdvancePosition(targetTerminalIdToWrite, Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE, entryWritingPos)) { AKLOGE("Cannot write bigram target terminal id. pos: %d, target terminal id: %d", - *entryWritingPos, bigramEntryToWrite->getTargetTerminalId()); + *entryWritingPos, targetTerminalId); return false; } return true; } -bool BigramDictContent::copyBigramList(const int bigramListPos, const int toPos, - int *const outTailEntryPos) { - int readingPos = bigramListPos; - int writingPos = toPos; - bool hasNext = true; - while (hasNext) { - const BigramEntry bigramEntry = getBigramEntryAndAdvancePosition(&readingPos); - hasNext = bigramEntry.hasNext(); - if (!hasNext) { - *outTailEntryPos = writingPos; - } - if (!writeBigramEntryAndAdvancePosition(&bigramEntry, &writingPos)) { - AKLOGE("Cannot write bigram entry to copy. pos: %d", writingPos); - return false; - } - } - return true; +bool BigramDictContent::writeLink(const int linkedEntryPos, const int writingPos) { + const int targetTerminalId = linkedEntryPos; + int pos = writingPos; + return writeBigramEntryAttributesAndAdvancePosition(true /* isLink */, + NOT_A_PROBABILITY /* probability */, targetTerminalId, NOT_A_TIMESTAMP, 0 /* level */, + 0 /* count */, &pos); } bool BigramDictContent::runGC(const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap, @@ -171,16 +182,15 @@ bool BigramDictContent::runGC(const TerminalPositionLookupTable::TerminalIdMap * bool BigramDictContent::runGCBigramList(const int bigramListPos, const BigramDictContent *const sourceBigramDictContent, const int toPos, const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap, - int *const outEntrycount) { + int *const outEntryCount) { bool hasNext = true; int readingPos = bigramListPos; int writingPos = toPos; - int lastEntryPos = NOT_A_DICT_POS; while (hasNext) { const BigramEntry originalBigramEntry = sourceBigramDictContent->getBigramEntryAndAdvancePosition(&readingPos); hasNext = originalBigramEntry.hasNext(); - if (originalBigramEntry.getTargetTerminalId() == Ver4DictConstants::NOT_A_TERMINAL_ID) { + if (!originalBigramEntry.isValid()) { continue; } TerminalPositionLookupTable::TerminalIdMap::const_iterator it = @@ -189,21 +199,17 @@ bool BigramDictContent::runGCBigramList(const int bigramListPos, // Target word has been removed. continue; } - lastEntryPos = hasNext ? writingPos : NOT_A_DICT_POS; const BigramEntry updatedBigramEntry = originalBigramEntry.updateTargetTerminalIdAndGetEntry(it->second); if (!writeBigramEntryAndAdvancePosition(&updatedBigramEntry, &writingPos)) { AKLOGE("Cannot write bigram entry to run GC. pos: %d", writingPos); return false; } - *outEntrycount += 1; + *outEntryCount += 1; } - if (lastEntryPos != NOT_A_DICT_POS) { - // Update has next flag in the last written entry. - const BigramEntry bigramEntry = getBigramEntry(lastEntryPos).updateHasNextAndGetEntry( - false /* hasNext */); - if (!writeBigramEntry(&bigramEntry, lastEntryPos)) { - AKLOGE("Cannot write bigram entry to set hasNext flag after GC. pos: %d", writingPos); + if (*outEntryCount > 0) { + if (!writeTerminator(writingPos)) { + AKLOGE("Cannot write terminator to run GC. pos: %d", writingPos); return false; } } diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h index 52447a336..033f18e9e 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h @@ -42,6 +42,10 @@ class BigramDictContent : public SparseTableDictContent { Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE), mHasHistoricalInfo(hasHistoricalInfo) {} + int getContentTailPos() const { + return getContentBuffer()->getTailPosition(); + } + const BigramEntry getBigramEntry(const int bigramEntryPos) const { int readingPos = bigramEntryPos; return getBigramEntryAndAdvancePosition(&readingPos); @@ -71,13 +75,18 @@ class BigramDictContent : public SparseTableDictContent { bool writeBigramEntryAndAdvancePosition(const BigramEntry *const bigramEntryToWrite, int *const entryWritingPos); + bool writeTerminator(const int writingPos) { + // Terminator is a link to the invalid position. + return writeLink(INVALID_LINKED_ENTRY_POS, writingPos); + } + + bool writeLink(const int linkedPos, const int writingPos); + bool createNewBigramList(const int terminalId) { const int bigramListPos = getContentBuffer()->getTailPosition(); return getUpdatableAddressLookupTable()->set(terminalId, bigramListPos); } - bool copyBigramList(const int bigramListPos, const int toPos, int *const outTailEntryPos); - bool flushToFile(const char *const dictPath) const { return flush(dictPath, Ver4DictConstants::BIGRAM_LOOKUP_TABLE_FILE_EXTENSION, Ver4DictConstants::BIGRAM_CONTENT_TABLE_FILE_EXTENSION, @@ -88,17 +97,6 @@ class BigramDictContent : public SparseTableDictContent { const BigramDictContent *const originalBigramDictContent, int *const outBigramEntryCount); - bool isContentTailPos(const int pos) const { - return pos == getContentBuffer()->getTailPosition(); - } - - private: - DISALLOW_COPY_AND_ASSIGN(BigramDictContent); - - int createAndGetBigramFlags(const bool hasNext) const { - return hasNext ? Ver4DictConstants::BIGRAM_HAS_NEXT_MASK : 0; - } - int getBigramEntrySize() const { if (mHasHistoricalInfo) { return Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE @@ -113,6 +111,15 @@ class BigramDictContent : public SparseTableDictContent { } } + private: + DISALLOW_COPY_AND_ASSIGN(BigramDictContent); + + static const int INVALID_LINKED_ENTRY_POS; + + bool writeBigramEntryAttributesAndAdvancePosition( + const bool isLink, const int probability, const int targetTerminalId, + const int timestamp, const int level, const int count, int *const entryWritingPos); + bool runGCBigramList(const int bigramListPos, const BigramDictContent *const sourceBigramDictContent, const int toPos, const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap, diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp index deed010cd..345cabbf9 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp @@ -60,7 +60,7 @@ const int Ver4DictConstants::INVALID_BIGRAM_TARGET_TERMINAL_ID = (1 << (BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE * 8)) - 1; const int Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE = 1; const int Ver4DictConstants::BIGRAM_PROBABILITY_MASK = 0x0F; -const int Ver4DictConstants::BIGRAM_HAS_NEXT_MASK = 0x80; +const int Ver4DictConstants::BIGRAM_IS_LINK_MASK = 0x80; const int Ver4DictConstants::BIGRAM_LARGE_PROBABILITY_FIELD_SIZE = 1; const int Ver4DictConstants::SHORTCUT_FLAGS_FIELD_SIZE = 1; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h index d6d22c5c1..b4effca9c 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h @@ -57,8 +57,8 @@ class Ver4DictConstants { static const int BIGRAM_FLAGS_FIELD_SIZE; static const int BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE; static const int INVALID_BIGRAM_TARGET_TERMINAL_ID; + static const int BIGRAM_IS_LINK_MASK; static const int BIGRAM_PROBABILITY_MASK; - static const int BIGRAM_HAS_NEXT_MASK; // Used when bigram list has time stamp. static const int BIGRAM_LARGE_PROBABILITY_FIELD_SIZE;