From ad518d9a5beccc7c322299bc60eeff2ebe944bf1 Mon Sep 17 00:00:00 2001 From: Keisuke Kuroyanagi Date: Thu, 8 May 2014 14:19:33 +0900 Subject: [PATCH] Avoid copying bigram list if possible. Constructing en_US main dict using dicttool: Before: real 1m8.699s user 1m10.600s sys 0m2.390s After: real 0m17.204s user 0m20.560s sys 0m0.720s Bug: 13406708 Change-Id: I3b0476be57e5cb93c6497025b3ffa7064ac326c6 --- .../bigram/ver4_bigram_list_policy.cpp | 100 +++++++++++------- .../bigram/ver4_bigram_list_policy.h | 3 +- .../v4/content/bigram_dict_content.h | 4 + 3 files changed, 66 insertions(+), 41 deletions(-) diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp index 04e768fbd..4975512ff 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp @@ -50,12 +50,18 @@ void Ver4BigramListPolicy::getNextBigram(int *const outBigramPos, int *const out bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTargetTerminalId, const int newProbability, const int timestamp, bool *const outAddedNewEntry) { + // 1. The word has no bigrams yet. + // 2. The word has bigrams, and there is the target in the list. + // 3. The word has bigrams, and there is an invalid entry that can be reclaimed. + // 4. The word has bigrams. We have to append new bigram entry to the list. + // 5. Same as 4, but the list is the last entry of the content file. + if (outAddedNewEntry) { *outAddedNewEntry = false; } const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId); if (bigramListPos == NOT_A_DICT_POS) { - // Updating PtNode that doesn't have a bigram list. + // Case 1. PtNode that doesn't have a bigram list. // Create new bigram list. if (!mBigramDictContent->createNewBigramList(terminalId)) { return false; @@ -75,50 +81,55 @@ bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTarget return true; } - const int entryPosToUpdate = getEntryPosToUpdate(newTargetTerminalId, bigramListPos); - if (entryPosToUpdate != NOT_A_DICT_POS) { - // Overwrite existing entry. - const BigramEntry originalBigramEntry = - mBigramDictContent->getBigramEntry(entryPosToUpdate); - if (!originalBigramEntry.isValid()) { - // Reuse invalid entry. - if (outAddedNewEntry) { - *outAddedNewEntry = true; + int tailEntryPos = NOT_A_DICT_POS; + const int entryPosToUpdate = getEntryPosToUpdate(newTargetTerminalId, bigramListPos, + &tailEntryPos); + if (tailEntryPos != NOT_A_DICT_POS || entryPosToUpdate == NOT_A_DICT_POS) { + // Case 4, 5. + // Add new entry to the bigram list. + if (tailEntryPos == NOT_A_DICT_POS) { + // Case 4. Create new bigram list. + if (!mBigramDictContent->createNewBigramList(terminalId)) { + return false; + } + const int destPos = mBigramDictContent->getBigramListHeadPos(terminalId); + // Copy existing bigram list. + if (!mBigramDictContent->copyBigramList(bigramListPos, destPos, &tailEntryPos)) { + return false; } } - const BigramEntry updatedBigramEntry = - originalBigramEntry.updateTargetTerminalIdAndGetEntry(newTargetTerminalId); + // Write new entry at the tail position of the bigram content. + const BigramEntry newBigramEntry(false /* hasNext */, NOT_A_PROBABILITY, + newTargetTerminalId); const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom( - &updatedBigramEntry, newProbability, timestamp); - return mBigramDictContent->writeBigramEntry(&bigramEntryToWrite, entryPosToUpdate); + &newBigramEntry, newProbability, timestamp); + if (!mBigramDictContent->writeBigramEntryAtTail(&bigramEntryToWrite)) { + return false; + } + // Update has next flag of the tail entry. + if (!updateHasNextFlag(true /* hasNext */, tailEntryPos)) { + return false; + } + if (outAddedNewEntry) { + *outAddedNewEntry = true; + } + return true; } - // Add new entry to the bigram list. - // Create new bigram list. - if (!mBigramDictContent->createNewBigramList(terminalId)) { - return false; + // Case 2. Overwrite the existing entry. Case 3. Reclaim and reuse the existing invalid entry. + const BigramEntry originalBigramEntry = mBigramDictContent->getBigramEntry(entryPosToUpdate); + if (!originalBigramEntry.isValid()) { + // Case 3. Reuse the existing invalid entry. outAddedNewEntry is false when an existing + // entry is updated. + if (outAddedNewEntry) { + *outAddedNewEntry = true; + } } - int writingPos = mBigramDictContent->getBigramListHeadPos(terminalId); - int tailEntryPos = NOT_A_DICT_POS; - // Copy existing bigram list. - if (!mBigramDictContent->copyBigramList(bigramListPos, writingPos, &tailEntryPos)) { - return false; - } - // Write new entry at the tail position of the bigram content. - const BigramEntry newBigramEntry(false /* hasNext */, NOT_A_PROBABILITY, newTargetTerminalId); + const BigramEntry updatedBigramEntry = + originalBigramEntry.updateTargetTerminalIdAndGetEntry(newTargetTerminalId); const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom( - &newBigramEntry, newProbability, timestamp); - if (!mBigramDictContent->writeBigramEntryAtTail(&bigramEntryToWrite)) { - return false; - } - // Update has next flag of the tail entry. - if (!updateHasNextFlag(true /* hasNext */, tailEntryPos)) { - return false; - } - if (outAddedNewEntry) { - *outAddedNewEntry = true; - } - return true; + &updatedBigramEntry, newProbability, timestamp); + return mBigramDictContent->writeBigramEntry(&bigramEntryToWrite, entryPosToUpdate); } bool Ver4BigramListPolicy::removeEntry(const int terminalId, const int targetTerminalId) { @@ -127,7 +138,8 @@ bool Ver4BigramListPolicy::removeEntry(const int terminalId, const int targetTer // Bigram list doesn't exist. return false; } - const int entryPosToUpdate = getEntryPosToUpdate(targetTerminalId, bigramListPos); + const int entryPosToUpdate = getEntryPosToUpdate(targetTerminalId, bigramListPos, + nullptr /* outTailEntryPos */); if (entryPosToUpdate == NOT_A_DICT_POS) { // Bigram entry doesn't exist. return false; @@ -212,7 +224,10 @@ int Ver4BigramListPolicy::getBigramEntryConut(const int terminalId) { } int Ver4BigramListPolicy::getEntryPosToUpdate(const int targetTerminalIdToFind, - const int bigramListPos) const { + const int bigramListPos, int *const outTailEntryPos) const { + if (outTailEntryPos) { + *outTailEntryPos = NOT_A_DICT_POS; + } bool hasNext = true; int invalidEntryPos = NOT_A_DICT_POS; int readingPos = bigramListPos; @@ -228,6 +243,11 @@ int Ver4BigramListPolicy::getEntryPosToUpdate(const int targetTerminalIdToFind, // Invalid entry that can be reused is found. invalidEntryPos = entryPos; } + if (!hasNext && mBigramDictContent->isContentTailPos(readingPos)) { + if (outTailEntryPos) { + *outTailEntryPos = entryPos; + } + } } return invalidEntryPos; } diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h index d8f7be631..c1f33359b 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h @@ -56,7 +56,8 @@ class Ver4BigramListPolicy : public DictionaryBigramsStructurePolicy { private: DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4BigramListPolicy); - int getEntryPosToUpdate(const int targetTerminalIdToFind, const int bigramListPos) const; + int getEntryPosToUpdate(const int targetTerminalIdToFind, const int bigramListPos, + int *const outTailEntryPos) const; const BigramEntry createUpdatedBigramEntryFrom(const BigramEntry *const originalBigramEntry, const int newProbability, const int timestamp) const; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h index 40ece7636..944e0f9e2 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h @@ -88,6 +88,10 @@ class BigramDictContent : public SparseTableDictContent { const BigramDictContent *const originalBigramDictContent, int *const outBigramEntryCount); + bool isContentTailPos(const int pos) const { + return pos == getContentBuffer()->getTailPosition(); + } + private: DISALLOW_COPY_AND_ASSIGN(BigramDictContent);