Avoid copying bigram list if possible.

Constructing en_US main dict using dicttool:
Before:
real    1m8.699s
user    1m10.600s
sys     0m2.390s
After:
real    0m17.204s
user    0m20.560s
sys     0m0.720s


Bug: 13406708
Change-Id: I3b0476be57e5cb93c6497025b3ffa7064ac326c6
This commit is contained in:
Keisuke Kuroyanagi 2014-05-08 14:19:33 +09:00
parent 8d8fb396a0
commit ad518d9a5b
3 changed files with 66 additions and 41 deletions

View file

@ -50,12 +50,18 @@ void Ver4BigramListPolicy::getNextBigram(int *const outBigramPos, int *const out
bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTargetTerminalId, bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTargetTerminalId,
const int newProbability, const int timestamp, bool *const outAddedNewEntry) { const int newProbability, const int timestamp, bool *const outAddedNewEntry) {
// 1. The word has no bigrams yet.
// 2. The word has bigrams, and there is the target in the list.
// 3. The word has bigrams, and there is an invalid entry that can be reclaimed.
// 4. The word has bigrams. We have to append new bigram entry to the list.
// 5. Same as 4, but the list is the last entry of the content file.
if (outAddedNewEntry) { if (outAddedNewEntry) {
*outAddedNewEntry = false; *outAddedNewEntry = false;
} }
const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId); const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId);
if (bigramListPos == NOT_A_DICT_POS) { if (bigramListPos == NOT_A_DICT_POS) {
// Updating PtNode that doesn't have a bigram list. // Case 1. PtNode that doesn't have a bigram list.
// Create new bigram list. // Create new bigram list.
if (!mBigramDictContent->createNewBigramList(terminalId)) { if (!mBigramDictContent->createNewBigramList(terminalId)) {
return false; return false;
@ -75,50 +81,55 @@ bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTarget
return true; return true;
} }
const int entryPosToUpdate = getEntryPosToUpdate(newTargetTerminalId, bigramListPos); int tailEntryPos = NOT_A_DICT_POS;
if (entryPosToUpdate != NOT_A_DICT_POS) { const int entryPosToUpdate = getEntryPosToUpdate(newTargetTerminalId, bigramListPos,
// Overwrite existing entry. &tailEntryPos);
const BigramEntry originalBigramEntry = if (tailEntryPos != NOT_A_DICT_POS || entryPosToUpdate == NOT_A_DICT_POS) {
mBigramDictContent->getBigramEntry(entryPosToUpdate); // Case 4, 5.
if (!originalBigramEntry.isValid()) { // Add new entry to the bigram list.
// Reuse invalid entry. if (tailEntryPos == NOT_A_DICT_POS) {
if (outAddedNewEntry) { // Case 4. Create new bigram list.
*outAddedNewEntry = true; if (!mBigramDictContent->createNewBigramList(terminalId)) {
return false;
}
const int destPos = mBigramDictContent->getBigramListHeadPos(terminalId);
// Copy existing bigram list.
if (!mBigramDictContent->copyBigramList(bigramListPos, destPos, &tailEntryPos)) {
return false;
} }
} }
const BigramEntry updatedBigramEntry = // Write new entry at the tail position of the bigram content.
originalBigramEntry.updateTargetTerminalIdAndGetEntry(newTargetTerminalId); const BigramEntry newBigramEntry(false /* hasNext */, NOT_A_PROBABILITY,
newTargetTerminalId);
const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom( const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom(
&updatedBigramEntry, newProbability, timestamp); &newBigramEntry, newProbability, timestamp);
return mBigramDictContent->writeBigramEntry(&bigramEntryToWrite, entryPosToUpdate); if (!mBigramDictContent->writeBigramEntryAtTail(&bigramEntryToWrite)) {
return false;
}
// Update has next flag of the tail entry.
if (!updateHasNextFlag(true /* hasNext */, tailEntryPos)) {
return false;
}
if (outAddedNewEntry) {
*outAddedNewEntry = true;
}
return true;
} }
// Add new entry to the bigram list. // Case 2. Overwrite the existing entry. Case 3. Reclaim and reuse the existing invalid entry.
// Create new bigram list. const BigramEntry originalBigramEntry = mBigramDictContent->getBigramEntry(entryPosToUpdate);
if (!mBigramDictContent->createNewBigramList(terminalId)) { if (!originalBigramEntry.isValid()) {
return false; // Case 3. Reuse the existing invalid entry. outAddedNewEntry is false when an existing
// entry is updated.
if (outAddedNewEntry) {
*outAddedNewEntry = true;
}
} }
int writingPos = mBigramDictContent->getBigramListHeadPos(terminalId); const BigramEntry updatedBigramEntry =
int tailEntryPos = NOT_A_DICT_POS; originalBigramEntry.updateTargetTerminalIdAndGetEntry(newTargetTerminalId);
// Copy existing bigram list.
if (!mBigramDictContent->copyBigramList(bigramListPos, writingPos, &tailEntryPos)) {
return false;
}
// Write new entry at the tail position of the bigram content.
const BigramEntry newBigramEntry(false /* hasNext */, NOT_A_PROBABILITY, newTargetTerminalId);
const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom( const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom(
&newBigramEntry, newProbability, timestamp); &updatedBigramEntry, newProbability, timestamp);
if (!mBigramDictContent->writeBigramEntryAtTail(&bigramEntryToWrite)) { return mBigramDictContent->writeBigramEntry(&bigramEntryToWrite, entryPosToUpdate);
return false;
}
// Update has next flag of the tail entry.
if (!updateHasNextFlag(true /* hasNext */, tailEntryPos)) {
return false;
}
if (outAddedNewEntry) {
*outAddedNewEntry = true;
}
return true;
} }
bool Ver4BigramListPolicy::removeEntry(const int terminalId, const int targetTerminalId) { bool Ver4BigramListPolicy::removeEntry(const int terminalId, const int targetTerminalId) {
@ -127,7 +138,8 @@ bool Ver4BigramListPolicy::removeEntry(const int terminalId, const int targetTer
// Bigram list doesn't exist. // Bigram list doesn't exist.
return false; return false;
} }
const int entryPosToUpdate = getEntryPosToUpdate(targetTerminalId, bigramListPos); const int entryPosToUpdate = getEntryPosToUpdate(targetTerminalId, bigramListPos,
nullptr /* outTailEntryPos */);
if (entryPosToUpdate == NOT_A_DICT_POS) { if (entryPosToUpdate == NOT_A_DICT_POS) {
// Bigram entry doesn't exist. // Bigram entry doesn't exist.
return false; return false;
@ -212,7 +224,10 @@ int Ver4BigramListPolicy::getBigramEntryConut(const int terminalId) {
} }
int Ver4BigramListPolicy::getEntryPosToUpdate(const int targetTerminalIdToFind, int Ver4BigramListPolicy::getEntryPosToUpdate(const int targetTerminalIdToFind,
const int bigramListPos) const { const int bigramListPos, int *const outTailEntryPos) const {
if (outTailEntryPos) {
*outTailEntryPos = NOT_A_DICT_POS;
}
bool hasNext = true; bool hasNext = true;
int invalidEntryPos = NOT_A_DICT_POS; int invalidEntryPos = NOT_A_DICT_POS;
int readingPos = bigramListPos; int readingPos = bigramListPos;
@ -228,6 +243,11 @@ int Ver4BigramListPolicy::getEntryPosToUpdate(const int targetTerminalIdToFind,
// Invalid entry that can be reused is found. // Invalid entry that can be reused is found.
invalidEntryPos = entryPos; invalidEntryPos = entryPos;
} }
if (!hasNext && mBigramDictContent->isContentTailPos(readingPos)) {
if (outTailEntryPos) {
*outTailEntryPos = entryPos;
}
}
} }
return invalidEntryPos; return invalidEntryPos;
} }

View file

@ -56,7 +56,8 @@ class Ver4BigramListPolicy : public DictionaryBigramsStructurePolicy {
private: private:
DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4BigramListPolicy); DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4BigramListPolicy);
int getEntryPosToUpdate(const int targetTerminalIdToFind, const int bigramListPos) const; int getEntryPosToUpdate(const int targetTerminalIdToFind, const int bigramListPos,
int *const outTailEntryPos) const;
const BigramEntry createUpdatedBigramEntryFrom(const BigramEntry *const originalBigramEntry, const BigramEntry createUpdatedBigramEntryFrom(const BigramEntry *const originalBigramEntry,
const int newProbability, const int timestamp) const; const int newProbability, const int timestamp) const;

View file

@ -88,6 +88,10 @@ class BigramDictContent : public SparseTableDictContent {
const BigramDictContent *const originalBigramDictContent, const BigramDictContent *const originalBigramDictContent,
int *const outBigramEntryCount); int *const outBigramEntryCount);
bool isContentTailPos(const int pos) const {
return pos == getContentBuffer()->getTailPosition();
}
private: private:
DISALLOW_COPY_AND_ASSIGN(BigramDictContent); DISALLOW_COPY_AND_ASSIGN(BigramDictContent);