Avoid copying bigram list if possible.
Constructing en_US main dict using dicttool: Before: real 1m8.699s user 1m10.600s sys 0m2.390s After: real 0m17.204s user 0m20.560s sys 0m0.720s Bug: 13406708 Change-Id: I3b0476be57e5cb93c6497025b3ffa7064ac326c6
This commit is contained in:
parent
8d8fb396a0
commit
ad518d9a5b
3 changed files with 66 additions and 41 deletions
|
@ -50,12 +50,18 @@ void Ver4BigramListPolicy::getNextBigram(int *const outBigramPos, int *const out
|
||||||
|
|
||||||
bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTargetTerminalId,
|
bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTargetTerminalId,
|
||||||
const int newProbability, const int timestamp, bool *const outAddedNewEntry) {
|
const int newProbability, const int timestamp, bool *const outAddedNewEntry) {
|
||||||
|
// 1. The word has no bigrams yet.
|
||||||
|
// 2. The word has bigrams, and there is the target in the list.
|
||||||
|
// 3. The word has bigrams, and there is an invalid entry that can be reclaimed.
|
||||||
|
// 4. The word has bigrams. We have to append new bigram entry to the list.
|
||||||
|
// 5. Same as 4, but the list is the last entry of the content file.
|
||||||
|
|
||||||
if (outAddedNewEntry) {
|
if (outAddedNewEntry) {
|
||||||
*outAddedNewEntry = false;
|
*outAddedNewEntry = false;
|
||||||
}
|
}
|
||||||
const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId);
|
const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId);
|
||||||
if (bigramListPos == NOT_A_DICT_POS) {
|
if (bigramListPos == NOT_A_DICT_POS) {
|
||||||
// Updating PtNode that doesn't have a bigram list.
|
// Case 1. PtNode that doesn't have a bigram list.
|
||||||
// Create new bigram list.
|
// Create new bigram list.
|
||||||
if (!mBigramDictContent->createNewBigramList(terminalId)) {
|
if (!mBigramDictContent->createNewBigramList(terminalId)) {
|
||||||
return false;
|
return false;
|
||||||
|
@ -75,50 +81,55 @@ bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTarget
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
const int entryPosToUpdate = getEntryPosToUpdate(newTargetTerminalId, bigramListPos);
|
int tailEntryPos = NOT_A_DICT_POS;
|
||||||
if (entryPosToUpdate != NOT_A_DICT_POS) {
|
const int entryPosToUpdate = getEntryPosToUpdate(newTargetTerminalId, bigramListPos,
|
||||||
// Overwrite existing entry.
|
&tailEntryPos);
|
||||||
const BigramEntry originalBigramEntry =
|
if (tailEntryPos != NOT_A_DICT_POS || entryPosToUpdate == NOT_A_DICT_POS) {
|
||||||
mBigramDictContent->getBigramEntry(entryPosToUpdate);
|
// Case 4, 5.
|
||||||
if (!originalBigramEntry.isValid()) {
|
// Add new entry to the bigram list.
|
||||||
// Reuse invalid entry.
|
if (tailEntryPos == NOT_A_DICT_POS) {
|
||||||
if (outAddedNewEntry) {
|
// Case 4. Create new bigram list.
|
||||||
*outAddedNewEntry = true;
|
if (!mBigramDictContent->createNewBigramList(terminalId)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
const int destPos = mBigramDictContent->getBigramListHeadPos(terminalId);
|
||||||
|
// Copy existing bigram list.
|
||||||
|
if (!mBigramDictContent->copyBigramList(bigramListPos, destPos, &tailEntryPos)) {
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
const BigramEntry updatedBigramEntry =
|
// Write new entry at the tail position of the bigram content.
|
||||||
originalBigramEntry.updateTargetTerminalIdAndGetEntry(newTargetTerminalId);
|
const BigramEntry newBigramEntry(false /* hasNext */, NOT_A_PROBABILITY,
|
||||||
|
newTargetTerminalId);
|
||||||
const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom(
|
const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom(
|
||||||
&updatedBigramEntry, newProbability, timestamp);
|
&newBigramEntry, newProbability, timestamp);
|
||||||
return mBigramDictContent->writeBigramEntry(&bigramEntryToWrite, entryPosToUpdate);
|
if (!mBigramDictContent->writeBigramEntryAtTail(&bigramEntryToWrite)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
// Update has next flag of the tail entry.
|
||||||
|
if (!updateHasNextFlag(true /* hasNext */, tailEntryPos)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (outAddedNewEntry) {
|
||||||
|
*outAddedNewEntry = true;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add new entry to the bigram list.
|
// Case 2. Overwrite the existing entry. Case 3. Reclaim and reuse the existing invalid entry.
|
||||||
// Create new bigram list.
|
const BigramEntry originalBigramEntry = mBigramDictContent->getBigramEntry(entryPosToUpdate);
|
||||||
if (!mBigramDictContent->createNewBigramList(terminalId)) {
|
if (!originalBigramEntry.isValid()) {
|
||||||
return false;
|
// Case 3. Reuse the existing invalid entry. outAddedNewEntry is false when an existing
|
||||||
|
// entry is updated.
|
||||||
|
if (outAddedNewEntry) {
|
||||||
|
*outAddedNewEntry = true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
int writingPos = mBigramDictContent->getBigramListHeadPos(terminalId);
|
const BigramEntry updatedBigramEntry =
|
||||||
int tailEntryPos = NOT_A_DICT_POS;
|
originalBigramEntry.updateTargetTerminalIdAndGetEntry(newTargetTerminalId);
|
||||||
// Copy existing bigram list.
|
|
||||||
if (!mBigramDictContent->copyBigramList(bigramListPos, writingPos, &tailEntryPos)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
// Write new entry at the tail position of the bigram content.
|
|
||||||
const BigramEntry newBigramEntry(false /* hasNext */, NOT_A_PROBABILITY, newTargetTerminalId);
|
|
||||||
const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom(
|
const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom(
|
||||||
&newBigramEntry, newProbability, timestamp);
|
&updatedBigramEntry, newProbability, timestamp);
|
||||||
if (!mBigramDictContent->writeBigramEntryAtTail(&bigramEntryToWrite)) {
|
return mBigramDictContent->writeBigramEntry(&bigramEntryToWrite, entryPosToUpdate);
|
||||||
return false;
|
|
||||||
}
|
|
||||||
// Update has next flag of the tail entry.
|
|
||||||
if (!updateHasNextFlag(true /* hasNext */, tailEntryPos)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
if (outAddedNewEntry) {
|
|
||||||
*outAddedNewEntry = true;
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Ver4BigramListPolicy::removeEntry(const int terminalId, const int targetTerminalId) {
|
bool Ver4BigramListPolicy::removeEntry(const int terminalId, const int targetTerminalId) {
|
||||||
|
@ -127,7 +138,8 @@ bool Ver4BigramListPolicy::removeEntry(const int terminalId, const int targetTer
|
||||||
// Bigram list doesn't exist.
|
// Bigram list doesn't exist.
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
const int entryPosToUpdate = getEntryPosToUpdate(targetTerminalId, bigramListPos);
|
const int entryPosToUpdate = getEntryPosToUpdate(targetTerminalId, bigramListPos,
|
||||||
|
nullptr /* outTailEntryPos */);
|
||||||
if (entryPosToUpdate == NOT_A_DICT_POS) {
|
if (entryPosToUpdate == NOT_A_DICT_POS) {
|
||||||
// Bigram entry doesn't exist.
|
// Bigram entry doesn't exist.
|
||||||
return false;
|
return false;
|
||||||
|
@ -212,7 +224,10 @@ int Ver4BigramListPolicy::getBigramEntryConut(const int terminalId) {
|
||||||
}
|
}
|
||||||
|
|
||||||
int Ver4BigramListPolicy::getEntryPosToUpdate(const int targetTerminalIdToFind,
|
int Ver4BigramListPolicy::getEntryPosToUpdate(const int targetTerminalIdToFind,
|
||||||
const int bigramListPos) const {
|
const int bigramListPos, int *const outTailEntryPos) const {
|
||||||
|
if (outTailEntryPos) {
|
||||||
|
*outTailEntryPos = NOT_A_DICT_POS;
|
||||||
|
}
|
||||||
bool hasNext = true;
|
bool hasNext = true;
|
||||||
int invalidEntryPos = NOT_A_DICT_POS;
|
int invalidEntryPos = NOT_A_DICT_POS;
|
||||||
int readingPos = bigramListPos;
|
int readingPos = bigramListPos;
|
||||||
|
@ -228,6 +243,11 @@ int Ver4BigramListPolicy::getEntryPosToUpdate(const int targetTerminalIdToFind,
|
||||||
// Invalid entry that can be reused is found.
|
// Invalid entry that can be reused is found.
|
||||||
invalidEntryPos = entryPos;
|
invalidEntryPos = entryPos;
|
||||||
}
|
}
|
||||||
|
if (!hasNext && mBigramDictContent->isContentTailPos(readingPos)) {
|
||||||
|
if (outTailEntryPos) {
|
||||||
|
*outTailEntryPos = entryPos;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return invalidEntryPos;
|
return invalidEntryPos;
|
||||||
}
|
}
|
||||||
|
|
|
@ -56,7 +56,8 @@ class Ver4BigramListPolicy : public DictionaryBigramsStructurePolicy {
|
||||||
private:
|
private:
|
||||||
DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4BigramListPolicy);
|
DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4BigramListPolicy);
|
||||||
|
|
||||||
int getEntryPosToUpdate(const int targetTerminalIdToFind, const int bigramListPos) const;
|
int getEntryPosToUpdate(const int targetTerminalIdToFind, const int bigramListPos,
|
||||||
|
int *const outTailEntryPos) const;
|
||||||
|
|
||||||
const BigramEntry createUpdatedBigramEntryFrom(const BigramEntry *const originalBigramEntry,
|
const BigramEntry createUpdatedBigramEntryFrom(const BigramEntry *const originalBigramEntry,
|
||||||
const int newProbability, const int timestamp) const;
|
const int newProbability, const int timestamp) const;
|
||||||
|
|
|
@ -88,6 +88,10 @@ class BigramDictContent : public SparseTableDictContent {
|
||||||
const BigramDictContent *const originalBigramDictContent,
|
const BigramDictContent *const originalBigramDictContent,
|
||||||
int *const outBigramEntryCount);
|
int *const outBigramEntryCount);
|
||||||
|
|
||||||
|
bool isContentTailPos(const int pos) const {
|
||||||
|
return pos == getContentBuffer()->getTailPosition();
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
DISALLOW_COPY_AND_ASSIGN(BigramDictContent);
|
DISALLOW_COPY_AND_ASSIGN(BigramDictContent);
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue