Merge "Use linked list for bigram list."
commit
2ebb244c91
|
@ -71,8 +71,14 @@ bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTarget
|
||||||
const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom(&newBigramEntry,
|
const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom(&newBigramEntry,
|
||||||
bigramProperty);
|
bigramProperty);
|
||||||
// Write an entry.
|
// Write an entry.
|
||||||
const int writingPos = mBigramDictContent->getBigramListHeadPos(terminalId);
|
int writingPos = mBigramDictContent->getBigramListHeadPos(terminalId);
|
||||||
if (!mBigramDictContent->writeBigramEntry(&bigramEntryToWrite, writingPos)) {
|
if (!mBigramDictContent->writeBigramEntryAndAdvancePosition(&bigramEntryToWrite,
|
||||||
|
&writingPos)) {
|
||||||
|
AKLOGE("Cannot write bigram entry. pos: %d.", writingPos);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (!mBigramDictContent->writeTerminator(writingPos)) {
|
||||||
|
AKLOGE("Cannot write bigram list terminator. pos: %d.", writingPos);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (outAddedNewEntry) {
|
if (outAddedNewEntry) {
|
||||||
|
@ -84,32 +90,37 @@ bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTarget
|
||||||
int tailEntryPos = NOT_A_DICT_POS;
|
int tailEntryPos = NOT_A_DICT_POS;
|
||||||
const int entryPosToUpdate = getEntryPosToUpdate(newTargetTerminalId, bigramListPos,
|
const int entryPosToUpdate = getEntryPosToUpdate(newTargetTerminalId, bigramListPos,
|
||||||
&tailEntryPos);
|
&tailEntryPos);
|
||||||
if (tailEntryPos != NOT_A_DICT_POS || entryPosToUpdate == NOT_A_DICT_POS) {
|
if (entryPosToUpdate == NOT_A_DICT_POS) {
|
||||||
// Case 4, 5.
|
// Case 4, 5. Add new entry to the bigram list.
|
||||||
// Add new entry to the bigram list.
|
const int contentTailPos = mBigramDictContent->getContentTailPos();
|
||||||
if (tailEntryPos == NOT_A_DICT_POS) {
|
// If the tail entry is at the tail of content buffer, the new entry can be written without
|
||||||
// Case 4. Create new bigram list.
|
// link (Case 5).
|
||||||
if (!mBigramDictContent->createNewBigramList(terminalId)) {
|
const bool canAppendEntry =
|
||||||
return false;
|
contentTailPos == tailEntryPos + mBigramDictContent->getBigramEntrySize();
|
||||||
}
|
const int newEntryPos = canAppendEntry ? tailEntryPos : contentTailPos;
|
||||||
const int destPos = mBigramDictContent->getBigramListHeadPos(terminalId);
|
int writingPos = newEntryPos;
|
||||||
// Copy existing bigram list.
|
|
||||||
if (!mBigramDictContent->copyBigramList(bigramListPos, destPos, &tailEntryPos)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// Write new entry at the tail position of the bigram content.
|
// Write new entry at the tail position of the bigram content.
|
||||||
const BigramEntry newBigramEntry(false /* hasNext */, NOT_A_PROBABILITY,
|
const BigramEntry newBigramEntry(false /* hasNext */, NOT_A_PROBABILITY,
|
||||||
newTargetTerminalId);
|
newTargetTerminalId);
|
||||||
const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom(
|
const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom(
|
||||||
&newBigramEntry, bigramProperty);
|
&newBigramEntry, bigramProperty);
|
||||||
if (!mBigramDictContent->writeBigramEntryAtTail(&bigramEntryToWrite)) {
|
if (!mBigramDictContent->writeBigramEntryAndAdvancePosition(&bigramEntryToWrite,
|
||||||
|
&writingPos)) {
|
||||||
|
AKLOGE("Cannot write bigram entry. pos: %d.", writingPos);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
// Update has next flag of the tail entry.
|
if (!mBigramDictContent->writeTerminator(writingPos)) {
|
||||||
if (!updateHasNextFlag(true /* hasNext */, tailEntryPos)) {
|
AKLOGE("Cannot write bigram list terminator. pos: %d.", writingPos);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
if (!canAppendEntry) {
|
||||||
|
// Update link of the current tail entry.
|
||||||
|
if (!mBigramDictContent->writeLink(newEntryPos, tailEntryPos)) {
|
||||||
|
AKLOGE("Cannot update bigram entry link. pos: %d, linked entry pos: %d.",
|
||||||
|
tailEntryPos, newEntryPos);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
if (outAddedNewEntry) {
|
if (outAddedNewEntry) {
|
||||||
*outAddedNewEntry = true;
|
*outAddedNewEntry = true;
|
||||||
}
|
}
|
||||||
|
@ -228,14 +239,18 @@ int Ver4BigramListPolicy::getEntryPosToUpdate(const int targetTerminalIdToFind,
|
||||||
if (outTailEntryPos) {
|
if (outTailEntryPos) {
|
||||||
*outTailEntryPos = NOT_A_DICT_POS;
|
*outTailEntryPos = NOT_A_DICT_POS;
|
||||||
}
|
}
|
||||||
bool hasNext = true;
|
|
||||||
int invalidEntryPos = NOT_A_DICT_POS;
|
int invalidEntryPos = NOT_A_DICT_POS;
|
||||||
int readingPos = bigramListPos;
|
int readingPos = bigramListPos;
|
||||||
while (hasNext) {
|
while (true) {
|
||||||
const int entryPos = readingPos;
|
|
||||||
const BigramEntry bigramEntry =
|
const BigramEntry bigramEntry =
|
||||||
mBigramDictContent->getBigramEntryAndAdvancePosition(&readingPos);
|
mBigramDictContent->getBigramEntryAndAdvancePosition(&readingPos);
|
||||||
hasNext = bigramEntry.hasNext();
|
const int entryPos = readingPos - mBigramDictContent->getBigramEntrySize();
|
||||||
|
if (!bigramEntry.hasNext()) {
|
||||||
|
if (outTailEntryPos) {
|
||||||
|
*outTailEntryPos = entryPos;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
if (bigramEntry.getTargetTerminalId() == targetTerminalIdToFind) {
|
if (bigramEntry.getTargetTerminalId() == targetTerminalIdToFind) {
|
||||||
// Entry with same target is found.
|
// Entry with same target is found.
|
||||||
return entryPos;
|
return entryPos;
|
||||||
|
@ -243,11 +258,6 @@ int Ver4BigramListPolicy::getEntryPosToUpdate(const int targetTerminalIdToFind,
|
||||||
// Invalid entry that can be reused is found.
|
// Invalid entry that can be reused is found.
|
||||||
invalidEntryPos = entryPos;
|
invalidEntryPos = entryPos;
|
||||||
}
|
}
|
||||||
if (!hasNext && mBigramDictContent->isContentTailPos(readingPos)) {
|
|
||||||
if (outTailEntryPos) {
|
|
||||||
*outTailEntryPos = entryPos;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
return invalidEntryPos;
|
return invalidEntryPos;
|
||||||
}
|
}
|
||||||
|
@ -269,10 +279,4 @@ const BigramEntry Ver4BigramListPolicy::createUpdatedBigramEntryFrom(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Ver4BigramListPolicy::updateHasNextFlag(const bool hasNext, const int bigramEntryPos) {
|
|
||||||
const BigramEntry bigramEntry = mBigramDictContent->getBigramEntry(bigramEntryPos);
|
|
||||||
const BigramEntry updatedBigramEntry = bigramEntry.updateHasNextAndGetEntry(hasNext);
|
|
||||||
return mBigramDictContent->writeBigramEntry(&updatedBigramEntry, bigramEntryPos);
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
|
|
|
@ -63,8 +63,6 @@ class Ver4BigramListPolicy : public DictionaryBigramsStructurePolicy {
|
||||||
const BigramEntry createUpdatedBigramEntryFrom(const BigramEntry *const originalBigramEntry,
|
const BigramEntry createUpdatedBigramEntryFrom(const BigramEntry *const originalBigramEntry,
|
||||||
const BigramProperty *const bigramProperty) const;
|
const BigramProperty *const bigramProperty) const;
|
||||||
|
|
||||||
bool updateHasNextFlag(const bool hasNext, const int bigramEntryPos);
|
|
||||||
|
|
||||||
BigramDictContent *const mBigramDictContent;
|
BigramDictContent *const mBigramDictContent;
|
||||||
const TerminalPositionLookupTable *const mTerminalPositionLookupTable;
|
const TerminalPositionLookupTable *const mTerminalPositionLookupTable;
|
||||||
const HeaderPolicy *const mHeaderPolicy;
|
const HeaderPolicy *const mHeaderPolicy;
|
||||||
|
|
|
@ -20,6 +20,8 @@
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
|
||||||
|
const int BigramDictContent::INVALID_LINKED_ENTRY_POS = Ver4DictConstants::NOT_A_TERMINAL_ID;
|
||||||
|
|
||||||
const BigramEntry BigramDictContent::getBigramEntryAndAdvancePosition(
|
const BigramEntry BigramDictContent::getBigramEntryAndAdvancePosition(
|
||||||
int *const bigramEntryPos) const {
|
int *const bigramEntryPos) const {
|
||||||
const BufferWithExtendableBuffer *const bigramListBuffer = getContentBuffer();
|
const BufferWithExtendableBuffer *const bigramListBuffer = getContentBuffer();
|
||||||
|
@ -34,7 +36,7 @@ const BigramEntry BigramDictContent::getBigramEntryAndAdvancePosition(
|
||||||
}
|
}
|
||||||
const int bigramFlags = bigramListBuffer->readUintAndAdvancePosition(
|
const int bigramFlags = bigramListBuffer->readUintAndAdvancePosition(
|
||||||
Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE, bigramEntryPos);
|
Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE, bigramEntryPos);
|
||||||
const bool hasNext = (bigramFlags & Ver4DictConstants::BIGRAM_HAS_NEXT_MASK) != 0;
|
const bool isLink = (bigramFlags & Ver4DictConstants::BIGRAM_IS_LINK_MASK) != 0;
|
||||||
int probability = NOT_A_PROBABILITY;
|
int probability = NOT_A_PROBABILITY;
|
||||||
int timestamp = NOT_A_TIMESTAMP;
|
int timestamp = NOT_A_TIMESTAMP;
|
||||||
int level = 0;
|
int level = 0;
|
||||||
|
@ -55,81 +57,90 @@ const BigramEntry BigramDictContent::getBigramEntryAndAdvancePosition(
|
||||||
const int targetTerminalId =
|
const int targetTerminalId =
|
||||||
(encodedTargetTerminalId == Ver4DictConstants::INVALID_BIGRAM_TARGET_TERMINAL_ID) ?
|
(encodedTargetTerminalId == Ver4DictConstants::INVALID_BIGRAM_TARGET_TERMINAL_ID) ?
|
||||||
Ver4DictConstants::NOT_A_TERMINAL_ID : encodedTargetTerminalId;
|
Ver4DictConstants::NOT_A_TERMINAL_ID : encodedTargetTerminalId;
|
||||||
|
if (isLink) {
|
||||||
|
const int linkedEntryPos = targetTerminalId;
|
||||||
|
if (linkedEntryPos == INVALID_LINKED_ENTRY_POS) {
|
||||||
|
// Bigram list terminator is found.
|
||||||
|
return BigramEntry(false /* hasNext */, NOT_A_PROBABILITY,
|
||||||
|
Ver4DictConstants::NOT_A_TERMINAL_ID);
|
||||||
|
}
|
||||||
|
*bigramEntryPos = linkedEntryPos;
|
||||||
|
return getBigramEntryAndAdvancePosition(bigramEntryPos);
|
||||||
|
}
|
||||||
|
// hasNext is always true because we should continue to read the next entry until the terminator
|
||||||
|
// is found.
|
||||||
if (mHasHistoricalInfo) {
|
if (mHasHistoricalInfo) {
|
||||||
const HistoricalInfo historicalInfo(timestamp, level, count);
|
const HistoricalInfo historicalInfo(timestamp, level, count);
|
||||||
return BigramEntry(hasNext, probability, &historicalInfo, targetTerminalId);
|
return BigramEntry(true /* hasNext */, probability, &historicalInfo, targetTerminalId);
|
||||||
} else {
|
} else {
|
||||||
return BigramEntry(hasNext, probability, targetTerminalId);
|
return BigramEntry(true /* hasNext */, probability, targetTerminalId);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool BigramDictContent::writeBigramEntryAndAdvancePosition(
|
bool BigramDictContent::writeBigramEntryAndAdvancePosition(
|
||||||
const BigramEntry *const bigramEntryToWrite, int *const entryWritingPos) {
|
const BigramEntry *const bigramEntryToWrite, int *const entryWritingPos) {
|
||||||
|
return writeBigramEntryAttributesAndAdvancePosition(false /* isLink */,
|
||||||
|
bigramEntryToWrite->getProbability(), bigramEntryToWrite->getTargetTerminalId(),
|
||||||
|
bigramEntryToWrite->getHistoricalInfo()->getTimeStamp(),
|
||||||
|
bigramEntryToWrite->getHistoricalInfo()->getLevel(),
|
||||||
|
bigramEntryToWrite->getHistoricalInfo()->getCount(),
|
||||||
|
entryWritingPos);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool BigramDictContent::writeBigramEntryAttributesAndAdvancePosition(
|
||||||
|
const bool isLink, const int probability, const int targetTerminalId,
|
||||||
|
const int timestamp, const int level, const int count, int *const entryWritingPos) {
|
||||||
BufferWithExtendableBuffer *const bigramListBuffer = getWritableContentBuffer();
|
BufferWithExtendableBuffer *const bigramListBuffer = getWritableContentBuffer();
|
||||||
const int bigramFlags = createAndGetBigramFlags(bigramEntryToWrite->hasNext());
|
const int bigramFlags = isLink ? Ver4DictConstants::BIGRAM_IS_LINK_MASK : 0;
|
||||||
if (!bigramListBuffer->writeUintAndAdvancePosition(bigramFlags,
|
if (!bigramListBuffer->writeUintAndAdvancePosition(bigramFlags,
|
||||||
Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE, entryWritingPos)) {
|
Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE, entryWritingPos)) {
|
||||||
AKLOGE("Cannot write bigram flags. pos: %d, flags: %x", *entryWritingPos, bigramFlags);
|
AKLOGE("Cannot write bigram flags. pos: %d, flags: %x", *entryWritingPos, bigramFlags);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (mHasHistoricalInfo) {
|
if (mHasHistoricalInfo) {
|
||||||
const HistoricalInfo *const historicalInfo = bigramEntryToWrite->getHistoricalInfo();
|
if (!bigramListBuffer->writeUintAndAdvancePosition(timestamp,
|
||||||
if (!bigramListBuffer->writeUintAndAdvancePosition(historicalInfo->getTimeStamp(),
|
|
||||||
Ver4DictConstants::TIME_STAMP_FIELD_SIZE, entryWritingPos)) {
|
Ver4DictConstants::TIME_STAMP_FIELD_SIZE, entryWritingPos)) {
|
||||||
AKLOGE("Cannot write bigram timestamps. pos: %d, timestamp: %d", *entryWritingPos,
|
AKLOGE("Cannot write bigram timestamps. pos: %d, timestamp: %d", *entryWritingPos,
|
||||||
historicalInfo->getTimeStamp());
|
timestamp);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (!bigramListBuffer->writeUintAndAdvancePosition(historicalInfo->getLevel(),
|
if (!bigramListBuffer->writeUintAndAdvancePosition(level,
|
||||||
Ver4DictConstants::WORD_LEVEL_FIELD_SIZE, entryWritingPos)) {
|
Ver4DictConstants::WORD_LEVEL_FIELD_SIZE, entryWritingPos)) {
|
||||||
AKLOGE("Cannot write bigram level. pos: %d, level: %d", *entryWritingPos,
|
AKLOGE("Cannot write bigram level. pos: %d, level: %d", *entryWritingPos,
|
||||||
historicalInfo->getLevel());
|
level);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (!bigramListBuffer->writeUintAndAdvancePosition(historicalInfo->getCount(),
|
if (!bigramListBuffer->writeUintAndAdvancePosition(count,
|
||||||
Ver4DictConstants::WORD_COUNT_FIELD_SIZE, entryWritingPos)) {
|
Ver4DictConstants::WORD_COUNT_FIELD_SIZE, entryWritingPos)) {
|
||||||
AKLOGE("Cannot write bigram count. pos: %d, count: %d", *entryWritingPos,
|
AKLOGE("Cannot write bigram count. pos: %d, count: %d", *entryWritingPos,
|
||||||
historicalInfo->getCount());
|
count);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if (!bigramListBuffer->writeUintAndAdvancePosition(bigramEntryToWrite->getProbability(),
|
if (!bigramListBuffer->writeUintAndAdvancePosition(probability,
|
||||||
Ver4DictConstants::PROBABILITY_SIZE, entryWritingPos)) {
|
Ver4DictConstants::PROBABILITY_SIZE, entryWritingPos)) {
|
||||||
AKLOGE("Cannot write bigram probability. pos: %d, probability: %d", *entryWritingPos,
|
AKLOGE("Cannot write bigram probability. pos: %d, probability: %d", *entryWritingPos,
|
||||||
bigramEntryToWrite->getProbability());
|
probability);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
const int targetTerminalIdToWrite =
|
const int targetTerminalIdToWrite = (targetTerminalId == Ver4DictConstants::NOT_A_TERMINAL_ID) ?
|
||||||
(bigramEntryToWrite->getTargetTerminalId() == Ver4DictConstants::NOT_A_TERMINAL_ID) ?
|
Ver4DictConstants::INVALID_BIGRAM_TARGET_TERMINAL_ID : targetTerminalId;
|
||||||
Ver4DictConstants::INVALID_BIGRAM_TARGET_TERMINAL_ID :
|
|
||||||
bigramEntryToWrite->getTargetTerminalId();
|
|
||||||
if (!bigramListBuffer->writeUintAndAdvancePosition(targetTerminalIdToWrite,
|
if (!bigramListBuffer->writeUintAndAdvancePosition(targetTerminalIdToWrite,
|
||||||
Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE, entryWritingPos)) {
|
Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE, entryWritingPos)) {
|
||||||
AKLOGE("Cannot write bigram target terminal id. pos: %d, target terminal id: %d",
|
AKLOGE("Cannot write bigram target terminal id. pos: %d, target terminal id: %d",
|
||||||
*entryWritingPos, bigramEntryToWrite->getTargetTerminalId());
|
*entryWritingPos, targetTerminalId);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool BigramDictContent::copyBigramList(const int bigramListPos, const int toPos,
|
bool BigramDictContent::writeLink(const int linkedEntryPos, const int writingPos) {
|
||||||
int *const outTailEntryPos) {
|
const int targetTerminalId = linkedEntryPos;
|
||||||
int readingPos = bigramListPos;
|
int pos = writingPos;
|
||||||
int writingPos = toPos;
|
return writeBigramEntryAttributesAndAdvancePosition(true /* isLink */,
|
||||||
bool hasNext = true;
|
NOT_A_PROBABILITY /* probability */, targetTerminalId, NOT_A_TIMESTAMP, 0 /* level */,
|
||||||
while (hasNext) {
|
0 /* count */, &pos);
|
||||||
const BigramEntry bigramEntry = getBigramEntryAndAdvancePosition(&readingPos);
|
|
||||||
hasNext = bigramEntry.hasNext();
|
|
||||||
if (!hasNext) {
|
|
||||||
*outTailEntryPos = writingPos;
|
|
||||||
}
|
|
||||||
if (!writeBigramEntryAndAdvancePosition(&bigramEntry, &writingPos)) {
|
|
||||||
AKLOGE("Cannot write bigram entry to copy. pos: %d", writingPos);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool BigramDictContent::runGC(const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
|
bool BigramDictContent::runGC(const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
|
||||||
|
@ -171,16 +182,15 @@ bool BigramDictContent::runGC(const TerminalPositionLookupTable::TerminalIdMap *
|
||||||
bool BigramDictContent::runGCBigramList(const int bigramListPos,
|
bool BigramDictContent::runGCBigramList(const int bigramListPos,
|
||||||
const BigramDictContent *const sourceBigramDictContent, const int toPos,
|
const BigramDictContent *const sourceBigramDictContent, const int toPos,
|
||||||
const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
|
const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
|
||||||
int *const outEntrycount) {
|
int *const outEntryCount) {
|
||||||
bool hasNext = true;
|
bool hasNext = true;
|
||||||
int readingPos = bigramListPos;
|
int readingPos = bigramListPos;
|
||||||
int writingPos = toPos;
|
int writingPos = toPos;
|
||||||
int lastEntryPos = NOT_A_DICT_POS;
|
|
||||||
while (hasNext) {
|
while (hasNext) {
|
||||||
const BigramEntry originalBigramEntry =
|
const BigramEntry originalBigramEntry =
|
||||||
sourceBigramDictContent->getBigramEntryAndAdvancePosition(&readingPos);
|
sourceBigramDictContent->getBigramEntryAndAdvancePosition(&readingPos);
|
||||||
hasNext = originalBigramEntry.hasNext();
|
hasNext = originalBigramEntry.hasNext();
|
||||||
if (originalBigramEntry.getTargetTerminalId() == Ver4DictConstants::NOT_A_TERMINAL_ID) {
|
if (!originalBigramEntry.isValid()) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
TerminalPositionLookupTable::TerminalIdMap::const_iterator it =
|
TerminalPositionLookupTable::TerminalIdMap::const_iterator it =
|
||||||
|
@ -189,21 +199,17 @@ bool BigramDictContent::runGCBigramList(const int bigramListPos,
|
||||||
// Target word has been removed.
|
// Target word has been removed.
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
lastEntryPos = hasNext ? writingPos : NOT_A_DICT_POS;
|
|
||||||
const BigramEntry updatedBigramEntry =
|
const BigramEntry updatedBigramEntry =
|
||||||
originalBigramEntry.updateTargetTerminalIdAndGetEntry(it->second);
|
originalBigramEntry.updateTargetTerminalIdAndGetEntry(it->second);
|
||||||
if (!writeBigramEntryAndAdvancePosition(&updatedBigramEntry, &writingPos)) {
|
if (!writeBigramEntryAndAdvancePosition(&updatedBigramEntry, &writingPos)) {
|
||||||
AKLOGE("Cannot write bigram entry to run GC. pos: %d", writingPos);
|
AKLOGE("Cannot write bigram entry to run GC. pos: %d", writingPos);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
*outEntrycount += 1;
|
*outEntryCount += 1;
|
||||||
}
|
}
|
||||||
if (lastEntryPos != NOT_A_DICT_POS) {
|
if (*outEntryCount > 0) {
|
||||||
// Update has next flag in the last written entry.
|
if (!writeTerminator(writingPos)) {
|
||||||
const BigramEntry bigramEntry = getBigramEntry(lastEntryPos).updateHasNextAndGetEntry(
|
AKLOGE("Cannot write terminator to run GC. pos: %d", writingPos);
|
||||||
false /* hasNext */);
|
|
||||||
if (!writeBigramEntry(&bigramEntry, lastEntryPos)) {
|
|
||||||
AKLOGE("Cannot write bigram entry to set hasNext flag after GC. pos: %d", writingPos);
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -42,6 +42,10 @@ class BigramDictContent : public SparseTableDictContent {
|
||||||
Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE),
|
Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE),
|
||||||
mHasHistoricalInfo(hasHistoricalInfo) {}
|
mHasHistoricalInfo(hasHistoricalInfo) {}
|
||||||
|
|
||||||
|
int getContentTailPos() const {
|
||||||
|
return getContentBuffer()->getTailPosition();
|
||||||
|
}
|
||||||
|
|
||||||
const BigramEntry getBigramEntry(const int bigramEntryPos) const {
|
const BigramEntry getBigramEntry(const int bigramEntryPos) const {
|
||||||
int readingPos = bigramEntryPos;
|
int readingPos = bigramEntryPos;
|
||||||
return getBigramEntryAndAdvancePosition(&readingPos);
|
return getBigramEntryAndAdvancePosition(&readingPos);
|
||||||
|
@ -71,13 +75,18 @@ class BigramDictContent : public SparseTableDictContent {
|
||||||
bool writeBigramEntryAndAdvancePosition(const BigramEntry *const bigramEntryToWrite,
|
bool writeBigramEntryAndAdvancePosition(const BigramEntry *const bigramEntryToWrite,
|
||||||
int *const entryWritingPos);
|
int *const entryWritingPos);
|
||||||
|
|
||||||
|
bool writeTerminator(const int writingPos) {
|
||||||
|
// Terminator is a link to the invalid position.
|
||||||
|
return writeLink(INVALID_LINKED_ENTRY_POS, writingPos);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool writeLink(const int linkedPos, const int writingPos);
|
||||||
|
|
||||||
bool createNewBigramList(const int terminalId) {
|
bool createNewBigramList(const int terminalId) {
|
||||||
const int bigramListPos = getContentBuffer()->getTailPosition();
|
const int bigramListPos = getContentBuffer()->getTailPosition();
|
||||||
return getUpdatableAddressLookupTable()->set(terminalId, bigramListPos);
|
return getUpdatableAddressLookupTable()->set(terminalId, bigramListPos);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool copyBigramList(const int bigramListPos, const int toPos, int *const outTailEntryPos);
|
|
||||||
|
|
||||||
bool flushToFile(const char *const dictPath) const {
|
bool flushToFile(const char *const dictPath) const {
|
||||||
return flush(dictPath, Ver4DictConstants::BIGRAM_LOOKUP_TABLE_FILE_EXTENSION,
|
return flush(dictPath, Ver4DictConstants::BIGRAM_LOOKUP_TABLE_FILE_EXTENSION,
|
||||||
Ver4DictConstants::BIGRAM_CONTENT_TABLE_FILE_EXTENSION,
|
Ver4DictConstants::BIGRAM_CONTENT_TABLE_FILE_EXTENSION,
|
||||||
|
@ -88,17 +97,6 @@ class BigramDictContent : public SparseTableDictContent {
|
||||||
const BigramDictContent *const originalBigramDictContent,
|
const BigramDictContent *const originalBigramDictContent,
|
||||||
int *const outBigramEntryCount);
|
int *const outBigramEntryCount);
|
||||||
|
|
||||||
bool isContentTailPos(const int pos) const {
|
|
||||||
return pos == getContentBuffer()->getTailPosition();
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
|
||||||
DISALLOW_COPY_AND_ASSIGN(BigramDictContent);
|
|
||||||
|
|
||||||
int createAndGetBigramFlags(const bool hasNext) const {
|
|
||||||
return hasNext ? Ver4DictConstants::BIGRAM_HAS_NEXT_MASK : 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
int getBigramEntrySize() const {
|
int getBigramEntrySize() const {
|
||||||
if (mHasHistoricalInfo) {
|
if (mHasHistoricalInfo) {
|
||||||
return Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE
|
return Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE
|
||||||
|
@ -113,6 +111,15 @@ class BigramDictContent : public SparseTableDictContent {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
DISALLOW_COPY_AND_ASSIGN(BigramDictContent);
|
||||||
|
|
||||||
|
static const int INVALID_LINKED_ENTRY_POS;
|
||||||
|
|
||||||
|
bool writeBigramEntryAttributesAndAdvancePosition(
|
||||||
|
const bool isLink, const int probability, const int targetTerminalId,
|
||||||
|
const int timestamp, const int level, const int count, int *const entryWritingPos);
|
||||||
|
|
||||||
bool runGCBigramList(const int bigramListPos,
|
bool runGCBigramList(const int bigramListPos,
|
||||||
const BigramDictContent *const sourceBigramDictContent, const int toPos,
|
const BigramDictContent *const sourceBigramDictContent, const int toPos,
|
||||||
const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
|
const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
|
||||||
|
|
|
@ -60,7 +60,7 @@ const int Ver4DictConstants::INVALID_BIGRAM_TARGET_TERMINAL_ID =
|
||||||
(1 << (BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE * 8)) - 1;
|
(1 << (BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE * 8)) - 1;
|
||||||
const int Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE = 1;
|
const int Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE = 1;
|
||||||
const int Ver4DictConstants::BIGRAM_PROBABILITY_MASK = 0x0F;
|
const int Ver4DictConstants::BIGRAM_PROBABILITY_MASK = 0x0F;
|
||||||
const int Ver4DictConstants::BIGRAM_HAS_NEXT_MASK = 0x80;
|
const int Ver4DictConstants::BIGRAM_IS_LINK_MASK = 0x80;
|
||||||
const int Ver4DictConstants::BIGRAM_LARGE_PROBABILITY_FIELD_SIZE = 1;
|
const int Ver4DictConstants::BIGRAM_LARGE_PROBABILITY_FIELD_SIZE = 1;
|
||||||
|
|
||||||
const int Ver4DictConstants::SHORTCUT_FLAGS_FIELD_SIZE = 1;
|
const int Ver4DictConstants::SHORTCUT_FLAGS_FIELD_SIZE = 1;
|
||||||
|
|
|
@ -57,8 +57,8 @@ class Ver4DictConstants {
|
||||||
static const int BIGRAM_FLAGS_FIELD_SIZE;
|
static const int BIGRAM_FLAGS_FIELD_SIZE;
|
||||||
static const int BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE;
|
static const int BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE;
|
||||||
static const int INVALID_BIGRAM_TARGET_TERMINAL_ID;
|
static const int INVALID_BIGRAM_TARGET_TERMINAL_ID;
|
||||||
|
static const int BIGRAM_IS_LINK_MASK;
|
||||||
static const int BIGRAM_PROBABILITY_MASK;
|
static const int BIGRAM_PROBABILITY_MASK;
|
||||||
static const int BIGRAM_HAS_NEXT_MASK;
|
|
||||||
// Used when bigram list has time stamp.
|
// Used when bigram list has time stamp.
|
||||||
static const int BIGRAM_LARGE_PROBABILITY_FIELD_SIZE;
|
static const int BIGRAM_LARGE_PROBABILITY_FIELD_SIZE;
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue