LatinIME/native/jni/src/dictionary/structure/backward/v402/content/bigram_dict_content.cpp

227 lines
10 KiB
C++

/*
* Copyright (C) 2013 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* !!!!! DO NOT EDIT THIS FILE !!!!!
*
* This file was generated from
* dictionary/structure/v4/content/bigram_dict_content.cpp
*/
#include "dictionary/structure/backward/v402/content/bigram_dict_content.h"
#include "dictionary/utils/buffer_with_extendable_buffer.h"
namespace latinime {
namespace backward {
namespace v402 {
const BigramEntry BigramDictContent::getBigramEntryAndAdvancePosition(
int *const bigramEntryPos) const {
const BufferWithExtendableBuffer *const bigramListBuffer = getContentBuffer();
const int bigramEntryTailPos = (*bigramEntryPos) + getBigramEntrySize();
if (*bigramEntryPos < 0 || bigramEntryTailPos > bigramListBuffer->getTailPosition()) {
AKLOGE("Invalid bigram entry position. bigramEntryPos: %d, bigramEntryTailPos: %d, "
"bufSize: %d", *bigramEntryPos, bigramEntryTailPos,
bigramListBuffer->getTailPosition());
ASSERT(false);
return BigramEntry(false /* hasNext */, NOT_A_PROBABILITY,
Ver4DictConstants::NOT_A_TERMINAL_ID);
}
const int bigramFlags = bigramListBuffer->readUintAndAdvancePosition(
Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE, bigramEntryPos);
const bool hasNext = (bigramFlags & Ver4DictConstants::BIGRAM_HAS_NEXT_MASK) != 0;
int probability = NOT_A_PROBABILITY;
int timestamp = NOT_A_TIMESTAMP;
int level = 0;
int count = 0;
if (mHasHistoricalInfo) {
timestamp = bigramListBuffer->readUintAndAdvancePosition(
Ver4DictConstants::TIME_STAMP_FIELD_SIZE, bigramEntryPos);
level = bigramListBuffer->readUintAndAdvancePosition(
Ver4DictConstants::WORD_LEVEL_FIELD_SIZE, bigramEntryPos);
count = bigramListBuffer->readUintAndAdvancePosition(
Ver4DictConstants::WORD_COUNT_FIELD_SIZE, bigramEntryPos);
} else {
probability = bigramListBuffer->readUintAndAdvancePosition(
Ver4DictConstants::PROBABILITY_SIZE, bigramEntryPos);
}
const int encodedTargetTerminalId = bigramListBuffer->readUintAndAdvancePosition(
Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE, bigramEntryPos);
const int targetTerminalId =
(encodedTargetTerminalId == Ver4DictConstants::INVALID_BIGRAM_TARGET_TERMINAL_ID) ?
Ver4DictConstants::NOT_A_TERMINAL_ID : encodedTargetTerminalId;
if (mHasHistoricalInfo) {
// Hack for better migration.
count += level;
const HistoricalInfo historicalInfo(timestamp, level, count);
return BigramEntry(hasNext, probability, &historicalInfo, targetTerminalId);
} else {
return BigramEntry(hasNext, probability, targetTerminalId);
}
}
bool BigramDictContent::writeBigramEntryAndAdvancePosition(
const BigramEntry *const bigramEntryToWrite, int *const entryWritingPos) {
BufferWithExtendableBuffer *const bigramListBuffer = getWritableContentBuffer();
const int bigramFlags = createAndGetBigramFlags(bigramEntryToWrite->hasNext());
if (!bigramListBuffer->writeUintAndAdvancePosition(bigramFlags,
Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE, entryWritingPos)) {
AKLOGE("Cannot write bigram flags. pos: %d, flags: %x", *entryWritingPos, bigramFlags);
return false;
}
if (mHasHistoricalInfo) {
const HistoricalInfo *const historicalInfo = bigramEntryToWrite->getHistoricalInfo();
if (!bigramListBuffer->writeUintAndAdvancePosition(historicalInfo->getTimestamp(),
Ver4DictConstants::TIME_STAMP_FIELD_SIZE, entryWritingPos)) {
AKLOGE("Cannot write bigram timestamps. pos: %d, timestamp: %d", *entryWritingPos,
historicalInfo->getTimestamp());
return false;
}
if (!bigramListBuffer->writeUintAndAdvancePosition(historicalInfo->getLevel(),
Ver4DictConstants::WORD_LEVEL_FIELD_SIZE, entryWritingPos)) {
AKLOGE("Cannot write bigram level. pos: %d, level: %d", *entryWritingPos,
historicalInfo->getLevel());
return false;
}
if (!bigramListBuffer->writeUintAndAdvancePosition(historicalInfo->getCount(),
Ver4DictConstants::WORD_COUNT_FIELD_SIZE, entryWritingPos)) {
AKLOGE("Cannot write bigram count. pos: %d, count: %d", *entryWritingPos,
historicalInfo->getCount());
return false;
}
} else {
if (!bigramListBuffer->writeUintAndAdvancePosition(bigramEntryToWrite->getProbability(),
Ver4DictConstants::PROBABILITY_SIZE, entryWritingPos)) {
AKLOGE("Cannot write bigram probability. pos: %d, probability: %d", *entryWritingPos,
bigramEntryToWrite->getProbability());
return false;
}
}
const int targetTerminalIdToWrite =
(bigramEntryToWrite->getTargetTerminalId() == Ver4DictConstants::NOT_A_TERMINAL_ID) ?
Ver4DictConstants::INVALID_BIGRAM_TARGET_TERMINAL_ID :
bigramEntryToWrite->getTargetTerminalId();
if (!bigramListBuffer->writeUintAndAdvancePosition(targetTerminalIdToWrite,
Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE, entryWritingPos)) {
AKLOGE("Cannot write bigram target terminal id. pos: %d, target terminal id: %d",
*entryWritingPos, bigramEntryToWrite->getTargetTerminalId());
return false;
}
return true;
}
bool BigramDictContent::copyBigramList(const int bigramListPos, const int toPos,
int *const outTailEntryPos) {
int readingPos = bigramListPos;
int writingPos = toPos;
bool hasNext = true;
while (hasNext) {
const BigramEntry bigramEntry = getBigramEntryAndAdvancePosition(&readingPos);
hasNext = bigramEntry.hasNext();
if (!hasNext) {
*outTailEntryPos = writingPos;
}
if (!writeBigramEntryAndAdvancePosition(&bigramEntry, &writingPos)) {
AKLOGE("Cannot write bigram entry to copy. pos: %d", writingPos);
return false;
}
}
return true;
}
bool BigramDictContent::runGC(const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
const BigramDictContent *const originalBigramDictContent,
int *const outBigramEntryCount) {
for (TerminalPositionLookupTable::TerminalIdMap::const_iterator it = terminalIdMap->begin();
it != terminalIdMap->end(); ++it) {
const int originalBigramListPos =
originalBigramDictContent->getBigramListHeadPos(it->first);
if (originalBigramListPos == NOT_A_DICT_POS) {
// This terminal does not have a bigram list.
continue;
}
const int bigramListPos = getContentBuffer()->getTailPosition();
int bigramEntryCount = 0;
// Copy bigram list with GC from original content.
if (!runGCBigramList(originalBigramListPos, originalBigramDictContent, bigramListPos,
terminalIdMap, &bigramEntryCount)) {
AKLOGE("Cannot complete GC for the bigram list. original pos: %d, pos: %d",
originalBigramListPos, bigramListPos);
return false;
}
if (bigramEntryCount == 0) {
// All bigram entries are useless. This terminal does not have a bigram list.
continue;
}
*outBigramEntryCount += bigramEntryCount;
// Set bigram list position to the lookup table.
if (!getUpdatableAddressLookupTable()->set(it->second, bigramListPos)) {
AKLOGE("Cannot set bigram list position. terminal id: %d, pos: %d",
it->second, bigramListPos);
return false;
}
}
return true;
}
// Returns whether GC for the bigram list was succeeded or not.
bool BigramDictContent::runGCBigramList(const int bigramListPos,
const BigramDictContent *const sourceBigramDictContent, const int toPos,
const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
int *const outEntrycount) {
bool hasNext = true;
int readingPos = bigramListPos;
int writingPos = toPos;
int lastEntryPos = NOT_A_DICT_POS;
while (hasNext) {
const BigramEntry originalBigramEntry =
sourceBigramDictContent->getBigramEntryAndAdvancePosition(&readingPos);
hasNext = originalBigramEntry.hasNext();
if (originalBigramEntry.getTargetTerminalId() == Ver4DictConstants::NOT_A_TERMINAL_ID) {
continue;
}
TerminalPositionLookupTable::TerminalIdMap::const_iterator it =
terminalIdMap->find(originalBigramEntry.getTargetTerminalId());
if (it == terminalIdMap->end()) {
// Target word has been removed.
continue;
}
lastEntryPos = hasNext ? writingPos : NOT_A_DICT_POS;
const BigramEntry updatedBigramEntry =
originalBigramEntry.updateTargetTerminalIdAndGetEntry(it->second);
if (!writeBigramEntryAndAdvancePosition(&updatedBigramEntry, &writingPos)) {
AKLOGE("Cannot write bigram entry to run GC. pos: %d", writingPos);
return false;
}
*outEntrycount += 1;
}
if (lastEntryPos != NOT_A_DICT_POS) {
// Update has next flag in the last written entry.
const BigramEntry bigramEntry = getBigramEntry(lastEntryPos).updateHasNextAndGetEntry(
false /* hasNext */);
if (!writeBigramEntry(&bigramEntry, lastEntryPos)) {
AKLOGE("Cannot write bigram entry to set hasNext flag after GC. pos: %d", writingPos);
return false;
}
}
return true;
}
} // namespace v402
} // namespace backward
} // namespace latinime