LatinIME/native/jni/src/dictionary/structure/backward/v402/bigram/ver4_bigram_list_policy.cpp

290 lines
12 KiB
C++

/*
* Copyright (C) 2013 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* !!!!! DO NOT CHANGE THE LOGIC IN THIS FILE !!!!!
* Do not edit this file other than updating policy's interface.
*
* This file was generated from
* dictionary/structure/v4/bigram/ver4_bigram_list_policy.cpp
*/
#include "dictionary/structure/backward/v402/bigram/ver4_bigram_list_policy.h"
#include "dictionary/header/header_policy.h"
#include "dictionary/property/ngram_property.h"
#include "dictionary/structure/pt_common/bigram/bigram_list_read_write_utils.h"
#include "dictionary/structure/backward/v402/content/bigram_dict_content.h"
#include "dictionary/structure/backward/v402/content/terminal_position_lookup_table.h"
#include "dictionary/structure/backward/v402/ver4_dict_constants.h"
#include "dictionary/utils/forgetting_curve_utils.h"
namespace latinime {
namespace backward {
namespace v402 {
void Ver4BigramListPolicy::getNextBigram(int *const outBigramPos, int *const outProbability,
bool *const outHasNext, int *const bigramEntryPos) const {
const BigramEntry bigramEntry =
mBigramDictContent->getBigramEntryAndAdvancePosition(bigramEntryPos);
if (outBigramPos) {
// Lookup target PtNode position.
*outBigramPos = mTerminalPositionLookupTable->getTerminalPtNodePosition(
bigramEntry.getTargetTerminalId());
}
if (outProbability) {
if (bigramEntry.hasHistoricalInfo()) {
*outProbability =
ForgettingCurveUtils::decodeProbability(bigramEntry.getHistoricalInfo(),
mHeaderPolicy);
} else {
*outProbability = bigramEntry.getProbability();
}
}
if (outHasNext) {
*outHasNext = bigramEntry.hasNext();
}
}
bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTargetTerminalId,
const NgramProperty *const ngramProperty, bool *const outAddedNewEntry) {
// 1. The word has no bigrams yet.
// 2. The word has bigrams, and there is the target in the list.
// 3. The word has bigrams, and there is an invalid entry that can be reclaimed.
// 4. The word has bigrams. We have to append new bigram entry to the list.
// 5. Same as 4, but the list is the last entry of the content file.
if (outAddedNewEntry) {
*outAddedNewEntry = false;
}
const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId);
if (bigramListPos == NOT_A_DICT_POS) {
// Case 1. PtNode that doesn't have a bigram list.
// Create new bigram list.
if (!mBigramDictContent->createNewBigramList(terminalId)) {
return false;
}
const BigramEntry newBigramEntry(false /* hasNext */, NOT_A_PROBABILITY,
newTargetTerminalId);
const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom(&newBigramEntry,
ngramProperty);
// Write an entry.
const int writingPos = mBigramDictContent->getBigramListHeadPos(terminalId);
if (!mBigramDictContent->writeBigramEntry(&bigramEntryToWrite, writingPos)) {
return false;
}
if (outAddedNewEntry) {
*outAddedNewEntry = true;
}
return true;
}
int tailEntryPos = NOT_A_DICT_POS;
const int entryPosToUpdate = getEntryPosToUpdate(newTargetTerminalId, bigramListPos,
&tailEntryPos);
if (tailEntryPos != NOT_A_DICT_POS || entryPosToUpdate == NOT_A_DICT_POS) {
// Case 4, 5.
// Add new entry to the bigram list.
if (tailEntryPos == NOT_A_DICT_POS) {
// Case 4. Create new bigram list.
if (!mBigramDictContent->createNewBigramList(terminalId)) {
return false;
}
const int destPos = mBigramDictContent->getBigramListHeadPos(terminalId);
// Copy existing bigram list.
if (!mBigramDictContent->copyBigramList(bigramListPos, destPos, &tailEntryPos)) {
return false;
}
}
// Write new entry at the tail position of the bigram content.
const BigramEntry newBigramEntry(false /* hasNext */, NOT_A_PROBABILITY,
newTargetTerminalId);
const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom(
&newBigramEntry, ngramProperty);
if (!mBigramDictContent->writeBigramEntryAtTail(&bigramEntryToWrite)) {
return false;
}
// Update has next flag of the tail entry.
if (!updateHasNextFlag(true /* hasNext */, tailEntryPos)) {
return false;
}
if (outAddedNewEntry) {
*outAddedNewEntry = true;
}
return true;
}
// Case 2. Overwrite the existing entry. Case 3. Reclaim and reuse the existing invalid entry.
const BigramEntry originalBigramEntry = mBigramDictContent->getBigramEntry(entryPosToUpdate);
if (!originalBigramEntry.isValid()) {
// Case 3. Reuse the existing invalid entry. outAddedNewEntry is false when an existing
// entry is updated.
if (outAddedNewEntry) {
*outAddedNewEntry = true;
}
}
const BigramEntry updatedBigramEntry =
originalBigramEntry.updateTargetTerminalIdAndGetEntry(newTargetTerminalId);
const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom(
&updatedBigramEntry, ngramProperty);
return mBigramDictContent->writeBigramEntry(&bigramEntryToWrite, entryPosToUpdate);
}
bool Ver4BigramListPolicy::removeEntry(const int terminalId, const int targetTerminalId) {
const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId);
if (bigramListPos == NOT_A_DICT_POS) {
// Bigram list doesn't exist.
return false;
}
const int entryPosToUpdate = getEntryPosToUpdate(targetTerminalId, bigramListPos,
nullptr /* outTailEntryPos */);
if (entryPosToUpdate == NOT_A_DICT_POS) {
// Bigram entry doesn't exist.
return false;
}
const BigramEntry bigramEntry = mBigramDictContent->getBigramEntry(entryPosToUpdate);
if (targetTerminalId != bigramEntry.getTargetTerminalId()) {
// Bigram entry doesn't exist.
return false;
}
// Remove bigram entry by marking it as invalid entry and overwriting the original entry.
const BigramEntry updatedBigramEntry = bigramEntry.getInvalidatedEntry();
return mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPosToUpdate);
}
bool Ver4BigramListPolicy::updateAllBigramEntriesAndDeleteUselessEntries(const int terminalId,
int *const outBigramCount) {
const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId);
if (bigramListPos == NOT_A_DICT_POS) {
// Bigram list doesn't exist.
return true;
}
bool hasNext = true;
int readingPos = bigramListPos;
while (hasNext) {
const int entryPos = readingPos;
const BigramEntry bigramEntry =
mBigramDictContent->getBigramEntryAndAdvancePosition(&readingPos);
hasNext = bigramEntry.hasNext();
if (!bigramEntry.isValid()) {
continue;
}
const int targetPtNodePos = mTerminalPositionLookupTable->getTerminalPtNodePosition(
bigramEntry.getTargetTerminalId());
if (targetPtNodePos == NOT_A_DICT_POS) {
// Invalidate bigram entry.
const BigramEntry updatedBigramEntry = bigramEntry.getInvalidatedEntry();
if (!mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPos)) {
return false;
}
} else if (bigramEntry.hasHistoricalInfo()) {
const HistoricalInfo historicalInfo = ForgettingCurveUtils::createHistoricalInfoToSave(
bigramEntry.getHistoricalInfo(), mHeaderPolicy);
if (ForgettingCurveUtils::needsToKeep(&historicalInfo, mHeaderPolicy)) {
const BigramEntry updatedBigramEntry =
bigramEntry.updateHistoricalInfoAndGetEntry(&historicalInfo);
if (!mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPos)) {
return false;
}
*outBigramCount += 1;
} else {
// Remove entry.
const BigramEntry updatedBigramEntry = bigramEntry.getInvalidatedEntry();
if (!mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPos)) {
return false;
}
}
} else {
*outBigramCount += 1;
}
}
return true;
}
int Ver4BigramListPolicy::getBigramEntryConut(const int terminalId) {
const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId);
if (bigramListPos == NOT_A_DICT_POS) {
// Bigram list doesn't exist.
return 0;
}
int bigramCount = 0;
bool hasNext = true;
int readingPos = bigramListPos;
while (hasNext) {
const BigramEntry bigramEntry =
mBigramDictContent->getBigramEntryAndAdvancePosition(&readingPos);
hasNext = bigramEntry.hasNext();
if (bigramEntry.isValid()) {
bigramCount++;
}
}
return bigramCount;
}
int Ver4BigramListPolicy::getEntryPosToUpdate(const int targetTerminalIdToFind,
const int bigramListPos, int *const outTailEntryPos) const {
if (outTailEntryPos) {
*outTailEntryPos = NOT_A_DICT_POS;
}
bool hasNext = true;
int invalidEntryPos = NOT_A_DICT_POS;
int readingPos = bigramListPos;
while (hasNext) {
const int entryPos = readingPos;
const BigramEntry bigramEntry =
mBigramDictContent->getBigramEntryAndAdvancePosition(&readingPos);
hasNext = bigramEntry.hasNext();
if (bigramEntry.getTargetTerminalId() == targetTerminalIdToFind) {
// Entry with same target is found.
return entryPos;
} else if (!bigramEntry.isValid()) {
// Invalid entry that can be reused is found.
invalidEntryPos = entryPos;
}
if (!hasNext && mBigramDictContent->isContentTailPos(readingPos)) {
if (outTailEntryPos) {
*outTailEntryPos = entryPos;
}
}
}
return invalidEntryPos;
}
const BigramEntry Ver4BigramListPolicy::createUpdatedBigramEntryFrom(
const BigramEntry *const originalBigramEntry,
const NgramProperty *const ngramProperty) const {
// TODO: Consolidate historical info and probability.
if (mHeaderPolicy->hasHistoricalInfoOfWords()) {
const HistoricalInfo &historicalInfoForUpdate = ngramProperty->getHistoricalInfo();
const HistoricalInfo updatedHistoricalInfo =
ForgettingCurveUtils::createUpdatedHistoricalInfo(
originalBigramEntry->getHistoricalInfo(), ngramProperty->getProbability(),
&historicalInfoForUpdate, mHeaderPolicy);
return originalBigramEntry->updateHistoricalInfoAndGetEntry(&updatedHistoricalInfo);
} else {
return originalBigramEntry->updateProbabilityAndGetEntry(ngramProperty->getProbability());
}
}
bool Ver4BigramListPolicy::updateHasNextFlag(const bool hasNext, const int bigramEntryPos) {
const BigramEntry bigramEntry = mBigramDictContent->getBigramEntry(bigramEntryPos);
const BigramEntry updatedBigramEntry = bigramEntry.updateHasNextAndGetEntry(hasNext);
return mBigramDictContent->writeBigramEntry(&updatedBigramEntry, bigramEntryPos);
}
} // namespace v402
} // namespace backward
} // namespace latinime