290 lines
12 KiB
C++
290 lines
12 KiB
C++
/*
|
|
* Copyright (C) 2013 The Android Open Source Project
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
/*
|
|
* !!!!! DO NOT CHANGE THE LOGIC IN THIS FILE !!!!!
|
|
* Do not edit this file other than updating policy's interface.
|
|
*
|
|
* This file was generated from
|
|
* dictionary/structure/v4/bigram/ver4_bigram_list_policy.cpp
|
|
*/
|
|
|
|
#include "dictionary/structure/backward/v402/bigram/ver4_bigram_list_policy.h"
|
|
|
|
#include "dictionary/header/header_policy.h"
|
|
#include "dictionary/property/ngram_property.h"
|
|
#include "dictionary/structure/pt_common/bigram/bigram_list_read_write_utils.h"
|
|
#include "dictionary/structure/backward/v402/content/bigram_dict_content.h"
|
|
#include "dictionary/structure/backward/v402/content/terminal_position_lookup_table.h"
|
|
#include "dictionary/structure/backward/v402/ver4_dict_constants.h"
|
|
#include "dictionary/utils/forgetting_curve_utils.h"
|
|
|
|
namespace latinime {
|
|
namespace backward {
|
|
namespace v402 {
|
|
|
|
void Ver4BigramListPolicy::getNextBigram(int *const outBigramPos, int *const outProbability,
|
|
bool *const outHasNext, int *const bigramEntryPos) const {
|
|
const BigramEntry bigramEntry =
|
|
mBigramDictContent->getBigramEntryAndAdvancePosition(bigramEntryPos);
|
|
if (outBigramPos) {
|
|
// Lookup target PtNode position.
|
|
*outBigramPos = mTerminalPositionLookupTable->getTerminalPtNodePosition(
|
|
bigramEntry.getTargetTerminalId());
|
|
}
|
|
if (outProbability) {
|
|
if (bigramEntry.hasHistoricalInfo()) {
|
|
*outProbability =
|
|
ForgettingCurveUtils::decodeProbability(bigramEntry.getHistoricalInfo(),
|
|
mHeaderPolicy);
|
|
} else {
|
|
*outProbability = bigramEntry.getProbability();
|
|
}
|
|
}
|
|
if (outHasNext) {
|
|
*outHasNext = bigramEntry.hasNext();
|
|
}
|
|
}
|
|
|
|
bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTargetTerminalId,
|
|
const NgramProperty *const ngramProperty, bool *const outAddedNewEntry) {
|
|
// 1. The word has no bigrams yet.
|
|
// 2. The word has bigrams, and there is the target in the list.
|
|
// 3. The word has bigrams, and there is an invalid entry that can be reclaimed.
|
|
// 4. The word has bigrams. We have to append new bigram entry to the list.
|
|
// 5. Same as 4, but the list is the last entry of the content file.
|
|
if (outAddedNewEntry) {
|
|
*outAddedNewEntry = false;
|
|
}
|
|
const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId);
|
|
if (bigramListPos == NOT_A_DICT_POS) {
|
|
// Case 1. PtNode that doesn't have a bigram list.
|
|
// Create new bigram list.
|
|
if (!mBigramDictContent->createNewBigramList(terminalId)) {
|
|
return false;
|
|
}
|
|
const BigramEntry newBigramEntry(false /* hasNext */, NOT_A_PROBABILITY,
|
|
newTargetTerminalId);
|
|
const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom(&newBigramEntry,
|
|
ngramProperty);
|
|
// Write an entry.
|
|
const int writingPos = mBigramDictContent->getBigramListHeadPos(terminalId);
|
|
if (!mBigramDictContent->writeBigramEntry(&bigramEntryToWrite, writingPos)) {
|
|
return false;
|
|
}
|
|
if (outAddedNewEntry) {
|
|
*outAddedNewEntry = true;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
int tailEntryPos = NOT_A_DICT_POS;
|
|
const int entryPosToUpdate = getEntryPosToUpdate(newTargetTerminalId, bigramListPos,
|
|
&tailEntryPos);
|
|
if (tailEntryPos != NOT_A_DICT_POS || entryPosToUpdate == NOT_A_DICT_POS) {
|
|
// Case 4, 5.
|
|
// Add new entry to the bigram list.
|
|
if (tailEntryPos == NOT_A_DICT_POS) {
|
|
// Case 4. Create new bigram list.
|
|
if (!mBigramDictContent->createNewBigramList(terminalId)) {
|
|
return false;
|
|
}
|
|
const int destPos = mBigramDictContent->getBigramListHeadPos(terminalId);
|
|
// Copy existing bigram list.
|
|
if (!mBigramDictContent->copyBigramList(bigramListPos, destPos, &tailEntryPos)) {
|
|
return false;
|
|
}
|
|
}
|
|
// Write new entry at the tail position of the bigram content.
|
|
const BigramEntry newBigramEntry(false /* hasNext */, NOT_A_PROBABILITY,
|
|
newTargetTerminalId);
|
|
const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom(
|
|
&newBigramEntry, ngramProperty);
|
|
if (!mBigramDictContent->writeBigramEntryAtTail(&bigramEntryToWrite)) {
|
|
return false;
|
|
}
|
|
// Update has next flag of the tail entry.
|
|
if (!updateHasNextFlag(true /* hasNext */, tailEntryPos)) {
|
|
return false;
|
|
}
|
|
if (outAddedNewEntry) {
|
|
*outAddedNewEntry = true;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
// Case 2. Overwrite the existing entry. Case 3. Reclaim and reuse the existing invalid entry.
|
|
const BigramEntry originalBigramEntry = mBigramDictContent->getBigramEntry(entryPosToUpdate);
|
|
if (!originalBigramEntry.isValid()) {
|
|
// Case 3. Reuse the existing invalid entry. outAddedNewEntry is false when an existing
|
|
// entry is updated.
|
|
if (outAddedNewEntry) {
|
|
*outAddedNewEntry = true;
|
|
}
|
|
}
|
|
const BigramEntry updatedBigramEntry =
|
|
originalBigramEntry.updateTargetTerminalIdAndGetEntry(newTargetTerminalId);
|
|
const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom(
|
|
&updatedBigramEntry, ngramProperty);
|
|
return mBigramDictContent->writeBigramEntry(&bigramEntryToWrite, entryPosToUpdate);
|
|
}
|
|
|
|
bool Ver4BigramListPolicy::removeEntry(const int terminalId, const int targetTerminalId) {
|
|
const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId);
|
|
if (bigramListPos == NOT_A_DICT_POS) {
|
|
// Bigram list doesn't exist.
|
|
return false;
|
|
}
|
|
const int entryPosToUpdate = getEntryPosToUpdate(targetTerminalId, bigramListPos,
|
|
nullptr /* outTailEntryPos */);
|
|
if (entryPosToUpdate == NOT_A_DICT_POS) {
|
|
// Bigram entry doesn't exist.
|
|
return false;
|
|
}
|
|
const BigramEntry bigramEntry = mBigramDictContent->getBigramEntry(entryPosToUpdate);
|
|
if (targetTerminalId != bigramEntry.getTargetTerminalId()) {
|
|
// Bigram entry doesn't exist.
|
|
return false;
|
|
}
|
|
// Remove bigram entry by marking it as invalid entry and overwriting the original entry.
|
|
const BigramEntry updatedBigramEntry = bigramEntry.getInvalidatedEntry();
|
|
return mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPosToUpdate);
|
|
}
|
|
|
|
bool Ver4BigramListPolicy::updateAllBigramEntriesAndDeleteUselessEntries(const int terminalId,
|
|
int *const outBigramCount) {
|
|
const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId);
|
|
if (bigramListPos == NOT_A_DICT_POS) {
|
|
// Bigram list doesn't exist.
|
|
return true;
|
|
}
|
|
bool hasNext = true;
|
|
int readingPos = bigramListPos;
|
|
while (hasNext) {
|
|
const int entryPos = readingPos;
|
|
const BigramEntry bigramEntry =
|
|
mBigramDictContent->getBigramEntryAndAdvancePosition(&readingPos);
|
|
hasNext = bigramEntry.hasNext();
|
|
if (!bigramEntry.isValid()) {
|
|
continue;
|
|
}
|
|
const int targetPtNodePos = mTerminalPositionLookupTable->getTerminalPtNodePosition(
|
|
bigramEntry.getTargetTerminalId());
|
|
if (targetPtNodePos == NOT_A_DICT_POS) {
|
|
// Invalidate bigram entry.
|
|
const BigramEntry updatedBigramEntry = bigramEntry.getInvalidatedEntry();
|
|
if (!mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPos)) {
|
|
return false;
|
|
}
|
|
} else if (bigramEntry.hasHistoricalInfo()) {
|
|
const HistoricalInfo historicalInfo = ForgettingCurveUtils::createHistoricalInfoToSave(
|
|
bigramEntry.getHistoricalInfo(), mHeaderPolicy);
|
|
if (ForgettingCurveUtils::needsToKeep(&historicalInfo, mHeaderPolicy)) {
|
|
const BigramEntry updatedBigramEntry =
|
|
bigramEntry.updateHistoricalInfoAndGetEntry(&historicalInfo);
|
|
if (!mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPos)) {
|
|
return false;
|
|
}
|
|
*outBigramCount += 1;
|
|
} else {
|
|
// Remove entry.
|
|
const BigramEntry updatedBigramEntry = bigramEntry.getInvalidatedEntry();
|
|
if (!mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPos)) {
|
|
return false;
|
|
}
|
|
}
|
|
} else {
|
|
*outBigramCount += 1;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
int Ver4BigramListPolicy::getBigramEntryConut(const int terminalId) {
|
|
const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId);
|
|
if (bigramListPos == NOT_A_DICT_POS) {
|
|
// Bigram list doesn't exist.
|
|
return 0;
|
|
}
|
|
int bigramCount = 0;
|
|
bool hasNext = true;
|
|
int readingPos = bigramListPos;
|
|
while (hasNext) {
|
|
const BigramEntry bigramEntry =
|
|
mBigramDictContent->getBigramEntryAndAdvancePosition(&readingPos);
|
|
hasNext = bigramEntry.hasNext();
|
|
if (bigramEntry.isValid()) {
|
|
bigramCount++;
|
|
}
|
|
}
|
|
return bigramCount;
|
|
}
|
|
|
|
int Ver4BigramListPolicy::getEntryPosToUpdate(const int targetTerminalIdToFind,
|
|
const int bigramListPos, int *const outTailEntryPos) const {
|
|
if (outTailEntryPos) {
|
|
*outTailEntryPos = NOT_A_DICT_POS;
|
|
}
|
|
bool hasNext = true;
|
|
int invalidEntryPos = NOT_A_DICT_POS;
|
|
int readingPos = bigramListPos;
|
|
while (hasNext) {
|
|
const int entryPos = readingPos;
|
|
const BigramEntry bigramEntry =
|
|
mBigramDictContent->getBigramEntryAndAdvancePosition(&readingPos);
|
|
hasNext = bigramEntry.hasNext();
|
|
if (bigramEntry.getTargetTerminalId() == targetTerminalIdToFind) {
|
|
// Entry with same target is found.
|
|
return entryPos;
|
|
} else if (!bigramEntry.isValid()) {
|
|
// Invalid entry that can be reused is found.
|
|
invalidEntryPos = entryPos;
|
|
}
|
|
if (!hasNext && mBigramDictContent->isContentTailPos(readingPos)) {
|
|
if (outTailEntryPos) {
|
|
*outTailEntryPos = entryPos;
|
|
}
|
|
}
|
|
}
|
|
return invalidEntryPos;
|
|
}
|
|
|
|
const BigramEntry Ver4BigramListPolicy::createUpdatedBigramEntryFrom(
|
|
const BigramEntry *const originalBigramEntry,
|
|
const NgramProperty *const ngramProperty) const {
|
|
// TODO: Consolidate historical info and probability.
|
|
if (mHeaderPolicy->hasHistoricalInfoOfWords()) {
|
|
const HistoricalInfo &historicalInfoForUpdate = ngramProperty->getHistoricalInfo();
|
|
const HistoricalInfo updatedHistoricalInfo =
|
|
ForgettingCurveUtils::createUpdatedHistoricalInfo(
|
|
originalBigramEntry->getHistoricalInfo(), ngramProperty->getProbability(),
|
|
&historicalInfoForUpdate, mHeaderPolicy);
|
|
return originalBigramEntry->updateHistoricalInfoAndGetEntry(&updatedHistoricalInfo);
|
|
} else {
|
|
return originalBigramEntry->updateProbabilityAndGetEntry(ngramProperty->getProbability());
|
|
}
|
|
}
|
|
|
|
bool Ver4BigramListPolicy::updateHasNextFlag(const bool hasNext, const int bigramEntryPos) {
|
|
const BigramEntry bigramEntry = mBigramDictContent->getBigramEntry(bigramEntryPos);
|
|
const BigramEntry updatedBigramEntry = bigramEntry.updateHasNextAndGetEntry(hasNext);
|
|
return mBigramDictContent->writeBigramEntry(&updatedBigramEntry, bigramEntryPos);
|
|
}
|
|
|
|
} // namespace v402
|
|
} // namespace backward
|
|
} // namespace latinime
|