/* * Copyright (C) 2013 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /* * !!!!! DO NOT CHANGE THE LOGIC IN THIS FILE !!!!! * Do not edit this file other than updating policy's interface. * * This file was generated from * dictionary/structure/v4/bigram/ver4_bigram_list_policy.cpp */ #include "dictionary/structure/backward/v402/bigram/ver4_bigram_list_policy.h" #include "dictionary/header/header_policy.h" #include "dictionary/property/ngram_property.h" #include "dictionary/structure/pt_common/bigram/bigram_list_read_write_utils.h" #include "dictionary/structure/backward/v402/content/bigram_dict_content.h" #include "dictionary/structure/backward/v402/content/terminal_position_lookup_table.h" #include "dictionary/structure/backward/v402/ver4_dict_constants.h" #include "dictionary/utils/forgetting_curve_utils.h" namespace latinime { namespace backward { namespace v402 { void Ver4BigramListPolicy::getNextBigram(int *const outBigramPos, int *const outProbability, bool *const outHasNext, int *const bigramEntryPos) const { const BigramEntry bigramEntry = mBigramDictContent->getBigramEntryAndAdvancePosition(bigramEntryPos); if (outBigramPos) { // Lookup target PtNode position. *outBigramPos = mTerminalPositionLookupTable->getTerminalPtNodePosition( bigramEntry.getTargetTerminalId()); } if (outProbability) { if (bigramEntry.hasHistoricalInfo()) { *outProbability = ForgettingCurveUtils::decodeProbability(bigramEntry.getHistoricalInfo(), mHeaderPolicy); } else { *outProbability = bigramEntry.getProbability(); } } if (outHasNext) { *outHasNext = bigramEntry.hasNext(); } } bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTargetTerminalId, const NgramProperty *const ngramProperty, bool *const outAddedNewEntry) { // 1. The word has no bigrams yet. // 2. The word has bigrams, and there is the target in the list. // 3. The word has bigrams, and there is an invalid entry that can be reclaimed. // 4. The word has bigrams. We have to append new bigram entry to the list. // 5. Same as 4, but the list is the last entry of the content file. if (outAddedNewEntry) { *outAddedNewEntry = false; } const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId); if (bigramListPos == NOT_A_DICT_POS) { // Case 1. PtNode that doesn't have a bigram list. // Create new bigram list. if (!mBigramDictContent->createNewBigramList(terminalId)) { return false; } const BigramEntry newBigramEntry(false /* hasNext */, NOT_A_PROBABILITY, newTargetTerminalId); const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom(&newBigramEntry, ngramProperty); // Write an entry. const int writingPos = mBigramDictContent->getBigramListHeadPos(terminalId); if (!mBigramDictContent->writeBigramEntry(&bigramEntryToWrite, writingPos)) { return false; } if (outAddedNewEntry) { *outAddedNewEntry = true; } return true; } int tailEntryPos = NOT_A_DICT_POS; const int entryPosToUpdate = getEntryPosToUpdate(newTargetTerminalId, bigramListPos, &tailEntryPos); if (tailEntryPos != NOT_A_DICT_POS || entryPosToUpdate == NOT_A_DICT_POS) { // Case 4, 5. // Add new entry to the bigram list. if (tailEntryPos == NOT_A_DICT_POS) { // Case 4. Create new bigram list. if (!mBigramDictContent->createNewBigramList(terminalId)) { return false; } const int destPos = mBigramDictContent->getBigramListHeadPos(terminalId); // Copy existing bigram list. if (!mBigramDictContent->copyBigramList(bigramListPos, destPos, &tailEntryPos)) { return false; } } // Write new entry at the tail position of the bigram content. const BigramEntry newBigramEntry(false /* hasNext */, NOT_A_PROBABILITY, newTargetTerminalId); const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom( &newBigramEntry, ngramProperty); if (!mBigramDictContent->writeBigramEntryAtTail(&bigramEntryToWrite)) { return false; } // Update has next flag of the tail entry. if (!updateHasNextFlag(true /* hasNext */, tailEntryPos)) { return false; } if (outAddedNewEntry) { *outAddedNewEntry = true; } return true; } // Case 2. Overwrite the existing entry. Case 3. Reclaim and reuse the existing invalid entry. const BigramEntry originalBigramEntry = mBigramDictContent->getBigramEntry(entryPosToUpdate); if (!originalBigramEntry.isValid()) { // Case 3. Reuse the existing invalid entry. outAddedNewEntry is false when an existing // entry is updated. if (outAddedNewEntry) { *outAddedNewEntry = true; } } const BigramEntry updatedBigramEntry = originalBigramEntry.updateTargetTerminalIdAndGetEntry(newTargetTerminalId); const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom( &updatedBigramEntry, ngramProperty); return mBigramDictContent->writeBigramEntry(&bigramEntryToWrite, entryPosToUpdate); } bool Ver4BigramListPolicy::removeEntry(const int terminalId, const int targetTerminalId) { const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId); if (bigramListPos == NOT_A_DICT_POS) { // Bigram list doesn't exist. return false; } const int entryPosToUpdate = getEntryPosToUpdate(targetTerminalId, bigramListPos, nullptr /* outTailEntryPos */); if (entryPosToUpdate == NOT_A_DICT_POS) { // Bigram entry doesn't exist. return false; } const BigramEntry bigramEntry = mBigramDictContent->getBigramEntry(entryPosToUpdate); if (targetTerminalId != bigramEntry.getTargetTerminalId()) { // Bigram entry doesn't exist. return false; } // Remove bigram entry by marking it as invalid entry and overwriting the original entry. const BigramEntry updatedBigramEntry = bigramEntry.getInvalidatedEntry(); return mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPosToUpdate); } bool Ver4BigramListPolicy::updateAllBigramEntriesAndDeleteUselessEntries(const int terminalId, int *const outBigramCount) { const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId); if (bigramListPos == NOT_A_DICT_POS) { // Bigram list doesn't exist. return true; } bool hasNext = true; int readingPos = bigramListPos; while (hasNext) { const int entryPos = readingPos; const BigramEntry bigramEntry = mBigramDictContent->getBigramEntryAndAdvancePosition(&readingPos); hasNext = bigramEntry.hasNext(); if (!bigramEntry.isValid()) { continue; } const int targetPtNodePos = mTerminalPositionLookupTable->getTerminalPtNodePosition( bigramEntry.getTargetTerminalId()); if (targetPtNodePos == NOT_A_DICT_POS) { // Invalidate bigram entry. const BigramEntry updatedBigramEntry = bigramEntry.getInvalidatedEntry(); if (!mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPos)) { return false; } } else if (bigramEntry.hasHistoricalInfo()) { const HistoricalInfo historicalInfo = ForgettingCurveUtils::createHistoricalInfoToSave( bigramEntry.getHistoricalInfo(), mHeaderPolicy); if (ForgettingCurveUtils::needsToKeep(&historicalInfo, mHeaderPolicy)) { const BigramEntry updatedBigramEntry = bigramEntry.updateHistoricalInfoAndGetEntry(&historicalInfo); if (!mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPos)) { return false; } *outBigramCount += 1; } else { // Remove entry. const BigramEntry updatedBigramEntry = bigramEntry.getInvalidatedEntry(); if (!mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPos)) { return false; } } } else { *outBigramCount += 1; } } return true; } int Ver4BigramListPolicy::getBigramEntryConut(const int terminalId) { const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId); if (bigramListPos == NOT_A_DICT_POS) { // Bigram list doesn't exist. return 0; } int bigramCount = 0; bool hasNext = true; int readingPos = bigramListPos; while (hasNext) { const BigramEntry bigramEntry = mBigramDictContent->getBigramEntryAndAdvancePosition(&readingPos); hasNext = bigramEntry.hasNext(); if (bigramEntry.isValid()) { bigramCount++; } } return bigramCount; } int Ver4BigramListPolicy::getEntryPosToUpdate(const int targetTerminalIdToFind, const int bigramListPos, int *const outTailEntryPos) const { if (outTailEntryPos) { *outTailEntryPos = NOT_A_DICT_POS; } bool hasNext = true; int invalidEntryPos = NOT_A_DICT_POS; int readingPos = bigramListPos; while (hasNext) { const int entryPos = readingPos; const BigramEntry bigramEntry = mBigramDictContent->getBigramEntryAndAdvancePosition(&readingPos); hasNext = bigramEntry.hasNext(); if (bigramEntry.getTargetTerminalId() == targetTerminalIdToFind) { // Entry with same target is found. return entryPos; } else if (!bigramEntry.isValid()) { // Invalid entry that can be reused is found. invalidEntryPos = entryPos; } if (!hasNext && mBigramDictContent->isContentTailPos(readingPos)) { if (outTailEntryPos) { *outTailEntryPos = entryPos; } } } return invalidEntryPos; } const BigramEntry Ver4BigramListPolicy::createUpdatedBigramEntryFrom( const BigramEntry *const originalBigramEntry, const NgramProperty *const ngramProperty) const { // TODO: Consolidate historical info and probability. if (mHeaderPolicy->hasHistoricalInfoOfWords()) { const HistoricalInfo &historicalInfoForUpdate = ngramProperty->getHistoricalInfo(); const HistoricalInfo updatedHistoricalInfo = ForgettingCurveUtils::createUpdatedHistoricalInfo( originalBigramEntry->getHistoricalInfo(), ngramProperty->getProbability(), &historicalInfoForUpdate, mHeaderPolicy); return originalBigramEntry->updateHistoricalInfoAndGetEntry(&updatedHistoricalInfo); } else { return originalBigramEntry->updateProbabilityAndGetEntry(ngramProperty->getProbability()); } } bool Ver4BigramListPolicy::updateHasNextFlag(const bool hasNext, const int bigramEntryPos) { const BigramEntry bigramEntry = mBigramDictContent->getBigramEntry(bigramEntryPos); const BigramEntry updatedBigramEntry = bigramEntry.updateHasNextAndGetEntry(hasNext); return mBigramDictContent->writeBigramEntry(&updatedBigramEntry, bigramEntryPos); } } // namespace v402 } // namespace backward } // namespace latinime