am fe395232: Remove bigram dict content.
* commit 'fe395232d69df0887863c1cbabe63def2586d29e': Remove bigram dict content.main
commit
17510158c4
|
@ -61,7 +61,6 @@ LATIN_IME_CORE_SRC_FILES := \
|
||||||
ver2_patricia_trie_node_reader.cpp \
|
ver2_patricia_trie_node_reader.cpp \
|
||||||
ver2_pt_node_array_reader.cpp) \
|
ver2_pt_node_array_reader.cpp) \
|
||||||
$(addprefix suggest/policyimpl/dictionary/structure/v4/, \
|
$(addprefix suggest/policyimpl/dictionary/structure/v4/, \
|
||||||
bigram/ver4_bigram_list_policy.cpp \
|
|
||||||
ver4_dict_buffers.cpp \
|
ver4_dict_buffers.cpp \
|
||||||
ver4_dict_constants.cpp \
|
ver4_dict_constants.cpp \
|
||||||
ver4_patricia_trie_node_reader.cpp \
|
ver4_patricia_trie_node_reader.cpp \
|
||||||
|
@ -71,7 +70,6 @@ LATIN_IME_CORE_SRC_FILES := \
|
||||||
ver4_patricia_trie_writing_helper.cpp \
|
ver4_patricia_trie_writing_helper.cpp \
|
||||||
ver4_pt_node_array_reader.cpp) \
|
ver4_pt_node_array_reader.cpp) \
|
||||||
$(addprefix suggest/policyimpl/dictionary/structure/v4/content/, \
|
$(addprefix suggest/policyimpl/dictionary/structure/v4/content/, \
|
||||||
bigram_dict_content.cpp \
|
|
||||||
language_model_dict_content.cpp \
|
language_model_dict_content.cpp \
|
||||||
shortcut_dict_content.cpp \
|
shortcut_dict_content.cpp \
|
||||||
sparse_table_dict_content.cpp \
|
sparse_table_dict_content.cpp \
|
||||||
|
|
|
@ -1,282 +0,0 @@
|
||||||
/*
|
|
||||||
* Copyright (C) 2013 The Android Open Source Project
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
* you may not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include "suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.h"
|
|
||||||
|
|
||||||
#include "suggest/core/dictionary/property/bigram_property.h"
|
|
||||||
#include "suggest/policyimpl/dictionary/header/header_policy.h"
|
|
||||||
#include "suggest/policyimpl/dictionary/structure/pt_common/bigram/bigram_list_read_write_utils.h"
|
|
||||||
#include "suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h"
|
|
||||||
#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h"
|
|
||||||
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
|
|
||||||
#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
|
|
||||||
|
|
||||||
namespace latinime {
|
|
||||||
|
|
||||||
void Ver4BigramListPolicy::getNextBigram(int *const outBigramPos, int *const outProbability,
|
|
||||||
bool *const outHasNext, int *const bigramEntryPos) const {
|
|
||||||
const BigramEntry bigramEntry =
|
|
||||||
mBigramDictContent->getBigramEntryAndAdvancePosition(bigramEntryPos);
|
|
||||||
if (outBigramPos) {
|
|
||||||
// Lookup target PtNode position.
|
|
||||||
*outBigramPos = mTerminalPositionLookupTable->getTerminalPtNodePosition(
|
|
||||||
bigramEntry.getTargetTerminalId());
|
|
||||||
}
|
|
||||||
if (outProbability) {
|
|
||||||
if (bigramEntry.hasHistoricalInfo()) {
|
|
||||||
*outProbability =
|
|
||||||
ForgettingCurveUtils::decodeProbability(bigramEntry.getHistoricalInfo(),
|
|
||||||
mHeaderPolicy);
|
|
||||||
} else {
|
|
||||||
*outProbability = bigramEntry.getProbability();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (outHasNext) {
|
|
||||||
*outHasNext = bigramEntry.hasNext();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTargetTerminalId,
|
|
||||||
const BigramProperty *const bigramProperty, bool *const outAddedNewEntry) {
|
|
||||||
// 1. The word has no bigrams yet.
|
|
||||||
// 2. The word has bigrams, and there is the target in the list.
|
|
||||||
// 3. The word has bigrams, and there is an invalid entry that can be reclaimed.
|
|
||||||
// 4. The word has bigrams. We have to append new bigram entry to the list.
|
|
||||||
// 5. Same as 4, but the list is the last entry of the content file.
|
|
||||||
if (outAddedNewEntry) {
|
|
||||||
*outAddedNewEntry = false;
|
|
||||||
}
|
|
||||||
const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId);
|
|
||||||
if (bigramListPos == NOT_A_DICT_POS) {
|
|
||||||
// Case 1. PtNode that doesn't have a bigram list.
|
|
||||||
// Create new bigram list.
|
|
||||||
if (!mBigramDictContent->createNewBigramList(terminalId)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
const BigramEntry newBigramEntry(false /* hasNext */, NOT_A_PROBABILITY,
|
|
||||||
newTargetTerminalId);
|
|
||||||
const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom(&newBigramEntry,
|
|
||||||
bigramProperty);
|
|
||||||
// Write an entry.
|
|
||||||
int writingPos = mBigramDictContent->getBigramListHeadPos(terminalId);
|
|
||||||
if (!mBigramDictContent->writeBigramEntryAndAdvancePosition(&bigramEntryToWrite,
|
|
||||||
&writingPos)) {
|
|
||||||
AKLOGE("Cannot write bigram entry. pos: %d.", writingPos);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
if (!mBigramDictContent->writeTerminator(writingPos)) {
|
|
||||||
AKLOGE("Cannot write bigram list terminator. pos: %d.", writingPos);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
if (outAddedNewEntry) {
|
|
||||||
*outAddedNewEntry = true;
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
int tailEntryPos = NOT_A_DICT_POS;
|
|
||||||
const int entryPosToUpdate = getEntryPosToUpdate(newTargetTerminalId, bigramListPos,
|
|
||||||
&tailEntryPos);
|
|
||||||
if (entryPosToUpdate == NOT_A_DICT_POS) {
|
|
||||||
// Case 4, 5. Add new entry to the bigram list.
|
|
||||||
const int contentTailPos = mBigramDictContent->getContentTailPos();
|
|
||||||
// If the tail entry is at the tail of content buffer, the new entry can be written without
|
|
||||||
// link (Case 5).
|
|
||||||
const bool canAppendEntry =
|
|
||||||
contentTailPos == tailEntryPos + mBigramDictContent->getBigramEntrySize();
|
|
||||||
const int newEntryPos = canAppendEntry ? tailEntryPos : contentTailPos;
|
|
||||||
int writingPos = newEntryPos;
|
|
||||||
// Write new entry at the tail position of the bigram content.
|
|
||||||
const BigramEntry newBigramEntry(false /* hasNext */, NOT_A_PROBABILITY,
|
|
||||||
newTargetTerminalId);
|
|
||||||
const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom(
|
|
||||||
&newBigramEntry, bigramProperty);
|
|
||||||
if (!mBigramDictContent->writeBigramEntryAndAdvancePosition(&bigramEntryToWrite,
|
|
||||||
&writingPos)) {
|
|
||||||
AKLOGE("Cannot write bigram entry. pos: %d.", writingPos);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
if (!mBigramDictContent->writeTerminator(writingPos)) {
|
|
||||||
AKLOGE("Cannot write bigram list terminator. pos: %d.", writingPos);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
if (!canAppendEntry) {
|
|
||||||
// Update link of the current tail entry.
|
|
||||||
if (!mBigramDictContent->writeLink(newEntryPos, tailEntryPos)) {
|
|
||||||
AKLOGE("Cannot update bigram entry link. pos: %d, linked entry pos: %d.",
|
|
||||||
tailEntryPos, newEntryPos);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (outAddedNewEntry) {
|
|
||||||
*outAddedNewEntry = true;
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Case 2. Overwrite the existing entry. Case 3. Reclaim and reuse the existing invalid entry.
|
|
||||||
const BigramEntry originalBigramEntry = mBigramDictContent->getBigramEntry(entryPosToUpdate);
|
|
||||||
if (!originalBigramEntry.isValid()) {
|
|
||||||
// Case 3. Reuse the existing invalid entry. outAddedNewEntry is false when an existing
|
|
||||||
// entry is updated.
|
|
||||||
if (outAddedNewEntry) {
|
|
||||||
*outAddedNewEntry = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
const BigramEntry updatedBigramEntry =
|
|
||||||
originalBigramEntry.updateTargetTerminalIdAndGetEntry(newTargetTerminalId);
|
|
||||||
const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom(
|
|
||||||
&updatedBigramEntry, bigramProperty);
|
|
||||||
return mBigramDictContent->writeBigramEntry(&bigramEntryToWrite, entryPosToUpdate);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool Ver4BigramListPolicy::removeEntry(const int terminalId, const int targetTerminalId) {
|
|
||||||
const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId);
|
|
||||||
if (bigramListPos == NOT_A_DICT_POS) {
|
|
||||||
// Bigram list doesn't exist.
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
const int entryPosToUpdate = getEntryPosToUpdate(targetTerminalId, bigramListPos,
|
|
||||||
nullptr /* outTailEntryPos */);
|
|
||||||
if (entryPosToUpdate == NOT_A_DICT_POS) {
|
|
||||||
// Bigram entry doesn't exist.
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
const BigramEntry bigramEntry = mBigramDictContent->getBigramEntry(entryPosToUpdate);
|
|
||||||
if (targetTerminalId != bigramEntry.getTargetTerminalId()) {
|
|
||||||
// Bigram entry doesn't exist.
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
// Remove bigram entry by marking it as invalid entry and overwriting the original entry.
|
|
||||||
const BigramEntry updatedBigramEntry = bigramEntry.getInvalidatedEntry();
|
|
||||||
return mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPosToUpdate);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool Ver4BigramListPolicy::updateAllBigramEntriesAndDeleteUselessEntries(const int terminalId,
|
|
||||||
int *const outBigramCount) {
|
|
||||||
const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId);
|
|
||||||
if (bigramListPos == NOT_A_DICT_POS) {
|
|
||||||
// Bigram list doesn't exist.
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
bool hasNext = true;
|
|
||||||
int readingPos = bigramListPos;
|
|
||||||
while (hasNext) {
|
|
||||||
const BigramEntry bigramEntry =
|
|
||||||
mBigramDictContent->getBigramEntryAndAdvancePosition(&readingPos);
|
|
||||||
const int entryPos = readingPos - mBigramDictContent->getBigramEntrySize();
|
|
||||||
hasNext = bigramEntry.hasNext();
|
|
||||||
if (!bigramEntry.isValid()) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
const int targetPtNodePos = mTerminalPositionLookupTable->getTerminalPtNodePosition(
|
|
||||||
bigramEntry.getTargetTerminalId());
|
|
||||||
if (targetPtNodePos == NOT_A_DICT_POS) {
|
|
||||||
// Invalidate bigram entry.
|
|
||||||
const BigramEntry updatedBigramEntry = bigramEntry.getInvalidatedEntry();
|
|
||||||
if (!mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPos)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
} else if (bigramEntry.hasHistoricalInfo()) {
|
|
||||||
const HistoricalInfo historicalInfo = ForgettingCurveUtils::createHistoricalInfoToSave(
|
|
||||||
bigramEntry.getHistoricalInfo(), mHeaderPolicy);
|
|
||||||
if (ForgettingCurveUtils::needsToKeep(&historicalInfo, mHeaderPolicy)) {
|
|
||||||
const BigramEntry updatedBigramEntry =
|
|
||||||
bigramEntry.updateHistoricalInfoAndGetEntry(&historicalInfo);
|
|
||||||
if (!mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPos)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
*outBigramCount += 1;
|
|
||||||
} else {
|
|
||||||
// Remove entry.
|
|
||||||
const BigramEntry updatedBigramEntry = bigramEntry.getInvalidatedEntry();
|
|
||||||
if (!mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPos)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
*outBigramCount += 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
int Ver4BigramListPolicy::getBigramEntryConut(const int terminalId) {
|
|
||||||
const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId);
|
|
||||||
if (bigramListPos == NOT_A_DICT_POS) {
|
|
||||||
// Bigram list doesn't exist.
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
int bigramCount = 0;
|
|
||||||
bool hasNext = true;
|
|
||||||
int readingPos = bigramListPos;
|
|
||||||
while (hasNext) {
|
|
||||||
const BigramEntry bigramEntry =
|
|
||||||
mBigramDictContent->getBigramEntryAndAdvancePosition(&readingPos);
|
|
||||||
hasNext = bigramEntry.hasNext();
|
|
||||||
if (bigramEntry.isValid()) {
|
|
||||||
bigramCount++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return bigramCount;
|
|
||||||
}
|
|
||||||
|
|
||||||
int Ver4BigramListPolicy::getEntryPosToUpdate(const int targetTerminalIdToFind,
|
|
||||||
const int bigramListPos, int *const outTailEntryPos) const {
|
|
||||||
if (outTailEntryPos) {
|
|
||||||
*outTailEntryPos = NOT_A_DICT_POS;
|
|
||||||
}
|
|
||||||
int invalidEntryPos = NOT_A_DICT_POS;
|
|
||||||
int readingPos = bigramListPos;
|
|
||||||
while (true) {
|
|
||||||
const BigramEntry bigramEntry =
|
|
||||||
mBigramDictContent->getBigramEntryAndAdvancePosition(&readingPos);
|
|
||||||
const int entryPos = readingPos - mBigramDictContent->getBigramEntrySize();
|
|
||||||
if (!bigramEntry.hasNext()) {
|
|
||||||
if (outTailEntryPos) {
|
|
||||||
*outTailEntryPos = entryPos;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
if (bigramEntry.getTargetTerminalId() == targetTerminalIdToFind) {
|
|
||||||
// Entry with same target is found.
|
|
||||||
return entryPos;
|
|
||||||
} else if (!bigramEntry.isValid()) {
|
|
||||||
// Invalid entry that can be reused is found.
|
|
||||||
invalidEntryPos = entryPos;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return invalidEntryPos;
|
|
||||||
}
|
|
||||||
|
|
||||||
const BigramEntry Ver4BigramListPolicy::createUpdatedBigramEntryFrom(
|
|
||||||
const BigramEntry *const originalBigramEntry,
|
|
||||||
const BigramProperty *const bigramProperty) const {
|
|
||||||
// TODO: Consolidate historical info and probability.
|
|
||||||
if (mHeaderPolicy->hasHistoricalInfoOfWords()) {
|
|
||||||
const HistoricalInfo historicalInfoForUpdate(bigramProperty->getTimestamp(),
|
|
||||||
bigramProperty->getLevel(), bigramProperty->getCount());
|
|
||||||
const HistoricalInfo updatedHistoricalInfo =
|
|
||||||
ForgettingCurveUtils::createUpdatedHistoricalInfo(
|
|
||||||
originalBigramEntry->getHistoricalInfo(), bigramProperty->getProbability(),
|
|
||||||
&historicalInfoForUpdate, mHeaderPolicy);
|
|
||||||
return originalBigramEntry->updateHistoricalInfoAndGetEntry(&updatedHistoricalInfo);
|
|
||||||
} else {
|
|
||||||
return originalBigramEntry->updateProbabilityAndGetEntry(bigramProperty->getProbability());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace latinime
|
|
|
@ -1,72 +0,0 @@
|
||||||
/*
|
|
||||||
* Copyright (C) 2013 The Android Open Source Project
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
* you may not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef LATINIME_VER4_BIGRAM_LIST_POLICY_H
|
|
||||||
#define LATINIME_VER4_BIGRAM_LIST_POLICY_H
|
|
||||||
|
|
||||||
#include "defines.h"
|
|
||||||
#include "suggest/core/policy/dictionary_bigrams_structure_policy.h"
|
|
||||||
#include "suggest/policyimpl/dictionary/structure/v4/content/bigram_entry.h"
|
|
||||||
|
|
||||||
namespace latinime {
|
|
||||||
|
|
||||||
class BigramDictContent;
|
|
||||||
class BigramProperty;
|
|
||||||
class HeaderPolicy;
|
|
||||||
class TerminalPositionLookupTable;
|
|
||||||
|
|
||||||
class Ver4BigramListPolicy : public DictionaryBigramsStructurePolicy {
|
|
||||||
public:
|
|
||||||
Ver4BigramListPolicy(BigramDictContent *const bigramDictContent,
|
|
||||||
const TerminalPositionLookupTable *const terminalPositionLookupTable,
|
|
||||||
const HeaderPolicy *const headerPolicy)
|
|
||||||
: mBigramDictContent(bigramDictContent),
|
|
||||||
mTerminalPositionLookupTable(terminalPositionLookupTable),
|
|
||||||
mHeaderPolicy(headerPolicy) {}
|
|
||||||
|
|
||||||
void getNextBigram(int *const outBigramPos, int *const outProbability,
|
|
||||||
bool *const outHasNext, int *const bigramEntryPos) const;
|
|
||||||
|
|
||||||
bool skipAllBigrams(int *const pos) const {
|
|
||||||
// Do nothing because we don't need to skip bigram lists in ver4 dictionaries.
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool addNewEntry(const int terminalId, const int newTargetTerminalId,
|
|
||||||
const BigramProperty *const bigramProperty, bool *const outAddedNewEntry);
|
|
||||||
|
|
||||||
bool removeEntry(const int terminalId, const int targetTerminalId);
|
|
||||||
|
|
||||||
bool updateAllBigramEntriesAndDeleteUselessEntries(const int terminalId,
|
|
||||||
int *const outBigramCount);
|
|
||||||
|
|
||||||
int getBigramEntryConut(const int terminalId);
|
|
||||||
|
|
||||||
private:
|
|
||||||
DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4BigramListPolicy);
|
|
||||||
|
|
||||||
int getEntryPosToUpdate(const int targetTerminalIdToFind, const int bigramListPos,
|
|
||||||
int *const outTailEntryPos) const;
|
|
||||||
|
|
||||||
const BigramEntry createUpdatedBigramEntryFrom(const BigramEntry *const originalBigramEntry,
|
|
||||||
const BigramProperty *const bigramProperty) const;
|
|
||||||
|
|
||||||
BigramDictContent *const mBigramDictContent;
|
|
||||||
const TerminalPositionLookupTable *const mTerminalPositionLookupTable;
|
|
||||||
const HeaderPolicy *const mHeaderPolicy;
|
|
||||||
};
|
|
||||||
} // namespace latinime
|
|
||||||
#endif /* LATINIME_VER4_BIGRAM_LIST_POLICY_H */
|
|
|
@ -1,219 +0,0 @@
|
||||||
/*
|
|
||||||
* Copyright (C) 2013 The Android Open Source Project
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
* you may not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include "suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h"
|
|
||||||
|
|
||||||
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
|
|
||||||
|
|
||||||
namespace latinime {
|
|
||||||
|
|
||||||
const int BigramDictContent::INVALID_LINKED_ENTRY_POS = Ver4DictConstants::NOT_A_TERMINAL_ID;
|
|
||||||
|
|
||||||
const BigramEntry BigramDictContent::getBigramEntryAndAdvancePosition(
|
|
||||||
int *const bigramEntryPos) const {
|
|
||||||
const BufferWithExtendableBuffer *const bigramListBuffer = getContentBuffer();
|
|
||||||
const int bigramEntryTailPos = (*bigramEntryPos) + getBigramEntrySize();
|
|
||||||
if (*bigramEntryPos < 0 || bigramEntryTailPos > bigramListBuffer->getTailPosition()) {
|
|
||||||
AKLOGE("Invalid bigram entry position. bigramEntryPos: %d, bigramEntryTailPos: %d, "
|
|
||||||
"bufSize: %d", *bigramEntryPos, bigramEntryTailPos,
|
|
||||||
bigramListBuffer->getTailPosition());
|
|
||||||
ASSERT(false);
|
|
||||||
return BigramEntry(false /* hasNext */, NOT_A_PROBABILITY,
|
|
||||||
Ver4DictConstants::NOT_A_TERMINAL_ID);
|
|
||||||
}
|
|
||||||
const int bigramFlags = bigramListBuffer->readUintAndAdvancePosition(
|
|
||||||
Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE, bigramEntryPos);
|
|
||||||
const bool isLink = (bigramFlags & Ver4DictConstants::BIGRAM_IS_LINK_MASK) != 0;
|
|
||||||
int probability = NOT_A_PROBABILITY;
|
|
||||||
int timestamp = NOT_A_TIMESTAMP;
|
|
||||||
int level = 0;
|
|
||||||
int count = 0;
|
|
||||||
if (mHasHistoricalInfo) {
|
|
||||||
timestamp = bigramListBuffer->readUintAndAdvancePosition(
|
|
||||||
Ver4DictConstants::TIME_STAMP_FIELD_SIZE, bigramEntryPos);
|
|
||||||
level = bigramListBuffer->readUintAndAdvancePosition(
|
|
||||||
Ver4DictConstants::WORD_LEVEL_FIELD_SIZE, bigramEntryPos);
|
|
||||||
count = bigramListBuffer->readUintAndAdvancePosition(
|
|
||||||
Ver4DictConstants::WORD_COUNT_FIELD_SIZE, bigramEntryPos);
|
|
||||||
} else {
|
|
||||||
probability = bigramListBuffer->readUintAndAdvancePosition(
|
|
||||||
Ver4DictConstants::PROBABILITY_SIZE, bigramEntryPos);
|
|
||||||
}
|
|
||||||
const int encodedTargetTerminalId = bigramListBuffer->readUintAndAdvancePosition(
|
|
||||||
Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE, bigramEntryPos);
|
|
||||||
const int targetTerminalId =
|
|
||||||
(encodedTargetTerminalId == Ver4DictConstants::INVALID_BIGRAM_TARGET_TERMINAL_ID) ?
|
|
||||||
Ver4DictConstants::NOT_A_TERMINAL_ID : encodedTargetTerminalId;
|
|
||||||
if (isLink) {
|
|
||||||
const int linkedEntryPos = targetTerminalId;
|
|
||||||
if (linkedEntryPos == INVALID_LINKED_ENTRY_POS) {
|
|
||||||
// Bigram list terminator is found.
|
|
||||||
return BigramEntry(false /* hasNext */, NOT_A_PROBABILITY,
|
|
||||||
Ver4DictConstants::NOT_A_TERMINAL_ID);
|
|
||||||
}
|
|
||||||
*bigramEntryPos = linkedEntryPos;
|
|
||||||
return getBigramEntryAndAdvancePosition(bigramEntryPos);
|
|
||||||
}
|
|
||||||
// hasNext is always true because we should continue to read the next entry until the terminator
|
|
||||||
// is found.
|
|
||||||
if (mHasHistoricalInfo) {
|
|
||||||
const HistoricalInfo historicalInfo(timestamp, level, count);
|
|
||||||
return BigramEntry(true /* hasNext */, probability, &historicalInfo, targetTerminalId);
|
|
||||||
} else {
|
|
||||||
return BigramEntry(true /* hasNext */, probability, targetTerminalId);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
bool BigramDictContent::writeBigramEntryAndAdvancePosition(
|
|
||||||
const BigramEntry *const bigramEntryToWrite, int *const entryWritingPos) {
|
|
||||||
return writeBigramEntryAttributesAndAdvancePosition(false /* isLink */,
|
|
||||||
bigramEntryToWrite->getProbability(), bigramEntryToWrite->getTargetTerminalId(),
|
|
||||||
bigramEntryToWrite->getHistoricalInfo()->getTimeStamp(),
|
|
||||||
bigramEntryToWrite->getHistoricalInfo()->getLevel(),
|
|
||||||
bigramEntryToWrite->getHistoricalInfo()->getCount(),
|
|
||||||
entryWritingPos);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool BigramDictContent::writeBigramEntryAttributesAndAdvancePosition(
|
|
||||||
const bool isLink, const int probability, const int targetTerminalId,
|
|
||||||
const int timestamp, const int level, const int count, int *const entryWritingPos) {
|
|
||||||
BufferWithExtendableBuffer *const bigramListBuffer = getWritableContentBuffer();
|
|
||||||
const int bigramFlags = isLink ? Ver4DictConstants::BIGRAM_IS_LINK_MASK : 0;
|
|
||||||
if (!bigramListBuffer->writeUintAndAdvancePosition(bigramFlags,
|
|
||||||
Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE, entryWritingPos)) {
|
|
||||||
AKLOGE("Cannot write bigram flags. pos: %d, flags: %x", *entryWritingPos, bigramFlags);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
if (mHasHistoricalInfo) {
|
|
||||||
if (!bigramListBuffer->writeUintAndAdvancePosition(timestamp,
|
|
||||||
Ver4DictConstants::TIME_STAMP_FIELD_SIZE, entryWritingPos)) {
|
|
||||||
AKLOGE("Cannot write bigram timestamps. pos: %d, timestamp: %d", *entryWritingPos,
|
|
||||||
timestamp);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
if (!bigramListBuffer->writeUintAndAdvancePosition(level,
|
|
||||||
Ver4DictConstants::WORD_LEVEL_FIELD_SIZE, entryWritingPos)) {
|
|
||||||
AKLOGE("Cannot write bigram level. pos: %d, level: %d", *entryWritingPos,
|
|
||||||
level);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
if (!bigramListBuffer->writeUintAndAdvancePosition(count,
|
|
||||||
Ver4DictConstants::WORD_COUNT_FIELD_SIZE, entryWritingPos)) {
|
|
||||||
AKLOGE("Cannot write bigram count. pos: %d, count: %d", *entryWritingPos,
|
|
||||||
count);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
if (!bigramListBuffer->writeUintAndAdvancePosition(probability,
|
|
||||||
Ver4DictConstants::PROBABILITY_SIZE, entryWritingPos)) {
|
|
||||||
AKLOGE("Cannot write bigram probability. pos: %d, probability: %d", *entryWritingPos,
|
|
||||||
probability);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
const int targetTerminalIdToWrite = (targetTerminalId == Ver4DictConstants::NOT_A_TERMINAL_ID) ?
|
|
||||||
Ver4DictConstants::INVALID_BIGRAM_TARGET_TERMINAL_ID : targetTerminalId;
|
|
||||||
if (!bigramListBuffer->writeUintAndAdvancePosition(targetTerminalIdToWrite,
|
|
||||||
Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE, entryWritingPos)) {
|
|
||||||
AKLOGE("Cannot write bigram target terminal id. pos: %d, target terminal id: %d",
|
|
||||||
*entryWritingPos, targetTerminalId);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool BigramDictContent::writeLink(const int linkedEntryPos, const int writingPos) {
|
|
||||||
const int targetTerminalId = linkedEntryPos;
|
|
||||||
int pos = writingPos;
|
|
||||||
return writeBigramEntryAttributesAndAdvancePosition(true /* isLink */,
|
|
||||||
NOT_A_PROBABILITY /* probability */, targetTerminalId, NOT_A_TIMESTAMP, 0 /* level */,
|
|
||||||
0 /* count */, &pos);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool BigramDictContent::runGC(const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
|
|
||||||
const BigramDictContent *const originalBigramDictContent,
|
|
||||||
int *const outBigramEntryCount) {
|
|
||||||
for (TerminalPositionLookupTable::TerminalIdMap::const_iterator it = terminalIdMap->begin();
|
|
||||||
it != terminalIdMap->end(); ++it) {
|
|
||||||
const int originalBigramListPos =
|
|
||||||
originalBigramDictContent->getBigramListHeadPos(it->first);
|
|
||||||
if (originalBigramListPos == NOT_A_DICT_POS) {
|
|
||||||
// This terminal does not have a bigram list.
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
const int bigramListPos = getContentBuffer()->getTailPosition();
|
|
||||||
int bigramEntryCount = 0;
|
|
||||||
// Copy bigram list with GC from original content.
|
|
||||||
if (!runGCBigramList(originalBigramListPos, originalBigramDictContent, bigramListPos,
|
|
||||||
terminalIdMap, &bigramEntryCount)) {
|
|
||||||
AKLOGE("Cannot complete GC for the bigram list. original pos: %d, pos: %d",
|
|
||||||
originalBigramListPos, bigramListPos);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
if (bigramEntryCount == 0) {
|
|
||||||
// All bigram entries are useless. This terminal does not have a bigram list.
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
*outBigramEntryCount += bigramEntryCount;
|
|
||||||
// Set bigram list position to the lookup table.
|
|
||||||
if (!getUpdatableAddressLookupTable()->set(it->second, bigramListPos)) {
|
|
||||||
AKLOGE("Cannot set bigram list position. terminal id: %d, pos: %d",
|
|
||||||
it->second, bigramListPos);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Returns whether GC for the bigram list was succeeded or not.
|
|
||||||
bool BigramDictContent::runGCBigramList(const int bigramListPos,
|
|
||||||
const BigramDictContent *const sourceBigramDictContent, const int toPos,
|
|
||||||
const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
|
|
||||||
int *const outEntryCount) {
|
|
||||||
bool hasNext = true;
|
|
||||||
int readingPos = bigramListPos;
|
|
||||||
int writingPos = toPos;
|
|
||||||
while (hasNext) {
|
|
||||||
const BigramEntry originalBigramEntry =
|
|
||||||
sourceBigramDictContent->getBigramEntryAndAdvancePosition(&readingPos);
|
|
||||||
hasNext = originalBigramEntry.hasNext();
|
|
||||||
if (!originalBigramEntry.isValid()) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
TerminalPositionLookupTable::TerminalIdMap::const_iterator it =
|
|
||||||
terminalIdMap->find(originalBigramEntry.getTargetTerminalId());
|
|
||||||
if (it == terminalIdMap->end()) {
|
|
||||||
// Target word has been removed.
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
const BigramEntry updatedBigramEntry =
|
|
||||||
originalBigramEntry.updateTargetTerminalIdAndGetEntry(it->second);
|
|
||||||
if (!writeBigramEntryAndAdvancePosition(&updatedBigramEntry, &writingPos)) {
|
|
||||||
AKLOGE("Cannot write bigram entry to run GC. pos: %d", writingPos);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
*outEntryCount += 1;
|
|
||||||
}
|
|
||||||
if (*outEntryCount > 0) {
|
|
||||||
if (!writeTerminator(writingPos)) {
|
|
||||||
AKLOGE("Cannot write terminator to run GC. pos: %d", writingPos);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace latinime
|
|
|
@ -1,128 +0,0 @@
|
||||||
/*
|
|
||||||
* Copyright (C) 2013, The Android Open Source Project
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
* you may not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef LATINIME_BIGRAM_DICT_CONTENT_H
|
|
||||||
#define LATINIME_BIGRAM_DICT_CONTENT_H
|
|
||||||
|
|
||||||
#include <cstdio>
|
|
||||||
|
|
||||||
#include "defines.h"
|
|
||||||
#include "suggest/policyimpl/dictionary/structure/v4/content/bigram_entry.h"
|
|
||||||
#include "suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h"
|
|
||||||
#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h"
|
|
||||||
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
|
|
||||||
|
|
||||||
namespace latinime {
|
|
||||||
|
|
||||||
class ReadWriteByteArrayView;
|
|
||||||
|
|
||||||
class BigramDictContent : public SparseTableDictContent {
|
|
||||||
public:
|
|
||||||
BigramDictContent(const ReadWriteByteArrayView *const buffers, const bool hasHistoricalInfo)
|
|
||||||
: SparseTableDictContent(buffers, Ver4DictConstants::BIGRAM_ADDRESS_TABLE_BLOCK_SIZE,
|
|
||||||
Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE),
|
|
||||||
mHasHistoricalInfo(hasHistoricalInfo) {}
|
|
||||||
|
|
||||||
BigramDictContent(const bool hasHistoricalInfo)
|
|
||||||
: SparseTableDictContent(Ver4DictConstants::BIGRAM_ADDRESS_TABLE_BLOCK_SIZE,
|
|
||||||
Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE),
|
|
||||||
mHasHistoricalInfo(hasHistoricalInfo) {}
|
|
||||||
|
|
||||||
int getContentTailPos() const {
|
|
||||||
return getContentBuffer()->getTailPosition();
|
|
||||||
}
|
|
||||||
|
|
||||||
const BigramEntry getBigramEntry(const int bigramEntryPos) const {
|
|
||||||
int readingPos = bigramEntryPos;
|
|
||||||
return getBigramEntryAndAdvancePosition(&readingPos);
|
|
||||||
}
|
|
||||||
|
|
||||||
const BigramEntry getBigramEntryAndAdvancePosition(int *const bigramEntryPos) const;
|
|
||||||
|
|
||||||
// Returns head position of bigram list for a PtNode specified by terminalId.
|
|
||||||
int getBigramListHeadPos(const int terminalId) const {
|
|
||||||
const SparseTable *const addressLookupTable = getAddressLookupTable();
|
|
||||||
if (!addressLookupTable->contains(terminalId)) {
|
|
||||||
return NOT_A_DICT_POS;
|
|
||||||
}
|
|
||||||
return addressLookupTable->get(terminalId);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool writeBigramEntryAtTail(const BigramEntry *const bigramEntryToWrite) {
|
|
||||||
int writingPos = getContentBuffer()->getTailPosition();
|
|
||||||
return writeBigramEntryAndAdvancePosition(bigramEntryToWrite, &writingPos);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool writeBigramEntry(const BigramEntry *const bigramEntryToWrite, const int entryWritingPos) {
|
|
||||||
int writingPos = entryWritingPos;
|
|
||||||
return writeBigramEntryAndAdvancePosition(bigramEntryToWrite, &writingPos);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool writeBigramEntryAndAdvancePosition(const BigramEntry *const bigramEntryToWrite,
|
|
||||||
int *const entryWritingPos);
|
|
||||||
|
|
||||||
bool writeTerminator(const int writingPos) {
|
|
||||||
// Terminator is a link to the invalid position.
|
|
||||||
return writeLink(INVALID_LINKED_ENTRY_POS, writingPos);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool writeLink(const int linkedPos, const int writingPos);
|
|
||||||
|
|
||||||
bool createNewBigramList(const int terminalId) {
|
|
||||||
const int bigramListPos = getContentBuffer()->getTailPosition();
|
|
||||||
return getUpdatableAddressLookupTable()->set(terminalId, bigramListPos);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool flushToFile(FILE *const file) const {
|
|
||||||
return flush(file);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool runGC(const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
|
|
||||||
const BigramDictContent *const originalBigramDictContent,
|
|
||||||
int *const outBigramEntryCount);
|
|
||||||
|
|
||||||
int getBigramEntrySize() const {
|
|
||||||
if (mHasHistoricalInfo) {
|
|
||||||
return Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE
|
|
||||||
+ Ver4DictConstants::TIME_STAMP_FIELD_SIZE
|
|
||||||
+ Ver4DictConstants::WORD_LEVEL_FIELD_SIZE
|
|
||||||
+ Ver4DictConstants::WORD_COUNT_FIELD_SIZE
|
|
||||||
+ Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE;
|
|
||||||
} else {
|
|
||||||
return Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE
|
|
||||||
+ Ver4DictConstants::PROBABILITY_SIZE
|
|
||||||
+ Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
|
||||||
DISALLOW_COPY_AND_ASSIGN(BigramDictContent);
|
|
||||||
|
|
||||||
static const int INVALID_LINKED_ENTRY_POS;
|
|
||||||
|
|
||||||
bool writeBigramEntryAttributesAndAdvancePosition(
|
|
||||||
const bool isLink, const int probability, const int targetTerminalId,
|
|
||||||
const int timestamp, const int level, const int count, int *const entryWritingPos);
|
|
||||||
|
|
||||||
bool runGCBigramList(const int bigramListPos,
|
|
||||||
const BigramDictContent *const sourceBigramDictContent, const int toPos,
|
|
||||||
const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
|
|
||||||
int *const outEntryCount);
|
|
||||||
|
|
||||||
bool mHasHistoricalInfo;
|
|
||||||
};
|
|
||||||
} // namespace latinime
|
|
||||||
#endif /* LATINIME_BIGRAM_DICT_CONTENT_H */
|
|
|
@ -1,99 +0,0 @@
|
||||||
/*
|
|
||||||
* Copyright (C) 2013, The Android Open Source Project
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
* you may not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef LATINIME_BIGRAM_ENTRY_H
|
|
||||||
#define LATINIME_BIGRAM_ENTRY_H
|
|
||||||
|
|
||||||
#include "defines.h"
|
|
||||||
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
|
|
||||||
#include "suggest/policyimpl/dictionary/utils/historical_info.h"
|
|
||||||
|
|
||||||
namespace latinime {
|
|
||||||
|
|
||||||
class BigramEntry {
|
|
||||||
public:
|
|
||||||
BigramEntry(const BigramEntry& bigramEntry)
|
|
||||||
: mHasNext(bigramEntry.mHasNext), mProbability(bigramEntry.mProbability),
|
|
||||||
mHistoricalInfo(), mTargetTerminalId(bigramEntry.mTargetTerminalId) {}
|
|
||||||
|
|
||||||
// Entry with historical information.
|
|
||||||
BigramEntry(const bool hasNext, const int probability, const int targetTerminalId)
|
|
||||||
: mHasNext(hasNext), mProbability(probability), mHistoricalInfo(),
|
|
||||||
mTargetTerminalId(targetTerminalId) {}
|
|
||||||
|
|
||||||
// Entry with historical information.
|
|
||||||
BigramEntry(const bool hasNext, const int probability,
|
|
||||||
const HistoricalInfo *const historicalInfo, const int targetTerminalId)
|
|
||||||
: mHasNext(hasNext), mProbability(probability), mHistoricalInfo(*historicalInfo),
|
|
||||||
mTargetTerminalId(targetTerminalId) {}
|
|
||||||
|
|
||||||
const BigramEntry getInvalidatedEntry() const {
|
|
||||||
return updateTargetTerminalIdAndGetEntry(Ver4DictConstants::NOT_A_TERMINAL_ID);
|
|
||||||
}
|
|
||||||
|
|
||||||
const BigramEntry updateHasNextAndGetEntry(const bool hasNext) const {
|
|
||||||
return BigramEntry(hasNext, mProbability, &mHistoricalInfo, mTargetTerminalId);
|
|
||||||
}
|
|
||||||
|
|
||||||
const BigramEntry updateTargetTerminalIdAndGetEntry(const int newTargetTerminalId) const {
|
|
||||||
return BigramEntry(mHasNext, mProbability, &mHistoricalInfo, newTargetTerminalId);
|
|
||||||
}
|
|
||||||
|
|
||||||
const BigramEntry updateProbabilityAndGetEntry(const int probability) const {
|
|
||||||
return BigramEntry(mHasNext, probability, &mHistoricalInfo, mTargetTerminalId);
|
|
||||||
}
|
|
||||||
|
|
||||||
const BigramEntry updateHistoricalInfoAndGetEntry(
|
|
||||||
const HistoricalInfo *const historicalInfo) const {
|
|
||||||
return BigramEntry(mHasNext, mProbability, historicalInfo, mTargetTerminalId);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool isValid() const {
|
|
||||||
return mTargetTerminalId != Ver4DictConstants::NOT_A_TERMINAL_ID;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool hasNext() const {
|
|
||||||
return mHasNext;
|
|
||||||
}
|
|
||||||
|
|
||||||
int getProbability() const {
|
|
||||||
return mProbability;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool hasHistoricalInfo() const {
|
|
||||||
return mHistoricalInfo.isValid();
|
|
||||||
}
|
|
||||||
|
|
||||||
const HistoricalInfo *getHistoricalInfo() const {
|
|
||||||
return &mHistoricalInfo;
|
|
||||||
}
|
|
||||||
|
|
||||||
int getTargetTerminalId() const {
|
|
||||||
return mTargetTerminalId;
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
|
||||||
// Copy constructor is public to use this class as a type of return value.
|
|
||||||
DISALLOW_DEFAULT_CONSTRUCTOR(BigramEntry);
|
|
||||||
DISALLOW_ASSIGNMENT_OPERATOR(BigramEntry);
|
|
||||||
|
|
||||||
const bool mHasNext;
|
|
||||||
const int mProbability;
|
|
||||||
const HistoricalInfo mHistoricalInfo;
|
|
||||||
const int mTargetTerminalId;
|
|
||||||
};
|
|
||||||
} // namespace latinime
|
|
||||||
#endif /* LATINIME_BIGRAM_ENTRY_H */
|
|
|
@ -159,11 +159,6 @@ bool Ver4DictBuffers::flushDictBuffers(FILE *const file) const {
|
||||||
AKLOGE("Language model dict content cannot be written.");
|
AKLOGE("Language model dict content cannot be written.");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
// Write bigram dict content.
|
|
||||||
if (!mBigramDictContent.flushToFile(file)) {
|
|
||||||
AKLOGE("Bigram dict content cannot be written.");
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
// Write shortcut dict content.
|
// Write shortcut dict content.
|
||||||
if (!mShortcutDictContent.flushToFile(file)) {
|
if (!mShortcutDictContent.flushToFile(file)) {
|
||||||
AKLOGE("Shortcut dict content cannot be written.");
|
AKLOGE("Shortcut dict content cannot be written.");
|
||||||
|
@ -186,8 +181,6 @@ Ver4DictBuffers::Ver4DictBuffers(MmappedBuffer::MmappedBufferPtr &&headerBuffer,
|
||||||
contentBuffers[Ver4DictConstants::TERMINAL_ADDRESS_LOOKUP_TABLE_BUFFER_INDEX]),
|
contentBuffers[Ver4DictConstants::TERMINAL_ADDRESS_LOOKUP_TABLE_BUFFER_INDEX]),
|
||||||
mLanguageModelDictContent(contentBuffers[Ver4DictConstants::LANGUAGE_MODEL_BUFFER_INDEX],
|
mLanguageModelDictContent(contentBuffers[Ver4DictConstants::LANGUAGE_MODEL_BUFFER_INDEX],
|
||||||
mHeaderPolicy.hasHistoricalInfoOfWords()),
|
mHeaderPolicy.hasHistoricalInfoOfWords()),
|
||||||
mBigramDictContent(&contentBuffers[Ver4DictConstants::BIGRAM_BUFFERS_INDEX],
|
|
||||||
mHeaderPolicy.hasHistoricalInfoOfWords()),
|
|
||||||
mShortcutDictContent(&contentBuffers[Ver4DictConstants::SHORTCUT_BUFFERS_INDEX]),
|
mShortcutDictContent(&contentBuffers[Ver4DictConstants::SHORTCUT_BUFFERS_INDEX]),
|
||||||
mIsUpdatable(mDictBuffer->isUpdatable()) {}
|
mIsUpdatable(mDictBuffer->isUpdatable()) {}
|
||||||
|
|
||||||
|
@ -196,7 +189,6 @@ Ver4DictBuffers::Ver4DictBuffers(const HeaderPolicy *const headerPolicy, const i
|
||||||
mExpandableHeaderBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE),
|
mExpandableHeaderBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE),
|
||||||
mExpandableTrieBuffer(maxTrieSize), mTerminalPositionLookupTable(),
|
mExpandableTrieBuffer(maxTrieSize), mTerminalPositionLookupTable(),
|
||||||
mLanguageModelDictContent(headerPolicy->hasHistoricalInfoOfWords()),
|
mLanguageModelDictContent(headerPolicy->hasHistoricalInfoOfWords()),
|
||||||
mBigramDictContent(headerPolicy->hasHistoricalInfoOfWords()), mShortcutDictContent(),
|
mShortcutDictContent(), mIsUpdatable(true) {}
|
||||||
mIsUpdatable(true) {}
|
|
||||||
|
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
|
|
|
@ -22,7 +22,6 @@
|
||||||
|
|
||||||
#include "defines.h"
|
#include "defines.h"
|
||||||
#include "suggest/policyimpl/dictionary/header/header_policy.h"
|
#include "suggest/policyimpl/dictionary/header/header_policy.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h"
|
|
||||||
#include "suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h"
|
#include "suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.h"
|
#include "suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h"
|
#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h"
|
||||||
|
@ -53,7 +52,6 @@ class Ver4DictBuffers {
|
||||||
return mExpandableTrieBuffer.isNearSizeLimit()
|
return mExpandableTrieBuffer.isNearSizeLimit()
|
||||||
|| mTerminalPositionLookupTable.isNearSizeLimit()
|
|| mTerminalPositionLookupTable.isNearSizeLimit()
|
||||||
|| mLanguageModelDictContent.isNearSizeLimit()
|
|| mLanguageModelDictContent.isNearSizeLimit()
|
||||||
|| mBigramDictContent.isNearSizeLimit()
|
|
||||||
|| mShortcutDictContent.isNearSizeLimit();
|
|| mShortcutDictContent.isNearSizeLimit();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -89,14 +87,6 @@ class Ver4DictBuffers {
|
||||||
return &mLanguageModelDictContent;
|
return &mLanguageModelDictContent;
|
||||||
}
|
}
|
||||||
|
|
||||||
AK_FORCE_INLINE BigramDictContent *getMutableBigramDictContent() {
|
|
||||||
return &mBigramDictContent;
|
|
||||||
}
|
|
||||||
|
|
||||||
AK_FORCE_INLINE const BigramDictContent *getBigramDictContent() const {
|
|
||||||
return &mBigramDictContent;
|
|
||||||
}
|
|
||||||
|
|
||||||
AK_FORCE_INLINE ShortcutDictContent *getMutableShortcutDictContent() {
|
AK_FORCE_INLINE ShortcutDictContent *getMutableShortcutDictContent() {
|
||||||
return &mShortcutDictContent;
|
return &mShortcutDictContent;
|
||||||
}
|
}
|
||||||
|
@ -135,7 +125,6 @@ class Ver4DictBuffers {
|
||||||
BufferWithExtendableBuffer mExpandableTrieBuffer;
|
BufferWithExtendableBuffer mExpandableTrieBuffer;
|
||||||
TerminalPositionLookupTable mTerminalPositionLookupTable;
|
TerminalPositionLookupTable mTerminalPositionLookupTable;
|
||||||
LanguageModelDictContent mLanguageModelDictContent;
|
LanguageModelDictContent mLanguageModelDictContent;
|
||||||
BigramDictContent mBigramDictContent;
|
|
||||||
ShortcutDictContent mShortcutDictContent;
|
ShortcutDictContent mShortcutDictContent;
|
||||||
const int mIsUpdatable;
|
const int mIsUpdatable;
|
||||||
};
|
};
|
||||||
|
|
|
@ -29,20 +29,18 @@ const int Ver4DictConstants::MAX_DICT_EXTENDED_REGION_SIZE = 1 * 1024 * 1024;
|
||||||
|
|
||||||
// NUM_OF_BUFFERS_FOR_SINGLE_DICT_CONTENT for Trie and TerminalAddressLookupTable.
|
// NUM_OF_BUFFERS_FOR_SINGLE_DICT_CONTENT for Trie and TerminalAddressLookupTable.
|
||||||
// NUM_OF_BUFFERS_FOR_LANGUAGE_MODEL_DICT_CONTENT for language model.
|
// NUM_OF_BUFFERS_FOR_LANGUAGE_MODEL_DICT_CONTENT for language model.
|
||||||
// NUM_OF_BUFFERS_FOR_SPARSE_TABLE_DICT_CONTENT for bigram and shortcut.
|
// NUM_OF_BUFFERS_FOR_SPARSE_TABLE_DICT_CONTENT for shortcut.
|
||||||
const size_t Ver4DictConstants::NUM_OF_CONTENT_BUFFERS_IN_BODY_FILE =
|
const size_t Ver4DictConstants::NUM_OF_CONTENT_BUFFERS_IN_BODY_FILE =
|
||||||
NUM_OF_BUFFERS_FOR_SINGLE_DICT_CONTENT * 2
|
NUM_OF_BUFFERS_FOR_SINGLE_DICT_CONTENT * 2
|
||||||
+ NUM_OF_BUFFERS_FOR_LANGUAGE_MODEL_DICT_CONTENT
|
+ NUM_OF_BUFFERS_FOR_LANGUAGE_MODEL_DICT_CONTENT
|
||||||
+ NUM_OF_BUFFERS_FOR_SPARSE_TABLE_DICT_CONTENT * 2;
|
+ NUM_OF_BUFFERS_FOR_SPARSE_TABLE_DICT_CONTENT;
|
||||||
const int Ver4DictConstants::TRIE_BUFFER_INDEX = 0;
|
const int Ver4DictConstants::TRIE_BUFFER_INDEX = 0;
|
||||||
const int Ver4DictConstants::TERMINAL_ADDRESS_LOOKUP_TABLE_BUFFER_INDEX =
|
const int Ver4DictConstants::TERMINAL_ADDRESS_LOOKUP_TABLE_BUFFER_INDEX =
|
||||||
TRIE_BUFFER_INDEX + NUM_OF_BUFFERS_FOR_SINGLE_DICT_CONTENT;
|
TRIE_BUFFER_INDEX + NUM_OF_BUFFERS_FOR_SINGLE_DICT_CONTENT;
|
||||||
const int Ver4DictConstants::LANGUAGE_MODEL_BUFFER_INDEX =
|
const int Ver4DictConstants::LANGUAGE_MODEL_BUFFER_INDEX =
|
||||||
TERMINAL_ADDRESS_LOOKUP_TABLE_BUFFER_INDEX + NUM_OF_BUFFERS_FOR_SINGLE_DICT_CONTENT;
|
TERMINAL_ADDRESS_LOOKUP_TABLE_BUFFER_INDEX + NUM_OF_BUFFERS_FOR_SINGLE_DICT_CONTENT;
|
||||||
const int Ver4DictConstants::BIGRAM_BUFFERS_INDEX =
|
|
||||||
LANGUAGE_MODEL_BUFFER_INDEX + NUM_OF_BUFFERS_FOR_LANGUAGE_MODEL_DICT_CONTENT;
|
|
||||||
const int Ver4DictConstants::SHORTCUT_BUFFERS_INDEX =
|
const int Ver4DictConstants::SHORTCUT_BUFFERS_INDEX =
|
||||||
BIGRAM_BUFFERS_INDEX + NUM_OF_BUFFERS_FOR_SPARSE_TABLE_DICT_CONTENT;
|
LANGUAGE_MODEL_BUFFER_INDEX + NUM_OF_BUFFERS_FOR_LANGUAGE_MODEL_DICT_CONTENT;
|
||||||
|
|
||||||
const int Ver4DictConstants::NOT_A_TERMINAL_ID = -1;
|
const int Ver4DictConstants::NOT_A_TERMINAL_ID = -1;
|
||||||
const int Ver4DictConstants::PROBABILITY_SIZE = 1;
|
const int Ver4DictConstants::PROBABILITY_SIZE = 1;
|
||||||
|
@ -56,21 +54,9 @@ const int Ver4DictConstants::WORD_COUNT_FIELD_SIZE = 1;
|
||||||
|
|
||||||
const uint8_t Ver4DictConstants::FLAG_REPRESENTS_BEGINNING_OF_SENTENCE = 0x1;
|
const uint8_t Ver4DictConstants::FLAG_REPRESENTS_BEGINNING_OF_SENTENCE = 0x1;
|
||||||
|
|
||||||
const int Ver4DictConstants::BIGRAM_ADDRESS_TABLE_BLOCK_SIZE = 16;
|
|
||||||
const int Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE = 4;
|
|
||||||
const int Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE = 64;
|
const int Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE = 64;
|
||||||
const int Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_DATA_SIZE = 4;
|
const int Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_DATA_SIZE = 4;
|
||||||
|
|
||||||
const int Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE = 3;
|
|
||||||
// Unsigned int max value of BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE-byte is used for representing
|
|
||||||
// invalid terminal ID in bigram lists.
|
|
||||||
const int Ver4DictConstants::INVALID_BIGRAM_TARGET_TERMINAL_ID =
|
|
||||||
(1 << (BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE * 8)) - 1;
|
|
||||||
const int Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE = 1;
|
|
||||||
const int Ver4DictConstants::BIGRAM_PROBABILITY_MASK = 0x0F;
|
|
||||||
const int Ver4DictConstants::BIGRAM_IS_LINK_MASK = 0x80;
|
|
||||||
const int Ver4DictConstants::BIGRAM_LARGE_PROBABILITY_FIELD_SIZE = 1;
|
|
||||||
|
|
||||||
const int Ver4DictConstants::SHORTCUT_FLAGS_FIELD_SIZE = 1;
|
const int Ver4DictConstants::SHORTCUT_FLAGS_FIELD_SIZE = 1;
|
||||||
const int Ver4DictConstants::SHORTCUT_PROBABILITY_MASK = 0x0F;
|
const int Ver4DictConstants::SHORTCUT_PROBABILITY_MASK = 0x0F;
|
||||||
const int Ver4DictConstants::SHORTCUT_HAS_NEXT_MASK = 0x80;
|
const int Ver4DictConstants::SHORTCUT_HAS_NEXT_MASK = 0x80;
|
||||||
|
|
|
@ -52,19 +52,9 @@ class Ver4DictConstants {
|
||||||
// Flags in probability entry.
|
// Flags in probability entry.
|
||||||
static const uint8_t FLAG_REPRESENTS_BEGINNING_OF_SENTENCE;
|
static const uint8_t FLAG_REPRESENTS_BEGINNING_OF_SENTENCE;
|
||||||
|
|
||||||
static const int BIGRAM_ADDRESS_TABLE_BLOCK_SIZE;
|
|
||||||
static const int BIGRAM_ADDRESS_TABLE_DATA_SIZE;
|
|
||||||
static const int SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE;
|
static const int SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE;
|
||||||
static const int SHORTCUT_ADDRESS_TABLE_DATA_SIZE;
|
static const int SHORTCUT_ADDRESS_TABLE_DATA_SIZE;
|
||||||
|
|
||||||
static const int BIGRAM_FLAGS_FIELD_SIZE;
|
|
||||||
static const int BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE;
|
|
||||||
static const int INVALID_BIGRAM_TARGET_TERMINAL_ID;
|
|
||||||
static const int BIGRAM_IS_LINK_MASK;
|
|
||||||
static const int BIGRAM_PROBABILITY_MASK;
|
|
||||||
// Used when bigram list has time stamp.
|
|
||||||
static const int BIGRAM_LARGE_PROBABILITY_FIELD_SIZE;
|
|
||||||
|
|
||||||
static const int SHORTCUT_FLAGS_FIELD_SIZE;
|
static const int SHORTCUT_FLAGS_FIELD_SIZE;
|
||||||
static const int SHORTCUT_PROBABILITY_MASK;
|
static const int SHORTCUT_PROBABILITY_MASK;
|
||||||
static const int SHORTCUT_HAS_NEXT_MASK;
|
static const int SHORTCUT_HAS_NEXT_MASK;
|
||||||
|
|
|
@ -21,7 +21,6 @@
|
||||||
#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.h"
|
#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_writing_utils.h"
|
#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_writing_utils.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.h"
|
#include "suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.h"
|
|
||||||
#include "suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h"
|
#include "suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/v4/shortcut/ver4_shortcut_list_policy.h"
|
#include "suggest/policyimpl/dictionary/structure/v4/shortcut/ver4_shortcut_list_policy.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h"
|
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h"
|
||||||
|
@ -230,12 +229,6 @@ bool Ver4PatriciaTrieNodeWriter::addNgramEntry(const WordIdArrayView prevWordIds
|
||||||
if (!probabilityEntry.isValid() && outAddedNewBigram) {
|
if (!probabilityEntry.isValid() && outAddedNewBigram) {
|
||||||
*outAddedNewBigram = true;
|
*outAddedNewBigram = true;
|
||||||
}
|
}
|
||||||
// TODO: Remove.
|
|
||||||
if (!mBigramPolicy->addNewEntry(prevWordIds[0], wordId, bigramProperty, outAddedNewBigram)) {
|
|
||||||
AKLOGE("Cannot add new bigram entry. prevWordId: %d, wordId: %d",
|
|
||||||
prevWordIds[0], wordId);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -244,19 +237,15 @@ bool Ver4PatriciaTrieNodeWriter::removeNgramEntry(const WordIdArrayView prevWord
|
||||||
// TODO: Support n-gram.
|
// TODO: Support n-gram.
|
||||||
LanguageModelDictContent *const languageModelDictContent =
|
LanguageModelDictContent *const languageModelDictContent =
|
||||||
mBuffers->getMutableLanguageModelDictContent();
|
mBuffers->getMutableLanguageModelDictContent();
|
||||||
if (!languageModelDictContent->removeNgramProbabilityEntry(prevWordIds.limit(1 /* maxSize */),
|
return languageModelDictContent->removeNgramProbabilityEntry(prevWordIds.limit(1 /* maxSize */),
|
||||||
wordId)) {
|
wordId);
|
||||||
// TODO: Uncomment.
|
|
||||||
// return false;
|
|
||||||
}
|
|
||||||
// TODO: Remove.
|
|
||||||
return mBigramPolicy->removeEntry(prevWordIds[0], wordId);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO: Remove when we stop supporting v402 format.
|
||||||
bool Ver4PatriciaTrieNodeWriter::updateAllBigramEntriesAndDeleteUselessEntries(
|
bool Ver4PatriciaTrieNodeWriter::updateAllBigramEntriesAndDeleteUselessEntries(
|
||||||
const PtNodeParams *const sourcePtNodeParams, int *const outBigramEntryCount) {
|
const PtNodeParams *const sourcePtNodeParams, int *const outBigramEntryCount) {
|
||||||
return mBigramPolicy->updateAllBigramEntriesAndDeleteUselessEntries(
|
// Do nothing.
|
||||||
sourcePtNodeParams->getTerminalId(), outBigramEntryCount);
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Ver4PatriciaTrieNodeWriter::updateAllPositionFields(
|
bool Ver4PatriciaTrieNodeWriter::updateAllPositionFields(
|
||||||
|
@ -291,12 +280,6 @@ bool Ver4PatriciaTrieNodeWriter::updateAllPositionFields(
|
||||||
if (!updateChildrenPosition(toBeUpdatedPtNodeParams, childrenPos)) {
|
if (!updateChildrenPosition(toBeUpdatedPtNodeParams, childrenPos)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Counts bigram entries.
|
|
||||||
if (outBigramEntryCount) {
|
|
||||||
*outBigramEntryCount = mBigramPolicy->getBigramEntryConut(
|
|
||||||
toBeUpdatedPtNodeParams->getTerminalId());
|
|
||||||
}
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -27,7 +27,6 @@ namespace latinime {
|
||||||
|
|
||||||
class BufferWithExtendableBuffer;
|
class BufferWithExtendableBuffer;
|
||||||
class HeaderPolicy;
|
class HeaderPolicy;
|
||||||
class Ver4BigramListPolicy;
|
|
||||||
class Ver4DictBuffers;
|
class Ver4DictBuffers;
|
||||||
class Ver4PatriciaTrieNodeReader;
|
class Ver4PatriciaTrieNodeReader;
|
||||||
class Ver4PtNodeArrayReader;
|
class Ver4PtNodeArrayReader;
|
||||||
|
@ -42,10 +41,9 @@ class Ver4PatriciaTrieNodeWriter : public PtNodeWriter {
|
||||||
Ver4DictBuffers *const buffers, const HeaderPolicy *const headerPolicy,
|
Ver4DictBuffers *const buffers, const HeaderPolicy *const headerPolicy,
|
||||||
const PtNodeReader *const ptNodeReader,
|
const PtNodeReader *const ptNodeReader,
|
||||||
const PtNodeArrayReader *const ptNodeArrayReader,
|
const PtNodeArrayReader *const ptNodeArrayReader,
|
||||||
Ver4BigramListPolicy *const bigramPolicy, Ver4ShortcutListPolicy *const shortcutPolicy)
|
Ver4ShortcutListPolicy *const shortcutPolicy)
|
||||||
: mTrieBuffer(trieBuffer), mBuffers(buffers), mHeaderPolicy(headerPolicy),
|
: mTrieBuffer(trieBuffer), mBuffers(buffers), mHeaderPolicy(headerPolicy),
|
||||||
mReadingHelper(ptNodeReader, ptNodeArrayReader), mBigramPolicy(bigramPolicy),
|
mReadingHelper(ptNodeReader, ptNodeArrayReader), mShortcutPolicy(shortcutPolicy) {}
|
||||||
mShortcutPolicy(shortcutPolicy) {}
|
|
||||||
|
|
||||||
virtual ~Ver4PatriciaTrieNodeWriter() {}
|
virtual ~Ver4PatriciaTrieNodeWriter() {}
|
||||||
|
|
||||||
|
@ -114,7 +112,6 @@ class Ver4PatriciaTrieNodeWriter : public PtNodeWriter {
|
||||||
Ver4DictBuffers *const mBuffers;
|
Ver4DictBuffers *const mBuffers;
|
||||||
const HeaderPolicy *const mHeaderPolicy;
|
const HeaderPolicy *const mHeaderPolicy;
|
||||||
DynamicPtReadingHelper mReadingHelper;
|
DynamicPtReadingHelper mReadingHelper;
|
||||||
Ver4BigramListPolicy *const mBigramPolicy;
|
|
||||||
Ver4ShortcutListPolicy *const mShortcutPolicy;
|
Ver4ShortcutListPolicy *const mShortcutPolicy;
|
||||||
};
|
};
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
|
|
|
@ -23,7 +23,6 @@
|
||||||
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
|
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
|
||||||
#include "suggest/policyimpl/dictionary/header/header_policy.h"
|
#include "suggest/policyimpl/dictionary/header/header_policy.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.h"
|
#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.h"
|
|
||||||
#include "suggest/policyimpl/dictionary/structure/v4/shortcut/ver4_shortcut_list_policy.h"
|
#include "suggest/policyimpl/dictionary/structure/v4/shortcut/ver4_shortcut_list_policy.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h"
|
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h"
|
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h"
|
||||||
|
@ -42,14 +41,12 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
||||||
Ver4PatriciaTriePolicy(Ver4DictBuffers::Ver4DictBuffersPtr buffers)
|
Ver4PatriciaTriePolicy(Ver4DictBuffers::Ver4DictBuffersPtr buffers)
|
||||||
: mBuffers(std::move(buffers)), mHeaderPolicy(mBuffers->getHeaderPolicy()),
|
: mBuffers(std::move(buffers)), mHeaderPolicy(mBuffers->getHeaderPolicy()),
|
||||||
mDictBuffer(mBuffers->getWritableTrieBuffer()),
|
mDictBuffer(mBuffers->getWritableTrieBuffer()),
|
||||||
mBigramPolicy(mBuffers->getMutableBigramDictContent(),
|
|
||||||
mBuffers->getTerminalPositionLookupTable(), mHeaderPolicy),
|
|
||||||
mShortcutPolicy(mBuffers->getMutableShortcutDictContent(),
|
mShortcutPolicy(mBuffers->getMutableShortcutDictContent(),
|
||||||
mBuffers->getTerminalPositionLookupTable()),
|
mBuffers->getTerminalPositionLookupTable()),
|
||||||
mNodeReader(mDictBuffer, mBuffers->getLanguageModelDictContent(), mHeaderPolicy),
|
mNodeReader(mDictBuffer, mBuffers->getLanguageModelDictContent(), mHeaderPolicy),
|
||||||
mPtNodeArrayReader(mDictBuffer),
|
mPtNodeArrayReader(mDictBuffer),
|
||||||
mNodeWriter(mDictBuffer, mBuffers.get(), mHeaderPolicy, &mNodeReader,
|
mNodeWriter(mDictBuffer, mBuffers.get(), mHeaderPolicy, &mNodeReader,
|
||||||
&mPtNodeArrayReader, &mBigramPolicy, &mShortcutPolicy),
|
&mPtNodeArrayReader, &mShortcutPolicy),
|
||||||
mUpdatingHelper(mDictBuffer, &mNodeReader, &mNodeWriter),
|
mUpdatingHelper(mDictBuffer, &mNodeReader, &mNodeWriter),
|
||||||
mWritingHelper(mBuffers.get()),
|
mWritingHelper(mBuffers.get()),
|
||||||
mUnigramCount(mHeaderPolicy->getUnigramCount()),
|
mUnigramCount(mHeaderPolicy->getUnigramCount()),
|
||||||
|
@ -132,7 +129,6 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
||||||
const Ver4DictBuffers::Ver4DictBuffersPtr mBuffers;
|
const Ver4DictBuffers::Ver4DictBuffersPtr mBuffers;
|
||||||
const HeaderPolicy *const mHeaderPolicy;
|
const HeaderPolicy *const mHeaderPolicy;
|
||||||
BufferWithExtendableBuffer *const mDictBuffer;
|
BufferWithExtendableBuffer *const mDictBuffer;
|
||||||
Ver4BigramListPolicy mBigramPolicy;
|
|
||||||
Ver4ShortcutListPolicy mShortcutPolicy;
|
Ver4ShortcutListPolicy mShortcutPolicy;
|
||||||
Ver4PatriciaTrieNodeReader mNodeReader;
|
Ver4PatriciaTrieNodeReader mNodeReader;
|
||||||
Ver4PtNodeArrayReader mPtNodeArrayReader;
|
Ver4PtNodeArrayReader mPtNodeArrayReader;
|
||||||
|
|
|
@ -20,7 +20,6 @@
|
||||||
#include <queue>
|
#include <queue>
|
||||||
|
|
||||||
#include "suggest/policyimpl/dictionary/header/header_policy.h"
|
#include "suggest/policyimpl/dictionary/header/header_policy.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.h"
|
|
||||||
#include "suggest/policyimpl/dictionary/structure/v4/shortcut/ver4_shortcut_list_policy.h"
|
#include "suggest/policyimpl/dictionary/structure/v4/shortcut/ver4_shortcut_list_policy.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h"
|
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
|
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
|
||||||
|
@ -77,13 +76,10 @@ bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
|
||||||
Ver4PatriciaTrieNodeReader ptNodeReader(mBuffers->getTrieBuffer(),
|
Ver4PatriciaTrieNodeReader ptNodeReader(mBuffers->getTrieBuffer(),
|
||||||
mBuffers->getLanguageModelDictContent(), headerPolicy);
|
mBuffers->getLanguageModelDictContent(), headerPolicy);
|
||||||
Ver4PtNodeArrayReader ptNodeArrayReader(mBuffers->getTrieBuffer());
|
Ver4PtNodeArrayReader ptNodeArrayReader(mBuffers->getTrieBuffer());
|
||||||
Ver4BigramListPolicy bigramPolicy(mBuffers->getMutableBigramDictContent(),
|
|
||||||
mBuffers->getTerminalPositionLookupTable(), headerPolicy);
|
|
||||||
Ver4ShortcutListPolicy shortcutPolicy(mBuffers->getMutableShortcutDictContent(),
|
Ver4ShortcutListPolicy shortcutPolicy(mBuffers->getMutableShortcutDictContent(),
|
||||||
mBuffers->getTerminalPositionLookupTable());
|
mBuffers->getTerminalPositionLookupTable());
|
||||||
Ver4PatriciaTrieNodeWriter ptNodeWriter(mBuffers->getWritableTrieBuffer(),
|
Ver4PatriciaTrieNodeWriter ptNodeWriter(mBuffers->getWritableTrieBuffer(),
|
||||||
mBuffers, headerPolicy, &ptNodeReader, &ptNodeArrayReader, &bigramPolicy,
|
mBuffers, headerPolicy, &ptNodeReader, &ptNodeArrayReader, &shortcutPolicy);
|
||||||
&shortcutPolicy);
|
|
||||||
|
|
||||||
int entryCountTable[MAX_PREV_WORD_COUNT_FOR_N_GRAM + 1];
|
int entryCountTable[MAX_PREV_WORD_COUNT_FOR_N_GRAM + 1];
|
||||||
if (!mBuffers->getMutableLanguageModelDictContent()->updateAllProbabilityEntries(headerPolicy,
|
if (!mBuffers->getMutableLanguageModelDictContent()->updateAllProbabilityEntries(headerPolicy,
|
||||||
|
@ -118,16 +114,6 @@ bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
|
||||||
&traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted)) {
|
&traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
const int unigramCount = traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted
|
|
||||||
.getValidUnigramCount();
|
|
||||||
const int maxUnigramCount = headerPolicy->getMaxUnigramCount();
|
|
||||||
if (headerPolicy->isDecayingDict() && unigramCount > maxUnigramCount) {
|
|
||||||
if (!truncateUnigrams(&ptNodeReader, &ptNodeWriter, maxUnigramCount)) {
|
|
||||||
AKLOGE("Cannot remove unigrams. current: %d, max: %d", unigramCount,
|
|
||||||
maxUnigramCount);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
|
readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
|
||||||
DynamicPtGcEventListeners::TraversePolicyToUpdateBigramProbability
|
DynamicPtGcEventListeners::TraversePolicyToUpdateBigramProbability
|
||||||
|
@ -136,21 +122,12 @@ bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
|
||||||
&traversePolicyToUpdateBigramProbability)) {
|
&traversePolicyToUpdateBigramProbability)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
const int bigramCount = traversePolicyToUpdateBigramProbability.getValidBigramEntryCount();
|
|
||||||
const int maxBigramCount = headerPolicy->getMaxBigramCount();
|
|
||||||
if (headerPolicy->isDecayingDict() && bigramCount > maxBigramCount) {
|
|
||||||
if (!truncateBigrams(maxBigramCount)) {
|
|
||||||
AKLOGE("Cannot remove bigrams. current: %d, max: %d", bigramCount, maxBigramCount);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Mapping from positions in mBuffer to positions in bufferToWrite.
|
// Mapping from positions in mBuffer to positions in bufferToWrite.
|
||||||
PtNodeWriter::DictPositionRelocationMap dictPositionRelocationMap;
|
PtNodeWriter::DictPositionRelocationMap dictPositionRelocationMap;
|
||||||
readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
|
readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
|
||||||
Ver4PatriciaTrieNodeWriter ptNodeWriterForNewBuffers(buffersToWrite->getWritableTrieBuffer(),
|
Ver4PatriciaTrieNodeWriter ptNodeWriterForNewBuffers(buffersToWrite->getWritableTrieBuffer(),
|
||||||
buffersToWrite, headerPolicy, &ptNodeReader, &ptNodeArrayReader, &bigramPolicy,
|
buffersToWrite, headerPolicy, &ptNodeReader, &ptNodeArrayReader, &shortcutPolicy);
|
||||||
&shortcutPolicy);
|
|
||||||
DynamicPtGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer
|
DynamicPtGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer
|
||||||
traversePolicyToPlaceAndWriteValidPtNodesToBuffer(&ptNodeWriterForNewBuffers,
|
traversePolicyToPlaceAndWriteValidPtNodesToBuffer(&ptNodeWriterForNewBuffers,
|
||||||
buffersToWrite->getWritableTrieBuffer(), &dictPositionRelocationMap);
|
buffersToWrite->getWritableTrieBuffer(), &dictPositionRelocationMap);
|
||||||
|
@ -163,12 +140,10 @@ bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
|
||||||
Ver4PatriciaTrieNodeReader newPtNodeReader(buffersToWrite->getTrieBuffer(),
|
Ver4PatriciaTrieNodeReader newPtNodeReader(buffersToWrite->getTrieBuffer(),
|
||||||
buffersToWrite->getLanguageModelDictContent(), headerPolicy);
|
buffersToWrite->getLanguageModelDictContent(), headerPolicy);
|
||||||
Ver4PtNodeArrayReader newPtNodeArrayreader(buffersToWrite->getTrieBuffer());
|
Ver4PtNodeArrayReader newPtNodeArrayreader(buffersToWrite->getTrieBuffer());
|
||||||
Ver4BigramListPolicy newBigramPolicy(buffersToWrite->getMutableBigramDictContent(),
|
|
||||||
buffersToWrite->getTerminalPositionLookupTable(), headerPolicy);
|
|
||||||
Ver4ShortcutListPolicy newShortcutPolicy(buffersToWrite->getMutableShortcutDictContent(),
|
Ver4ShortcutListPolicy newShortcutPolicy(buffersToWrite->getMutableShortcutDictContent(),
|
||||||
buffersToWrite->getTerminalPositionLookupTable());
|
buffersToWrite->getTerminalPositionLookupTable());
|
||||||
Ver4PatriciaTrieNodeWriter newPtNodeWriter(buffersToWrite->getWritableTrieBuffer(),
|
Ver4PatriciaTrieNodeWriter newPtNodeWriter(buffersToWrite->getWritableTrieBuffer(),
|
||||||
buffersToWrite, headerPolicy, &newPtNodeReader, &newPtNodeArrayreader, &newBigramPolicy,
|
buffersToWrite, headerPolicy, &newPtNodeReader, &newPtNodeArrayreader,
|
||||||
&newShortcutPolicy);
|
&newShortcutPolicy);
|
||||||
// Re-assign terminal IDs for valid terminal PtNodes.
|
// Re-assign terminal IDs for valid terminal PtNodes.
|
||||||
TerminalPositionLookupTable::TerminalIdMap terminalIdMap;
|
TerminalPositionLookupTable::TerminalIdMap terminalIdMap;
|
||||||
|
@ -181,11 +156,6 @@ bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
|
||||||
mBuffers->getLanguageModelDictContent(), nullptr /* outNgramCount */)) {
|
mBuffers->getLanguageModelDictContent(), nullptr /* outNgramCount */)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
// Run GC for bigram dict content.
|
|
||||||
if(!buffersToWrite->getMutableBigramDictContent()->runGC(&terminalIdMap,
|
|
||||||
mBuffers->getBigramDictContent(), outBigramCount)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
// Run GC for shortcut dict content.
|
// Run GC for shortcut dict content.
|
||||||
if(!buffersToWrite->getMutableShortcutDictContent()->runGC(&terminalIdMap,
|
if(!buffersToWrite->getMutableShortcutDictContent()->runGC(&terminalIdMap,
|
||||||
mBuffers->getShortcutDictContent())) {
|
mBuffers->getShortcutDictContent())) {
|
||||||
|
@ -213,93 +183,6 @@ bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: Remove.
|
|
||||||
bool Ver4PatriciaTrieWritingHelper::truncateUnigrams(
|
|
||||||
const Ver4PatriciaTrieNodeReader *const ptNodeReader,
|
|
||||||
Ver4PatriciaTrieNodeWriter *const ptNodeWriter, const int maxUnigramCount) {
|
|
||||||
const TerminalPositionLookupTable *const terminalPosLookupTable =
|
|
||||||
mBuffers->getTerminalPositionLookupTable();
|
|
||||||
const int nextTerminalId = terminalPosLookupTable->getNextTerminalId();
|
|
||||||
std::priority_queue<DictProbability, std::vector<DictProbability>, DictProbabilityComparator>
|
|
||||||
priorityQueue;
|
|
||||||
for (int i = 0; i < nextTerminalId; ++i) {
|
|
||||||
const int terminalPos = terminalPosLookupTable->getTerminalPtNodePosition(i);
|
|
||||||
if (terminalPos == NOT_A_DICT_POS) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
const ProbabilityEntry probabilityEntry =
|
|
||||||
mBuffers->getLanguageModelDictContent()->getProbabilityEntry(i);
|
|
||||||
const int probability = probabilityEntry.hasHistoricalInfo() ?
|
|
||||||
ForgettingCurveUtils::decodeProbability(
|
|
||||||
probabilityEntry.getHistoricalInfo(), mBuffers->getHeaderPolicy()) :
|
|
||||||
probabilityEntry.getProbability();
|
|
||||||
priorityQueue.push(DictProbability(terminalPos, probability,
|
|
||||||
probabilityEntry.getHistoricalInfo()->getTimeStamp()));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Delete unigrams.
|
|
||||||
while (static_cast<int>(priorityQueue.size()) > maxUnigramCount) {
|
|
||||||
const int ptNodePos = priorityQueue.top().getDictPos();
|
|
||||||
priorityQueue.pop();
|
|
||||||
const PtNodeParams ptNodeParams =
|
|
||||||
ptNodeReader->fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos);
|
|
||||||
if (ptNodeParams.representsNonWordInfo()) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if (!ptNodeWriter->markPtNodeAsWillBecomeNonTerminal(&ptNodeParams)) {
|
|
||||||
AKLOGE("Cannot mark PtNode as willBecomeNonterminal. PtNode pos: %d", ptNodePos);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO: Remove.
|
|
||||||
bool Ver4PatriciaTrieWritingHelper::truncateBigrams(const int maxBigramCount) {
|
|
||||||
const TerminalPositionLookupTable *const terminalPosLookupTable =
|
|
||||||
mBuffers->getTerminalPositionLookupTable();
|
|
||||||
const int nextTerminalId = terminalPosLookupTable->getNextTerminalId();
|
|
||||||
std::priority_queue<DictProbability, std::vector<DictProbability>, DictProbabilityComparator>
|
|
||||||
priorityQueue;
|
|
||||||
BigramDictContent *const bigramDictContent = mBuffers->getMutableBigramDictContent();
|
|
||||||
for (int i = 0; i < nextTerminalId; ++i) {
|
|
||||||
const int bigramListPos = bigramDictContent->getBigramListHeadPos(i);
|
|
||||||
if (bigramListPos == NOT_A_DICT_POS) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
bool hasNext = true;
|
|
||||||
int readingPos = bigramListPos;
|
|
||||||
while (hasNext) {
|
|
||||||
const BigramEntry bigramEntry =
|
|
||||||
bigramDictContent->getBigramEntryAndAdvancePosition(&readingPos);
|
|
||||||
const int entryPos = readingPos - bigramDictContent->getBigramEntrySize();
|
|
||||||
hasNext = bigramEntry.hasNext();
|
|
||||||
if (!bigramEntry.isValid()) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
const int probability = bigramEntry.hasHistoricalInfo() ?
|
|
||||||
ForgettingCurveUtils::decodeProbability(
|
|
||||||
bigramEntry.getHistoricalInfo(), mBuffers->getHeaderPolicy()) :
|
|
||||||
bigramEntry.getProbability();
|
|
||||||
priorityQueue.push(DictProbability(entryPos, probability,
|
|
||||||
bigramEntry.getHistoricalInfo()->getTimeStamp()));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Delete bigrams.
|
|
||||||
while (static_cast<int>(priorityQueue.size()) > maxBigramCount) {
|
|
||||||
const int entryPos = priorityQueue.top().getDictPos();
|
|
||||||
const BigramEntry bigramEntry = bigramDictContent->getBigramEntry(entryPos);
|
|
||||||
const BigramEntry invalidatedBigramEntry = bigramEntry.getInvalidatedEntry();
|
|
||||||
if (!bigramDictContent->writeBigramEntry(&invalidatedBigramEntry, entryPos)) {
|
|
||||||
AKLOGE("Cannot write bigram entry to remove. pos: %d", entryPos);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
priorityQueue.pop();
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool Ver4PatriciaTrieWritingHelper::TraversePolicyToUpdateAllPtNodeFlagsAndTerminalIds
|
bool Ver4PatriciaTrieWritingHelper::TraversePolicyToUpdateAllPtNodeFlagsAndTerminalIds
|
||||||
::onVisitingPtNode(const PtNodeParams *const ptNodeParams) {
|
::onVisitingPtNode(const PtNodeParams *const ptNodeParams) {
|
||||||
if (!ptNodeParams->isTerminal()) {
|
if (!ptNodeParams->isTerminal()) {
|
||||||
|
|
|
@ -66,49 +66,6 @@ class Ver4PatriciaTrieWritingHelper {
|
||||||
const TerminalPositionLookupTable::TerminalIdMap *const mTerminalIdMap;
|
const TerminalPositionLookupTable::TerminalIdMap *const mTerminalIdMap;
|
||||||
};
|
};
|
||||||
|
|
||||||
// For truncateUnigrams() and truncateBigrams().
|
|
||||||
class DictProbability {
|
|
||||||
public:
|
|
||||||
DictProbability(const int dictPos, const int probability, const int timestamp)
|
|
||||||
: mDictPos(dictPos), mProbability(probability), mTimestamp(timestamp) {}
|
|
||||||
|
|
||||||
int getDictPos() const {
|
|
||||||
return mDictPos;
|
|
||||||
}
|
|
||||||
|
|
||||||
int getProbability() const {
|
|
||||||
return mProbability;
|
|
||||||
}
|
|
||||||
|
|
||||||
int getTimestamp() const {
|
|
||||||
return mTimestamp;
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
|
||||||
DISALLOW_DEFAULT_CONSTRUCTOR(DictProbability);
|
|
||||||
|
|
||||||
int mDictPos;
|
|
||||||
int mProbability;
|
|
||||||
int mTimestamp;
|
|
||||||
};
|
|
||||||
|
|
||||||
// For truncateUnigrams() and truncateBigrams().
|
|
||||||
class DictProbabilityComparator {
|
|
||||||
public:
|
|
||||||
bool operator()(const DictProbability &left, const DictProbability &right) {
|
|
||||||
if (left.getProbability() != right.getProbability()) {
|
|
||||||
return left.getProbability() > right.getProbability();
|
|
||||||
}
|
|
||||||
if (left.getTimestamp() != right.getTimestamp()) {
|
|
||||||
return left.getTimestamp() < right.getTimestamp();
|
|
||||||
}
|
|
||||||
return left.getDictPos() > right.getDictPos();
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
|
||||||
DISALLOW_ASSIGNMENT_OPERATOR(DictProbabilityComparator);
|
|
||||||
};
|
|
||||||
|
|
||||||
bool runGC(const int rootPtNodeArrayPos, const HeaderPolicy *const headerPolicy,
|
bool runGC(const int rootPtNodeArrayPos, const HeaderPolicy *const headerPolicy,
|
||||||
Ver4DictBuffers *const buffersToWrite, int *const outUnigramCount,
|
Ver4DictBuffers *const buffersToWrite, int *const outUnigramCount,
|
||||||
int *const outBigramCount);
|
int *const outBigramCount);
|
||||||
|
|
Loading…
Reference in New Issue