Merge "Generate dict code for version 401."
This commit is contained in:
commit
728141d55a
36 changed files with 4660 additions and 0 deletions
|
@ -13,6 +13,7 @@
|
|||
# limitations under the License.
|
||||
|
||||
LATIN_IME_CORE_SRC_FILES :=
|
||||
LATIN_IME_CORE_SRC_FILES_BACKWARD_V401 :=
|
||||
LATIN_IME_CORE_TEST_FILES :=
|
||||
LATIN_IME_JNI_SRC_FILES :=
|
||||
LATIN_IME_SRC_DIR :=
|
||||
|
|
|
@ -98,6 +98,27 @@ LATIN_IME_CORE_SRC_FILES := \
|
|||
log_utils.cpp \
|
||||
time_keeper.cpp)
|
||||
|
||||
LATIN_IME_CORE_SRC_FILES_BACKWARD_V401 := \
|
||||
$(addprefix suggest/policyimpl/dictionary/structure/backward/v401/, \
|
||||
ver4_dict_buffers.cpp \
|
||||
ver4_dict_constants.cpp \
|
||||
ver4_patricia_trie_node_reader.cpp \
|
||||
ver4_patricia_trie_node_writer.cpp \
|
||||
ver4_patricia_trie_policy.cpp \
|
||||
ver4_patricia_trie_reading_utils.cpp \
|
||||
ver4_patricia_trie_writing_helper.cpp \
|
||||
ver4_pt_node_array_reader.cpp) \
|
||||
$(addprefix suggest/policyimpl/dictionary/structure/backward/v401/content/, \
|
||||
bigram_dict_content.cpp \
|
||||
probability_dict_content.cpp \
|
||||
shortcut_dict_content.cpp \
|
||||
sparse_table_dict_content.cpp \
|
||||
terminal_position_lookup_table.cpp) \
|
||||
$(addprefix suggest/policyimpl/dictionary/structure/backward/v401/bigram/, \
|
||||
ver4_bigram_list_policy.cpp)
|
||||
|
||||
LATIN_IME_CORE_SRC_FILES += $(LATIN_IME_CORE_SRC_FILES_BACKWARD_V401)
|
||||
|
||||
LATIN_IME_CORE_TEST_FILES := \
|
||||
defines_test.cpp \
|
||||
suggest/core/layout/normal_distribution_2d_test.cpp \
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
Files under this directory have been auto generated.
|
|
@ -0,0 +1,290 @@
|
|||
/*
|
||||
* Copyright (C) 2013 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/*
|
||||
* !!!!! DO NOT CHANGE THE LOGIC IN THIS FILE !!!!!
|
||||
* Do not edit this file other than updating policy's interface.
|
||||
*
|
||||
* This file was generated from
|
||||
* suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.cpp
|
||||
*/
|
||||
|
||||
#include "suggest/policyimpl/dictionary/structure/backward/v401/bigram/ver4_bigram_list_policy.h"
|
||||
|
||||
#include "suggest/core/dictionary/property/bigram_property.h"
|
||||
#include "suggest/policyimpl/dictionary/header/header_policy.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/pt_common/bigram/bigram_list_read_write_utils.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/backward/v401/content/bigram_dict_content.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/backward/v401/content/terminal_position_lookup_table.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_constants.h"
|
||||
#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
|
||||
|
||||
namespace latinime {
|
||||
namespace backward {
|
||||
namespace v401 {
|
||||
|
||||
void Ver4BigramListPolicy::getNextBigram(int *const outBigramPos, int *const outProbability,
|
||||
bool *const outHasNext, int *const bigramEntryPos) const {
|
||||
const BigramEntry bigramEntry =
|
||||
mBigramDictContent->getBigramEntryAndAdvancePosition(bigramEntryPos);
|
||||
if (outBigramPos) {
|
||||
// Lookup target PtNode position.
|
||||
*outBigramPos = mTerminalPositionLookupTable->getTerminalPtNodePosition(
|
||||
bigramEntry.getTargetTerminalId());
|
||||
}
|
||||
if (outProbability) {
|
||||
if (bigramEntry.hasHistoricalInfo()) {
|
||||
*outProbability =
|
||||
ForgettingCurveUtils::decodeProbability(bigramEntry.getHistoricalInfo(),
|
||||
mHeaderPolicy);
|
||||
} else {
|
||||
*outProbability = bigramEntry.getProbability();
|
||||
}
|
||||
}
|
||||
if (outHasNext) {
|
||||
*outHasNext = bigramEntry.hasNext();
|
||||
}
|
||||
}
|
||||
|
||||
bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTargetTerminalId,
|
||||
const BigramProperty *const bigramProperty, bool *const outAddedNewEntry) {
|
||||
// 1. The word has no bigrams yet.
|
||||
// 2. The word has bigrams, and there is the target in the list.
|
||||
// 3. The word has bigrams, and there is an invalid entry that can be reclaimed.
|
||||
// 4. The word has bigrams. We have to append new bigram entry to the list.
|
||||
// 5. Same as 4, but the list is the last entry of the content file.
|
||||
if (outAddedNewEntry) {
|
||||
*outAddedNewEntry = false;
|
||||
}
|
||||
const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId);
|
||||
if (bigramListPos == NOT_A_DICT_POS) {
|
||||
// Case 1. PtNode that doesn't have a bigram list.
|
||||
// Create new bigram list.
|
||||
if (!mBigramDictContent->createNewBigramList(terminalId)) {
|
||||
return false;
|
||||
}
|
||||
const BigramEntry newBigramEntry(false /* hasNext */, NOT_A_PROBABILITY,
|
||||
newTargetTerminalId);
|
||||
const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom(&newBigramEntry,
|
||||
bigramProperty);
|
||||
// Write an entry.
|
||||
const int writingPos = mBigramDictContent->getBigramListHeadPos(terminalId);
|
||||
if (!mBigramDictContent->writeBigramEntry(&bigramEntryToWrite, writingPos)) {
|
||||
return false;
|
||||
}
|
||||
if (outAddedNewEntry) {
|
||||
*outAddedNewEntry = true;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
int tailEntryPos = NOT_A_DICT_POS;
|
||||
const int entryPosToUpdate = getEntryPosToUpdate(newTargetTerminalId, bigramListPos,
|
||||
&tailEntryPos);
|
||||
if (tailEntryPos != NOT_A_DICT_POS || entryPosToUpdate == NOT_A_DICT_POS) {
|
||||
// Case 4, 5.
|
||||
// Add new entry to the bigram list.
|
||||
if (tailEntryPos == NOT_A_DICT_POS) {
|
||||
// Case 4. Create new bigram list.
|
||||
if (!mBigramDictContent->createNewBigramList(terminalId)) {
|
||||
return false;
|
||||
}
|
||||
const int destPos = mBigramDictContent->getBigramListHeadPos(terminalId);
|
||||
// Copy existing bigram list.
|
||||
if (!mBigramDictContent->copyBigramList(bigramListPos, destPos, &tailEntryPos)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
// Write new entry at the tail position of the bigram content.
|
||||
const BigramEntry newBigramEntry(false /* hasNext */, NOT_A_PROBABILITY,
|
||||
newTargetTerminalId);
|
||||
const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom(
|
||||
&newBigramEntry, bigramProperty);
|
||||
if (!mBigramDictContent->writeBigramEntryAtTail(&bigramEntryToWrite)) {
|
||||
return false;
|
||||
}
|
||||
// Update has next flag of the tail entry.
|
||||
if (!updateHasNextFlag(true /* hasNext */, tailEntryPos)) {
|
||||
return false;
|
||||
}
|
||||
if (outAddedNewEntry) {
|
||||
*outAddedNewEntry = true;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// Case 2. Overwrite the existing entry. Case 3. Reclaim and reuse the existing invalid entry.
|
||||
const BigramEntry originalBigramEntry = mBigramDictContent->getBigramEntry(entryPosToUpdate);
|
||||
if (!originalBigramEntry.isValid()) {
|
||||
// Case 3. Reuse the existing invalid entry. outAddedNewEntry is false when an existing
|
||||
// entry is updated.
|
||||
if (outAddedNewEntry) {
|
||||
*outAddedNewEntry = true;
|
||||
}
|
||||
}
|
||||
const BigramEntry updatedBigramEntry =
|
||||
originalBigramEntry.updateTargetTerminalIdAndGetEntry(newTargetTerminalId);
|
||||
const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom(
|
||||
&updatedBigramEntry, bigramProperty);
|
||||
return mBigramDictContent->writeBigramEntry(&bigramEntryToWrite, entryPosToUpdate);
|
||||
}
|
||||
|
||||
bool Ver4BigramListPolicy::removeEntry(const int terminalId, const int targetTerminalId) {
|
||||
const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId);
|
||||
if (bigramListPos == NOT_A_DICT_POS) {
|
||||
// Bigram list doesn't exist.
|
||||
return false;
|
||||
}
|
||||
const int entryPosToUpdate = getEntryPosToUpdate(targetTerminalId, bigramListPos,
|
||||
nullptr /* outTailEntryPos */);
|
||||
if (entryPosToUpdate == NOT_A_DICT_POS) {
|
||||
// Bigram entry doesn't exist.
|
||||
return false;
|
||||
}
|
||||
const BigramEntry bigramEntry = mBigramDictContent->getBigramEntry(entryPosToUpdate);
|
||||
if (targetTerminalId != bigramEntry.getTargetTerminalId()) {
|
||||
// Bigram entry doesn't exist.
|
||||
return false;
|
||||
}
|
||||
// Remove bigram entry by marking it as invalid entry and overwriting the original entry.
|
||||
const BigramEntry updatedBigramEntry = bigramEntry.getInvalidatedEntry();
|
||||
return mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPosToUpdate);
|
||||
}
|
||||
|
||||
bool Ver4BigramListPolicy::updateAllBigramEntriesAndDeleteUselessEntries(const int terminalId,
|
||||
int *const outBigramCount) {
|
||||
const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId);
|
||||
if (bigramListPos == NOT_A_DICT_POS) {
|
||||
// Bigram list doesn't exist.
|
||||
return true;
|
||||
}
|
||||
bool hasNext = true;
|
||||
int readingPos = bigramListPos;
|
||||
while (hasNext) {
|
||||
const int entryPos = readingPos;
|
||||
const BigramEntry bigramEntry =
|
||||
mBigramDictContent->getBigramEntryAndAdvancePosition(&readingPos);
|
||||
hasNext = bigramEntry.hasNext();
|
||||
if (!bigramEntry.isValid()) {
|
||||
continue;
|
||||
}
|
||||
const int targetPtNodePos = mTerminalPositionLookupTable->getTerminalPtNodePosition(
|
||||
bigramEntry.getTargetTerminalId());
|
||||
if (targetPtNodePos == NOT_A_DICT_POS) {
|
||||
// Invalidate bigram entry.
|
||||
const BigramEntry updatedBigramEntry = bigramEntry.getInvalidatedEntry();
|
||||
if (!mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPos)) {
|
||||
return false;
|
||||
}
|
||||
} else if (bigramEntry.hasHistoricalInfo()) {
|
||||
const HistoricalInfo historicalInfo = ForgettingCurveUtils::createHistoricalInfoToSave(
|
||||
bigramEntry.getHistoricalInfo(), mHeaderPolicy);
|
||||
if (ForgettingCurveUtils::needsToKeep(&historicalInfo, mHeaderPolicy)) {
|
||||
const BigramEntry updatedBigramEntry =
|
||||
bigramEntry.updateHistoricalInfoAndGetEntry(&historicalInfo);
|
||||
if (!mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPos)) {
|
||||
return false;
|
||||
}
|
||||
*outBigramCount += 1;
|
||||
} else {
|
||||
// Remove entry.
|
||||
const BigramEntry updatedBigramEntry = bigramEntry.getInvalidatedEntry();
|
||||
if (!mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPos)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
*outBigramCount += 1;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
int Ver4BigramListPolicy::getBigramEntryConut(const int terminalId) {
|
||||
const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId);
|
||||
if (bigramListPos == NOT_A_DICT_POS) {
|
||||
// Bigram list doesn't exist.
|
||||
return 0;
|
||||
}
|
||||
int bigramCount = 0;
|
||||
bool hasNext = true;
|
||||
int readingPos = bigramListPos;
|
||||
while (hasNext) {
|
||||
const BigramEntry bigramEntry =
|
||||
mBigramDictContent->getBigramEntryAndAdvancePosition(&readingPos);
|
||||
hasNext = bigramEntry.hasNext();
|
||||
if (bigramEntry.isValid()) {
|
||||
bigramCount++;
|
||||
}
|
||||
}
|
||||
return bigramCount;
|
||||
}
|
||||
|
||||
int Ver4BigramListPolicy::getEntryPosToUpdate(const int targetTerminalIdToFind,
|
||||
const int bigramListPos, int *const outTailEntryPos) const {
|
||||
if (outTailEntryPos) {
|
||||
*outTailEntryPos = NOT_A_DICT_POS;
|
||||
}
|
||||
bool hasNext = true;
|
||||
int invalidEntryPos = NOT_A_DICT_POS;
|
||||
int readingPos = bigramListPos;
|
||||
while (hasNext) {
|
||||
const int entryPos = readingPos;
|
||||
const BigramEntry bigramEntry =
|
||||
mBigramDictContent->getBigramEntryAndAdvancePosition(&readingPos);
|
||||
hasNext = bigramEntry.hasNext();
|
||||
if (bigramEntry.getTargetTerminalId() == targetTerminalIdToFind) {
|
||||
// Entry with same target is found.
|
||||
return entryPos;
|
||||
} else if (!bigramEntry.isValid()) {
|
||||
// Invalid entry that can be reused is found.
|
||||
invalidEntryPos = entryPos;
|
||||
}
|
||||
if (!hasNext && mBigramDictContent->isContentTailPos(readingPos)) {
|
||||
if (outTailEntryPos) {
|
||||
*outTailEntryPos = entryPos;
|
||||
}
|
||||
}
|
||||
}
|
||||
return invalidEntryPos;
|
||||
}
|
||||
|
||||
const BigramEntry Ver4BigramListPolicy::createUpdatedBigramEntryFrom(
|
||||
const BigramEntry *const originalBigramEntry,
|
||||
const BigramProperty *const bigramProperty) const {
|
||||
// TODO: Consolidate historical info and probability.
|
||||
if (mHeaderPolicy->hasHistoricalInfoOfWords()) {
|
||||
const HistoricalInfo historicalInfoForUpdate(bigramProperty->getTimestamp(),
|
||||
bigramProperty->getLevel(), bigramProperty->getCount());
|
||||
const HistoricalInfo updatedHistoricalInfo =
|
||||
ForgettingCurveUtils::createUpdatedHistoricalInfo(
|
||||
originalBigramEntry->getHistoricalInfo(), bigramProperty->getProbability(),
|
||||
&historicalInfoForUpdate, mHeaderPolicy);
|
||||
return originalBigramEntry->updateHistoricalInfoAndGetEntry(&updatedHistoricalInfo);
|
||||
} else {
|
||||
return originalBigramEntry->updateProbabilityAndGetEntry(bigramProperty->getProbability());
|
||||
}
|
||||
}
|
||||
|
||||
bool Ver4BigramListPolicy::updateHasNextFlag(const bool hasNext, const int bigramEntryPos) {
|
||||
const BigramEntry bigramEntry = mBigramDictContent->getBigramEntry(bigramEntryPos);
|
||||
const BigramEntry updatedBigramEntry = bigramEntry.updateHasNextAndGetEntry(hasNext);
|
||||
return mBigramDictContent->writeBigramEntry(&updatedBigramEntry, bigramEntryPos);
|
||||
}
|
||||
|
||||
} // namespace v401
|
||||
} // namespace backward
|
||||
} // namespace latinime
|
|
@ -0,0 +1,93 @@
|
|||
/*
|
||||
* Copyright (C) 2013 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/*
|
||||
* !!!!! DO NOT CHANGE THE LOGIC IN THIS FILE !!!!!
|
||||
* Do not edit this file other than updating policy's interface.
|
||||
*
|
||||
* This file was generated from
|
||||
* suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.h
|
||||
*/
|
||||
|
||||
#ifndef LATINIME_BACKWARD_V401_VER4_BIGRAM_LIST_POLICY_H
|
||||
#define LATINIME_BACKWARD_V401_VER4_BIGRAM_LIST_POLICY_H
|
||||
|
||||
#include "defines.h"
|
||||
#include "suggest/core/policy/dictionary_bigrams_structure_policy.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/backward/v401/content/bigram_entry.h"
|
||||
|
||||
namespace latinime {
|
||||
namespace backward {
|
||||
namespace v401 {
|
||||
|
||||
class BigramDictContent;
|
||||
} // namespace v401
|
||||
} // namespace backward
|
||||
class BigramProperty;
|
||||
namespace backward {
|
||||
namespace v401 {
|
||||
} // namespace v401
|
||||
} // namespace backward
|
||||
class HeaderPolicy;
|
||||
namespace backward {
|
||||
namespace v401 {
|
||||
class TerminalPositionLookupTable;
|
||||
|
||||
class Ver4BigramListPolicy : public DictionaryBigramsStructurePolicy {
|
||||
public:
|
||||
Ver4BigramListPolicy(BigramDictContent *const bigramDictContent,
|
||||
const TerminalPositionLookupTable *const terminalPositionLookupTable,
|
||||
const HeaderPolicy *const headerPolicy)
|
||||
: mBigramDictContent(bigramDictContent),
|
||||
mTerminalPositionLookupTable(terminalPositionLookupTable),
|
||||
mHeaderPolicy(headerPolicy) {}
|
||||
|
||||
void getNextBigram(int *const outBigramPos, int *const outProbability,
|
||||
bool *const outHasNext, int *const bigramEntryPos) const;
|
||||
|
||||
void skipAllBigrams(int *const pos) const {
|
||||
// Do nothing because we don't need to skip bigram lists in ver4 dictionaries.
|
||||
}
|
||||
|
||||
bool addNewEntry(const int terminalId, const int newTargetTerminalId,
|
||||
const BigramProperty *const bigramProperty, bool *const outAddedNewEntry);
|
||||
|
||||
bool removeEntry(const int terminalId, const int targetTerminalId);
|
||||
|
||||
bool updateAllBigramEntriesAndDeleteUselessEntries(const int terminalId,
|
||||
int *const outBigramCount);
|
||||
|
||||
int getBigramEntryConut(const int terminalId);
|
||||
|
||||
private:
|
||||
DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4BigramListPolicy);
|
||||
|
||||
int getEntryPosToUpdate(const int targetTerminalIdToFind, const int bigramListPos,
|
||||
int *const outTailEntryPos) const;
|
||||
|
||||
const BigramEntry createUpdatedBigramEntryFrom(const BigramEntry *const originalBigramEntry,
|
||||
const BigramProperty *const bigramProperty) const;
|
||||
|
||||
bool updateHasNextFlag(const bool hasNext, const int bigramEntryPos);
|
||||
|
||||
BigramDictContent *const mBigramDictContent;
|
||||
const TerminalPositionLookupTable *const mTerminalPositionLookupTable;
|
||||
const HeaderPolicy *const mHeaderPolicy;
|
||||
};
|
||||
} // namespace v401
|
||||
} // namespace backward
|
||||
} // namespace latinime
|
||||
#endif /* LATINIME_BACKWARD_V401_VER4_BIGRAM_LIST_POLICY_H */
|
|
@ -0,0 +1,224 @@
|
|||
/*
|
||||
* Copyright (C) 2013 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/*
|
||||
* !!!!! DO NOT EDIT THIS FILE !!!!!
|
||||
*
|
||||
* This file was generated from
|
||||
* suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp
|
||||
*/
|
||||
|
||||
#include "suggest/policyimpl/dictionary/structure/backward/v401/content/bigram_dict_content.h"
|
||||
|
||||
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
|
||||
|
||||
namespace latinime {
|
||||
namespace backward {
|
||||
namespace v401 {
|
||||
|
||||
const BigramEntry BigramDictContent::getBigramEntryAndAdvancePosition(
|
||||
int *const bigramEntryPos) const {
|
||||
const BufferWithExtendableBuffer *const bigramListBuffer = getContentBuffer();
|
||||
if (*bigramEntryPos < 0 || *bigramEntryPos >= bigramListBuffer->getTailPosition()) {
|
||||
AKLOGE("Invalid bigram entry position. bigramEntryPos: %d, bufSize: %d",
|
||||
*bigramEntryPos, bigramListBuffer->getTailPosition());
|
||||
ASSERT(false);
|
||||
return BigramEntry(false /* hasNext */, NOT_A_PROBABILITY,
|
||||
Ver4DictConstants::NOT_A_TERMINAL_ID);
|
||||
}
|
||||
const int bigramFlags = bigramListBuffer->readUintAndAdvancePosition(
|
||||
Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE, bigramEntryPos);
|
||||
const bool hasNext = (bigramFlags & Ver4DictConstants::BIGRAM_HAS_NEXT_MASK) != 0;
|
||||
int probability = NOT_A_PROBABILITY;
|
||||
int timestamp = NOT_A_TIMESTAMP;
|
||||
int level = 0;
|
||||
int count = 0;
|
||||
if (mHasHistoricalInfo) {
|
||||
probability = bigramListBuffer->readUintAndAdvancePosition(
|
||||
Ver4DictConstants::PROBABILITY_SIZE, bigramEntryPos);
|
||||
timestamp = bigramListBuffer->readUintAndAdvancePosition(
|
||||
Ver4DictConstants::TIME_STAMP_FIELD_SIZE, bigramEntryPos);
|
||||
level = bigramListBuffer->readUintAndAdvancePosition(
|
||||
Ver4DictConstants::WORD_LEVEL_FIELD_SIZE, bigramEntryPos);
|
||||
count = bigramListBuffer->readUintAndAdvancePosition(
|
||||
Ver4DictConstants::WORD_COUNT_FIELD_SIZE, bigramEntryPos);
|
||||
} else {
|
||||
probability = bigramFlags & Ver4DictConstants::BIGRAM_PROBABILITY_MASK;
|
||||
}
|
||||
const int encodedTargetTerminalId = bigramListBuffer->readUintAndAdvancePosition(
|
||||
Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE, bigramEntryPos);
|
||||
const int targetTerminalId =
|
||||
(encodedTargetTerminalId == Ver4DictConstants::INVALID_BIGRAM_TARGET_TERMINAL_ID) ?
|
||||
Ver4DictConstants::NOT_A_TERMINAL_ID : encodedTargetTerminalId;
|
||||
if (mHasHistoricalInfo) {
|
||||
const HistoricalInfo historicalInfo(timestamp, level, count);
|
||||
return BigramEntry(hasNext, probability, &historicalInfo, targetTerminalId);
|
||||
} else {
|
||||
return BigramEntry(hasNext, probability, targetTerminalId);
|
||||
}
|
||||
}
|
||||
|
||||
bool BigramDictContent::writeBigramEntryAndAdvancePosition(
|
||||
const BigramEntry *const bigramEntryToWrite, int *const entryWritingPos) {
|
||||
BufferWithExtendableBuffer *const bigramListBuffer = getWritableContentBuffer();
|
||||
const int bigramFlags = createAndGetBigramFlags(
|
||||
mHasHistoricalInfo ? 0 : bigramEntryToWrite->getProbability(),
|
||||
bigramEntryToWrite->hasNext());
|
||||
if (!bigramListBuffer->writeUintAndAdvancePosition(bigramFlags,
|
||||
Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE, entryWritingPos)) {
|
||||
AKLOGE("Cannot write bigram flags. pos: %d, flags: %x", *entryWritingPos, bigramFlags);
|
||||
return false;
|
||||
}
|
||||
if (mHasHistoricalInfo) {
|
||||
if (!bigramListBuffer->writeUintAndAdvancePosition(bigramEntryToWrite->getProbability(),
|
||||
Ver4DictConstants::PROBABILITY_SIZE, entryWritingPos)) {
|
||||
AKLOGE("Cannot write bigram probability. pos: %d, probability: %d", *entryWritingPos,
|
||||
bigramEntryToWrite->getProbability());
|
||||
return false;
|
||||
}
|
||||
const HistoricalInfo *const historicalInfo = bigramEntryToWrite->getHistoricalInfo();
|
||||
if (!bigramListBuffer->writeUintAndAdvancePosition(historicalInfo->getTimeStamp(),
|
||||
Ver4DictConstants::TIME_STAMP_FIELD_SIZE, entryWritingPos)) {
|
||||
AKLOGE("Cannot write bigram timestamps. pos: %d, timestamp: %d", *entryWritingPos,
|
||||
historicalInfo->getTimeStamp());
|
||||
return false;
|
||||
}
|
||||
if (!bigramListBuffer->writeUintAndAdvancePosition(historicalInfo->getLevel(),
|
||||
Ver4DictConstants::WORD_LEVEL_FIELD_SIZE, entryWritingPos)) {
|
||||
AKLOGE("Cannot write bigram level. pos: %d, level: %d", *entryWritingPos,
|
||||
historicalInfo->getLevel());
|
||||
return false;
|
||||
}
|
||||
if (!bigramListBuffer->writeUintAndAdvancePosition(historicalInfo->getCount(),
|
||||
Ver4DictConstants::WORD_COUNT_FIELD_SIZE, entryWritingPos)) {
|
||||
AKLOGE("Cannot write bigram count. pos: %d, count: %d", *entryWritingPos,
|
||||
historicalInfo->getCount());
|
||||
return false;
|
||||
}
|
||||
}
|
||||
const int targetTerminalIdToWrite =
|
||||
(bigramEntryToWrite->getTargetTerminalId() == Ver4DictConstants::NOT_A_TERMINAL_ID) ?
|
||||
Ver4DictConstants::INVALID_BIGRAM_TARGET_TERMINAL_ID :
|
||||
bigramEntryToWrite->getTargetTerminalId();
|
||||
if (!bigramListBuffer->writeUintAndAdvancePosition(targetTerminalIdToWrite,
|
||||
Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE, entryWritingPos)) {
|
||||
AKLOGE("Cannot write bigram target terminal id. pos: %d, target terminal id: %d",
|
||||
*entryWritingPos, bigramEntryToWrite->getTargetTerminalId());
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool BigramDictContent::copyBigramList(const int bigramListPos, const int toPos,
|
||||
int *const outTailEntryPos) {
|
||||
int readingPos = bigramListPos;
|
||||
int writingPos = toPos;
|
||||
bool hasNext = true;
|
||||
while (hasNext) {
|
||||
const BigramEntry bigramEntry = getBigramEntryAndAdvancePosition(&readingPos);
|
||||
hasNext = bigramEntry.hasNext();
|
||||
if (!hasNext) {
|
||||
*outTailEntryPos = writingPos;
|
||||
}
|
||||
if (!writeBigramEntryAndAdvancePosition(&bigramEntry, &writingPos)) {
|
||||
AKLOGE("Cannot write bigram entry to copy. pos: %d", writingPos);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool BigramDictContent::runGC(const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
|
||||
const BigramDictContent *const originalBigramDictContent,
|
||||
int *const outBigramEntryCount) {
|
||||
for (TerminalPositionLookupTable::TerminalIdMap::const_iterator it = terminalIdMap->begin();
|
||||
it != terminalIdMap->end(); ++it) {
|
||||
const int originalBigramListPos =
|
||||
originalBigramDictContent->getBigramListHeadPos(it->first);
|
||||
if (originalBigramListPos == NOT_A_DICT_POS) {
|
||||
// This terminal does not have a bigram list.
|
||||
continue;
|
||||
}
|
||||
const int bigramListPos = getContentBuffer()->getTailPosition();
|
||||
int bigramEntryCount = 0;
|
||||
// Copy bigram list with GC from original content.
|
||||
if (!runGCBigramList(originalBigramListPos, originalBigramDictContent, bigramListPos,
|
||||
terminalIdMap, &bigramEntryCount)) {
|
||||
AKLOGE("Cannot complete GC for the bigram list. original pos: %d, pos: %d",
|
||||
originalBigramListPos, bigramListPos);
|
||||
return false;
|
||||
}
|
||||
if (bigramEntryCount == 0) {
|
||||
// All bigram entries are useless. This terminal does not have a bigram list.
|
||||
continue;
|
||||
}
|
||||
*outBigramEntryCount += bigramEntryCount;
|
||||
// Set bigram list position to the lookup table.
|
||||
if (!getUpdatableAddressLookupTable()->set(it->second, bigramListPos)) {
|
||||
AKLOGE("Cannot set bigram list position. terminal id: %d, pos: %d",
|
||||
it->second, bigramListPos);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// Returns whether GC for the bigram list was succeeded or not.
|
||||
bool BigramDictContent::runGCBigramList(const int bigramListPos,
|
||||
const BigramDictContent *const sourceBigramDictContent, const int toPos,
|
||||
const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
|
||||
int *const outEntrycount) {
|
||||
bool hasNext = true;
|
||||
int readingPos = bigramListPos;
|
||||
int writingPos = toPos;
|
||||
int lastEntryPos = NOT_A_DICT_POS;
|
||||
while (hasNext) {
|
||||
const BigramEntry originalBigramEntry =
|
||||
sourceBigramDictContent->getBigramEntryAndAdvancePosition(&readingPos);
|
||||
hasNext = originalBigramEntry.hasNext();
|
||||
if (originalBigramEntry.getTargetTerminalId() == Ver4DictConstants::NOT_A_TERMINAL_ID) {
|
||||
continue;
|
||||
}
|
||||
TerminalPositionLookupTable::TerminalIdMap::const_iterator it =
|
||||
terminalIdMap->find(originalBigramEntry.getTargetTerminalId());
|
||||
if (it == terminalIdMap->end()) {
|
||||
// Target word has been removed.
|
||||
continue;
|
||||
}
|
||||
lastEntryPos = hasNext ? writingPos : NOT_A_DICT_POS;
|
||||
const BigramEntry updatedBigramEntry =
|
||||
originalBigramEntry.updateTargetTerminalIdAndGetEntry(it->second);
|
||||
if (!writeBigramEntryAndAdvancePosition(&updatedBigramEntry, &writingPos)) {
|
||||
AKLOGE("Cannot write bigram entry to run GC. pos: %d", writingPos);
|
||||
return false;
|
||||
}
|
||||
*outEntrycount += 1;
|
||||
}
|
||||
if (lastEntryPos != NOT_A_DICT_POS) {
|
||||
// Update has next flag in the last written entry.
|
||||
const BigramEntry bigramEntry = getBigramEntry(lastEntryPos).updateHasNextAndGetEntry(
|
||||
false /* hasNext */);
|
||||
if (!writeBigramEntry(&bigramEntry, lastEntryPos)) {
|
||||
AKLOGE("Cannot write bigram entry to set hasNext flag after GC. pos: %d", writingPos);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace v401
|
||||
} // namespace backward
|
||||
} // namespace latinime
|
|
@ -0,0 +1,122 @@
|
|||
/*
|
||||
* Copyright (C) 2013, The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/*
|
||||
* !!!!! DO NOT EDIT THIS FILE !!!!!
|
||||
*
|
||||
* This file was generated from
|
||||
* suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h
|
||||
*/
|
||||
|
||||
#ifndef LATINIME_BACKWARD_V401_BIGRAM_DICT_CONTENT_H
|
||||
#define LATINIME_BACKWARD_V401_BIGRAM_DICT_CONTENT_H
|
||||
|
||||
#include "defines.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/backward/v401/content/bigram_entry.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/backward/v401/content/sparse_table_dict_content.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/backward/v401/content/terminal_position_lookup_table.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_constants.h"
|
||||
|
||||
namespace latinime {
|
||||
namespace backward {
|
||||
namespace v401 {
|
||||
|
||||
class BigramDictContent : public SparseTableDictContent {
|
||||
public:
|
||||
BigramDictContent(const char *const dictPath, const bool hasHistoricalInfo,
|
||||
const bool isUpdatable)
|
||||
: SparseTableDictContent(dictPath,
|
||||
Ver4DictConstants::BIGRAM_LOOKUP_TABLE_FILE_EXTENSION,
|
||||
Ver4DictConstants::BIGRAM_CONTENT_TABLE_FILE_EXTENSION,
|
||||
Ver4DictConstants::BIGRAM_FILE_EXTENSION, isUpdatable,
|
||||
Ver4DictConstants::BIGRAM_ADDRESS_TABLE_BLOCK_SIZE,
|
||||
Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE),
|
||||
mHasHistoricalInfo(hasHistoricalInfo) {}
|
||||
|
||||
BigramDictContent(const bool hasHistoricalInfo)
|
||||
: SparseTableDictContent(Ver4DictConstants::BIGRAM_ADDRESS_TABLE_BLOCK_SIZE,
|
||||
Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE),
|
||||
mHasHistoricalInfo(hasHistoricalInfo) {}
|
||||
|
||||
const BigramEntry getBigramEntry(const int bigramEntryPos) const {
|
||||
int readingPos = bigramEntryPos;
|
||||
return getBigramEntryAndAdvancePosition(&readingPos);
|
||||
}
|
||||
|
||||
const BigramEntry getBigramEntryAndAdvancePosition(int *const bigramEntryPos) const;
|
||||
|
||||
// Returns head position of bigram list for a PtNode specified by terminalId.
|
||||
int getBigramListHeadPos(const int terminalId) const {
|
||||
const SparseTable *const addressLookupTable = getAddressLookupTable();
|
||||
if (!addressLookupTable->contains(terminalId)) {
|
||||
return NOT_A_DICT_POS;
|
||||
}
|
||||
return addressLookupTable->get(terminalId);
|
||||
}
|
||||
|
||||
bool writeBigramEntryAtTail(const BigramEntry *const bigramEntryToWrite) {
|
||||
int writingPos = getContentBuffer()->getTailPosition();
|
||||
return writeBigramEntryAndAdvancePosition(bigramEntryToWrite, &writingPos);
|
||||
}
|
||||
|
||||
bool writeBigramEntry(const BigramEntry *const bigramEntryToWrite, const int entryWritingPos) {
|
||||
int writingPos = entryWritingPos;
|
||||
return writeBigramEntryAndAdvancePosition(bigramEntryToWrite, &writingPos);
|
||||
}
|
||||
|
||||
bool writeBigramEntryAndAdvancePosition(const BigramEntry *const bigramEntryToWrite,
|
||||
int *const entryWritingPos);
|
||||
|
||||
bool createNewBigramList(const int terminalId) {
|
||||
const int bigramListPos = getContentBuffer()->getTailPosition();
|
||||
return getUpdatableAddressLookupTable()->set(terminalId, bigramListPos);
|
||||
}
|
||||
|
||||
bool copyBigramList(const int bigramListPos, const int toPos, int *const outTailEntryPos);
|
||||
|
||||
bool flushToFile(const char *const dictPath) const {
|
||||
return flush(dictPath, Ver4DictConstants::BIGRAM_LOOKUP_TABLE_FILE_EXTENSION,
|
||||
Ver4DictConstants::BIGRAM_CONTENT_TABLE_FILE_EXTENSION,
|
||||
Ver4DictConstants::BIGRAM_FILE_EXTENSION);
|
||||
}
|
||||
|
||||
bool runGC(const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
|
||||
const BigramDictContent *const originalBigramDictContent,
|
||||
int *const outBigramEntryCount);
|
||||
|
||||
bool isContentTailPos(const int pos) const {
|
||||
return pos == getContentBuffer()->getTailPosition();
|
||||
}
|
||||
|
||||
private:
|
||||
DISALLOW_COPY_AND_ASSIGN(BigramDictContent);
|
||||
|
||||
int createAndGetBigramFlags(const int probability, const bool hasNext) const {
|
||||
return (probability & Ver4DictConstants::BIGRAM_PROBABILITY_MASK)
|
||||
| (hasNext ? Ver4DictConstants::BIGRAM_HAS_NEXT_MASK : 0);
|
||||
}
|
||||
|
||||
bool runGCBigramList(const int bigramListPos,
|
||||
const BigramDictContent *const sourceBigramDictContent, const int toPos,
|
||||
const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
|
||||
int *const outEntryCount);
|
||||
|
||||
bool mHasHistoricalInfo;
|
||||
};
|
||||
} // namespace v401
|
||||
} // namespace backward
|
||||
} // namespace latinime
|
||||
#endif /* LATINIME_BACKWARD_V401_BIGRAM_DICT_CONTENT_H */
|
|
@ -0,0 +1,110 @@
|
|||
/*
|
||||
* Copyright (C) 2013, The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/*
|
||||
* !!!!! DO NOT EDIT THIS FILE !!!!!
|
||||
*
|
||||
* This file was generated from
|
||||
* suggest/policyimpl/dictionary/structure/v4/content/bigram_entry.h
|
||||
*/
|
||||
|
||||
#ifndef LATINIME_BACKWARD_V401_BIGRAM_ENTRY_H
|
||||
#define LATINIME_BACKWARD_V401_BIGRAM_ENTRY_H
|
||||
|
||||
#include "defines.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_constants.h"
|
||||
#include "suggest/policyimpl/dictionary/utils/historical_info.h"
|
||||
|
||||
namespace latinime {
|
||||
namespace backward {
|
||||
namespace v401 {
|
||||
|
||||
class BigramEntry {
|
||||
public:
|
||||
BigramEntry(const BigramEntry& bigramEntry)
|
||||
: mHasNext(bigramEntry.mHasNext), mProbability(bigramEntry.mProbability),
|
||||
mHistoricalInfo(), mTargetTerminalId(bigramEntry.mTargetTerminalId) {}
|
||||
|
||||
// Entry with historical information.
|
||||
BigramEntry(const bool hasNext, const int probability, const int targetTerminalId)
|
||||
: mHasNext(hasNext), mProbability(probability), mHistoricalInfo(),
|
||||
mTargetTerminalId(targetTerminalId) {}
|
||||
|
||||
// Entry with historical information.
|
||||
BigramEntry(const bool hasNext, const int probability,
|
||||
const HistoricalInfo *const historicalInfo, const int targetTerminalId)
|
||||
: mHasNext(hasNext), mProbability(probability), mHistoricalInfo(*historicalInfo),
|
||||
mTargetTerminalId(targetTerminalId) {}
|
||||
|
||||
const BigramEntry getInvalidatedEntry() const {
|
||||
return updateTargetTerminalIdAndGetEntry(Ver4DictConstants::NOT_A_TERMINAL_ID);
|
||||
}
|
||||
|
||||
const BigramEntry updateHasNextAndGetEntry(const bool hasNext) const {
|
||||
return BigramEntry(hasNext, mProbability, &mHistoricalInfo, mTargetTerminalId);
|
||||
}
|
||||
|
||||
const BigramEntry updateTargetTerminalIdAndGetEntry(const int newTargetTerminalId) const {
|
||||
return BigramEntry(mHasNext, mProbability, &mHistoricalInfo, newTargetTerminalId);
|
||||
}
|
||||
|
||||
const BigramEntry updateProbabilityAndGetEntry(const int probability) const {
|
||||
return BigramEntry(mHasNext, probability, &mHistoricalInfo, mTargetTerminalId);
|
||||
}
|
||||
|
||||
const BigramEntry updateHistoricalInfoAndGetEntry(
|
||||
const HistoricalInfo *const historicalInfo) const {
|
||||
return BigramEntry(mHasNext, mProbability, historicalInfo, mTargetTerminalId);
|
||||
}
|
||||
|
||||
bool isValid() const {
|
||||
return mTargetTerminalId != Ver4DictConstants::NOT_A_TERMINAL_ID;
|
||||
}
|
||||
|
||||
bool hasNext() const {
|
||||
return mHasNext;
|
||||
}
|
||||
|
||||
int getProbability() const {
|
||||
return mProbability;
|
||||
}
|
||||
|
||||
bool hasHistoricalInfo() const {
|
||||
return mHistoricalInfo.isValid();
|
||||
}
|
||||
|
||||
const HistoricalInfo *getHistoricalInfo() const {
|
||||
return &mHistoricalInfo;
|
||||
}
|
||||
|
||||
int getTargetTerminalId() const {
|
||||
return mTargetTerminalId;
|
||||
}
|
||||
|
||||
private:
|
||||
// Copy constructor is public to use this class as a type of return value.
|
||||
DISALLOW_DEFAULT_CONSTRUCTOR(BigramEntry);
|
||||
DISALLOW_ASSIGNMENT_OPERATOR(BigramEntry);
|
||||
|
||||
const bool mHasNext;
|
||||
const int mProbability;
|
||||
const HistoricalInfo mHistoricalInfo;
|
||||
const int mTargetTerminalId;
|
||||
};
|
||||
} // namespace v401
|
||||
} // namespace backward
|
||||
} // namespace latinime
|
||||
#endif /* LATINIME_BACKWARD_V401_BIGRAM_ENTRY_H */
|
|
@ -0,0 +1,47 @@
|
|||
/*
|
||||
* Copyright (C) 2013, The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/*
|
||||
* !!!!! DO NOT EDIT THIS FILE !!!!!
|
||||
*
|
||||
* This file was generated from
|
||||
* suggest/policyimpl/dictionary/structure/v4/content/dict_content.h
|
||||
*/
|
||||
|
||||
#ifndef LATINIME_BACKWARD_V401_DICT_CONTENT_H
|
||||
#define LATINIME_BACKWARD_V401_DICT_CONTENT_H
|
||||
|
||||
#include "defines.h"
|
||||
|
||||
namespace latinime {
|
||||
namespace backward {
|
||||
namespace v401 {
|
||||
|
||||
class DictContent {
|
||||
public:
|
||||
virtual ~DictContent() {}
|
||||
virtual bool isValid() const = 0;
|
||||
|
||||
protected:
|
||||
DictContent() {}
|
||||
|
||||
private:
|
||||
DISALLOW_COPY_AND_ASSIGN(DictContent);
|
||||
};
|
||||
} // namespace v401
|
||||
} // namespace backward
|
||||
} // namespace latinime
|
||||
#endif /* LATINIME_BACKWARD_V401_DICT_CONTENT_H */
|
|
@ -0,0 +1,171 @@
|
|||
/*
|
||||
* Copyright (C) 2013 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/*
|
||||
* !!!!! DO NOT EDIT THIS FILE !!!!!
|
||||
*
|
||||
* This file was generated from
|
||||
* suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.cpp
|
||||
*/
|
||||
|
||||
#include "suggest/policyimpl/dictionary/structure/backward/v401/content/probability_dict_content.h"
|
||||
|
||||
#include "suggest/policyimpl/dictionary/structure/backward/v401/content/probability_entry.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/backward/v401/content/terminal_position_lookup_table.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_constants.h"
|
||||
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
|
||||
|
||||
namespace latinime {
|
||||
namespace backward {
|
||||
namespace v401 {
|
||||
|
||||
const ProbabilityEntry ProbabilityDictContent::getProbabilityEntry(const int terminalId) const {
|
||||
if (terminalId < 0 || terminalId >= mSize) {
|
||||
// This method can be called with invalid terminal id during GC.
|
||||
return ProbabilityEntry(0 /* flags */, NOT_A_PROBABILITY);
|
||||
}
|
||||
const BufferWithExtendableBuffer *const buffer = getBuffer();
|
||||
int entryPos = getEntryPos(terminalId);
|
||||
const int flags = buffer->readUintAndAdvancePosition(
|
||||
Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE, &entryPos);
|
||||
const int probability = buffer->readUintAndAdvancePosition(
|
||||
Ver4DictConstants::PROBABILITY_SIZE, &entryPos);
|
||||
if (mHasHistoricalInfo) {
|
||||
const int timestamp = buffer->readUintAndAdvancePosition(
|
||||
Ver4DictConstants::TIME_STAMP_FIELD_SIZE, &entryPos);
|
||||
const int level = buffer->readUintAndAdvancePosition(
|
||||
Ver4DictConstants::WORD_LEVEL_FIELD_SIZE, &entryPos);
|
||||
const int count = buffer->readUintAndAdvancePosition(
|
||||
Ver4DictConstants::WORD_COUNT_FIELD_SIZE, &entryPos);
|
||||
const HistoricalInfo historicalInfo(timestamp, level, count);
|
||||
return ProbabilityEntry(flags, probability, &historicalInfo);
|
||||
} else {
|
||||
return ProbabilityEntry(flags, probability);
|
||||
}
|
||||
}
|
||||
|
||||
bool ProbabilityDictContent::setProbabilityEntry(const int terminalId,
|
||||
const ProbabilityEntry *const probabilityEntry) {
|
||||
if (terminalId < 0) {
|
||||
return false;
|
||||
}
|
||||
const int entryPos = getEntryPos(terminalId);
|
||||
if (terminalId >= mSize) {
|
||||
ProbabilityEntry dummyEntry;
|
||||
// Write new entry.
|
||||
int writingPos = getBuffer()->getTailPosition();
|
||||
while (writingPos <= entryPos) {
|
||||
// Fulfilling with dummy entries until writingPos.
|
||||
if (!writeEntry(&dummyEntry, writingPos)) {
|
||||
AKLOGE("Cannot write dummy entry. pos: %d, mSize: %d", writingPos, mSize);
|
||||
return false;
|
||||
}
|
||||
writingPos += getEntrySize();
|
||||
mSize++;
|
||||
}
|
||||
}
|
||||
return writeEntry(probabilityEntry, entryPos);
|
||||
}
|
||||
|
||||
bool ProbabilityDictContent::flushToFile(const char *const dictPath) const {
|
||||
if (getEntryPos(mSize) < getBuffer()->getTailPosition()) {
|
||||
ProbabilityDictContent probabilityDictContentToWrite(mHasHistoricalInfo);
|
||||
for (int i = 0; i < mSize; ++i) {
|
||||
const ProbabilityEntry probabilityEntry = getProbabilityEntry(i);
|
||||
if (!probabilityDictContentToWrite.setProbabilityEntry(i, &probabilityEntry)) {
|
||||
AKLOGE("Cannot set probability entry in flushToFile. terminalId: %d", i);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return probabilityDictContentToWrite.flush(dictPath,
|
||||
Ver4DictConstants::FREQ_FILE_EXTENSION);
|
||||
} else {
|
||||
return flush(dictPath, Ver4DictConstants::FREQ_FILE_EXTENSION);
|
||||
}
|
||||
}
|
||||
|
||||
bool ProbabilityDictContent::runGC(
|
||||
const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
|
||||
const ProbabilityDictContent *const originalProbabilityDictContent) {
|
||||
mSize = 0;
|
||||
for (TerminalPositionLookupTable::TerminalIdMap::const_iterator it = terminalIdMap->begin();
|
||||
it != terminalIdMap->end(); ++it) {
|
||||
const ProbabilityEntry probabilityEntry =
|
||||
originalProbabilityDictContent->getProbabilityEntry(it->first);
|
||||
if (!setProbabilityEntry(it->second, &probabilityEntry)) {
|
||||
AKLOGE("Cannot set probability entry in runGC. terminalId: %d", it->second);
|
||||
return false;
|
||||
}
|
||||
mSize++;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
int ProbabilityDictContent::getEntrySize() const {
|
||||
if (mHasHistoricalInfo) {
|
||||
return Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE
|
||||
+ Ver4DictConstants::PROBABILITY_SIZE
|
||||
+ Ver4DictConstants::TIME_STAMP_FIELD_SIZE
|
||||
+ Ver4DictConstants::WORD_LEVEL_FIELD_SIZE
|
||||
+ Ver4DictConstants::WORD_COUNT_FIELD_SIZE;
|
||||
} else {
|
||||
return Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE
|
||||
+ Ver4DictConstants::PROBABILITY_SIZE;
|
||||
}
|
||||
}
|
||||
|
||||
int ProbabilityDictContent::getEntryPos(const int terminalId) const {
|
||||
return terminalId * getEntrySize();
|
||||
}
|
||||
|
||||
bool ProbabilityDictContent::writeEntry(const ProbabilityEntry *const probabilityEntry,
|
||||
const int entryPos) {
|
||||
BufferWithExtendableBuffer *const bufferToWrite = getWritableBuffer();
|
||||
int writingPos = entryPos;
|
||||
if (!bufferToWrite->writeUintAndAdvancePosition(probabilityEntry->getFlags(),
|
||||
Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE, &writingPos)) {
|
||||
AKLOGE("Cannot write flags in probability dict content. pos: %d", writingPos);
|
||||
return false;
|
||||
}
|
||||
if (!bufferToWrite->writeUintAndAdvancePosition(probabilityEntry->getProbability(),
|
||||
Ver4DictConstants::PROBABILITY_SIZE, &writingPos)) {
|
||||
AKLOGE("Cannot write probability in probability dict content. pos: %d", writingPos);
|
||||
return false;
|
||||
}
|
||||
if (mHasHistoricalInfo) {
|
||||
const HistoricalInfo *const historicalInfo = probabilityEntry->getHistoricalInfo();
|
||||
if (!bufferToWrite->writeUintAndAdvancePosition(historicalInfo->getTimeStamp(),
|
||||
Ver4DictConstants::TIME_STAMP_FIELD_SIZE, &writingPos)) {
|
||||
AKLOGE("Cannot write timestamp in probability dict content. pos: %d", writingPos);
|
||||
return false;
|
||||
}
|
||||
if (!bufferToWrite->writeUintAndAdvancePosition(historicalInfo->getLevel(),
|
||||
Ver4DictConstants::WORD_LEVEL_FIELD_SIZE, &writingPos)) {
|
||||
AKLOGE("Cannot write level in probability dict content. pos: %d", writingPos);
|
||||
return false;
|
||||
}
|
||||
if (!bufferToWrite->writeUintAndAdvancePosition(historicalInfo->getCount(),
|
||||
Ver4DictConstants::WORD_COUNT_FIELD_SIZE, &writingPos)) {
|
||||
AKLOGE("Cannot write count in probability dict content. pos: %d", writingPos);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace v401
|
||||
} // namespace backward
|
||||
} // namespace latinime
|
|
@ -0,0 +1,74 @@
|
|||
/*
|
||||
* Copyright (C) 2013, The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/*
|
||||
* !!!!! DO NOT EDIT THIS FILE !!!!!
|
||||
*
|
||||
* This file was generated from
|
||||
* suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h
|
||||
*/
|
||||
|
||||
#ifndef LATINIME_BACKWARD_V401_PROBABILITY_DICT_CONTENT_H
|
||||
#define LATINIME_BACKWARD_V401_PROBABILITY_DICT_CONTENT_H
|
||||
|
||||
#include "defines.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/backward/v401/content/single_dict_content.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/backward/v401/content/terminal_position_lookup_table.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_constants.h"
|
||||
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
|
||||
|
||||
namespace latinime {
|
||||
namespace backward {
|
||||
namespace v401 {
|
||||
|
||||
class ProbabilityEntry;
|
||||
|
||||
class ProbabilityDictContent : public SingleDictContent {
|
||||
public:
|
||||
ProbabilityDictContent(const char *const dictPath, const bool hasHistoricalInfo,
|
||||
const bool isUpdatable)
|
||||
: SingleDictContent(dictPath, Ver4DictConstants::FREQ_FILE_EXTENSION, isUpdatable),
|
||||
mHasHistoricalInfo(hasHistoricalInfo),
|
||||
mSize(getBuffer()->getTailPosition() / getEntrySize()) {}
|
||||
|
||||
ProbabilityDictContent(const bool hasHistoricalInfo)
|
||||
: mHasHistoricalInfo(hasHistoricalInfo), mSize(0) {}
|
||||
|
||||
const ProbabilityEntry getProbabilityEntry(const int terminalId) const;
|
||||
|
||||
bool setProbabilityEntry(const int terminalId, const ProbabilityEntry *const probabilityEntry);
|
||||
|
||||
bool flushToFile(const char *const dictPath) const;
|
||||
|
||||
bool runGC(const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
|
||||
const ProbabilityDictContent *const originalProbabilityDictContent);
|
||||
|
||||
private:
|
||||
DISALLOW_COPY_AND_ASSIGN(ProbabilityDictContent);
|
||||
|
||||
int getEntrySize() const;
|
||||
|
||||
int getEntryPos(const int terminalId) const;
|
||||
|
||||
bool writeEntry(const ProbabilityEntry *const probabilityEntry, const int entryPos);
|
||||
|
||||
bool mHasHistoricalInfo;
|
||||
int mSize;
|
||||
};
|
||||
} // namespace v401
|
||||
} // namespace backward
|
||||
} // namespace latinime
|
||||
#endif /* LATINIME_BACKWARD_V401_PROBABILITY_DICT_CONTENT_H */
|
|
@ -0,0 +1,90 @@
|
|||
/*
|
||||
* Copyright (C) 2013, The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/*
|
||||
* !!!!! DO NOT EDIT THIS FILE !!!!!
|
||||
*
|
||||
* This file was generated from
|
||||
* suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h
|
||||
*/
|
||||
|
||||
#ifndef LATINIME_BACKWARD_V401_PROBABILITY_ENTRY_H
|
||||
#define LATINIME_BACKWARD_V401_PROBABILITY_ENTRY_H
|
||||
|
||||
#include "defines.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_constants.h"
|
||||
#include "suggest/policyimpl/dictionary/utils/historical_info.h"
|
||||
|
||||
namespace latinime {
|
||||
namespace backward {
|
||||
namespace v401 {
|
||||
|
||||
class ProbabilityEntry {
|
||||
public:
|
||||
ProbabilityEntry(const ProbabilityEntry &probabilityEntry)
|
||||
: mFlags(probabilityEntry.mFlags), mProbability(probabilityEntry.mProbability),
|
||||
mHistoricalInfo(probabilityEntry.mHistoricalInfo) {}
|
||||
|
||||
// Dummy entry
|
||||
ProbabilityEntry()
|
||||
: mFlags(0), mProbability(NOT_A_PROBABILITY), mHistoricalInfo() {}
|
||||
|
||||
// Entry without historical information
|
||||
ProbabilityEntry(const int flags, const int probability)
|
||||
: mFlags(flags), mProbability(probability), mHistoricalInfo() {}
|
||||
|
||||
// Entry with historical information.
|
||||
ProbabilityEntry(const int flags, const int probability,
|
||||
const HistoricalInfo *const historicalInfo)
|
||||
: mFlags(flags), mProbability(probability), mHistoricalInfo(*historicalInfo) {}
|
||||
|
||||
const ProbabilityEntry createEntryWithUpdatedProbability(const int probability) const {
|
||||
return ProbabilityEntry(mFlags, probability, &mHistoricalInfo);
|
||||
}
|
||||
|
||||
const ProbabilityEntry createEntryWithUpdatedHistoricalInfo(
|
||||
const HistoricalInfo *const historicalInfo) const {
|
||||
return ProbabilityEntry(mFlags, mProbability, historicalInfo);
|
||||
}
|
||||
|
||||
bool hasHistoricalInfo() const {
|
||||
return mHistoricalInfo.isValid();
|
||||
}
|
||||
|
||||
int getFlags() const {
|
||||
return mFlags;
|
||||
}
|
||||
|
||||
int getProbability() const {
|
||||
return mProbability;
|
||||
}
|
||||
|
||||
const HistoricalInfo *getHistoricalInfo() const {
|
||||
return &mHistoricalInfo;
|
||||
}
|
||||
|
||||
private:
|
||||
// Copy constructor is public to use this class as a type of return value.
|
||||
DISALLOW_ASSIGNMENT_OPERATOR(ProbabilityEntry);
|
||||
|
||||
const int mFlags;
|
||||
const int mProbability;
|
||||
const HistoricalInfo mHistoricalInfo;
|
||||
};
|
||||
} // namespace v401
|
||||
} // namespace backward
|
||||
} // namespace latinime
|
||||
#endif /* LATINIME_BACKWARD_V401_PROBABILITY_ENTRY_H */
|
|
@ -0,0 +1,199 @@
|
|||
/*
|
||||
* Copyright (C) 2013, The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/*
|
||||
* !!!!! DO NOT EDIT THIS FILE !!!!!
|
||||
*
|
||||
* This file was generated from
|
||||
* suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.cpp
|
||||
*/
|
||||
|
||||
#include "suggest/policyimpl/dictionary/structure/backward/v401/content/shortcut_dict_content.h"
|
||||
|
||||
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
|
||||
|
||||
namespace latinime {
|
||||
namespace backward {
|
||||
namespace v401 {
|
||||
|
||||
void ShortcutDictContent::getShortcutEntryAndAdvancePosition(const int maxCodePointCount,
|
||||
int *const outCodePoint, int *const outCodePointCount, int *const outProbability,
|
||||
bool *const outhasNext, int *const shortcutEntryPos) const {
|
||||
const BufferWithExtendableBuffer *const shortcutListBuffer = getContentBuffer();
|
||||
if (*shortcutEntryPos < 0 || *shortcutEntryPos >= shortcutListBuffer->getTailPosition()) {
|
||||
AKLOGE("Invalid shortcut entry position. shortcutEntryPos: %d, bufSize: %d",
|
||||
*shortcutEntryPos, shortcutListBuffer->getTailPosition());
|
||||
ASSERT(false);
|
||||
if (outhasNext) {
|
||||
*outhasNext = false;
|
||||
}
|
||||
if (outCodePointCount) {
|
||||
*outCodePointCount = 0;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
const int shortcutFlags = shortcutListBuffer->readUintAndAdvancePosition(
|
||||
Ver4DictConstants::SHORTCUT_FLAGS_FIELD_SIZE, shortcutEntryPos);
|
||||
if (outProbability) {
|
||||
*outProbability = shortcutFlags & Ver4DictConstants::SHORTCUT_PROBABILITY_MASK;
|
||||
}
|
||||
if (outhasNext) {
|
||||
*outhasNext = shortcutFlags & Ver4DictConstants::SHORTCUT_HAS_NEXT_MASK;
|
||||
}
|
||||
if (outCodePoint && outCodePointCount) {
|
||||
shortcutListBuffer->readCodePointsAndAdvancePosition(
|
||||
maxCodePointCount, outCodePoint, outCodePointCount, shortcutEntryPos);
|
||||
}
|
||||
}
|
||||
|
||||
int ShortcutDictContent::getShortcutListHeadPos(const int terminalId) const {
|
||||
const SparseTable *const addressLookupTable = getAddressLookupTable();
|
||||
if (!addressLookupTable->contains(terminalId)) {
|
||||
return NOT_A_DICT_POS;
|
||||
}
|
||||
return addressLookupTable->get(terminalId);
|
||||
}
|
||||
|
||||
bool ShortcutDictContent::flushToFile(const char *const dictPath) const {
|
||||
return flush(dictPath, Ver4DictConstants::SHORTCUT_LOOKUP_TABLE_FILE_EXTENSION,
|
||||
Ver4DictConstants::SHORTCUT_CONTENT_TABLE_FILE_EXTENSION,
|
||||
Ver4DictConstants::SHORTCUT_FILE_EXTENSION);
|
||||
}
|
||||
|
||||
bool ShortcutDictContent::runGC(
|
||||
const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
|
||||
const ShortcutDictContent *const originalShortcutDictContent) {
|
||||
for (TerminalPositionLookupTable::TerminalIdMap::const_iterator it = terminalIdMap->begin();
|
||||
it != terminalIdMap->end(); ++it) {
|
||||
const int originalShortcutListPos =
|
||||
originalShortcutDictContent->getShortcutListHeadPos(it->first);
|
||||
if (originalShortcutListPos == NOT_A_DICT_POS) {
|
||||
continue;
|
||||
}
|
||||
const int shortcutListPos = getContentBuffer()->getTailPosition();
|
||||
// Copy shortcut list from original content.
|
||||
if (!copyShortcutListFromDictContent(originalShortcutListPos, originalShortcutDictContent,
|
||||
shortcutListPos)) {
|
||||
AKLOGE("Cannot copy shortcut list during GC. original pos: %d, pos: %d",
|
||||
originalShortcutListPos, shortcutListPos);
|
||||
return false;
|
||||
}
|
||||
// Set shortcut list position to the lookup table.
|
||||
if (!getUpdatableAddressLookupTable()->set(it->second, shortcutListPos)) {
|
||||
AKLOGE("Cannot set shortcut list position. terminal id: %d, pos: %d",
|
||||
it->second, shortcutListPos);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool ShortcutDictContent::createNewShortcutList(const int terminalId) {
|
||||
const int shortcutListListPos = getContentBuffer()->getTailPosition();
|
||||
return getUpdatableAddressLookupTable()->set(terminalId, shortcutListListPos);
|
||||
}
|
||||
|
||||
bool ShortcutDictContent::copyShortcutList(const int shortcutListPos, const int toPos) {
|
||||
return copyShortcutListFromDictContent(shortcutListPos, this, toPos);
|
||||
}
|
||||
|
||||
bool ShortcutDictContent::copyShortcutListFromDictContent(const int shortcutListPos,
|
||||
const ShortcutDictContent *const sourceShortcutDictContent, const int toPos) {
|
||||
bool hasNext = true;
|
||||
int readingPos = shortcutListPos;
|
||||
int writingPos = toPos;
|
||||
int codePoints[MAX_WORD_LENGTH];
|
||||
while (hasNext) {
|
||||
int probability = 0;
|
||||
int codePointCount = 0;
|
||||
sourceShortcutDictContent->getShortcutEntryAndAdvancePosition(MAX_WORD_LENGTH,
|
||||
codePoints, &codePointCount, &probability, &hasNext, &readingPos);
|
||||
if (!writeShortcutEntryAndAdvancePosition(codePoints, codePointCount, probability,
|
||||
hasNext, &writingPos)) {
|
||||
AKLOGE("Cannot write shortcut entry to copy. pos: %d", writingPos);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool ShortcutDictContent::setProbability(const int probability, const int shortcutEntryPos) {
|
||||
BufferWithExtendableBuffer *const shortcutListBuffer = getWritableContentBuffer();
|
||||
const int shortcutFlags = shortcutListBuffer->readUint(
|
||||
Ver4DictConstants::SHORTCUT_FLAGS_FIELD_SIZE, shortcutEntryPos);
|
||||
const bool hasNext = shortcutFlags & Ver4DictConstants::SHORTCUT_HAS_NEXT_MASK;
|
||||
const int shortcutFlagsToWrite = createAndGetShortcutFlags(probability, hasNext);
|
||||
return shortcutListBuffer->writeUint(shortcutFlagsToWrite,
|
||||
Ver4DictConstants::SHORTCUT_FLAGS_FIELD_SIZE, shortcutEntryPos);
|
||||
}
|
||||
|
||||
bool ShortcutDictContent::writeShortcutEntryAndAdvancePosition(const int *const codePoint,
|
||||
const int codePointCount, const int probability, const bool hasNext,
|
||||
int *const shortcutEntryPos) {
|
||||
BufferWithExtendableBuffer *const shortcutListBuffer = getWritableContentBuffer();
|
||||
const int shortcutFlags = createAndGetShortcutFlags(probability, hasNext);
|
||||
if (!shortcutListBuffer->writeUintAndAdvancePosition(shortcutFlags,
|
||||
Ver4DictConstants::SHORTCUT_FLAGS_FIELD_SIZE, shortcutEntryPos)) {
|
||||
AKLOGE("Cannot write shortcut flags. flags; %x, pos: %d", shortcutFlags, *shortcutEntryPos);
|
||||
return false;
|
||||
}
|
||||
if (!shortcutListBuffer->writeCodePointsAndAdvancePosition(codePoint, codePointCount,
|
||||
true /* writesTerminator */, shortcutEntryPos)) {
|
||||
AKLOGE("Cannot write shortcut target code points. pos: %d", *shortcutEntryPos);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// Find a shortcut entry that has specified target and return its position.
|
||||
int ShortcutDictContent::findShortcutEntryAndGetPos(const int shortcutListPos,
|
||||
const int *const targetCodePointsToFind, const int codePointCount) const {
|
||||
bool hasNext = true;
|
||||
int readingPos = shortcutListPos;
|
||||
int targetCodePoints[MAX_WORD_LENGTH];
|
||||
while (hasNext) {
|
||||
const int entryPos = readingPos;
|
||||
int probability = 0;
|
||||
int targetCodePointCount = 0;
|
||||
getShortcutEntryAndAdvancePosition(MAX_WORD_LENGTH, targetCodePoints, &targetCodePointCount,
|
||||
&probability, &hasNext, &readingPos);
|
||||
if (targetCodePointCount != codePointCount) {
|
||||
continue;
|
||||
}
|
||||
bool matched = true;
|
||||
for (int i = 0; i < codePointCount; ++i) {
|
||||
if (targetCodePointsToFind[i] != targetCodePoints[i]) {
|
||||
matched = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (matched) {
|
||||
return entryPos;
|
||||
}
|
||||
}
|
||||
return NOT_A_DICT_POS;
|
||||
}
|
||||
|
||||
int ShortcutDictContent::createAndGetShortcutFlags(const int probability,
|
||||
const bool hasNext) const {
|
||||
return (probability & Ver4DictConstants::SHORTCUT_PROBABILITY_MASK)
|
||||
| (hasNext ? Ver4DictConstants::SHORTCUT_HAS_NEXT_MASK : 0);
|
||||
}
|
||||
|
||||
} // namespace v401
|
||||
} // namespace backward
|
||||
} // namespace latinime
|
|
@ -0,0 +1,101 @@
|
|||
/*
|
||||
* Copyright (C) 2013, The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/*
|
||||
* !!!!! DO NOT EDIT THIS FILE !!!!!
|
||||
*
|
||||
* This file was generated from
|
||||
* suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.h
|
||||
*/
|
||||
|
||||
#ifndef LATINIME_BACKWARD_V401_SHORTCUT_DICT_CONTENT_H
|
||||
#define LATINIME_BACKWARD_V401_SHORTCUT_DICT_CONTENT_H
|
||||
|
||||
#include "defines.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/backward/v401/content/sparse_table_dict_content.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/backward/v401/content/terminal_position_lookup_table.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_constants.h"
|
||||
|
||||
namespace latinime {
|
||||
namespace backward {
|
||||
namespace v401 {
|
||||
|
||||
class ShortcutDictContent : public SparseTableDictContent {
|
||||
public:
|
||||
ShortcutDictContent(const char *const dictPath, const bool isUpdatable)
|
||||
: SparseTableDictContent(dictPath,
|
||||
Ver4DictConstants::SHORTCUT_LOOKUP_TABLE_FILE_EXTENSION,
|
||||
Ver4DictConstants::SHORTCUT_CONTENT_TABLE_FILE_EXTENSION,
|
||||
Ver4DictConstants::SHORTCUT_FILE_EXTENSION, isUpdatable,
|
||||
Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE,
|
||||
Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_DATA_SIZE) {}
|
||||
|
||||
ShortcutDictContent()
|
||||
: SparseTableDictContent(Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE,
|
||||
Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_DATA_SIZE) {}
|
||||
|
||||
void getShortcutEntry(const int maxCodePointCount, int *const outCodePoint,
|
||||
int *const outCodePointCount, int *const outProbability, bool *const outhasNext,
|
||||
const int shortcutEntryPos) {
|
||||
int readingPos = shortcutEntryPos;
|
||||
return getShortcutEntryAndAdvancePosition(maxCodePointCount, outCodePoint,
|
||||
outCodePointCount, outProbability, outhasNext, &readingPos);
|
||||
}
|
||||
|
||||
void getShortcutEntryAndAdvancePosition(const int maxCodePointCount,
|
||||
int *const outCodePoint, int *const outCodePointCount, int *const outProbability,
|
||||
bool *const outhasNext, int *const shortcutEntryPos) const;
|
||||
|
||||
// Returns head position of shortcut list for a PtNode specified by terminalId.
|
||||
int getShortcutListHeadPos(const int terminalId) const;
|
||||
|
||||
bool flushToFile(const char *const dictPath) const;
|
||||
|
||||
bool runGC(const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
|
||||
const ShortcutDictContent *const originalShortcutDictContent);
|
||||
|
||||
bool createNewShortcutList(const int terminalId);
|
||||
|
||||
bool copyShortcutList(const int shortcutListPos, const int toPos);
|
||||
|
||||
bool setProbability(const int probability, const int shortcutEntryPos);
|
||||
|
||||
bool writeShortcutEntry(const int *const codePoint, const int codePointCount,
|
||||
const int probability, const bool hasNext, const int shortcutEntryPos) {
|
||||
int writingPos = shortcutEntryPos;
|
||||
return writeShortcutEntryAndAdvancePosition(codePoint, codePointCount, probability,
|
||||
hasNext, &writingPos);
|
||||
}
|
||||
|
||||
bool writeShortcutEntryAndAdvancePosition(const int *const codePoint,
|
||||
const int codePointCount, const int probability, const bool hasNext,
|
||||
int *const shortcutEntryPos);
|
||||
|
||||
int findShortcutEntryAndGetPos(const int shortcutListPos,
|
||||
const int *const targetCodePointsToFind, const int codePointCount) const;
|
||||
|
||||
private:
|
||||
DISALLOW_COPY_AND_ASSIGN(ShortcutDictContent);
|
||||
|
||||
bool copyShortcutListFromDictContent(const int shortcutListPos,
|
||||
const ShortcutDictContent *const sourceShortcutDictContent, const int toPos);
|
||||
|
||||
int createAndGetShortcutFlags(const int probability, const bool hasNext) const;
|
||||
};
|
||||
} // namespace v401
|
||||
} // namespace backward
|
||||
} // namespace latinime
|
||||
#endif /* LATINIME_BACKWARD_V401_SHORTCUT_DICT_CONTENT_H */
|
|
@ -0,0 +1,86 @@
|
|||
/*
|
||||
* Copyright (C) 2013, The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/*
|
||||
* !!!!! DO NOT EDIT THIS FILE !!!!!
|
||||
*
|
||||
* This file was generated from
|
||||
* suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h
|
||||
*/
|
||||
|
||||
#ifndef LATINIME_BACKWARD_V401_SINGLE_DICT_CONTENT_H
|
||||
#define LATINIME_BACKWARD_V401_SINGLE_DICT_CONTENT_H
|
||||
|
||||
#include "defines.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/backward/v401/content/dict_content.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_constants.h"
|
||||
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
|
||||
#include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h"
|
||||
#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
|
||||
|
||||
namespace latinime {
|
||||
namespace backward {
|
||||
namespace v401 {
|
||||
|
||||
class SingleDictContent : public DictContent {
|
||||
public:
|
||||
SingleDictContent(const char *const dictPath, const char *const contentFileName,
|
||||
const bool isUpdatable)
|
||||
: mMmappedBuffer(MmappedBuffer::openBuffer(dictPath, contentFileName, isUpdatable)),
|
||||
mExpandableContentBuffer(mMmappedBuffer ? mMmappedBuffer->getBuffer() : nullptr,
|
||||
mMmappedBuffer ? mMmappedBuffer->getBufferSize() : 0,
|
||||
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
|
||||
mIsValid(mMmappedBuffer) {}
|
||||
|
||||
SingleDictContent()
|
||||
: mMmappedBuffer(nullptr),
|
||||
mExpandableContentBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE), mIsValid(true) {}
|
||||
|
||||
virtual ~SingleDictContent() {}
|
||||
|
||||
virtual bool isValid() const {
|
||||
return mIsValid;
|
||||
}
|
||||
|
||||
bool isNearSizeLimit() const {
|
||||
return mExpandableContentBuffer.isNearSizeLimit();
|
||||
}
|
||||
|
||||
protected:
|
||||
BufferWithExtendableBuffer *getWritableBuffer() {
|
||||
return &mExpandableContentBuffer;
|
||||
}
|
||||
|
||||
const BufferWithExtendableBuffer *getBuffer() const {
|
||||
return &mExpandableContentBuffer;
|
||||
}
|
||||
|
||||
bool flush(const char *const dictPath, const char *const contentFileNameSuffix) const {
|
||||
return DictFileWritingUtils::flushBufferToFileWithSuffix(dictPath,
|
||||
contentFileNameSuffix, &mExpandableContentBuffer);
|
||||
}
|
||||
|
||||
private:
|
||||
DISALLOW_COPY_AND_ASSIGN(SingleDictContent);
|
||||
|
||||
const MmappedBuffer::MmappedBufferPtr mMmappedBuffer;
|
||||
BufferWithExtendableBuffer mExpandableContentBuffer;
|
||||
const bool mIsValid;
|
||||
};
|
||||
} // namespace v401
|
||||
} // namespace backward
|
||||
} // namespace latinime
|
||||
#endif /* LATINIME_BACKWARD_V401_SINGLE_DICT_CONTENT_H */
|
|
@ -0,0 +1,50 @@
|
|||
/*
|
||||
* Copyright (C) 2013 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/*
|
||||
* !!!!! DO NOT EDIT THIS FILE !!!!!
|
||||
*
|
||||
* This file was generated from
|
||||
* suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.cpp
|
||||
*/
|
||||
|
||||
#include "suggest/policyimpl/dictionary/structure/backward/v401/content/sparse_table_dict_content.h"
|
||||
|
||||
namespace latinime {
|
||||
namespace backward {
|
||||
namespace v401 {
|
||||
|
||||
bool SparseTableDictContent::flush(const char *const dictPath,
|
||||
const char *const lookupTableFileNameSuffix, const char *const addressTableFileNameSuffix,
|
||||
const char *const contentFileNameSuffix) const {
|
||||
if (!DictFileWritingUtils::flushBufferToFileWithSuffix(dictPath, lookupTableFileNameSuffix,
|
||||
&mExpandableLookupTableBuffer)){
|
||||
return false;
|
||||
}
|
||||
if (!DictFileWritingUtils::flushBufferToFileWithSuffix(dictPath, addressTableFileNameSuffix,
|
||||
&mExpandableAddressTableBuffer)) {
|
||||
return false;
|
||||
}
|
||||
if (!DictFileWritingUtils::flushBufferToFileWithSuffix(dictPath, contentFileNameSuffix,
|
||||
&mExpandableContentBuffer)) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace v401
|
||||
} // namespace backward
|
||||
} // namespace latinime
|
|
@ -0,0 +1,122 @@
|
|||
/*
|
||||
* Copyright (C) 2013, The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/*
|
||||
* !!!!! DO NOT EDIT THIS FILE !!!!!
|
||||
*
|
||||
* This file was generated from
|
||||
* suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h
|
||||
*/
|
||||
|
||||
#ifndef LATINIME_BACKWARD_V401_SPARSE_TABLE_DICT_CONTENT_H
|
||||
#define LATINIME_BACKWARD_V401_SPARSE_TABLE_DICT_CONTENT_H
|
||||
|
||||
#include "defines.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/backward/v401/content/dict_content.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_constants.h"
|
||||
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
|
||||
#include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h"
|
||||
#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
|
||||
#include "suggest/policyimpl/dictionary/utils/sparse_table.h"
|
||||
|
||||
namespace latinime {
|
||||
namespace backward {
|
||||
namespace v401 {
|
||||
|
||||
// TODO: Support multiple contents.
|
||||
class SparseTableDictContent : public DictContent {
|
||||
public:
|
||||
AK_FORCE_INLINE SparseTableDictContent(const char *const dictPath,
|
||||
const char *const lookupTableFileName, const char *const addressTableFileName,
|
||||
const char *const contentFileName, const bool isUpdatable,
|
||||
const int sparseTableBlockSize, const int sparseTableDataSize)
|
||||
: mLookupTableBuffer(
|
||||
MmappedBuffer::openBuffer(dictPath, lookupTableFileName, isUpdatable)),
|
||||
mAddressTableBuffer(
|
||||
MmappedBuffer::openBuffer(dictPath, addressTableFileName, isUpdatable)),
|
||||
mContentBuffer(
|
||||
MmappedBuffer::openBuffer(dictPath, contentFileName, isUpdatable)),
|
||||
mExpandableLookupTableBuffer(
|
||||
mLookupTableBuffer ? mLookupTableBuffer->getBuffer() : nullptr,
|
||||
mLookupTableBuffer ? mLookupTableBuffer->getBufferSize() : 0,
|
||||
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
|
||||
mExpandableAddressTableBuffer(
|
||||
mAddressTableBuffer ? mAddressTableBuffer->getBuffer() : nullptr,
|
||||
mAddressTableBuffer ? mAddressTableBuffer->getBufferSize() : 0,
|
||||
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
|
||||
mExpandableContentBuffer(mContentBuffer ? mContentBuffer->getBuffer() : nullptr,
|
||||
mContentBuffer ? mContentBuffer->getBufferSize() : 0,
|
||||
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
|
||||
mAddressLookupTable(&mExpandableLookupTableBuffer, &mExpandableAddressTableBuffer,
|
||||
sparseTableBlockSize, sparseTableDataSize),
|
||||
mIsValid(mLookupTableBuffer && mAddressTableBuffer && mContentBuffer) {}
|
||||
|
||||
SparseTableDictContent(const int sparseTableBlockSize, const int sparseTableDataSize)
|
||||
: mLookupTableBuffer(), mAddressTableBuffer(), mContentBuffer(),
|
||||
mExpandableLookupTableBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE),
|
||||
mExpandableAddressTableBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE),
|
||||
mExpandableContentBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE),
|
||||
mAddressLookupTable(&mExpandableLookupTableBuffer, &mExpandableAddressTableBuffer,
|
||||
sparseTableBlockSize, sparseTableDataSize), mIsValid(true) {}
|
||||
|
||||
virtual ~SparseTableDictContent() {}
|
||||
|
||||
virtual bool isValid() const {
|
||||
return mIsValid;
|
||||
}
|
||||
|
||||
bool isNearSizeLimit() const {
|
||||
return mExpandableLookupTableBuffer.isNearSizeLimit()
|
||||
|| mExpandableAddressTableBuffer.isNearSizeLimit()
|
||||
|| mExpandableContentBuffer.isNearSizeLimit();
|
||||
}
|
||||
|
||||
protected:
|
||||
SparseTable *getUpdatableAddressLookupTable() {
|
||||
return &mAddressLookupTable;
|
||||
}
|
||||
|
||||
const SparseTable *getAddressLookupTable() const {
|
||||
return &mAddressLookupTable;
|
||||
}
|
||||
|
||||
BufferWithExtendableBuffer *getWritableContentBuffer() {
|
||||
return &mExpandableContentBuffer;
|
||||
}
|
||||
|
||||
const BufferWithExtendableBuffer *getContentBuffer() const {
|
||||
return &mExpandableContentBuffer;
|
||||
}
|
||||
|
||||
bool flush(const char *const dictDirPath, const char *const lookupTableFileName,
|
||||
const char *const addressTableFileName, const char *const contentFileName) const;
|
||||
|
||||
private:
|
||||
DISALLOW_IMPLICIT_CONSTRUCTORS(SparseTableDictContent);
|
||||
|
||||
const MmappedBuffer::MmappedBufferPtr mLookupTableBuffer;
|
||||
const MmappedBuffer::MmappedBufferPtr mAddressTableBuffer;
|
||||
const MmappedBuffer::MmappedBufferPtr mContentBuffer;
|
||||
BufferWithExtendableBuffer mExpandableLookupTableBuffer;
|
||||
BufferWithExtendableBuffer mExpandableAddressTableBuffer;
|
||||
BufferWithExtendableBuffer mExpandableContentBuffer;
|
||||
SparseTable mAddressLookupTable;
|
||||
const bool mIsValid;
|
||||
};
|
||||
} // namespace v401
|
||||
} // namespace backward
|
||||
} // namespace latinime
|
||||
#endif /* LATINIME_BACKWARD_V401_SPARSE_TABLE_DICT_CONTENT_H */
|
|
@ -0,0 +1,111 @@
|
|||
/*
|
||||
* Copyright (C) 2013 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/*
|
||||
* !!!!! DO NOT EDIT THIS FILE !!!!!
|
||||
*
|
||||
* This file was generated from
|
||||
* suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.cpp
|
||||
*/
|
||||
|
||||
#include "suggest/policyimpl/dictionary/structure/backward/v401/content/terminal_position_lookup_table.h"
|
||||
|
||||
#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_reading_utils.h"
|
||||
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
|
||||
|
||||
namespace latinime {
|
||||
namespace backward {
|
||||
namespace v401 {
|
||||
|
||||
int TerminalPositionLookupTable::getTerminalPtNodePosition(const int terminalId) const {
|
||||
if (terminalId < 0 || terminalId >= mSize) {
|
||||
return NOT_A_DICT_POS;
|
||||
}
|
||||
const int terminalPos = getBuffer()->readUint(
|
||||
Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE, getEntryPos(terminalId));
|
||||
return (terminalPos == Ver4DictConstants::NOT_A_TERMINAL_ADDRESS) ?
|
||||
NOT_A_DICT_POS : terminalPos;
|
||||
}
|
||||
|
||||
bool TerminalPositionLookupTable::setTerminalPtNodePosition(
|
||||
const int terminalId, const int terminalPtNodePos) {
|
||||
if (terminalId < 0) {
|
||||
return NOT_A_DICT_POS;
|
||||
}
|
||||
while (terminalId >= mSize) {
|
||||
// Write new entry.
|
||||
if (!getWritableBuffer()->writeUint(Ver4DictConstants::NOT_A_TERMINAL_ADDRESS,
|
||||
Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE, getEntryPos(mSize))) {
|
||||
return false;
|
||||
}
|
||||
mSize++;
|
||||
}
|
||||
const int terminalPos = (terminalPtNodePos != NOT_A_DICT_POS) ?
|
||||
terminalPtNodePos : Ver4DictConstants::NOT_A_TERMINAL_ADDRESS;
|
||||
return getWritableBuffer()->writeUint(terminalPos,
|
||||
Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE, getEntryPos(terminalId));
|
||||
}
|
||||
|
||||
bool TerminalPositionLookupTable::flushToFile(const char *const dictPath) const {
|
||||
// If the used buffer size is smaller than the actual buffer size, regenerate the lookup
|
||||
// table and write the new table to the file.
|
||||
if (getEntryPos(mSize) < getBuffer()->getTailPosition()) {
|
||||
TerminalPositionLookupTable lookupTableToWrite;
|
||||
for (int i = 0; i < mSize; ++i) {
|
||||
const int terminalPtNodePosition = getTerminalPtNodePosition(i);
|
||||
if (!lookupTableToWrite.setTerminalPtNodePosition(i, terminalPtNodePosition)) {
|
||||
AKLOGE("Cannot set terminal position to lookupTableToWrite."
|
||||
" terminalId: %d, position: %d", i, terminalPtNodePosition);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return lookupTableToWrite.flush(dictPath,
|
||||
Ver4DictConstants::TERMINAL_ADDRESS_TABLE_FILE_EXTENSION);
|
||||
} else {
|
||||
// We can simply use this lookup table because the buffer size has not been
|
||||
// changed.
|
||||
return flush(dictPath, Ver4DictConstants::TERMINAL_ADDRESS_TABLE_FILE_EXTENSION);
|
||||
}
|
||||
}
|
||||
|
||||
bool TerminalPositionLookupTable::runGCTerminalIds(TerminalIdMap *const terminalIdMap) {
|
||||
int removedEntryCount = 0;
|
||||
int nextNewTerminalId = 0;
|
||||
for (int i = 0; i < mSize; ++i) {
|
||||
const int terminalPos = getBuffer()->readUint(
|
||||
Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE, getEntryPos(i));
|
||||
if (terminalPos == Ver4DictConstants::NOT_A_TERMINAL_ADDRESS) {
|
||||
// This entry is a garbage.
|
||||
removedEntryCount++;
|
||||
} else {
|
||||
// Give a new terminal id to the entry.
|
||||
if (!getWritableBuffer()->writeUint(terminalPos,
|
||||
Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE,
|
||||
getEntryPos(nextNewTerminalId))) {
|
||||
return false;
|
||||
}
|
||||
// Memorize the mapping to the old terminal id to the new terminal id.
|
||||
terminalIdMap->insert(TerminalIdMap::value_type(i, nextNewTerminalId));
|
||||
nextNewTerminalId++;
|
||||
}
|
||||
}
|
||||
mSize = nextNewTerminalId;
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace v401
|
||||
} // namespace backward
|
||||
} // namespace latinime
|
|
@ -0,0 +1,73 @@
|
|||
/*
|
||||
* Copyright (C) 2013 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/*
|
||||
* !!!!! DO NOT EDIT THIS FILE !!!!!
|
||||
*
|
||||
* This file was generated from
|
||||
* suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h
|
||||
*/
|
||||
|
||||
#ifndef LATINIME_BACKWARD_V401_TERMINAL_POSITION_LOOKUP_TABLE_H
|
||||
#define LATINIME_BACKWARD_V401_TERMINAL_POSITION_LOOKUP_TABLE_H
|
||||
|
||||
#include <unordered_map>
|
||||
|
||||
#include "defines.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/backward/v401/content/single_dict_content.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_constants.h"
|
||||
|
||||
namespace latinime {
|
||||
namespace backward {
|
||||
namespace v401 {
|
||||
|
||||
class TerminalPositionLookupTable : public SingleDictContent {
|
||||
public:
|
||||
typedef std::unordered_map<int, int> TerminalIdMap;
|
||||
|
||||
TerminalPositionLookupTable(const char *const dictPath, const bool isUpdatable)
|
||||
: SingleDictContent(dictPath,
|
||||
Ver4DictConstants::TERMINAL_ADDRESS_TABLE_FILE_EXTENSION, isUpdatable),
|
||||
mSize(getBuffer()->getTailPosition()
|
||||
/ Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE) {}
|
||||
|
||||
TerminalPositionLookupTable() : mSize(0) {}
|
||||
|
||||
int getTerminalPtNodePosition(const int terminalId) const;
|
||||
|
||||
bool setTerminalPtNodePosition(const int terminalId, const int terminalPtNodePos);
|
||||
|
||||
int getNextTerminalId() const {
|
||||
return mSize;
|
||||
}
|
||||
|
||||
bool flushToFile(const char *const dictPath) const;
|
||||
|
||||
bool runGCTerminalIds(TerminalIdMap *const terminalIdMap);
|
||||
|
||||
private:
|
||||
DISALLOW_COPY_AND_ASSIGN(TerminalPositionLookupTable);
|
||||
|
||||
int getEntryPos(const int terminalId) const {
|
||||
return terminalId * Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE;
|
||||
}
|
||||
|
||||
int mSize;
|
||||
};
|
||||
} // namespace v401
|
||||
} // namespace backward
|
||||
} // namespace latinime
|
||||
#endif // LATINIME_BACKWARD_V401_TERMINAL_POSITION_LOOKUP_TABLE_H
|
|
@ -0,0 +1,118 @@
|
|||
/*
|
||||
* Copyright (C) 2013 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/*
|
||||
* !!!!! DO NOT CHANGE THE LOGIC IN THIS FILE !!!!!
|
||||
* Do not edit this file other than updating policy's interface.
|
||||
*
|
||||
* This file was generated from
|
||||
* suggest/policyimpl/dictionary/structure/v4/shortcut/ver4_shortcut_list_policy.h
|
||||
*/
|
||||
|
||||
#ifndef LATINIME_BACKWARD_V401_VER4_SHORTCUT_LIST_POLICY_H
|
||||
#define LATINIME_BACKWARD_V401_VER4_SHORTCUT_LIST_POLICY_H
|
||||
|
||||
#include "defines.h"
|
||||
#include "suggest/core/policy/dictionary_shortcuts_structure_policy.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/pt_common/shortcut/shortcut_list_reading_utils.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/backward/v401/content/shortcut_dict_content.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/backward/v401/content/terminal_position_lookup_table.h"
|
||||
|
||||
namespace latinime {
|
||||
namespace backward {
|
||||
namespace v401 {
|
||||
|
||||
class Ver4ShortcutListPolicy : public DictionaryShortcutsStructurePolicy {
|
||||
public:
|
||||
Ver4ShortcutListPolicy(ShortcutDictContent *const shortcutDictContent,
|
||||
const TerminalPositionLookupTable *const terminalPositionLookupTable)
|
||||
: mShortcutDictContent(shortcutDictContent) {}
|
||||
|
||||
~Ver4ShortcutListPolicy() {}
|
||||
|
||||
int getStartPos(const int pos) const {
|
||||
// The first shortcut entry is located at the head position of the shortcut list.
|
||||
return pos;
|
||||
}
|
||||
|
||||
void getNextShortcut(const int maxCodePointCount, int *const outCodePoint,
|
||||
int *const outCodePointCount, bool *const outIsWhitelist, bool *const outHasNext,
|
||||
int *const pos) const {
|
||||
int probability = 0;
|
||||
mShortcutDictContent->getShortcutEntryAndAdvancePosition(maxCodePointCount,
|
||||
outCodePoint, outCodePointCount, &probability, outHasNext, pos);
|
||||
if (outIsWhitelist) {
|
||||
*outIsWhitelist = ShortcutListReadingUtils::isWhitelist(probability);
|
||||
}
|
||||
}
|
||||
|
||||
void skipAllShortcuts(int *const pos) const {
|
||||
// Do nothing because we don't need to skip shortcut lists in ver4 dictionaries.
|
||||
}
|
||||
|
||||
bool addNewShortcut(const int terminalId, const int *const codePoints, const int codePointCount,
|
||||
const int probability) {
|
||||
const int shortcutListPos = mShortcutDictContent->getShortcutListHeadPos(terminalId);
|
||||
if (shortcutListPos == NOT_A_DICT_POS) {
|
||||
// Create shortcut list.
|
||||
if (!mShortcutDictContent->createNewShortcutList(terminalId)) {
|
||||
AKLOGE("Cannot create new shortcut list. terminal id: %d", terminalId);
|
||||
return false;
|
||||
}
|
||||
const int writingPos = mShortcutDictContent->getShortcutListHeadPos(terminalId);
|
||||
return mShortcutDictContent->writeShortcutEntry(codePoints, codePointCount, probability,
|
||||
false /* hasNext */, writingPos);
|
||||
}
|
||||
const int entryPos = mShortcutDictContent->findShortcutEntryAndGetPos(shortcutListPos,
|
||||
codePoints, codePointCount);
|
||||
if (entryPos == NOT_A_DICT_POS) {
|
||||
// Add new entry to the shortcut list.
|
||||
// Create new shortcut list.
|
||||
if (!mShortcutDictContent->createNewShortcutList(terminalId)) {
|
||||
AKLOGE("Cannot create new shortcut list. terminal id: %d", terminalId);
|
||||
return false;
|
||||
}
|
||||
int writingPos = mShortcutDictContent->getShortcutListHeadPos(terminalId);
|
||||
if (!mShortcutDictContent->writeShortcutEntryAndAdvancePosition(codePoints,
|
||||
codePointCount, probability, true /* hasNext */, &writingPos)) {
|
||||
AKLOGE("Cannot write shortcut entry. terminal id: %d, pos: %d", terminalId,
|
||||
writingPos);
|
||||
return false;
|
||||
}
|
||||
return mShortcutDictContent->copyShortcutList(shortcutListPos, writingPos);
|
||||
}
|
||||
// Overwrite existing entry.
|
||||
bool hasNext = false;
|
||||
mShortcutDictContent->getShortcutEntry(MAX_WORD_LENGTH, 0 /* outCodePoint */,
|
||||
0 /* outCodePointCount */ , 0 /* probability */, &hasNext, entryPos);
|
||||
if (!mShortcutDictContent->writeShortcutEntry(codePoints,
|
||||
codePointCount, probability, hasNext, entryPos)) {
|
||||
AKLOGE("Cannot overwrite shortcut entry. terminal id: %d, pos: %d", terminalId,
|
||||
entryPos);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
private:
|
||||
DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4ShortcutListPolicy);
|
||||
|
||||
ShortcutDictContent *const mShortcutDictContent;
|
||||
};
|
||||
} // namespace v401
|
||||
} // namespace backward
|
||||
} // namespace latinime
|
||||
#endif // LATINIME_BACKWARD_V401_VER4_SHORTCUT_LIST_POLICY_H
|
|
@ -0,0 +1,155 @@
|
|||
/*
|
||||
* Copyright (C) 2013 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/*
|
||||
* !!!!! DO NOT EDIT THIS FILE !!!!!
|
||||
*
|
||||
* This file was generated from
|
||||
* suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.cpp
|
||||
*/
|
||||
|
||||
#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_buffers.h"
|
||||
|
||||
#include <cerrno>
|
||||
#include <cstring>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/types.h>
|
||||
|
||||
#include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h"
|
||||
#include "suggest/policyimpl/dictionary/utils/file_utils.h"
|
||||
|
||||
namespace latinime {
|
||||
namespace backward {
|
||||
namespace v401 {
|
||||
|
||||
/* static */ Ver4DictBuffers::Ver4DictBuffersPtr Ver4DictBuffers::openVer4DictBuffers(
|
||||
const char *const dictPath, MmappedBuffer::MmappedBufferPtr headerBuffer,
|
||||
const FormatUtils::FORMAT_VERSION formatVersion) {
|
||||
if (!headerBuffer) {
|
||||
ASSERT(false);
|
||||
AKLOGE("The header buffer must be valid to open ver4 dict buffers.");
|
||||
return Ver4DictBuffersPtr(nullptr);
|
||||
}
|
||||
// TODO: take only dictDirPath, and open both header and trie files in the constructor below
|
||||
const bool isUpdatable = headerBuffer->isUpdatable();
|
||||
return Ver4DictBuffersPtr(new Ver4DictBuffers(dictPath, std::move(headerBuffer), isUpdatable,
|
||||
formatVersion));
|
||||
}
|
||||
|
||||
bool Ver4DictBuffers::flushHeaderAndDictBuffers(const char *const dictDirPath,
|
||||
const BufferWithExtendableBuffer *const headerBuffer) const {
|
||||
// Create temporary directory.
|
||||
const int tmpDirPathBufSize = FileUtils::getFilePathWithSuffixBufSize(dictDirPath,
|
||||
DictFileWritingUtils::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE);
|
||||
char tmpDirPath[tmpDirPathBufSize];
|
||||
FileUtils::getFilePathWithSuffix(dictDirPath,
|
||||
DictFileWritingUtils::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE, tmpDirPathBufSize,
|
||||
tmpDirPath);
|
||||
if (FileUtils::existsDir(tmpDirPath)) {
|
||||
if (!FileUtils::removeDirAndFiles(tmpDirPath)) {
|
||||
AKLOGE("Existing directory %s cannot be removed.", tmpDirPath);
|
||||
ASSERT(false);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if (mkdir(tmpDirPath, S_IRWXU) == -1) {
|
||||
AKLOGE("Cannot create directory: %s. errno: %d.", tmpDirPath, errno);
|
||||
return false;
|
||||
}
|
||||
// Get dictionary base path.
|
||||
const int dictNameBufSize = strlen(dictDirPath) + 1 /* terminator */;
|
||||
char dictName[dictNameBufSize];
|
||||
FileUtils::getBasename(dictDirPath, dictNameBufSize, dictName);
|
||||
const int dictPathBufSize = FileUtils::getFilePathBufSize(tmpDirPath, dictName);
|
||||
char dictPath[dictPathBufSize];
|
||||
FileUtils::getFilePath(tmpDirPath, dictName, dictPathBufSize, dictPath);
|
||||
|
||||
// Write header file.
|
||||
if (!DictFileWritingUtils::flushBufferToFileWithSuffix(dictPath,
|
||||
Ver4DictConstants::HEADER_FILE_EXTENSION, headerBuffer)) {
|
||||
AKLOGE("Dictionary header file %s%s cannot be written.", tmpDirPath,
|
||||
Ver4DictConstants::HEADER_FILE_EXTENSION);
|
||||
return false;
|
||||
}
|
||||
// Write trie file.
|
||||
if (!DictFileWritingUtils::flushBufferToFileWithSuffix(dictPath,
|
||||
Ver4DictConstants::TRIE_FILE_EXTENSION, &mExpandableTrieBuffer)) {
|
||||
AKLOGE("Dictionary trie file %s%s cannot be written.", tmpDirPath,
|
||||
Ver4DictConstants::TRIE_FILE_EXTENSION);
|
||||
return false;
|
||||
}
|
||||
// Write dictionary contents.
|
||||
if (!mTerminalPositionLookupTable.flushToFile(dictPath)) {
|
||||
AKLOGE("Terminal position lookup table cannot be written. %s", tmpDirPath);
|
||||
return false;
|
||||
}
|
||||
if (!mProbabilityDictContent.flushToFile(dictPath)) {
|
||||
AKLOGE("Probability dict content cannot be written. %s", tmpDirPath);
|
||||
return false;
|
||||
}
|
||||
if (!mBigramDictContent.flushToFile(dictPath)) {
|
||||
AKLOGE("Bigram dict content cannot be written. %s", tmpDirPath);
|
||||
return false;
|
||||
}
|
||||
if (!mShortcutDictContent.flushToFile(dictPath)) {
|
||||
AKLOGE("Shortcut dict content cannot be written. %s", tmpDirPath);
|
||||
return false;
|
||||
}
|
||||
// Remove existing dictionary.
|
||||
if (!FileUtils::removeDirAndFiles(dictDirPath)) {
|
||||
AKLOGE("Existing directory %s cannot be removed.", dictDirPath);
|
||||
ASSERT(false);
|
||||
return false;
|
||||
}
|
||||
// Rename temporary directory.
|
||||
if (rename(tmpDirPath, dictDirPath) != 0) {
|
||||
AKLOGE("%s cannot be renamed to %s", tmpDirPath, dictDirPath);
|
||||
ASSERT(false);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
Ver4DictBuffers::Ver4DictBuffers(const char *const dictPath,
|
||||
MmappedBuffer::MmappedBufferPtr headerBuffer, const bool isUpdatable,
|
||||
const FormatUtils::FORMAT_VERSION formatVersion)
|
||||
: mHeaderBuffer(std::move(headerBuffer)),
|
||||
mDictBuffer(MmappedBuffer::openBuffer(dictPath,
|
||||
Ver4DictConstants::TRIE_FILE_EXTENSION, isUpdatable)),
|
||||
mHeaderPolicy(mHeaderBuffer->getBuffer(), formatVersion),
|
||||
mExpandableHeaderBuffer(mHeaderBuffer ? mHeaderBuffer->getBuffer() : nullptr,
|
||||
mHeaderPolicy.getSize(),
|
||||
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
|
||||
mExpandableTrieBuffer(mDictBuffer ? mDictBuffer->getBuffer() : nullptr,
|
||||
mDictBuffer ? mDictBuffer->getBufferSize() : 0,
|
||||
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
|
||||
mTerminalPositionLookupTable(dictPath, isUpdatable),
|
||||
mProbabilityDictContent(dictPath, mHeaderPolicy.hasHistoricalInfoOfWords(), isUpdatable),
|
||||
mBigramDictContent(dictPath, mHeaderPolicy.hasHistoricalInfoOfWords(), isUpdatable),
|
||||
mShortcutDictContent(dictPath, isUpdatable),
|
||||
mIsUpdatable(isUpdatable) {}
|
||||
|
||||
Ver4DictBuffers::Ver4DictBuffers(const HeaderPolicy *const headerPolicy, const int maxTrieSize)
|
||||
: mHeaderBuffer(nullptr), mDictBuffer(nullptr), mHeaderPolicy(headerPolicy),
|
||||
mExpandableHeaderBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE),
|
||||
mExpandableTrieBuffer(maxTrieSize), mTerminalPositionLookupTable(),
|
||||
mProbabilityDictContent(headerPolicy->hasHistoricalInfoOfWords()),
|
||||
mBigramDictContent(headerPolicy->hasHistoricalInfoOfWords()), mShortcutDictContent(),
|
||||
mIsUpdatable(true) {}
|
||||
|
||||
} // namespace v401
|
||||
} // namespace backward
|
||||
} // namespace latinime
|
|
@ -0,0 +1,152 @@
|
|||
/*
|
||||
* Copyright (C) 2013, The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/*
|
||||
* !!!!! DO NOT EDIT THIS FILE !!!!!
|
||||
*
|
||||
* This file was generated from
|
||||
* suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h
|
||||
*/
|
||||
|
||||
#ifndef LATINIME_BACKWARD_V401_VER4_DICT_BUFFER_H
|
||||
#define LATINIME_BACKWARD_V401_VER4_DICT_BUFFER_H
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "defines.h"
|
||||
#include "suggest/policyimpl/dictionary/header/header_policy.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/backward/v401/content/bigram_dict_content.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/backward/v401/content/probability_dict_content.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/backward/v401/content/shortcut_dict_content.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/backward/v401/content/terminal_position_lookup_table.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_constants.h"
|
||||
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
|
||||
#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
|
||||
|
||||
namespace latinime {
|
||||
namespace backward {
|
||||
namespace v401 {
|
||||
|
||||
class Ver4DictBuffers {
|
||||
public:
|
||||
typedef std::unique_ptr<Ver4DictBuffers> Ver4DictBuffersPtr;
|
||||
|
||||
static Ver4DictBuffersPtr openVer4DictBuffers(const char *const dictDirPath,
|
||||
MmappedBuffer::MmappedBufferPtr headerBuffer,
|
||||
const FormatUtils::FORMAT_VERSION formatVersion);
|
||||
|
||||
static AK_FORCE_INLINE Ver4DictBuffersPtr createVer4DictBuffers(
|
||||
const HeaderPolicy *const headerPolicy, const int maxTrieSize) {
|
||||
return Ver4DictBuffersPtr(new Ver4DictBuffers(headerPolicy, maxTrieSize));
|
||||
}
|
||||
|
||||
AK_FORCE_INLINE bool isValid() const {
|
||||
return mHeaderBuffer && mDictBuffer && mHeaderPolicy.isValid()
|
||||
&& mProbabilityDictContent.isValid() && mTerminalPositionLookupTable.isValid()
|
||||
&& mBigramDictContent.isValid() && mShortcutDictContent.isValid();
|
||||
}
|
||||
|
||||
AK_FORCE_INLINE bool isNearSizeLimit() const {
|
||||
return mExpandableTrieBuffer.isNearSizeLimit()
|
||||
|| mTerminalPositionLookupTable.isNearSizeLimit()
|
||||
|| mProbabilityDictContent.isNearSizeLimit()
|
||||
|| mBigramDictContent.isNearSizeLimit()
|
||||
|| mShortcutDictContent.isNearSizeLimit();
|
||||
}
|
||||
|
||||
AK_FORCE_INLINE const HeaderPolicy *getHeaderPolicy() const {
|
||||
return &mHeaderPolicy;
|
||||
}
|
||||
|
||||
AK_FORCE_INLINE BufferWithExtendableBuffer *getWritableHeaderBuffer() {
|
||||
return &mExpandableHeaderBuffer;
|
||||
}
|
||||
|
||||
AK_FORCE_INLINE BufferWithExtendableBuffer *getWritableTrieBuffer() {
|
||||
return &mExpandableTrieBuffer;
|
||||
}
|
||||
|
||||
AK_FORCE_INLINE const BufferWithExtendableBuffer *getTrieBuffer() const {
|
||||
return &mExpandableTrieBuffer;
|
||||
}
|
||||
|
||||
AK_FORCE_INLINE TerminalPositionLookupTable *getMutableTerminalPositionLookupTable() {
|
||||
return &mTerminalPositionLookupTable;
|
||||
}
|
||||
|
||||
AK_FORCE_INLINE const TerminalPositionLookupTable *getTerminalPositionLookupTable() const {
|
||||
return &mTerminalPositionLookupTable;
|
||||
}
|
||||
|
||||
AK_FORCE_INLINE ProbabilityDictContent *getMutableProbabilityDictContent() {
|
||||
return &mProbabilityDictContent;
|
||||
}
|
||||
|
||||
AK_FORCE_INLINE const ProbabilityDictContent *getProbabilityDictContent() const {
|
||||
return &mProbabilityDictContent;
|
||||
}
|
||||
|
||||
AK_FORCE_INLINE BigramDictContent *getMutableBigramDictContent() {
|
||||
return &mBigramDictContent;
|
||||
}
|
||||
|
||||
AK_FORCE_INLINE const BigramDictContent *getBigramDictContent() const {
|
||||
return &mBigramDictContent;
|
||||
}
|
||||
|
||||
AK_FORCE_INLINE ShortcutDictContent *getMutableShortcutDictContent() {
|
||||
return &mShortcutDictContent;
|
||||
}
|
||||
|
||||
AK_FORCE_INLINE const ShortcutDictContent *getShortcutDictContent() const {
|
||||
return &mShortcutDictContent;
|
||||
}
|
||||
|
||||
AK_FORCE_INLINE bool isUpdatable() const {
|
||||
return mIsUpdatable;
|
||||
}
|
||||
|
||||
bool flush(const char *const dictDirPath) const {
|
||||
return flushHeaderAndDictBuffers(dictDirPath, &mExpandableHeaderBuffer);
|
||||
}
|
||||
|
||||
bool flushHeaderAndDictBuffers(const char *const dictDirPath,
|
||||
const BufferWithExtendableBuffer *const headerBuffer) const;
|
||||
|
||||
private:
|
||||
DISALLOW_COPY_AND_ASSIGN(Ver4DictBuffers);
|
||||
|
||||
Ver4DictBuffers(const char *const dictDirPath,
|
||||
const MmappedBuffer::MmappedBufferPtr headerBuffer, const bool isUpdatable,
|
||||
const FormatUtils::FORMAT_VERSION formatVersion);
|
||||
|
||||
Ver4DictBuffers(const HeaderPolicy *const headerPolicy, const int maxTrieSize);
|
||||
|
||||
const MmappedBuffer::MmappedBufferPtr mHeaderBuffer;
|
||||
const MmappedBuffer::MmappedBufferPtr mDictBuffer;
|
||||
const HeaderPolicy mHeaderPolicy;
|
||||
BufferWithExtendableBuffer mExpandableHeaderBuffer;
|
||||
BufferWithExtendableBuffer mExpandableTrieBuffer;
|
||||
TerminalPositionLookupTable mTerminalPositionLookupTable;
|
||||
ProbabilityDictContent mProbabilityDictContent;
|
||||
BigramDictContent mBigramDictContent;
|
||||
ShortcutDictContent mShortcutDictContent;
|
||||
const int mIsUpdatable;
|
||||
};
|
||||
} // namespace v401
|
||||
} // namespace backward
|
||||
} // namespace latinime
|
||||
#endif /* LATINIME_BACKWARD_V401_VER4_DICT_BUFFER_H */
|
|
@ -0,0 +1,81 @@
|
|||
/*
|
||||
* Copyright (C) 2013, The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/*
|
||||
* !!!!! DO NOT EDIT THIS FILE !!!!!
|
||||
*
|
||||
* This file was generated from
|
||||
* suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp
|
||||
*/
|
||||
|
||||
#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_constants.h"
|
||||
|
||||
namespace latinime {
|
||||
namespace backward {
|
||||
namespace v401 {
|
||||
|
||||
// These values MUST match the definitions in FormatSpec.java.
|
||||
const char *const Ver4DictConstants::TRIE_FILE_EXTENSION = ".trie";
|
||||
const char *const Ver4DictConstants::HEADER_FILE_EXTENSION = ".header";
|
||||
const char *const Ver4DictConstants::FREQ_FILE_EXTENSION = ".freq";
|
||||
// tat = Terminal Address Table
|
||||
const char *const Ver4DictConstants::TERMINAL_ADDRESS_TABLE_FILE_EXTENSION = ".tat";
|
||||
const char *const Ver4DictConstants::BIGRAM_FILE_EXTENSION = ".bigram_freq";
|
||||
const char *const Ver4DictConstants::BIGRAM_LOOKUP_TABLE_FILE_EXTENSION = ".bigram_lookup";
|
||||
const char *const Ver4DictConstants::BIGRAM_CONTENT_TABLE_FILE_EXTENSION = ".bigram_index_freq";
|
||||
const char *const Ver4DictConstants::SHORTCUT_FILE_EXTENSION = ".shortcut_shortcut";
|
||||
const char *const Ver4DictConstants::SHORTCUT_LOOKUP_TABLE_FILE_EXTENSION = ".shortcut_lookup";
|
||||
const char *const Ver4DictConstants::SHORTCUT_CONTENT_TABLE_FILE_EXTENSION =
|
||||
".shortcut_index_shortcut";
|
||||
|
||||
// Version 4 dictionary size is implicitly limited to 8MB due to 3-byte offsets.
|
||||
const int Ver4DictConstants::MAX_DICTIONARY_SIZE = 8 * 1024 * 1024;
|
||||
// Extended region size, which is not GCed region size in dict file + additional buffer size, is
|
||||
// limited to 1MB to prevent from inefficient traversing.
|
||||
const int Ver4DictConstants::MAX_DICT_EXTENDED_REGION_SIZE = 1 * 1024 * 1024;
|
||||
|
||||
const int Ver4DictConstants::NOT_A_TERMINAL_ID = -1;
|
||||
const int Ver4DictConstants::PROBABILITY_SIZE = 1;
|
||||
const int Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE = 1;
|
||||
const int Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE = 3;
|
||||
const int Ver4DictConstants::NOT_A_TERMINAL_ADDRESS = 0;
|
||||
const int Ver4DictConstants::TERMINAL_ID_FIELD_SIZE = 4;
|
||||
const int Ver4DictConstants::TIME_STAMP_FIELD_SIZE = 4;
|
||||
const int Ver4DictConstants::WORD_LEVEL_FIELD_SIZE = 1;
|
||||
const int Ver4DictConstants::WORD_COUNT_FIELD_SIZE = 1;
|
||||
|
||||
const int Ver4DictConstants::BIGRAM_ADDRESS_TABLE_BLOCK_SIZE = 16;
|
||||
const int Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE = 4;
|
||||
const int Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE = 64;
|
||||
const int Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_DATA_SIZE = 4;
|
||||
|
||||
const int Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE = 3;
|
||||
// Unsigned int max value of BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE-byte is used for representing
|
||||
// invalid terminal ID in bigram lists.
|
||||
const int Ver4DictConstants::INVALID_BIGRAM_TARGET_TERMINAL_ID =
|
||||
(1 << (BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE * 8)) - 1;
|
||||
const int Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE = 1;
|
||||
const int Ver4DictConstants::BIGRAM_PROBABILITY_MASK = 0x0F;
|
||||
const int Ver4DictConstants::BIGRAM_HAS_NEXT_MASK = 0x80;
|
||||
const int Ver4DictConstants::BIGRAM_LARGE_PROBABILITY_FIELD_SIZE = 1;
|
||||
|
||||
const int Ver4DictConstants::SHORTCUT_FLAGS_FIELD_SIZE = 1;
|
||||
const int Ver4DictConstants::SHORTCUT_PROBABILITY_MASK = 0x0F;
|
||||
const int Ver4DictConstants::SHORTCUT_HAS_NEXT_MASK = 0x80;
|
||||
|
||||
} // namespace v401
|
||||
} // namespace backward
|
||||
} // namespace latinime
|
|
@ -0,0 +1,84 @@
|
|||
/*
|
||||
* Copyright (C) 2013, The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/*
|
||||
* !!!!! DO NOT EDIT THIS FILE !!!!!
|
||||
*
|
||||
* This file was generated from
|
||||
* suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h
|
||||
*/
|
||||
|
||||
#ifndef LATINIME_BACKWARD_V401_VER4_DICT_CONSTANTS_H
|
||||
#define LATINIME_BACKWARD_V401_VER4_DICT_CONSTANTS_H
|
||||
|
||||
#include "defines.h"
|
||||
|
||||
namespace latinime {
|
||||
namespace backward {
|
||||
namespace v401 {
|
||||
|
||||
// TODO: Create PtConstants under the pt_common and move some constant values there.
|
||||
// Note that there are corresponding definitions in FormatSpec.java.
|
||||
class Ver4DictConstants {
|
||||
public:
|
||||
static const char *const TRIE_FILE_EXTENSION;
|
||||
static const char *const HEADER_FILE_EXTENSION;
|
||||
static const char *const FREQ_FILE_EXTENSION;
|
||||
static const char *const TERMINAL_ADDRESS_TABLE_FILE_EXTENSION;
|
||||
static const char *const BIGRAM_FILE_EXTENSION;
|
||||
static const char *const BIGRAM_LOOKUP_TABLE_FILE_EXTENSION;
|
||||
static const char *const BIGRAM_CONTENT_TABLE_FILE_EXTENSION;
|
||||
static const char *const SHORTCUT_FILE_EXTENSION;
|
||||
static const char *const SHORTCUT_LOOKUP_TABLE_FILE_EXTENSION;
|
||||
static const char *const SHORTCUT_CONTENT_TABLE_FILE_EXTENSION;
|
||||
|
||||
static const int MAX_DICTIONARY_SIZE;
|
||||
static const int MAX_DICT_EXTENDED_REGION_SIZE;
|
||||
|
||||
static const int NOT_A_TERMINAL_ID;
|
||||
static const int PROBABILITY_SIZE;
|
||||
static const int FLAGS_IN_PROBABILITY_FILE_SIZE;
|
||||
static const int TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE;
|
||||
static const int NOT_A_TERMINAL_ADDRESS;
|
||||
static const int TERMINAL_ID_FIELD_SIZE;
|
||||
static const int TIME_STAMP_FIELD_SIZE;
|
||||
static const int WORD_LEVEL_FIELD_SIZE;
|
||||
static const int WORD_COUNT_FIELD_SIZE;
|
||||
|
||||
static const int BIGRAM_ADDRESS_TABLE_BLOCK_SIZE;
|
||||
static const int BIGRAM_ADDRESS_TABLE_DATA_SIZE;
|
||||
static const int SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE;
|
||||
static const int SHORTCUT_ADDRESS_TABLE_DATA_SIZE;
|
||||
|
||||
static const int BIGRAM_FLAGS_FIELD_SIZE;
|
||||
static const int BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE;
|
||||
static const int INVALID_BIGRAM_TARGET_TERMINAL_ID;
|
||||
static const int BIGRAM_PROBABILITY_MASK;
|
||||
static const int BIGRAM_HAS_NEXT_MASK;
|
||||
// Used when bigram list has time stamp.
|
||||
static const int BIGRAM_LARGE_PROBABILITY_FIELD_SIZE;
|
||||
|
||||
static const int SHORTCUT_FLAGS_FIELD_SIZE;
|
||||
static const int SHORTCUT_PROBABILITY_MASK;
|
||||
static const int SHORTCUT_HAS_NEXT_MASK;
|
||||
|
||||
private:
|
||||
DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4DictConstants);
|
||||
};
|
||||
} // namespace v401
|
||||
} // namespace backward
|
||||
} // namespace latinime
|
||||
#endif /* LATINIME_BACKWARD_V401_VER4_DICT_CONSTANTS_H */
|
|
@ -0,0 +1,109 @@
|
|||
/*
|
||||
* Copyright (C) 2013, The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/*
|
||||
* !!!!! DO NOT EDIT THIS FILE !!!!!
|
||||
*
|
||||
* This file was generated from
|
||||
* suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.cpp
|
||||
*/
|
||||
|
||||
#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_node_reader.h"
|
||||
|
||||
#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/backward/v401/content/probability_dict_content.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/backward/v401/content/probability_entry.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_reading_utils.h"
|
||||
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
|
||||
#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
|
||||
|
||||
namespace latinime {
|
||||
namespace backward {
|
||||
namespace v401 {
|
||||
|
||||
const PtNodeParams Ver4PatriciaTrieNodeReader::fetchPtNodeInfoFromBufferAndProcessMovedPtNode(
|
||||
const int ptNodePos, const int siblingNodePos) const {
|
||||
if (ptNodePos < 0 || ptNodePos >= mBuffer->getTailPosition()) {
|
||||
// Reading invalid position because of bug or broken dictionary.
|
||||
AKLOGE("Fetching PtNode info from invalid dictionary position: %d, dictionary size: %d",
|
||||
ptNodePos, mBuffer->getTailPosition());
|
||||
ASSERT(false);
|
||||
return PtNodeParams();
|
||||
}
|
||||
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(ptNodePos);
|
||||
const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
|
||||
int pos = ptNodePos;
|
||||
const int headPos = ptNodePos;
|
||||
if (usesAdditionalBuffer) {
|
||||
pos -= mBuffer->getOriginalBufferSize();
|
||||
}
|
||||
const PatriciaTrieReadingUtils::NodeFlags flags =
|
||||
PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos);
|
||||
const int parentPosOffset =
|
||||
DynamicPtReadingUtils::getParentPtNodePosOffsetAndAdvancePosition(
|
||||
dictBuf, &pos);
|
||||
const int parentPos =
|
||||
DynamicPtReadingUtils::getParentPtNodePos(parentPosOffset, headPos);
|
||||
int codePoints[MAX_WORD_LENGTH];
|
||||
const int codePonitCount = PatriciaTrieReadingUtils::getCharsAndAdvancePosition(
|
||||
dictBuf, flags, MAX_WORD_LENGTH, codePoints, &pos);
|
||||
int terminalIdFieldPos = NOT_A_DICT_POS;
|
||||
int terminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
|
||||
int probability = NOT_A_PROBABILITY;
|
||||
if (PatriciaTrieReadingUtils::isTerminal(flags)) {
|
||||
terminalIdFieldPos = pos;
|
||||
if (usesAdditionalBuffer) {
|
||||
terminalIdFieldPos += mBuffer->getOriginalBufferSize();
|
||||
}
|
||||
terminalId = Ver4PatriciaTrieReadingUtils::getTerminalIdAndAdvancePosition(dictBuf, &pos);
|
||||
const ProbabilityEntry probabilityEntry =
|
||||
mProbabilityDictContent->getProbabilityEntry(terminalId);
|
||||
if (probabilityEntry.hasHistoricalInfo()) {
|
||||
probability = ForgettingCurveUtils::decodeProbability(
|
||||
probabilityEntry.getHistoricalInfo(), mHeaderPolicy);
|
||||
} else {
|
||||
probability = probabilityEntry.getProbability();
|
||||
}
|
||||
}
|
||||
int childrenPosFieldPos = pos;
|
||||
if (usesAdditionalBuffer) {
|
||||
childrenPosFieldPos += mBuffer->getOriginalBufferSize();
|
||||
}
|
||||
int childrenPos = DynamicPtReadingUtils::readChildrenPositionAndAdvancePosition(
|
||||
dictBuf, &pos);
|
||||
if (usesAdditionalBuffer && childrenPos != NOT_A_DICT_POS) {
|
||||
childrenPos += mBuffer->getOriginalBufferSize();
|
||||
}
|
||||
if (usesAdditionalBuffer) {
|
||||
pos += mBuffer->getOriginalBufferSize();
|
||||
}
|
||||
// Sibling position is the tail position of original PtNode.
|
||||
int newSiblingNodePos = (siblingNodePos == NOT_A_DICT_POS) ? pos : siblingNodePos;
|
||||
// Read destination node if the read node is a moved node.
|
||||
if (DynamicPtReadingUtils::isMoved(flags)) {
|
||||
// The destination position is stored at the same place as the parent position.
|
||||
return fetchPtNodeInfoFromBufferAndProcessMovedPtNode(parentPos, newSiblingNodePos);
|
||||
} else {
|
||||
return PtNodeParams(headPos, flags, parentPos, codePonitCount, codePoints,
|
||||
terminalIdFieldPos, terminalId, probability, childrenPosFieldPos, childrenPos,
|
||||
newSiblingNodePos);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace v401
|
||||
} // namespace backward
|
||||
} // namespace latinime
|
|
@ -0,0 +1,79 @@
|
|||
/*
|
||||
* Copyright (C) 2013, The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/*
|
||||
* !!!!! DO NOT EDIT THIS FILE !!!!!
|
||||
*
|
||||
* This file was generated from
|
||||
* suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h
|
||||
*/
|
||||
|
||||
#ifndef LATINIME_BACKWARD_V401_VER4_PATRICIA_TRIE_NODE_READER_H
|
||||
#define LATINIME_BACKWARD_V401_VER4_PATRICIA_TRIE_NODE_READER_H
|
||||
|
||||
#include "defines.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_reader.h"
|
||||
|
||||
namespace latinime {
|
||||
namespace backward {
|
||||
namespace v401 {
|
||||
|
||||
} // namespace v401
|
||||
} // namespace backward
|
||||
class BufferWithExtendableBuffer;
|
||||
namespace backward {
|
||||
namespace v401 {
|
||||
} // namespace v401
|
||||
} // namespace backward
|
||||
class HeaderPolicy;
|
||||
namespace backward {
|
||||
namespace v401 {
|
||||
class ProbabilityDictContent;
|
||||
|
||||
/*
|
||||
* This class is used for helping to read nodes of ver4 patricia trie. This class handles moved
|
||||
* node and reads node attributes including probability form probabilityBuffer.
|
||||
*/
|
||||
class Ver4PatriciaTrieNodeReader : public PtNodeReader {
|
||||
public:
|
||||
Ver4PatriciaTrieNodeReader(const BufferWithExtendableBuffer *const buffer,
|
||||
const ProbabilityDictContent *const probabilityDictContent,
|
||||
const HeaderPolicy *const headerPolicy)
|
||||
: mBuffer(buffer), mProbabilityDictContent(probabilityDictContent),
|
||||
mHeaderPolicy(headerPolicy) {}
|
||||
|
||||
~Ver4PatriciaTrieNodeReader() {}
|
||||
|
||||
virtual const PtNodeParams fetchNodeInfoInBufferFromPtNodePos(const int ptNodePos) const {
|
||||
return fetchPtNodeInfoFromBufferAndProcessMovedPtNode(ptNodePos,
|
||||
NOT_A_DICT_POS /* siblingNodePos */);
|
||||
}
|
||||
|
||||
private:
|
||||
DISALLOW_COPY_AND_ASSIGN(Ver4PatriciaTrieNodeReader);
|
||||
|
||||
const BufferWithExtendableBuffer *const mBuffer;
|
||||
const ProbabilityDictContent *const mProbabilityDictContent;
|
||||
const HeaderPolicy *const mHeaderPolicy;
|
||||
|
||||
const PtNodeParams fetchPtNodeInfoFromBufferAndProcessMovedPtNode(const int ptNodePos,
|
||||
const int siblingNodePos) const;
|
||||
};
|
||||
} // namespace v401
|
||||
} // namespace backward
|
||||
} // namespace latinime
|
||||
#endif /* LATINIME_BACKWARD_V401_VER4_PATRICIA_TRIE_NODE_READER_H */
|
|
@ -0,0 +1,429 @@
|
|||
/*
|
||||
* Copyright (C) 2013, The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/*
|
||||
* !!!!! DO NOT EDIT THIS FILE !!!!!
|
||||
*
|
||||
* This file was generated from
|
||||
* suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp
|
||||
*/
|
||||
|
||||
#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_node_writer.h"
|
||||
|
||||
#include "suggest/core/dictionary/property/unigram_property.h"
|
||||
#include "suggest/policyimpl/dictionary/header/header_policy.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_writing_utils.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/backward/v401/bigram/ver4_bigram_list_policy.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/backward/v401/content/probability_entry.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/backward/v401/shortcut/ver4_shortcut_list_policy.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_node_reader.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_buffers.h"
|
||||
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
|
||||
#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
|
||||
|
||||
namespace latinime {
|
||||
namespace backward {
|
||||
namespace v401 {
|
||||
|
||||
const int Ver4PatriciaTrieNodeWriter::CHILDREN_POSITION_FIELD_SIZE = 3;
|
||||
|
||||
bool Ver4PatriciaTrieNodeWriter::markPtNodeAsDeleted(
|
||||
const PtNodeParams *const toBeUpdatedPtNodeParams) {
|
||||
int pos = toBeUpdatedPtNodeParams->getHeadPos();
|
||||
const bool usesAdditionalBuffer = mTrieBuffer->isInAdditionalBuffer(pos);
|
||||
const uint8_t *const dictBuf = mTrieBuffer->getBuffer(usesAdditionalBuffer);
|
||||
if (usesAdditionalBuffer) {
|
||||
pos -= mTrieBuffer->getOriginalBufferSize();
|
||||
}
|
||||
// Read original flags
|
||||
const PatriciaTrieReadingUtils::NodeFlags originalFlags =
|
||||
PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos);
|
||||
const PatriciaTrieReadingUtils::NodeFlags updatedFlags =
|
||||
DynamicPtReadingUtils::updateAndGetFlags(originalFlags, false /* isMoved */,
|
||||
true /* isDeleted */, false /* willBecomeNonTerminal */);
|
||||
int writingPos = toBeUpdatedPtNodeParams->getHeadPos();
|
||||
// Update flags.
|
||||
if (!DynamicPtWritingUtils::writeFlagsAndAdvancePosition(mTrieBuffer, updatedFlags,
|
||||
&writingPos)) {
|
||||
return false;
|
||||
}
|
||||
if (toBeUpdatedPtNodeParams->isTerminal()) {
|
||||
// The PtNode is a terminal. Delete entry from the terminal position lookup table.
|
||||
return mBuffers->getMutableTerminalPositionLookupTable()->setTerminalPtNodePosition(
|
||||
toBeUpdatedPtNodeParams->getTerminalId(), NOT_A_DICT_POS /* ptNodePos */);
|
||||
} else {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
bool Ver4PatriciaTrieNodeWriter::markPtNodeAsMoved(
|
||||
const PtNodeParams *const toBeUpdatedPtNodeParams,
|
||||
const int movedPos, const int bigramLinkedNodePos) {
|
||||
int pos = toBeUpdatedPtNodeParams->getHeadPos();
|
||||
const bool usesAdditionalBuffer = mTrieBuffer->isInAdditionalBuffer(pos);
|
||||
const uint8_t *const dictBuf = mTrieBuffer->getBuffer(usesAdditionalBuffer);
|
||||
if (usesAdditionalBuffer) {
|
||||
pos -= mTrieBuffer->getOriginalBufferSize();
|
||||
}
|
||||
// Read original flags
|
||||
const PatriciaTrieReadingUtils::NodeFlags originalFlags =
|
||||
PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos);
|
||||
const PatriciaTrieReadingUtils::NodeFlags updatedFlags =
|
||||
DynamicPtReadingUtils::updateAndGetFlags(originalFlags, true /* isMoved */,
|
||||
false /* isDeleted */, false /* willBecomeNonTerminal */);
|
||||
int writingPos = toBeUpdatedPtNodeParams->getHeadPos();
|
||||
// Update flags.
|
||||
if (!DynamicPtWritingUtils::writeFlagsAndAdvancePosition(mTrieBuffer, updatedFlags,
|
||||
&writingPos)) {
|
||||
return false;
|
||||
}
|
||||
// Update moved position, which is stored in the parent offset field.
|
||||
if (!DynamicPtWritingUtils::writeParentPosOffsetAndAdvancePosition(
|
||||
mTrieBuffer, movedPos, toBeUpdatedPtNodeParams->getHeadPos(), &writingPos)) {
|
||||
return false;
|
||||
}
|
||||
if (toBeUpdatedPtNodeParams->hasChildren()) {
|
||||
// Update children's parent position.
|
||||
mReadingHelper.initWithPtNodeArrayPos(toBeUpdatedPtNodeParams->getChildrenPos());
|
||||
while (!mReadingHelper.isEnd()) {
|
||||
const PtNodeParams childPtNodeParams(mReadingHelper.getPtNodeParams());
|
||||
int parentOffsetFieldPos = childPtNodeParams.getHeadPos()
|
||||
+ DynamicPtWritingUtils::NODE_FLAG_FIELD_SIZE;
|
||||
if (!DynamicPtWritingUtils::writeParentPosOffsetAndAdvancePosition(
|
||||
mTrieBuffer, bigramLinkedNodePos, childPtNodeParams.getHeadPos(),
|
||||
&parentOffsetFieldPos)) {
|
||||
// Parent offset cannot be written because of a bug or a broken dictionary; thus,
|
||||
// we give up to update dictionary.
|
||||
return false;
|
||||
}
|
||||
mReadingHelper.readNextSiblingNode(childPtNodeParams);
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Ver4PatriciaTrieNodeWriter::markPtNodeAsWillBecomeNonTerminal(
|
||||
const PtNodeParams *const toBeUpdatedPtNodeParams) {
|
||||
int pos = toBeUpdatedPtNodeParams->getHeadPos();
|
||||
const bool usesAdditionalBuffer = mTrieBuffer->isInAdditionalBuffer(pos);
|
||||
const uint8_t *const dictBuf = mTrieBuffer->getBuffer(usesAdditionalBuffer);
|
||||
if (usesAdditionalBuffer) {
|
||||
pos -= mTrieBuffer->getOriginalBufferSize();
|
||||
}
|
||||
// Read original flags
|
||||
const PatriciaTrieReadingUtils::NodeFlags originalFlags =
|
||||
PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos);
|
||||
const PatriciaTrieReadingUtils::NodeFlags updatedFlags =
|
||||
DynamicPtReadingUtils::updateAndGetFlags(originalFlags, false /* isMoved */,
|
||||
false /* isDeleted */, true /* willBecomeNonTerminal */);
|
||||
if (!mBuffers->getMutableTerminalPositionLookupTable()->setTerminalPtNodePosition(
|
||||
toBeUpdatedPtNodeParams->getTerminalId(), NOT_A_DICT_POS /* ptNodePos */)) {
|
||||
AKLOGE("Cannot update terminal position lookup table. terminal id: %d",
|
||||
toBeUpdatedPtNodeParams->getTerminalId());
|
||||
return false;
|
||||
}
|
||||
// Update flags.
|
||||
int writingPos = toBeUpdatedPtNodeParams->getHeadPos();
|
||||
return DynamicPtWritingUtils::writeFlagsAndAdvancePosition(mTrieBuffer, updatedFlags,
|
||||
&writingPos);
|
||||
}
|
||||
|
||||
bool Ver4PatriciaTrieNodeWriter::updatePtNodeUnigramProperty(
|
||||
const PtNodeParams *const toBeUpdatedPtNodeParams,
|
||||
const UnigramProperty *const unigramProperty) {
|
||||
// Update probability and historical information.
|
||||
// TODO: Update other information in the unigram property.
|
||||
if (!toBeUpdatedPtNodeParams->isTerminal()) {
|
||||
return false;
|
||||
}
|
||||
const ProbabilityEntry originalProbabilityEntry =
|
||||
mBuffers->getProbabilityDictContent()->getProbabilityEntry(
|
||||
toBeUpdatedPtNodeParams->getTerminalId());
|
||||
const ProbabilityEntry probabilityEntry = createUpdatedEntryFrom(&originalProbabilityEntry,
|
||||
unigramProperty);
|
||||
return mBuffers->getMutableProbabilityDictContent()->setProbabilityEntry(
|
||||
toBeUpdatedPtNodeParams->getTerminalId(), &probabilityEntry);
|
||||
}
|
||||
|
||||
bool Ver4PatriciaTrieNodeWriter::updatePtNodeProbabilityAndGetNeedsToKeepPtNodeAfterGC(
|
||||
const PtNodeParams *const toBeUpdatedPtNodeParams, bool *const outNeedsToKeepPtNode) {
|
||||
if (!toBeUpdatedPtNodeParams->isTerminal()) {
|
||||
AKLOGE("updatePtNodeProbabilityAndGetNeedsToSaveForGC is called for non-terminal PtNode.");
|
||||
return false;
|
||||
}
|
||||
const ProbabilityEntry originalProbabilityEntry =
|
||||
mBuffers->getProbabilityDictContent()->getProbabilityEntry(
|
||||
toBeUpdatedPtNodeParams->getTerminalId());
|
||||
if (originalProbabilityEntry.hasHistoricalInfo()) {
|
||||
const HistoricalInfo historicalInfo = ForgettingCurveUtils::createHistoricalInfoToSave(
|
||||
originalProbabilityEntry.getHistoricalInfo(), mHeaderPolicy);
|
||||
const ProbabilityEntry probabilityEntry =
|
||||
originalProbabilityEntry.createEntryWithUpdatedHistoricalInfo(&historicalInfo);
|
||||
if (!mBuffers->getMutableProbabilityDictContent()->setProbabilityEntry(
|
||||
toBeUpdatedPtNodeParams->getTerminalId(), &probabilityEntry)) {
|
||||
AKLOGE("Cannot write updated probability entry. terminalId: %d",
|
||||
toBeUpdatedPtNodeParams->getTerminalId());
|
||||
return false;
|
||||
}
|
||||
const bool isValid = ForgettingCurveUtils::needsToKeep(&historicalInfo, mHeaderPolicy);
|
||||
if (!isValid) {
|
||||
if (!markPtNodeAsWillBecomeNonTerminal(toBeUpdatedPtNodeParams)) {
|
||||
AKLOGE("Cannot mark PtNode as willBecomeNonTerminal.");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
*outNeedsToKeepPtNode = isValid;
|
||||
} else {
|
||||
// No need to update probability.
|
||||
*outNeedsToKeepPtNode = true;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Ver4PatriciaTrieNodeWriter::updateChildrenPosition(
|
||||
const PtNodeParams *const toBeUpdatedPtNodeParams, const int newChildrenPosition) {
|
||||
int childrenPosFieldPos = toBeUpdatedPtNodeParams->getChildrenPosFieldPos();
|
||||
return DynamicPtWritingUtils::writeChildrenPositionAndAdvancePosition(mTrieBuffer,
|
||||
newChildrenPosition, &childrenPosFieldPos);
|
||||
}
|
||||
|
||||
bool Ver4PatriciaTrieNodeWriter::updateTerminalId(const PtNodeParams *const toBeUpdatedPtNodeParams,
|
||||
const int newTerminalId) {
|
||||
return mTrieBuffer->writeUint(newTerminalId, Ver4DictConstants::TERMINAL_ID_FIELD_SIZE,
|
||||
toBeUpdatedPtNodeParams->getTerminalIdFieldPos());
|
||||
}
|
||||
|
||||
bool Ver4PatriciaTrieNodeWriter::writePtNodeAndAdvancePosition(
|
||||
const PtNodeParams *const ptNodeParams, int *const ptNodeWritingPos) {
|
||||
return writePtNodeAndGetTerminalIdAndAdvancePosition(ptNodeParams, 0 /* outTerminalId */,
|
||||
ptNodeWritingPos);
|
||||
}
|
||||
|
||||
|
||||
bool Ver4PatriciaTrieNodeWriter::writeNewTerminalPtNodeAndAdvancePosition(
|
||||
const PtNodeParams *const ptNodeParams, const UnigramProperty *const unigramProperty,
|
||||
int *const ptNodeWritingPos) {
|
||||
int terminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
|
||||
if (!writePtNodeAndGetTerminalIdAndAdvancePosition(ptNodeParams, &terminalId,
|
||||
ptNodeWritingPos)) {
|
||||
return false;
|
||||
}
|
||||
// Write probability.
|
||||
ProbabilityEntry newProbabilityEntry;
|
||||
const ProbabilityEntry probabilityEntryToWrite = createUpdatedEntryFrom(
|
||||
&newProbabilityEntry, unigramProperty);
|
||||
return mBuffers->getMutableProbabilityDictContent()->setProbabilityEntry(terminalId,
|
||||
&probabilityEntryToWrite);
|
||||
}
|
||||
|
||||
bool Ver4PatriciaTrieNodeWriter::addNewBigramEntry(
|
||||
const PtNodeParams *const sourcePtNodeParams, const PtNodeParams *const targetPtNodeParam,
|
||||
const BigramProperty *const bigramProperty, bool *const outAddedNewBigram) {
|
||||
if (!mBigramPolicy->addNewEntry(sourcePtNodeParams->getTerminalId(),
|
||||
targetPtNodeParam->getTerminalId(), bigramProperty, outAddedNewBigram)) {
|
||||
AKLOGE("Cannot add new bigram entry. terminalId: %d, targetTerminalId: %d",
|
||||
sourcePtNodeParams->getTerminalId(), targetPtNodeParam->getTerminalId());
|
||||
return false;
|
||||
}
|
||||
if (!sourcePtNodeParams->hasBigrams()) {
|
||||
// Update has bigrams flag.
|
||||
return updatePtNodeFlags(sourcePtNodeParams->getHeadPos(),
|
||||
sourcePtNodeParams->isBlacklisted(), sourcePtNodeParams->isNotAWord(),
|
||||
sourcePtNodeParams->isTerminal(), sourcePtNodeParams->hasShortcutTargets(),
|
||||
true /* hasBigrams */,
|
||||
sourcePtNodeParams->getCodePointCount() > 1 /* hasMultipleChars */);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Ver4PatriciaTrieNodeWriter::removeBigramEntry(
|
||||
const PtNodeParams *const sourcePtNodeParams, const PtNodeParams *const targetPtNodeParam) {
|
||||
return mBigramPolicy->removeEntry(sourcePtNodeParams->getTerminalId(),
|
||||
targetPtNodeParam->getTerminalId());
|
||||
}
|
||||
|
||||
bool Ver4PatriciaTrieNodeWriter::updateAllBigramEntriesAndDeleteUselessEntries(
|
||||
const PtNodeParams *const sourcePtNodeParams, int *const outBigramEntryCount) {
|
||||
return mBigramPolicy->updateAllBigramEntriesAndDeleteUselessEntries(
|
||||
sourcePtNodeParams->getTerminalId(), outBigramEntryCount);
|
||||
}
|
||||
|
||||
bool Ver4PatriciaTrieNodeWriter::updateAllPositionFields(
|
||||
const PtNodeParams *const toBeUpdatedPtNodeParams,
|
||||
const DictPositionRelocationMap *const dictPositionRelocationMap,
|
||||
int *const outBigramEntryCount) {
|
||||
int parentPos = toBeUpdatedPtNodeParams->getParentPos();
|
||||
if (parentPos != NOT_A_DICT_POS) {
|
||||
PtNodeWriter::PtNodePositionRelocationMap::const_iterator it =
|
||||
dictPositionRelocationMap->mPtNodePositionRelocationMap.find(parentPos);
|
||||
if (it != dictPositionRelocationMap->mPtNodePositionRelocationMap.end()) {
|
||||
parentPos = it->second;
|
||||
}
|
||||
}
|
||||
int writingPos = toBeUpdatedPtNodeParams->getHeadPos()
|
||||
+ DynamicPtWritingUtils::NODE_FLAG_FIELD_SIZE;
|
||||
// Write updated parent offset.
|
||||
if (!DynamicPtWritingUtils::writeParentPosOffsetAndAdvancePosition(mTrieBuffer,
|
||||
parentPos, toBeUpdatedPtNodeParams->getHeadPos(), &writingPos)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Updates children position.
|
||||
int childrenPos = toBeUpdatedPtNodeParams->getChildrenPos();
|
||||
if (childrenPos != NOT_A_DICT_POS) {
|
||||
PtNodeWriter::PtNodeArrayPositionRelocationMap::const_iterator it =
|
||||
dictPositionRelocationMap->mPtNodeArrayPositionRelocationMap.find(childrenPos);
|
||||
if (it != dictPositionRelocationMap->mPtNodeArrayPositionRelocationMap.end()) {
|
||||
childrenPos = it->second;
|
||||
}
|
||||
}
|
||||
if (!updateChildrenPosition(toBeUpdatedPtNodeParams, childrenPos)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Counts bigram entries.
|
||||
if (outBigramEntryCount) {
|
||||
*outBigramEntryCount = mBigramPolicy->getBigramEntryConut(
|
||||
toBeUpdatedPtNodeParams->getTerminalId());
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Ver4PatriciaTrieNodeWriter::addShortcutTarget(const PtNodeParams *const ptNodeParams,
|
||||
const int *const targetCodePoints, const int targetCodePointCount,
|
||||
const int shortcutProbability) {
|
||||
if (!mShortcutPolicy->addNewShortcut(ptNodeParams->getTerminalId(),
|
||||
targetCodePoints, targetCodePointCount, shortcutProbability)) {
|
||||
AKLOGE("Cannot add new shortuct entry. terminalId: %d", ptNodeParams->getTerminalId());
|
||||
return false;
|
||||
}
|
||||
if (!ptNodeParams->hasShortcutTargets()) {
|
||||
// Update has shortcut targets flag.
|
||||
return updatePtNodeFlags(ptNodeParams->getHeadPos(),
|
||||
ptNodeParams->isBlacklisted(), ptNodeParams->isNotAWord(),
|
||||
ptNodeParams->isTerminal(), true /* hasShortcutTargets */,
|
||||
ptNodeParams->hasBigrams(),
|
||||
ptNodeParams->getCodePointCount() > 1 /* hasMultipleChars */);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Ver4PatriciaTrieNodeWriter::updatePtNodeHasBigramsAndShortcutTargetsFlags(
|
||||
const PtNodeParams *const ptNodeParams) {
|
||||
const bool hasBigrams = mBuffers->getBigramDictContent()->getBigramListHeadPos(
|
||||
ptNodeParams->getTerminalId()) != NOT_A_DICT_POS;
|
||||
const bool hasShortcutTargets = mBuffers->getShortcutDictContent()->getShortcutListHeadPos(
|
||||
ptNodeParams->getTerminalId()) != NOT_A_DICT_POS;
|
||||
return updatePtNodeFlags(ptNodeParams->getHeadPos(), ptNodeParams->isBlacklisted(),
|
||||
ptNodeParams->isNotAWord(), ptNodeParams->isTerminal(), hasShortcutTargets,
|
||||
hasBigrams, ptNodeParams->getCodePointCount() > 1 /* hasMultipleChars */);
|
||||
}
|
||||
|
||||
bool Ver4PatriciaTrieNodeWriter::writePtNodeAndGetTerminalIdAndAdvancePosition(
|
||||
const PtNodeParams *const ptNodeParams, int *const outTerminalId,
|
||||
int *const ptNodeWritingPos) {
|
||||
const int nodePos = *ptNodeWritingPos;
|
||||
// Write dummy flags. The Node flags are updated with appropriate flags at the last step of the
|
||||
// PtNode writing.
|
||||
if (!DynamicPtWritingUtils::writeFlagsAndAdvancePosition(mTrieBuffer,
|
||||
0 /* nodeFlags */, ptNodeWritingPos)) {
|
||||
return false;
|
||||
}
|
||||
// Calculate a parent offset and write the offset.
|
||||
if (!DynamicPtWritingUtils::writeParentPosOffsetAndAdvancePosition(mTrieBuffer,
|
||||
ptNodeParams->getParentPos(), nodePos, ptNodeWritingPos)) {
|
||||
return false;
|
||||
}
|
||||
// Write code points
|
||||
if (!DynamicPtWritingUtils::writeCodePointsAndAdvancePosition(mTrieBuffer,
|
||||
ptNodeParams->getCodePoints(), ptNodeParams->getCodePointCount(), ptNodeWritingPos)) {
|
||||
return false;
|
||||
}
|
||||
int terminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
|
||||
if (!ptNodeParams->willBecomeNonTerminal()) {
|
||||
if (ptNodeParams->getTerminalId() != Ver4DictConstants::NOT_A_TERMINAL_ID) {
|
||||
terminalId = ptNodeParams->getTerminalId();
|
||||
} else if (ptNodeParams->isTerminal()) {
|
||||
// Write terminal information using a new terminal id.
|
||||
// Get a new unused terminal id.
|
||||
terminalId = mBuffers->getTerminalPositionLookupTable()->getNextTerminalId();
|
||||
}
|
||||
}
|
||||
const int isTerminal = terminalId != Ver4DictConstants::NOT_A_TERMINAL_ID;
|
||||
if (isTerminal) {
|
||||
// Update the lookup table.
|
||||
if (!mBuffers->getMutableTerminalPositionLookupTable()->setTerminalPtNodePosition(
|
||||
terminalId, nodePos)) {
|
||||
return false;
|
||||
}
|
||||
// Write terminal Id.
|
||||
if (!mTrieBuffer->writeUintAndAdvancePosition(terminalId,
|
||||
Ver4DictConstants::TERMINAL_ID_FIELD_SIZE, ptNodeWritingPos)) {
|
||||
return false;
|
||||
}
|
||||
if (outTerminalId) {
|
||||
*outTerminalId = terminalId;
|
||||
}
|
||||
}
|
||||
// Write children position
|
||||
if (!DynamicPtWritingUtils::writeChildrenPositionAndAdvancePosition(mTrieBuffer,
|
||||
ptNodeParams->getChildrenPos(), ptNodeWritingPos)) {
|
||||
return false;
|
||||
}
|
||||
return updatePtNodeFlags(nodePos, ptNodeParams->isBlacklisted(), ptNodeParams->isNotAWord(),
|
||||
isTerminal, ptNodeParams->hasShortcutTargets(), ptNodeParams->hasBigrams(),
|
||||
ptNodeParams->getCodePointCount() > 1 /* hasMultipleChars */);
|
||||
}
|
||||
|
||||
const ProbabilityEntry Ver4PatriciaTrieNodeWriter::createUpdatedEntryFrom(
|
||||
const ProbabilityEntry *const originalProbabilityEntry,
|
||||
const UnigramProperty *const unigramProperty) const {
|
||||
// TODO: Consolidate historical info and probability.
|
||||
if (mHeaderPolicy->hasHistoricalInfoOfWords()) {
|
||||
const HistoricalInfo historicalInfoForUpdate(unigramProperty->getTimestamp(),
|
||||
unigramProperty->getLevel(), unigramProperty->getCount());
|
||||
const HistoricalInfo updatedHistoricalInfo =
|
||||
ForgettingCurveUtils::createUpdatedHistoricalInfo(
|
||||
originalProbabilityEntry->getHistoricalInfo(),
|
||||
unigramProperty->getProbability(), &historicalInfoForUpdate, mHeaderPolicy);
|
||||
return originalProbabilityEntry->createEntryWithUpdatedHistoricalInfo(
|
||||
&updatedHistoricalInfo);
|
||||
} else {
|
||||
return originalProbabilityEntry->createEntryWithUpdatedProbability(
|
||||
unigramProperty->getProbability());
|
||||
}
|
||||
}
|
||||
|
||||
bool Ver4PatriciaTrieNodeWriter::updatePtNodeFlags(const int ptNodePos,
|
||||
const bool isBlacklisted, const bool isNotAWord, const bool isTerminal,
|
||||
const bool hasShortcutTargets, const bool hasBigrams, const bool hasMultipleChars) {
|
||||
// Create node flags and write them.
|
||||
PatriciaTrieReadingUtils::NodeFlags nodeFlags =
|
||||
PatriciaTrieReadingUtils::createAndGetFlags(isBlacklisted, isNotAWord, isTerminal,
|
||||
hasShortcutTargets, hasBigrams, hasMultipleChars,
|
||||
CHILDREN_POSITION_FIELD_SIZE);
|
||||
if (!DynamicPtWritingUtils::writeFlags(mTrieBuffer, nodeFlags, ptNodePos)) {
|
||||
AKLOGE("Cannot write PtNode flags. flags: %x, pos: %d", nodeFlags, ptNodePos);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace v401
|
||||
} // namespace backward
|
||||
} // namespace latinime
|
|
@ -0,0 +1,145 @@
|
|||
/*
|
||||
* Copyright (C) 2013, The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/*
|
||||
* !!!!! DO NOT EDIT THIS FILE !!!!!
|
||||
*
|
||||
* This file was generated from
|
||||
* suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h
|
||||
*/
|
||||
|
||||
#ifndef LATINIME_BACKWARD_V401_VER4_PATRICIA_TRIE_NODE_WRITER_H
|
||||
#define LATINIME_BACKWARD_V401_VER4_PATRICIA_TRIE_NODE_WRITER_H
|
||||
|
||||
#include "defines.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/backward/v401/content/probability_entry.h"
|
||||
|
||||
namespace latinime {
|
||||
namespace backward {
|
||||
namespace v401 {
|
||||
|
||||
} // namespace v401
|
||||
} // namespace backward
|
||||
class BufferWithExtendableBuffer;
|
||||
namespace backward {
|
||||
namespace v401 {
|
||||
} // namespace v401
|
||||
} // namespace backward
|
||||
class HeaderPolicy;
|
||||
namespace backward {
|
||||
namespace v401 {
|
||||
class Ver4BigramListPolicy;
|
||||
class Ver4DictBuffers;
|
||||
class Ver4PatriciaTrieNodeReader;
|
||||
class Ver4PtNodeArrayReader;
|
||||
class Ver4ShortcutListPolicy;
|
||||
|
||||
/*
|
||||
* This class is used for helping to writes nodes of ver4 patricia trie.
|
||||
*/
|
||||
class Ver4PatriciaTrieNodeWriter : public PtNodeWriter {
|
||||
public:
|
||||
Ver4PatriciaTrieNodeWriter(BufferWithExtendableBuffer *const trieBuffer,
|
||||
Ver4DictBuffers *const buffers, const HeaderPolicy *const headerPolicy,
|
||||
const PtNodeReader *const ptNodeReader,
|
||||
const PtNodeArrayReader *const ptNodeArrayReader,
|
||||
Ver4BigramListPolicy *const bigramPolicy, Ver4ShortcutListPolicy *const shortcutPolicy)
|
||||
: mTrieBuffer(trieBuffer), mBuffers(buffers), mHeaderPolicy(headerPolicy),
|
||||
mReadingHelper(ptNodeReader, ptNodeArrayReader), mBigramPolicy(bigramPolicy),
|
||||
mShortcutPolicy(shortcutPolicy) {}
|
||||
|
||||
virtual ~Ver4PatriciaTrieNodeWriter() {}
|
||||
|
||||
virtual bool markPtNodeAsDeleted(const PtNodeParams *const toBeUpdatedPtNodeParams);
|
||||
|
||||
virtual bool markPtNodeAsMoved(const PtNodeParams *const toBeUpdatedPtNodeParams,
|
||||
const int movedPos, const int bigramLinkedNodePos);
|
||||
|
||||
virtual bool markPtNodeAsWillBecomeNonTerminal(
|
||||
const PtNodeParams *const toBeUpdatedPtNodeParams);
|
||||
|
||||
virtual bool updatePtNodeUnigramProperty(const PtNodeParams *const toBeUpdatedPtNodeParams,
|
||||
const UnigramProperty *const unigramProperty);
|
||||
|
||||
virtual bool updatePtNodeProbabilityAndGetNeedsToKeepPtNodeAfterGC(
|
||||
const PtNodeParams *const toBeUpdatedPtNodeParams, bool *const outNeedsToKeepPtNode);
|
||||
|
||||
virtual bool updateChildrenPosition(const PtNodeParams *const toBeUpdatedPtNodeParams,
|
||||
const int newChildrenPosition);
|
||||
|
||||
bool updateTerminalId(const PtNodeParams *const toBeUpdatedPtNodeParams,
|
||||
const int newTerminalId);
|
||||
|
||||
virtual bool writePtNodeAndAdvancePosition(const PtNodeParams *const ptNodeParams,
|
||||
int *const ptNodeWritingPos);
|
||||
|
||||
virtual bool writeNewTerminalPtNodeAndAdvancePosition(const PtNodeParams *const ptNodeParams,
|
||||
const UnigramProperty *const unigramProperty, int *const ptNodeWritingPos);
|
||||
|
||||
virtual bool addNewBigramEntry(const PtNodeParams *const sourcePtNodeParams,
|
||||
const PtNodeParams *const targetPtNodeParam, const BigramProperty *const bigramProperty,
|
||||
bool *const outAddedNewBigram);
|
||||
|
||||
virtual bool removeBigramEntry(const PtNodeParams *const sourcePtNodeParams,
|
||||
const PtNodeParams *const targetPtNodeParam);
|
||||
|
||||
virtual bool updateAllBigramEntriesAndDeleteUselessEntries(
|
||||
const PtNodeParams *const sourcePtNodeParams, int *const outBigramEntryCount);
|
||||
|
||||
virtual bool updateAllPositionFields(const PtNodeParams *const toBeUpdatedPtNodeParams,
|
||||
const DictPositionRelocationMap *const dictPositionRelocationMap,
|
||||
int *const outBigramEntryCount);
|
||||
|
||||
virtual bool addShortcutTarget(const PtNodeParams *const ptNodeParams,
|
||||
const int *const targetCodePoints, const int targetCodePointCount,
|
||||
const int shortcutProbability);
|
||||
|
||||
bool updatePtNodeHasBigramsAndShortcutTargetsFlags(const PtNodeParams *const ptNodeParams);
|
||||
|
||||
private:
|
||||
DISALLOW_COPY_AND_ASSIGN(Ver4PatriciaTrieNodeWriter);
|
||||
|
||||
bool writePtNodeAndGetTerminalIdAndAdvancePosition(
|
||||
const PtNodeParams *const ptNodeParams, int *const outTerminalId,
|
||||
int *const ptNodeWritingPos);
|
||||
|
||||
// Create updated probability entry using given unigram property. In addition to the
|
||||
// probability, this method updates historical information if needed.
|
||||
// TODO: Update flags belonging to the unigram property.
|
||||
const ProbabilityEntry createUpdatedEntryFrom(
|
||||
const ProbabilityEntry *const originalProbabilityEntry,
|
||||
const UnigramProperty *const unigramProperty) const;
|
||||
|
||||
bool updatePtNodeFlags(const int ptNodePos, const bool isBlacklisted, const bool isNotAWord,
|
||||
const bool isTerminal, const bool hasShortcutTargets, const bool hasBigrams,
|
||||
const bool hasMultipleChars);
|
||||
|
||||
static const int CHILDREN_POSITION_FIELD_SIZE;
|
||||
|
||||
BufferWithExtendableBuffer *const mTrieBuffer;
|
||||
Ver4DictBuffers *const mBuffers;
|
||||
const HeaderPolicy *const mHeaderPolicy;
|
||||
DynamicPtReadingHelper mReadingHelper;
|
||||
Ver4BigramListPolicy *const mBigramPolicy;
|
||||
Ver4ShortcutListPolicy *const mShortcutPolicy;
|
||||
};
|
||||
} // namespace v401
|
||||
} // namespace backward
|
||||
} // namespace latinime
|
||||
#endif /* LATINIME_BACKWARD_V401_VER4_PATRICIA_TRIE_NODE_WRITER_H */
|
|
@ -0,0 +1,475 @@
|
|||
/*
|
||||
* Copyright (C) 2013, The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/*
|
||||
* !!!!! DO NOT CHANGE THE LOGIC IN THIS FILE !!!!!
|
||||
* Do not edit this file other than updating policy's interface.
|
||||
*
|
||||
* This file was generated from
|
||||
* suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
|
||||
*/
|
||||
|
||||
#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_policy.h"
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "suggest/core/dicnode/dic_node.h"
|
||||
#include "suggest/core/dicnode/dic_node_vector.h"
|
||||
#include "suggest/core/dictionary/property/bigram_property.h"
|
||||
#include "suggest/core/dictionary/property/unigram_property.h"
|
||||
#include "suggest/core/dictionary/property/word_property.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_node_reader.h"
|
||||
#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
|
||||
#include "suggest/policyimpl/dictionary/utils/probability_utils.h"
|
||||
|
||||
namespace latinime {
|
||||
namespace backward {
|
||||
namespace v401 {
|
||||
|
||||
// Note that there are corresponding definitions in Java side in BinaryDictionaryTests and
|
||||
// BinaryDictionaryDecayingTests.
|
||||
const char *const Ver4PatriciaTriePolicy::UNIGRAM_COUNT_QUERY = "UNIGRAM_COUNT";
|
||||
const char *const Ver4PatriciaTriePolicy::BIGRAM_COUNT_QUERY = "BIGRAM_COUNT";
|
||||
const char *const Ver4PatriciaTriePolicy::MAX_UNIGRAM_COUNT_QUERY = "MAX_UNIGRAM_COUNT";
|
||||
const char *const Ver4PatriciaTriePolicy::MAX_BIGRAM_COUNT_QUERY = "MAX_BIGRAM_COUNT";
|
||||
const int Ver4PatriciaTriePolicy::MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS = 1024;
|
||||
const int Ver4PatriciaTriePolicy::MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS =
|
||||
Ver4DictConstants::MAX_DICTIONARY_SIZE - MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS;
|
||||
|
||||
void Ver4PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const dicNode,
|
||||
DicNodeVector *const childDicNodes) const {
|
||||
if (!dicNode->hasChildren()) {
|
||||
return;
|
||||
}
|
||||
DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader);
|
||||
readingHelper.initWithPtNodeArrayPos(dicNode->getChildrenPtNodeArrayPos());
|
||||
while (!readingHelper.isEnd()) {
|
||||
const PtNodeParams ptNodeParams = readingHelper.getPtNodeParams();
|
||||
if (!ptNodeParams.isValid()) {
|
||||
break;
|
||||
}
|
||||
bool isTerminal = ptNodeParams.isTerminal() && !ptNodeParams.isDeleted();
|
||||
if (isTerminal && mHeaderPolicy->isDecayingDict()) {
|
||||
// A DecayingDict may have a terminal PtNode that has a terminal DicNode whose
|
||||
// probability is NOT_A_PROBABILITY. In such case, we don't want to treat it as a
|
||||
// valid terminal DicNode.
|
||||
isTerminal = ptNodeParams.getProbability() != NOT_A_PROBABILITY;
|
||||
}
|
||||
childDicNodes->pushLeavingChild(dicNode, ptNodeParams.getHeadPos(),
|
||||
ptNodeParams.getChildrenPos(), ptNodeParams.getProbability(), isTerminal,
|
||||
ptNodeParams.hasChildren(),
|
||||
ptNodeParams.isBlacklisted()
|
||||
|| ptNodeParams.isNotAWord() /* isBlacklistedOrNotAWord */,
|
||||
ptNodeParams.getCodePointCount(), ptNodeParams.getCodePoints());
|
||||
readingHelper.readNextSiblingNode(ptNodeParams);
|
||||
}
|
||||
if (readingHelper.isError()) {
|
||||
mIsCorrupted = true;
|
||||
AKLOGE("Dictionary reading error in createAndGetAllChildDicNodes().");
|
||||
}
|
||||
}
|
||||
|
||||
int Ver4PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
|
||||
const int ptNodePos, const int maxCodePointCount, int *const outCodePoints,
|
||||
int *const outUnigramProbability) const {
|
||||
DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader);
|
||||
readingHelper.initWithPtNodePos(ptNodePos);
|
||||
const int codePointCount = readingHelper.getCodePointsAndProbabilityAndReturnCodePointCount(
|
||||
maxCodePointCount, outCodePoints, outUnigramProbability);
|
||||
if (readingHelper.isError()) {
|
||||
mIsCorrupted = true;
|
||||
AKLOGE("Dictionary reading error in getCodePointsAndProbabilityAndReturnCodePointCount().");
|
||||
}
|
||||
return codePointCount;
|
||||
}
|
||||
|
||||
int Ver4PatriciaTriePolicy::getTerminalPtNodePositionOfWord(const int *const inWord,
|
||||
const int length, const bool forceLowerCaseSearch) const {
|
||||
DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader);
|
||||
readingHelper.initWithPtNodeArrayPos(getRootPosition());
|
||||
const int ptNodePos =
|
||||
readingHelper.getTerminalPtNodePositionOfWord(inWord, length, forceLowerCaseSearch);
|
||||
if (readingHelper.isError()) {
|
||||
mIsCorrupted = true;
|
||||
AKLOGE("Dictionary reading error in createAndGetAllChildDicNodes().");
|
||||
}
|
||||
return ptNodePos;
|
||||
}
|
||||
|
||||
int Ver4PatriciaTriePolicy::getProbability(const int unigramProbability,
|
||||
const int bigramProbability) const {
|
||||
if (mHeaderPolicy->isDecayingDict()) {
|
||||
// Both probabilities are encoded. Decode them and get probability.
|
||||
return ForgettingCurveUtils::getProbability(unigramProbability, bigramProbability);
|
||||
} else {
|
||||
if (unigramProbability == NOT_A_PROBABILITY) {
|
||||
return NOT_A_PROBABILITY;
|
||||
} else if (bigramProbability == NOT_A_PROBABILITY) {
|
||||
return ProbabilityUtils::backoff(unigramProbability);
|
||||
} else {
|
||||
// bigramProbability is a bigram probability delta.
|
||||
return ProbabilityUtils::computeProbabilityForBigram(unigramProbability,
|
||||
bigramProbability);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int Ver4PatriciaTriePolicy::getUnigramProbabilityOfPtNode(const int ptNodePos) const {
|
||||
if (ptNodePos == NOT_A_DICT_POS) {
|
||||
return NOT_A_PROBABILITY;
|
||||
}
|
||||
const PtNodeParams ptNodeParams(mNodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos));
|
||||
if (ptNodeParams.isDeleted() || ptNodeParams.isBlacklisted() || ptNodeParams.isNotAWord()) {
|
||||
return NOT_A_PROBABILITY;
|
||||
}
|
||||
return getProbability(ptNodeParams.getProbability(), NOT_A_PROBABILITY);
|
||||
}
|
||||
|
||||
int Ver4PatriciaTriePolicy::getShortcutPositionOfPtNode(const int ptNodePos) const {
|
||||
if (ptNodePos == NOT_A_DICT_POS) {
|
||||
return NOT_A_DICT_POS;
|
||||
}
|
||||
const PtNodeParams ptNodeParams(mNodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos));
|
||||
if (ptNodeParams.isDeleted()) {
|
||||
return NOT_A_DICT_POS;
|
||||
}
|
||||
return mBuffers->getShortcutDictContent()->getShortcutListHeadPos(
|
||||
ptNodeParams.getTerminalId());
|
||||
}
|
||||
|
||||
int Ver4PatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) const {
|
||||
if (ptNodePos == NOT_A_DICT_POS) {
|
||||
return NOT_A_DICT_POS;
|
||||
}
|
||||
const PtNodeParams ptNodeParams(mNodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos));
|
||||
if (ptNodeParams.isDeleted()) {
|
||||
return NOT_A_DICT_POS;
|
||||
}
|
||||
return mBuffers->getBigramDictContent()->getBigramListHeadPos(
|
||||
ptNodeParams.getTerminalId());
|
||||
}
|
||||
|
||||
bool Ver4PatriciaTriePolicy::addUnigramWord(const int *const word, const int length,
|
||||
const UnigramProperty *const unigramProperty) {
|
||||
if (!mBuffers->isUpdatable()) {
|
||||
AKLOGI("Warning: addUnigramWord() is called for non-updatable dictionary.");
|
||||
return false;
|
||||
}
|
||||
if (mDictBuffer->getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) {
|
||||
AKLOGE("The dictionary is too large to dynamically update. Dictionary size: %d",
|
||||
mDictBuffer->getTailPosition());
|
||||
return false;
|
||||
}
|
||||
if (length > MAX_WORD_LENGTH) {
|
||||
AKLOGE("The word is too long to insert to the dictionary, length: %d", length);
|
||||
return false;
|
||||
}
|
||||
for (const auto &shortcut : unigramProperty->getShortcuts()) {
|
||||
if (shortcut.getTargetCodePoints()->size() > MAX_WORD_LENGTH) {
|
||||
AKLOGE("One of shortcut targets is too long to insert to the dictionary, length: %d",
|
||||
shortcut.getTargetCodePoints()->size());
|
||||
return false;
|
||||
}
|
||||
}
|
||||
DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader);
|
||||
readingHelper.initWithPtNodeArrayPos(getRootPosition());
|
||||
bool addedNewUnigram = false;
|
||||
if (mUpdatingHelper.addUnigramWord(&readingHelper, word, length,
|
||||
unigramProperty, &addedNewUnigram)) {
|
||||
if (addedNewUnigram) {
|
||||
mUnigramCount++;
|
||||
}
|
||||
if (unigramProperty->getShortcuts().size() > 0) {
|
||||
// Add shortcut target.
|
||||
const int wordPos = getTerminalPtNodePositionOfWord(word, length,
|
||||
false /* forceLowerCaseSearch */);
|
||||
if (wordPos == NOT_A_DICT_POS) {
|
||||
AKLOGE("Cannot find terminal PtNode position to add shortcut target.");
|
||||
return false;
|
||||
}
|
||||
for (const auto &shortcut : unigramProperty->getShortcuts()) {
|
||||
if (!mUpdatingHelper.addShortcutTarget(wordPos,
|
||||
shortcut.getTargetCodePoints()->data(),
|
||||
shortcut.getTargetCodePoints()->size(), shortcut.getProbability())) {
|
||||
AKLOGE("Cannot add new shortcut target. PtNodePos: %d, length: %d, "
|
||||
"probability: %d", wordPos, shortcut.getTargetCodePoints()->size(),
|
||||
shortcut.getProbability());
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool Ver4PatriciaTriePolicy::addBigramWords(const int *const word0, const int length0,
|
||||
const BigramProperty *const bigramProperty) {
|
||||
if (!mBuffers->isUpdatable()) {
|
||||
AKLOGI("Warning: addBigramWords() is called for non-updatable dictionary.");
|
||||
return false;
|
||||
}
|
||||
if (mDictBuffer->getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) {
|
||||
AKLOGE("The dictionary is too large to dynamically update. Dictionary size: %d",
|
||||
mDictBuffer->getTailPosition());
|
||||
return false;
|
||||
}
|
||||
if (length0 > MAX_WORD_LENGTH
|
||||
|| bigramProperty->getTargetCodePoints()->size() > MAX_WORD_LENGTH) {
|
||||
AKLOGE("Either src word or target word is too long to insert the bigram to the dictionary. "
|
||||
"length0: %d, length1: %d", length0, bigramProperty->getTargetCodePoints()->size());
|
||||
return false;
|
||||
}
|
||||
const int word0Pos = getTerminalPtNodePositionOfWord(word0, length0,
|
||||
false /* forceLowerCaseSearch */);
|
||||
if (word0Pos == NOT_A_DICT_POS) {
|
||||
return false;
|
||||
}
|
||||
const int word1Pos = getTerminalPtNodePositionOfWord(
|
||||
bigramProperty->getTargetCodePoints()->data(),
|
||||
bigramProperty->getTargetCodePoints()->size(), false /* forceLowerCaseSearch */);
|
||||
if (word1Pos == NOT_A_DICT_POS) {
|
||||
return false;
|
||||
}
|
||||
bool addedNewBigram = false;
|
||||
if (mUpdatingHelper.addBigramWords(word0Pos, word1Pos, bigramProperty, &addedNewBigram)) {
|
||||
if (addedNewBigram) {
|
||||
mBigramCount++;
|
||||
}
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool Ver4PatriciaTriePolicy::removeBigramWords(const int *const word0, const int length0,
|
||||
const int *const word1, const int length1) {
|
||||
if (!mBuffers->isUpdatable()) {
|
||||
AKLOGI("Warning: addBigramWords() is called for non-updatable dictionary.");
|
||||
return false;
|
||||
}
|
||||
if (mDictBuffer->getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) {
|
||||
AKLOGE("The dictionary is too large to dynamically update. Dictionary size: %d",
|
||||
mDictBuffer->getTailPosition());
|
||||
return false;
|
||||
}
|
||||
if (length0 > MAX_WORD_LENGTH || length1 > MAX_WORD_LENGTH) {
|
||||
AKLOGE("Either src word or target word is too long to remove the bigram to from the "
|
||||
"dictionary. length0: %d, length1: %d", length0, length1);
|
||||
return false;
|
||||
}
|
||||
const int word0Pos = getTerminalPtNodePositionOfWord(word0, length0,
|
||||
false /* forceLowerCaseSearch */);
|
||||
if (word0Pos == NOT_A_DICT_POS) {
|
||||
return false;
|
||||
}
|
||||
const int word1Pos = getTerminalPtNodePositionOfWord(word1, length1,
|
||||
false /* forceLowerCaseSearch */);
|
||||
if (word1Pos == NOT_A_DICT_POS) {
|
||||
return false;
|
||||
}
|
||||
if (mUpdatingHelper.removeBigramWords(word0Pos, word1Pos)) {
|
||||
mBigramCount--;
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
void Ver4PatriciaTriePolicy::flush(const char *const filePath) {
|
||||
if (!mBuffers->isUpdatable()) {
|
||||
AKLOGI("Warning: flush() is called for non-updatable dictionary. filePath: %s", filePath);
|
||||
return;
|
||||
}
|
||||
if (!mWritingHelper.writeToDictFile(filePath, mUnigramCount, mBigramCount)) {
|
||||
AKLOGE("Cannot flush the dictionary to file.");
|
||||
mIsCorrupted = true;
|
||||
}
|
||||
}
|
||||
|
||||
void Ver4PatriciaTriePolicy::flushWithGC(const char *const filePath) {
|
||||
if (!mBuffers->isUpdatable()) {
|
||||
AKLOGI("Warning: flushWithGC() is called for non-updatable dictionary.");
|
||||
return;
|
||||
}
|
||||
if (!mWritingHelper.writeToDictFileWithGC(getRootPosition(), filePath)) {
|
||||
AKLOGE("Cannot flush the dictionary to file with GC.");
|
||||
mIsCorrupted = true;
|
||||
}
|
||||
}
|
||||
|
||||
bool Ver4PatriciaTriePolicy::needsToRunGC(const bool mindsBlockByGC) const {
|
||||
if (!mBuffers->isUpdatable()) {
|
||||
AKLOGI("Warning: needsToRunGC() is called for non-updatable dictionary.");
|
||||
return false;
|
||||
}
|
||||
if (mBuffers->isNearSizeLimit()) {
|
||||
// Additional buffer size is near the limit.
|
||||
return true;
|
||||
} else if (mHeaderPolicy->getExtendedRegionSize() + mDictBuffer->getUsedAdditionalBufferSize()
|
||||
> Ver4DictConstants::MAX_DICT_EXTENDED_REGION_SIZE) {
|
||||
// Total extended region size of the trie exceeds the limit.
|
||||
return true;
|
||||
} else if (mDictBuffer->getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS
|
||||
&& mDictBuffer->getUsedAdditionalBufferSize() > 0) {
|
||||
// Needs to reduce dictionary size.
|
||||
return true;
|
||||
} else if (mHeaderPolicy->isDecayingDict()) {
|
||||
return ForgettingCurveUtils::needsToDecay(mindsBlockByGC, mUnigramCount, mBigramCount,
|
||||
mHeaderPolicy);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void Ver4PatriciaTriePolicy::getProperty(const char *const query, const int queryLength,
|
||||
char *const outResult, const int maxResultLength) {
|
||||
const int compareLength = queryLength + 1 /* terminator */;
|
||||
if (strncmp(query, UNIGRAM_COUNT_QUERY, compareLength) == 0) {
|
||||
snprintf(outResult, maxResultLength, "%d", mUnigramCount);
|
||||
} else if (strncmp(query, BIGRAM_COUNT_QUERY, compareLength) == 0) {
|
||||
snprintf(outResult, maxResultLength, "%d", mBigramCount);
|
||||
} else if (strncmp(query, MAX_UNIGRAM_COUNT_QUERY, compareLength) == 0) {
|
||||
snprintf(outResult, maxResultLength, "%d",
|
||||
mHeaderPolicy->isDecayingDict() ?
|
||||
ForgettingCurveUtils::getUnigramCountHardLimit(
|
||||
mHeaderPolicy->getMaxUnigramCount()) :
|
||||
static_cast<int>(Ver4DictConstants::MAX_DICTIONARY_SIZE));
|
||||
} else if (strncmp(query, MAX_BIGRAM_COUNT_QUERY, compareLength) == 0) {
|
||||
snprintf(outResult, maxResultLength, "%d",
|
||||
mHeaderPolicy->isDecayingDict() ?
|
||||
ForgettingCurveUtils::getBigramCountHardLimit(
|
||||
mHeaderPolicy->getMaxBigramCount()) :
|
||||
static_cast<int>(Ver4DictConstants::MAX_DICTIONARY_SIZE));
|
||||
}
|
||||
}
|
||||
|
||||
const WordProperty Ver4PatriciaTriePolicy::getWordProperty(const int *const codePoints,
|
||||
const int codePointCount) const {
|
||||
const int ptNodePos = getTerminalPtNodePositionOfWord(codePoints, codePointCount,
|
||||
false /* forceLowerCaseSearch */);
|
||||
if (ptNodePos == NOT_A_DICT_POS) {
|
||||
AKLOGE("getWordProperty is called for invalid word.");
|
||||
return WordProperty();
|
||||
}
|
||||
const PtNodeParams ptNodeParams = mNodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos);
|
||||
std::vector<int> codePointVector(ptNodeParams.getCodePoints(),
|
||||
ptNodeParams.getCodePoints() + ptNodeParams.getCodePointCount());
|
||||
const ProbabilityEntry probabilityEntry =
|
||||
mBuffers->getProbabilityDictContent()->getProbabilityEntry(
|
||||
ptNodeParams.getTerminalId());
|
||||
const HistoricalInfo *const historicalInfo = probabilityEntry.getHistoricalInfo();
|
||||
// Fetch bigram information.
|
||||
std::vector<BigramProperty> bigrams;
|
||||
const int bigramListPos = getBigramsPositionOfPtNode(ptNodePos);
|
||||
if (bigramListPos != NOT_A_DICT_POS) {
|
||||
int bigramWord1CodePoints[MAX_WORD_LENGTH];
|
||||
const BigramDictContent *const bigramDictContent = mBuffers->getBigramDictContent();
|
||||
const TerminalPositionLookupTable *const terminalPositionLookupTable =
|
||||
mBuffers->getTerminalPositionLookupTable();
|
||||
bool hasNext = true;
|
||||
int readingPos = bigramListPos;
|
||||
while (hasNext) {
|
||||
const BigramEntry bigramEntry =
|
||||
bigramDictContent->getBigramEntryAndAdvancePosition(&readingPos);
|
||||
hasNext = bigramEntry.hasNext();
|
||||
const int word1TerminalId = bigramEntry.getTargetTerminalId();
|
||||
const int word1TerminalPtNodePos =
|
||||
terminalPositionLookupTable->getTerminalPtNodePosition(word1TerminalId);
|
||||
if (word1TerminalPtNodePos == NOT_A_DICT_POS) {
|
||||
continue;
|
||||
}
|
||||
// Word (unigram) probability
|
||||
int word1Probability = NOT_A_PROBABILITY;
|
||||
const int codePointCount = getCodePointsAndProbabilityAndReturnCodePointCount(
|
||||
word1TerminalPtNodePos, MAX_WORD_LENGTH, bigramWord1CodePoints,
|
||||
&word1Probability);
|
||||
const std::vector<int> word1(bigramWord1CodePoints,
|
||||
bigramWord1CodePoints + codePointCount);
|
||||
const HistoricalInfo *const historicalInfo = bigramEntry.getHistoricalInfo();
|
||||
const int probability = bigramEntry.hasHistoricalInfo() ?
|
||||
ForgettingCurveUtils::decodeProbability(
|
||||
bigramEntry.getHistoricalInfo(), mHeaderPolicy) :
|
||||
getProbability(word1Probability, bigramEntry.getProbability());
|
||||
bigrams.emplace_back(&word1, probability,
|
||||
historicalInfo->getTimeStamp(), historicalInfo->getLevel(),
|
||||
historicalInfo->getCount());
|
||||
}
|
||||
}
|
||||
// Fetch shortcut information.
|
||||
std::vector<UnigramProperty::ShortcutProperty> shortcuts;
|
||||
int shortcutPos = getShortcutPositionOfPtNode(ptNodePos);
|
||||
if (shortcutPos != NOT_A_DICT_POS) {
|
||||
int shortcutTarget[MAX_WORD_LENGTH];
|
||||
const ShortcutDictContent *const shortcutDictContent =
|
||||
mBuffers->getShortcutDictContent();
|
||||
bool hasNext = true;
|
||||
while (hasNext) {
|
||||
int shortcutTargetLength = 0;
|
||||
int shortcutProbability = NOT_A_PROBABILITY;
|
||||
shortcutDictContent->getShortcutEntryAndAdvancePosition(MAX_WORD_LENGTH, shortcutTarget,
|
||||
&shortcutTargetLength, &shortcutProbability, &hasNext, &shortcutPos);
|
||||
const std::vector<int> target(shortcutTarget, shortcutTarget + shortcutTargetLength);
|
||||
shortcuts.emplace_back(&target, shortcutProbability);
|
||||
}
|
||||
}
|
||||
const UnigramProperty unigramProperty(ptNodeParams.isNotAWord(),
|
||||
ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(),
|
||||
historicalInfo->getTimeStamp(), historicalInfo->getLevel(),
|
||||
historicalInfo->getCount(), &shortcuts);
|
||||
return WordProperty(&codePointVector, &unigramProperty, &bigrams);
|
||||
}
|
||||
|
||||
int Ver4PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const outCodePoints) {
|
||||
// TODO: Return code point count like other methods.
|
||||
// Null termination.
|
||||
outCodePoints[0] = 0;
|
||||
if (token == 0) {
|
||||
mTerminalPtNodePositionsForIteratingWords.clear();
|
||||
DynamicPtReadingHelper::TraversePolicyToGetAllTerminalPtNodePositions traversePolicy(
|
||||
&mTerminalPtNodePositionsForIteratingWords);
|
||||
DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader);
|
||||
readingHelper.initWithPtNodeArrayPos(getRootPosition());
|
||||
readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(&traversePolicy);
|
||||
}
|
||||
const int terminalPtNodePositionsVectorSize =
|
||||
static_cast<int>(mTerminalPtNodePositionsForIteratingWords.size());
|
||||
if (token < 0 || token >= terminalPtNodePositionsVectorSize) {
|
||||
AKLOGE("Given token %d is invalid.", token);
|
||||
return 0;
|
||||
}
|
||||
const int terminalPtNodePos = mTerminalPtNodePositionsForIteratingWords[token];
|
||||
int unigramProbability = NOT_A_PROBABILITY;
|
||||
const int codePointCount = getCodePointsAndProbabilityAndReturnCodePointCount(
|
||||
terminalPtNodePos, MAX_WORD_LENGTH, outCodePoints, &unigramProbability);
|
||||
if (codePointCount < MAX_WORD_LENGTH) {
|
||||
// Null termination. outCodePoints have to be null terminated or contain MAX_WORD_LENGTH
|
||||
// code points.
|
||||
outCodePoints[codePointCount] = 0;
|
||||
}
|
||||
const int nextToken = token + 1;
|
||||
if (nextToken >= terminalPtNodePositionsVectorSize) {
|
||||
// All words have been iterated.
|
||||
mTerminalPtNodePositionsForIteratingWords.clear();
|
||||
return 0;
|
||||
}
|
||||
return nextToken;
|
||||
}
|
||||
|
||||
} // namespace v401
|
||||
} // namespace backward
|
||||
} // namespace latinime
|
|
@ -0,0 +1,168 @@
|
|||
/*
|
||||
* Copyright (C) 2013, The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/*
|
||||
* !!!!! DO NOT CHANGE THE LOGIC IN THIS FILE !!!!!
|
||||
* Do not edit this file other than updating policy's interface.
|
||||
*
|
||||
* This file was generated from
|
||||
* suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h
|
||||
*/
|
||||
|
||||
#ifndef LATINIME_BACKWARD_V401_VER4_PATRICIA_TRIE_POLICY_H
|
||||
#define LATINIME_BACKWARD_V401_VER4_PATRICIA_TRIE_POLICY_H
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "defines.h"
|
||||
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
|
||||
#include "suggest/policyimpl/dictionary/header/header_policy.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/backward/v401/bigram/ver4_bigram_list_policy.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/backward/v401/shortcut/ver4_shortcut_list_policy.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_buffers.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_node_reader.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_node_writer.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_writing_helper.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_pt_node_array_reader.h"
|
||||
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
|
||||
|
||||
namespace latinime {
|
||||
namespace backward {
|
||||
namespace v401 {
|
||||
|
||||
} // namespace v401
|
||||
} // namespace backward
|
||||
class DicNode;
|
||||
namespace backward {
|
||||
namespace v401 {
|
||||
} // namespace v401
|
||||
} // namespace backward
|
||||
class DicNodeVector;
|
||||
namespace backward {
|
||||
namespace v401 {
|
||||
|
||||
class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
||||
public:
|
||||
Ver4PatriciaTriePolicy(Ver4DictBuffers::Ver4DictBuffersPtr buffers)
|
||||
: mBuffers(std::move(buffers)), mHeaderPolicy(mBuffers->getHeaderPolicy()),
|
||||
mDictBuffer(mBuffers->getWritableTrieBuffer()),
|
||||
mBigramPolicy(mBuffers->getMutableBigramDictContent(),
|
||||
mBuffers->getTerminalPositionLookupTable(), mHeaderPolicy),
|
||||
mShortcutPolicy(mBuffers->getMutableShortcutDictContent(),
|
||||
mBuffers->getTerminalPositionLookupTable()),
|
||||
mNodeReader(mDictBuffer, mBuffers->getProbabilityDictContent(), mHeaderPolicy),
|
||||
mPtNodeArrayReader(mDictBuffer),
|
||||
mNodeWriter(mDictBuffer, mBuffers.get(), mHeaderPolicy, &mNodeReader,
|
||||
&mPtNodeArrayReader, &mBigramPolicy, &mShortcutPolicy),
|
||||
mUpdatingHelper(mDictBuffer, &mNodeReader, &mNodeWriter),
|
||||
mWritingHelper(mBuffers.get()),
|
||||
mUnigramCount(mHeaderPolicy->getUnigramCount()),
|
||||
mBigramCount(mHeaderPolicy->getBigramCount()),
|
||||
mTerminalPtNodePositionsForIteratingWords(), mIsCorrupted(false) {};
|
||||
|
||||
AK_FORCE_INLINE int getRootPosition() const {
|
||||
return 0;
|
||||
}
|
||||
|
||||
void createAndGetAllChildDicNodes(const DicNode *const dicNode,
|
||||
DicNodeVector *const childDicNodes) const;
|
||||
|
||||
int getCodePointsAndProbabilityAndReturnCodePointCount(
|
||||
const int terminalPtNodePos, const int maxCodePointCount, int *const outCodePoints,
|
||||
int *const outUnigramProbability) const;
|
||||
|
||||
int getTerminalPtNodePositionOfWord(const int *const inWord,
|
||||
const int length, const bool forceLowerCaseSearch) const;
|
||||
|
||||
int getProbability(const int unigramProbability, const int bigramProbability) const;
|
||||
|
||||
int getUnigramProbabilityOfPtNode(const int ptNodePos) const;
|
||||
|
||||
int getShortcutPositionOfPtNode(const int ptNodePos) const;
|
||||
|
||||
int getBigramsPositionOfPtNode(const int ptNodePos) const;
|
||||
|
||||
const DictionaryHeaderStructurePolicy *getHeaderStructurePolicy() const {
|
||||
return mHeaderPolicy;
|
||||
}
|
||||
|
||||
const DictionaryBigramsStructurePolicy *getBigramsStructurePolicy() const {
|
||||
return &mBigramPolicy;
|
||||
}
|
||||
|
||||
const DictionaryShortcutsStructurePolicy *getShortcutsStructurePolicy() const {
|
||||
return &mShortcutPolicy;
|
||||
}
|
||||
|
||||
bool addUnigramWord(const int *const word, const int length,
|
||||
const UnigramProperty *const unigramProperty);
|
||||
|
||||
bool addBigramWords(const int *const word0, const int length0,
|
||||
const BigramProperty *const bigramProperty);
|
||||
|
||||
bool removeBigramWords(const int *const word0, const int length0, const int *const word1,
|
||||
const int length1);
|
||||
|
||||
void flush(const char *const filePath);
|
||||
|
||||
void flushWithGC(const char *const filePath);
|
||||
|
||||
bool needsToRunGC(const bool mindsBlockByGC) const;
|
||||
|
||||
void getProperty(const char *const query, const int queryLength, char *const outResult,
|
||||
const int maxResultLength);
|
||||
|
||||
const WordProperty getWordProperty(const int *const codePoints,
|
||||
const int codePointCount) const;
|
||||
|
||||
int getNextWordAndNextToken(const int token, int *const outCodePoints);
|
||||
|
||||
bool isCorrupted() const {
|
||||
return mIsCorrupted;
|
||||
}
|
||||
|
||||
private:
|
||||
DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4PatriciaTriePolicy);
|
||||
|
||||
static const char *const UNIGRAM_COUNT_QUERY;
|
||||
static const char *const BIGRAM_COUNT_QUERY;
|
||||
static const char *const MAX_UNIGRAM_COUNT_QUERY;
|
||||
static const char *const MAX_BIGRAM_COUNT_QUERY;
|
||||
// When the dictionary size is near the maximum size, we have to refuse dynamic operations to
|
||||
// prevent the dictionary from overflowing.
|
||||
static const int MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS;
|
||||
static const int MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS;
|
||||
|
||||
const Ver4DictBuffers::Ver4DictBuffersPtr mBuffers;
|
||||
const HeaderPolicy *const mHeaderPolicy;
|
||||
BufferWithExtendableBuffer *const mDictBuffer;
|
||||
Ver4BigramListPolicy mBigramPolicy;
|
||||
Ver4ShortcutListPolicy mShortcutPolicy;
|
||||
Ver4PatriciaTrieNodeReader mNodeReader;
|
||||
Ver4PtNodeArrayReader mPtNodeArrayReader;
|
||||
Ver4PatriciaTrieNodeWriter mNodeWriter;
|
||||
DynamicPtUpdatingHelper mUpdatingHelper;
|
||||
Ver4PatriciaTrieWritingHelper mWritingHelper;
|
||||
int mUnigramCount;
|
||||
int mBigramCount;
|
||||
std::vector<int> mTerminalPtNodePositionsForIteratingWords;
|
||||
mutable bool mIsCorrupted;
|
||||
};
|
||||
} // namespace v401
|
||||
} // namespace backward
|
||||
} // namespace latinime
|
||||
#endif // LATINIME_BACKWARD_V401_VER4_PATRICIA_TRIE_POLICY_H
|
|
@ -0,0 +1,39 @@
|
|||
/*
|
||||
* Copyright (C) 2013, The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/*
|
||||
* !!!!! DO NOT EDIT THIS FILE !!!!!
|
||||
*
|
||||
* This file was generated from
|
||||
* suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.cpp
|
||||
*/
|
||||
|
||||
#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_reading_utils.h"
|
||||
|
||||
#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h"
|
||||
|
||||
namespace latinime {
|
||||
namespace backward {
|
||||
namespace v401 {
|
||||
|
||||
/* static */ int Ver4PatriciaTrieReadingUtils::getTerminalIdAndAdvancePosition(
|
||||
const uint8_t *const buffer, int *pos) {
|
||||
return ByteArrayUtils::readUint32AndAdvancePosition(buffer, pos);
|
||||
}
|
||||
|
||||
} // namespace v401
|
||||
} // namespace backward
|
||||
} // namespace latinime
|
|
@ -0,0 +1,52 @@
|
|||
/*
|
||||
* Copyright (C) 2013, The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/*
|
||||
* !!!!! DO NOT EDIT THIS FILE !!!!!
|
||||
*
|
||||
* This file was generated from
|
||||
* suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.h
|
||||
*/
|
||||
|
||||
#ifndef LATINIME_BACKWARD_V401_VER4_PATRICIA_TRIE_READING_UTILS_H
|
||||
#define LATINIME_BACKWARD_V401_VER4_PATRICIA_TRIE_READING_UTILS_H
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
#include "defines.h"
|
||||
|
||||
namespace latinime {
|
||||
namespace backward {
|
||||
namespace v401 {
|
||||
|
||||
} // namespace v401
|
||||
} // namespace backward
|
||||
class BufferWithExtendableBuffer;
|
||||
namespace backward {
|
||||
namespace v401 {
|
||||
|
||||
class Ver4PatriciaTrieReadingUtils {
|
||||
public:
|
||||
static int getTerminalIdAndAdvancePosition(const uint8_t *const buffer,
|
||||
int *const pos);
|
||||
|
||||
private:
|
||||
DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4PatriciaTrieReadingUtils);
|
||||
};
|
||||
} // namespace v401
|
||||
} // namespace backward
|
||||
} // namespace latinime
|
||||
#endif /* LATINIME_BACKWARD_V401_VER4_PATRICIA_TRIE_READING_UTILS_H */
|
|
@ -0,0 +1,301 @@
|
|||
/*
|
||||
* Copyright (C) 2013, The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/*
|
||||
* !!!!! DO NOT EDIT THIS FILE !!!!!
|
||||
*
|
||||
* This file was generated from
|
||||
* suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp
|
||||
*/
|
||||
|
||||
#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_writing_helper.h"
|
||||
|
||||
#include <cstring>
|
||||
#include <queue>
|
||||
|
||||
#include "suggest/policyimpl/dictionary/header/header_policy.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/backward/v401/bigram/ver4_bigram_list_policy.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/backward/v401/shortcut/ver4_shortcut_list_policy.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_buffers.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_constants.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_node_reader.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_node_writer.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_pt_node_array_reader.h"
|
||||
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
|
||||
#include "suggest/policyimpl/dictionary/utils/file_utils.h"
|
||||
#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
|
||||
|
||||
namespace latinime {
|
||||
namespace backward {
|
||||
namespace v401 {
|
||||
|
||||
bool Ver4PatriciaTrieWritingHelper::writeToDictFile(const char *const dictDirPath,
|
||||
const int unigramCount, const int bigramCount) const {
|
||||
const HeaderPolicy *const headerPolicy = mBuffers->getHeaderPolicy();
|
||||
BufferWithExtendableBuffer headerBuffer(
|
||||
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE);
|
||||
const int extendedRegionSize = headerPolicy->getExtendedRegionSize()
|
||||
+ mBuffers->getTrieBuffer()->getUsedAdditionalBufferSize();
|
||||
if (!headerPolicy->fillInAndWriteHeaderToBuffer(false /* updatesLastDecayedTime */,
|
||||
unigramCount, bigramCount, extendedRegionSize, &headerBuffer)) {
|
||||
AKLOGE("Cannot write header structure to buffer. "
|
||||
"updatesLastDecayedTime: %d, unigramCount: %d, bigramCount: %d, "
|
||||
"extendedRegionSize: %d", false, unigramCount, bigramCount,
|
||||
extendedRegionSize);
|
||||
return false;
|
||||
}
|
||||
return mBuffers->flushHeaderAndDictBuffers(dictDirPath, &headerBuffer);
|
||||
}
|
||||
|
||||
bool Ver4PatriciaTrieWritingHelper::writeToDictFileWithGC(const int rootPtNodeArrayPos,
|
||||
const char *const dictDirPath) {
|
||||
const HeaderPolicy *const headerPolicy = mBuffers->getHeaderPolicy();
|
||||
Ver4DictBuffers::Ver4DictBuffersPtr dictBuffers(
|
||||
Ver4DictBuffers::createVer4DictBuffers(headerPolicy,
|
||||
Ver4DictConstants::MAX_DICTIONARY_SIZE));
|
||||
int unigramCount = 0;
|
||||
int bigramCount = 0;
|
||||
if (!runGC(rootPtNodeArrayPos, headerPolicy, dictBuffers.get(), &unigramCount, &bigramCount)) {
|
||||
return false;
|
||||
}
|
||||
BufferWithExtendableBuffer headerBuffer(
|
||||
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE);
|
||||
if (!headerPolicy->fillInAndWriteHeaderToBuffer(true /* updatesLastDecayedTime */,
|
||||
unigramCount, bigramCount, 0 /* extendedRegionSize */, &headerBuffer)) {
|
||||
return false;
|
||||
}
|
||||
return dictBuffers->flushHeaderAndDictBuffers(dictDirPath, &headerBuffer);
|
||||
}
|
||||
|
||||
bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
|
||||
const HeaderPolicy *const headerPolicy, Ver4DictBuffers *const buffersToWrite,
|
||||
int *const outUnigramCount, int *const outBigramCount) {
|
||||
Ver4PatriciaTrieNodeReader ptNodeReader(mBuffers->getTrieBuffer(),
|
||||
mBuffers->getProbabilityDictContent(), headerPolicy);
|
||||
Ver4PtNodeArrayReader ptNodeArrayReader(mBuffers->getTrieBuffer());
|
||||
Ver4BigramListPolicy bigramPolicy(mBuffers->getMutableBigramDictContent(),
|
||||
mBuffers->getTerminalPositionLookupTable(), headerPolicy);
|
||||
Ver4ShortcutListPolicy shortcutPolicy(mBuffers->getMutableShortcutDictContent(),
|
||||
mBuffers->getTerminalPositionLookupTable());
|
||||
Ver4PatriciaTrieNodeWriter ptNodeWriter(mBuffers->getWritableTrieBuffer(),
|
||||
mBuffers, headerPolicy, &ptNodeReader, &ptNodeArrayReader, &bigramPolicy,
|
||||
&shortcutPolicy);
|
||||
|
||||
DynamicPtReadingHelper readingHelper(&ptNodeReader, &ptNodeArrayReader);
|
||||
readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
|
||||
DynamicPtGcEventListeners
|
||||
::TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted
|
||||
traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted(
|
||||
&ptNodeWriter);
|
||||
if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(
|
||||
&traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted)) {
|
||||
return false;
|
||||
}
|
||||
const int unigramCount = traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted
|
||||
.getValidUnigramCount();
|
||||
const int maxUnigramCount = headerPolicy->getMaxUnigramCount();
|
||||
if (headerPolicy->isDecayingDict() && unigramCount > maxUnigramCount) {
|
||||
if (!truncateUnigrams(&ptNodeReader, &ptNodeWriter, maxUnigramCount)) {
|
||||
AKLOGE("Cannot remove unigrams. current: %d, max: %d", unigramCount,
|
||||
maxUnigramCount);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
|
||||
DynamicPtGcEventListeners::TraversePolicyToUpdateBigramProbability
|
||||
traversePolicyToUpdateBigramProbability(&ptNodeWriter);
|
||||
if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(
|
||||
&traversePolicyToUpdateBigramProbability)) {
|
||||
return false;
|
||||
}
|
||||
const int bigramCount = traversePolicyToUpdateBigramProbability.getValidBigramEntryCount();
|
||||
const int maxBigramCount = headerPolicy->getMaxBigramCount();
|
||||
if (headerPolicy->isDecayingDict() && bigramCount > maxBigramCount) {
|
||||
if (!truncateBigrams(maxBigramCount)) {
|
||||
AKLOGE("Cannot remove bigrams. current: %d, max: %d", bigramCount, maxBigramCount);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// Mapping from positions in mBuffer to positions in bufferToWrite.
|
||||
PtNodeWriter::DictPositionRelocationMap dictPositionRelocationMap;
|
||||
readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
|
||||
Ver4PatriciaTrieNodeWriter ptNodeWriterForNewBuffers(buffersToWrite->getWritableTrieBuffer(),
|
||||
buffersToWrite, headerPolicy, &ptNodeReader, &ptNodeArrayReader, &bigramPolicy,
|
||||
&shortcutPolicy);
|
||||
DynamicPtGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer
|
||||
traversePolicyToPlaceAndWriteValidPtNodesToBuffer(&ptNodeWriterForNewBuffers,
|
||||
buffersToWrite->getWritableTrieBuffer(), &dictPositionRelocationMap);
|
||||
if (!readingHelper.traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner(
|
||||
&traversePolicyToPlaceAndWriteValidPtNodesToBuffer)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Create policy instances for the GCed dictionary.
|
||||
Ver4PatriciaTrieNodeReader newPtNodeReader(buffersToWrite->getTrieBuffer(),
|
||||
buffersToWrite->getProbabilityDictContent(), headerPolicy);
|
||||
Ver4PtNodeArrayReader newPtNodeArrayreader(buffersToWrite->getTrieBuffer());
|
||||
Ver4BigramListPolicy newBigramPolicy(buffersToWrite->getMutableBigramDictContent(),
|
||||
buffersToWrite->getTerminalPositionLookupTable(), headerPolicy);
|
||||
Ver4ShortcutListPolicy newShortcutPolicy(buffersToWrite->getMutableShortcutDictContent(),
|
||||
buffersToWrite->getTerminalPositionLookupTable());
|
||||
Ver4PatriciaTrieNodeWriter newPtNodeWriter(buffersToWrite->getWritableTrieBuffer(),
|
||||
buffersToWrite, headerPolicy, &newPtNodeReader, &newPtNodeArrayreader, &newBigramPolicy,
|
||||
&newShortcutPolicy);
|
||||
// Re-assign terminal IDs for valid terminal PtNodes.
|
||||
TerminalPositionLookupTable::TerminalIdMap terminalIdMap;
|
||||
if(!buffersToWrite->getMutableTerminalPositionLookupTable()->runGCTerminalIds(
|
||||
&terminalIdMap)) {
|
||||
return false;
|
||||
}
|
||||
// Run GC for probability dict content.
|
||||
if (!buffersToWrite->getMutableProbabilityDictContent()->runGC(&terminalIdMap,
|
||||
mBuffers->getProbabilityDictContent())) {
|
||||
return false;
|
||||
}
|
||||
// Run GC for bigram dict content.
|
||||
if(!buffersToWrite->getMutableBigramDictContent()->runGC(&terminalIdMap,
|
||||
mBuffers->getBigramDictContent(), outBigramCount)) {
|
||||
return false;
|
||||
}
|
||||
// Run GC for shortcut dict content.
|
||||
if(!buffersToWrite->getMutableShortcutDictContent()->runGC(&terminalIdMap,
|
||||
mBuffers->getShortcutDictContent())) {
|
||||
return false;
|
||||
}
|
||||
DynamicPtReadingHelper newDictReadingHelper(&newPtNodeReader, &newPtNodeArrayreader);
|
||||
newDictReadingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
|
||||
DynamicPtGcEventListeners::TraversePolicyToUpdateAllPositionFields
|
||||
traversePolicyToUpdateAllPositionFields(&newPtNodeWriter, &dictPositionRelocationMap);
|
||||
if (!newDictReadingHelper.traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner(
|
||||
&traversePolicyToUpdateAllPositionFields)) {
|
||||
return false;
|
||||
}
|
||||
newDictReadingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
|
||||
TraversePolicyToUpdateAllPtNodeFlagsAndTerminalIds
|
||||
traversePolicyToUpdateAllPtNodeFlagsAndTerminalIds(&newPtNodeWriter, &terminalIdMap);
|
||||
if (!newDictReadingHelper.traverseAllPtNodesInPostorderDepthFirstManner(
|
||||
&traversePolicyToUpdateAllPtNodeFlagsAndTerminalIds)) {
|
||||
return false;
|
||||
}
|
||||
*outUnigramCount = traversePolicyToUpdateAllPositionFields.getUnigramCount();
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Ver4PatriciaTrieWritingHelper::truncateUnigrams(
|
||||
const Ver4PatriciaTrieNodeReader *const ptNodeReader,
|
||||
Ver4PatriciaTrieNodeWriter *const ptNodeWriter, const int maxUnigramCount) {
|
||||
const TerminalPositionLookupTable *const terminalPosLookupTable =
|
||||
mBuffers->getTerminalPositionLookupTable();
|
||||
const int nextTerminalId = terminalPosLookupTable->getNextTerminalId();
|
||||
std::priority_queue<DictProbability, std::vector<DictProbability>, DictProbabilityComparator>
|
||||
priorityQueue;
|
||||
for (int i = 0; i < nextTerminalId; ++i) {
|
||||
const int terminalPos = terminalPosLookupTable->getTerminalPtNodePosition(i);
|
||||
if (terminalPos == NOT_A_DICT_POS) {
|
||||
continue;
|
||||
}
|
||||
const ProbabilityEntry probabilityEntry =
|
||||
mBuffers->getProbabilityDictContent()->getProbabilityEntry(i);
|
||||
const int probability = probabilityEntry.hasHistoricalInfo() ?
|
||||
ForgettingCurveUtils::decodeProbability(
|
||||
probabilityEntry.getHistoricalInfo(), mBuffers->getHeaderPolicy()) :
|
||||
probabilityEntry.getProbability();
|
||||
priorityQueue.push(DictProbability(terminalPos, probability,
|
||||
probabilityEntry.getHistoricalInfo()->getTimeStamp()));
|
||||
}
|
||||
|
||||
// Delete unigrams.
|
||||
while (static_cast<int>(priorityQueue.size()) > maxUnigramCount) {
|
||||
const int ptNodePos = priorityQueue.top().getDictPos();
|
||||
const PtNodeParams ptNodeParams =
|
||||
ptNodeReader->fetchNodeInfoInBufferFromPtNodePos(ptNodePos);
|
||||
if (!ptNodeWriter->markPtNodeAsWillBecomeNonTerminal(&ptNodeParams)) {
|
||||
AKLOGE("Cannot mark PtNode as willBecomeNonterminal. PtNode pos: %d", ptNodePos);
|
||||
return false;
|
||||
}
|
||||
priorityQueue.pop();
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Ver4PatriciaTrieWritingHelper::truncateBigrams(const int maxBigramCount) {
|
||||
const TerminalPositionLookupTable *const terminalPosLookupTable =
|
||||
mBuffers->getTerminalPositionLookupTable();
|
||||
const int nextTerminalId = terminalPosLookupTable->getNextTerminalId();
|
||||
std::priority_queue<DictProbability, std::vector<DictProbability>, DictProbabilityComparator>
|
||||
priorityQueue;
|
||||
BigramDictContent *const bigramDictContent = mBuffers->getMutableBigramDictContent();
|
||||
for (int i = 0; i < nextTerminalId; ++i) {
|
||||
const int bigramListPos = bigramDictContent->getBigramListHeadPos(i);
|
||||
if (bigramListPos == NOT_A_DICT_POS) {
|
||||
continue;
|
||||
}
|
||||
bool hasNext = true;
|
||||
int readingPos = bigramListPos;
|
||||
while (hasNext) {
|
||||
const int entryPos = readingPos;
|
||||
const BigramEntry bigramEntry =
|
||||
bigramDictContent->getBigramEntryAndAdvancePosition(&readingPos);
|
||||
hasNext = bigramEntry.hasNext();
|
||||
if (!bigramEntry.isValid()) {
|
||||
continue;
|
||||
}
|
||||
const int probability = bigramEntry.hasHistoricalInfo() ?
|
||||
ForgettingCurveUtils::decodeProbability(
|
||||
bigramEntry.getHistoricalInfo(), mBuffers->getHeaderPolicy()) :
|
||||
bigramEntry.getProbability();
|
||||
priorityQueue.push(DictProbability(entryPos, probability,
|
||||
bigramEntry.getHistoricalInfo()->getTimeStamp()));
|
||||
}
|
||||
}
|
||||
|
||||
// Delete bigrams.
|
||||
while (static_cast<int>(priorityQueue.size()) > maxBigramCount) {
|
||||
const int entryPos = priorityQueue.top().getDictPos();
|
||||
const BigramEntry bigramEntry = bigramDictContent->getBigramEntry(entryPos);
|
||||
const BigramEntry invalidatedBigramEntry = bigramEntry.getInvalidatedEntry();
|
||||
if (!bigramDictContent->writeBigramEntry(&invalidatedBigramEntry, entryPos)) {
|
||||
AKLOGE("Cannot write bigram entry to remove. pos: %d", entryPos);
|
||||
return false;
|
||||
}
|
||||
priorityQueue.pop();
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Ver4PatriciaTrieWritingHelper::TraversePolicyToUpdateAllPtNodeFlagsAndTerminalIds
|
||||
::onVisitingPtNode(const PtNodeParams *const ptNodeParams) {
|
||||
if (!ptNodeParams->isTerminal()) {
|
||||
return true;
|
||||
}
|
||||
TerminalPositionLookupTable::TerminalIdMap::const_iterator it =
|
||||
mTerminalIdMap->find(ptNodeParams->getTerminalId());
|
||||
if (it == mTerminalIdMap->end()) {
|
||||
AKLOGE("terminal Id %d is not in the terminal position map. map size: %zd",
|
||||
ptNodeParams->getTerminalId(), mTerminalIdMap->size());
|
||||
return false;
|
||||
}
|
||||
if (!mPtNodeWriter->updateTerminalId(ptNodeParams, it->second)) {
|
||||
AKLOGE("Cannot update terminal id. %d -> %d", it->first, it->second);
|
||||
}
|
||||
return mPtNodeWriter->updatePtNodeHasBigramsAndShortcutTargetsFlags(ptNodeParams);
|
||||
}
|
||||
|
||||
} // namespace v401
|
||||
} // namespace backward
|
||||
} // namespace latinime
|
|
@ -0,0 +1,140 @@
|
|||
/*
|
||||
* Copyright (C) 2013, The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/*
|
||||
* !!!!! DO NOT EDIT THIS FILE !!!!!
|
||||
*
|
||||
* This file was generated from
|
||||
* suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.h
|
||||
*/
|
||||
|
||||
#ifndef LATINIME_BACKWARD_V401_VER4_PATRICIA_TRIE_WRITING_HELPER_H
|
||||
#define LATINIME_BACKWARD_V401_VER4_PATRICIA_TRIE_WRITING_HELPER_H
|
||||
|
||||
#include "defines.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_gc_event_listeners.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/backward/v401/content/terminal_position_lookup_table.h"
|
||||
|
||||
namespace latinime {
|
||||
namespace backward {
|
||||
namespace v401 {
|
||||
|
||||
} // namespace v401
|
||||
} // namespace backward
|
||||
class HeaderPolicy;
|
||||
namespace backward {
|
||||
namespace v401 {
|
||||
class Ver4DictBuffers;
|
||||
class Ver4PatriciaTrieNodeReader;
|
||||
class Ver4PatriciaTrieNodeWriter;
|
||||
|
||||
class Ver4PatriciaTrieWritingHelper {
|
||||
public:
|
||||
Ver4PatriciaTrieWritingHelper(Ver4DictBuffers *const buffers)
|
||||
: mBuffers(buffers) {}
|
||||
|
||||
bool writeToDictFile(const char *const dictDirPath, const int unigramCount,
|
||||
const int bigramCount) const;
|
||||
|
||||
// This method cannot be const because the original dictionary buffer will be updated to detect
|
||||
// useless PtNodes during GC.
|
||||
bool writeToDictFileWithGC(const int rootPtNodeArrayPos, const char *const dictDirPath);
|
||||
|
||||
private:
|
||||
DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4PatriciaTrieWritingHelper);
|
||||
|
||||
class TraversePolicyToUpdateAllPtNodeFlagsAndTerminalIds
|
||||
: public DynamicPtReadingHelper::TraversingEventListener {
|
||||
public:
|
||||
TraversePolicyToUpdateAllPtNodeFlagsAndTerminalIds(
|
||||
Ver4PatriciaTrieNodeWriter *const ptNodeWriter,
|
||||
const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap)
|
||||
: mPtNodeWriter(ptNodeWriter), mTerminalIdMap(terminalIdMap) {}
|
||||
|
||||
bool onAscend() { return true; }
|
||||
|
||||
bool onDescend(const int ptNodeArrayPos) { return true; }
|
||||
|
||||
bool onReadingPtNodeArrayTail() { return true; }
|
||||
|
||||
bool onVisitingPtNode(const PtNodeParams *const ptNodeParams);
|
||||
|
||||
private:
|
||||
DISALLOW_IMPLICIT_CONSTRUCTORS(TraversePolicyToUpdateAllPtNodeFlagsAndTerminalIds);
|
||||
|
||||
Ver4PatriciaTrieNodeWriter *const mPtNodeWriter;
|
||||
const TerminalPositionLookupTable::TerminalIdMap *const mTerminalIdMap;
|
||||
};
|
||||
|
||||
// For truncateUnigrams() and truncateBigrams().
|
||||
class DictProbability {
|
||||
public:
|
||||
DictProbability(const int dictPos, const int probability, const int timestamp)
|
||||
: mDictPos(dictPos), mProbability(probability), mTimestamp(timestamp) {}
|
||||
|
||||
int getDictPos() const {
|
||||
return mDictPos;
|
||||
}
|
||||
|
||||
int getProbability() const {
|
||||
return mProbability;
|
||||
}
|
||||
|
||||
int getTimestamp() const {
|
||||
return mTimestamp;
|
||||
}
|
||||
|
||||
private:
|
||||
DISALLOW_DEFAULT_CONSTRUCTOR(DictProbability);
|
||||
|
||||
int mDictPos;
|
||||
int mProbability;
|
||||
int mTimestamp;
|
||||
};
|
||||
|
||||
// For truncateUnigrams() and truncateBigrams().
|
||||
class DictProbabilityComparator {
|
||||
public:
|
||||
bool operator()(const DictProbability &left, const DictProbability &right) {
|
||||
if (left.getProbability() != right.getProbability()) {
|
||||
return left.getProbability() > right.getProbability();
|
||||
}
|
||||
if (left.getTimestamp() != right.getTimestamp()) {
|
||||
return left.getTimestamp() < right.getTimestamp();
|
||||
}
|
||||
return left.getDictPos() > right.getDictPos();
|
||||
}
|
||||
|
||||
private:
|
||||
DISALLOW_ASSIGNMENT_OPERATOR(DictProbabilityComparator);
|
||||
};
|
||||
|
||||
bool runGC(const int rootPtNodeArrayPos, const HeaderPolicy *const headerPolicy,
|
||||
Ver4DictBuffers *const buffersToWrite, int *const outUnigramCount,
|
||||
int *const outBigramCount);
|
||||
|
||||
bool truncateUnigrams(const Ver4PatriciaTrieNodeReader *const ptNodeReader,
|
||||
Ver4PatriciaTrieNodeWriter *const ptNodeWriter, const int maxUnigramCount);
|
||||
|
||||
bool truncateBigrams(const int maxBigramCount);
|
||||
|
||||
Ver4DictBuffers *const mBuffers;
|
||||
};
|
||||
} // namespace v401
|
||||
} // namespace backward
|
||||
} // namespace latinime
|
||||
|
||||
#endif /* LATINIME_BACKWARD_V401_VER4_PATRICIA_TRIE_WRITING_HELPER_H */
|
|
@ -0,0 +1,90 @@
|
|||
/*
|
||||
* Copyright (C) 2014, The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/*
|
||||
* !!!!! DO NOT EDIT THIS FILE !!!!!
|
||||
*
|
||||
* This file was generated from
|
||||
* suggest/policyimpl/dictionary/structure/v4/ver4_pt_node_array_reader.cpp
|
||||
*/
|
||||
|
||||
#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_pt_node_array_reader.h"
|
||||
|
||||
#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.h"
|
||||
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
|
||||
|
||||
namespace latinime {
|
||||
namespace backward {
|
||||
namespace v401 {
|
||||
|
||||
bool Ver4PtNodeArrayReader::readPtNodeArrayInfoAndReturnIfValid(const int ptNodeArrayPos,
|
||||
int *const outPtNodeCount, int *const outFirstPtNodePos) const {
|
||||
if (ptNodeArrayPos < 0 || ptNodeArrayPos >= mBuffer->getTailPosition()) {
|
||||
// Reading invalid position because of a bug or a broken dictionary.
|
||||
AKLOGE("Reading PtNode array info from invalid dictionary position: %d, dict size: %d",
|
||||
ptNodeArrayPos, mBuffer->getTailPosition());
|
||||
ASSERT(false);
|
||||
return false;
|
||||
}
|
||||
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(ptNodeArrayPos);
|
||||
const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
|
||||
int readingPos = ptNodeArrayPos;
|
||||
if (usesAdditionalBuffer) {
|
||||
readingPos -= mBuffer->getOriginalBufferSize();
|
||||
}
|
||||
const int ptNodeCountInArray = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition(
|
||||
dictBuf, &readingPos);
|
||||
if (usesAdditionalBuffer) {
|
||||
readingPos += mBuffer->getOriginalBufferSize();
|
||||
}
|
||||
if (ptNodeCountInArray < 0) {
|
||||
AKLOGE("Invalid PtNode count in an array: %d.", ptNodeCountInArray);
|
||||
return false;
|
||||
}
|
||||
*outPtNodeCount = ptNodeCountInArray;
|
||||
*outFirstPtNodePos = readingPos;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Ver4PtNodeArrayReader::readForwardLinkAndReturnIfValid(const int forwordLinkPos,
|
||||
int *const outNextPtNodeArrayPos) const {
|
||||
if (forwordLinkPos < 0 || forwordLinkPos >= mBuffer->getTailPosition()) {
|
||||
// Reading invalid position because of bug or broken dictionary.
|
||||
AKLOGE("Reading forward link from invalid dictionary position: %d, dict size: %d",
|
||||
forwordLinkPos, mBuffer->getTailPosition());
|
||||
ASSERT(false);
|
||||
return false;
|
||||
}
|
||||
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(forwordLinkPos);
|
||||
const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
|
||||
int readingPos = forwordLinkPos;
|
||||
if (usesAdditionalBuffer) {
|
||||
readingPos -= mBuffer->getOriginalBufferSize();
|
||||
}
|
||||
const int nextPtNodeArrayOffset =
|
||||
DynamicPtReadingUtils::getForwardLinkPosition(dictBuf, readingPos);
|
||||
if (DynamicPtReadingUtils::isValidForwardLinkPosition(nextPtNodeArrayOffset)) {
|
||||
*outNextPtNodeArrayPos = forwordLinkPos + nextPtNodeArrayOffset;
|
||||
} else {
|
||||
*outNextPtNodeArrayPos = NOT_A_DICT_POS;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace v401
|
||||
} // namespace backward
|
||||
} // namespace latinime
|
|
@ -0,0 +1,57 @@
|
|||
/*
|
||||
* Copyright (C) 2014, The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/*
|
||||
* !!!!! DO NOT EDIT THIS FILE !!!!!
|
||||
*
|
||||
* This file was generated from
|
||||
* suggest/policyimpl/dictionary/structure/v4/ver4_pt_node_array_reader.h
|
||||
*/
|
||||
|
||||
#ifndef LATINIME_BACKWARD_V401_VER4_PT_NODE_ARRAY_READER_H
|
||||
#define LATINIME_BACKWARD_V401_VER4_PT_NODE_ARRAY_READER_H
|
||||
|
||||
#include "defines.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_array_reader.h"
|
||||
|
||||
namespace latinime {
|
||||
namespace backward {
|
||||
namespace v401 {
|
||||
|
||||
} // namespace v401
|
||||
} // namespace backward
|
||||
class BufferWithExtendableBuffer;
|
||||
namespace backward {
|
||||
namespace v401 {
|
||||
|
||||
class Ver4PtNodeArrayReader : public PtNodeArrayReader {
|
||||
public:
|
||||
Ver4PtNodeArrayReader(const BufferWithExtendableBuffer *const buffer) : mBuffer(buffer) {};
|
||||
|
||||
virtual bool readPtNodeArrayInfoAndReturnIfValid(const int ptNodeArrayPos,
|
||||
int *const outPtNodeCount, int *const outFirstPtNodePos) const;
|
||||
virtual bool readForwardLinkAndReturnIfValid(const int forwordLinkPos,
|
||||
int *const outNextPtNodeArrayPos) const;
|
||||
|
||||
private:
|
||||
DISALLOW_COPY_AND_ASSIGN(Ver4PtNodeArrayReader);
|
||||
|
||||
const BufferWithExtendableBuffer *const mBuffer;
|
||||
};
|
||||
} // namespace v401
|
||||
} // namespace backward
|
||||
} // namespace latinime
|
||||
#endif /* LATINIME_BACKWARD_V401_VER4_PT_NODE_ARRAY_READER_H */
|
Loading…
Reference in a new issue