Generate dict code for version 401.

Bug: 13406708
Change-Id: I769ac84d54d997e7aefab0c9c16727455a132e0b
main
Keisuke Kuroyanagi 2014-05-14 19:09:01 +09:00
parent 0c8ce96bec
commit 6bf268132d
36 changed files with 4660 additions and 0 deletions

View File

@ -13,6 +13,7 @@
# limitations under the License. # limitations under the License.
LATIN_IME_CORE_SRC_FILES := LATIN_IME_CORE_SRC_FILES :=
LATIN_IME_CORE_SRC_FILES_BACKWARD_V401 :=
LATIN_IME_CORE_TEST_FILES := LATIN_IME_CORE_TEST_FILES :=
LATIN_IME_JNI_SRC_FILES := LATIN_IME_JNI_SRC_FILES :=
LATIN_IME_SRC_DIR := LATIN_IME_SRC_DIR :=

View File

@ -98,6 +98,27 @@ LATIN_IME_CORE_SRC_FILES := \
log_utils.cpp \ log_utils.cpp \
time_keeper.cpp) time_keeper.cpp)
LATIN_IME_CORE_SRC_FILES_BACKWARD_V401 := \
$(addprefix suggest/policyimpl/dictionary/structure/backward/v401/, \
ver4_dict_buffers.cpp \
ver4_dict_constants.cpp \
ver4_patricia_trie_node_reader.cpp \
ver4_patricia_trie_node_writer.cpp \
ver4_patricia_trie_policy.cpp \
ver4_patricia_trie_reading_utils.cpp \
ver4_patricia_trie_writing_helper.cpp \
ver4_pt_node_array_reader.cpp) \
$(addprefix suggest/policyimpl/dictionary/structure/backward/v401/content/, \
bigram_dict_content.cpp \
probability_dict_content.cpp \
shortcut_dict_content.cpp \
sparse_table_dict_content.cpp \
terminal_position_lookup_table.cpp) \
$(addprefix suggest/policyimpl/dictionary/structure/backward/v401/bigram/, \
ver4_bigram_list_policy.cpp)
LATIN_IME_CORE_SRC_FILES += $(LATIN_IME_CORE_SRC_FILES_BACKWARD_V401)
LATIN_IME_CORE_TEST_FILES := \ LATIN_IME_CORE_TEST_FILES := \
defines_test.cpp \ defines_test.cpp \
suggest/core/layout/normal_distribution_2d_test.cpp \ suggest/core/layout/normal_distribution_2d_test.cpp \

View File

@ -0,0 +1 @@
Files under this directory have been auto generated.

View File

@ -0,0 +1,290 @@
/*
* Copyright (C) 2013 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* !!!!! DO NOT CHANGE THE LOGIC IN THIS FILE !!!!!
* Do not edit this file other than updating policy's interface.
*
* This file was generated from
* suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.cpp
*/
#include "suggest/policyimpl/dictionary/structure/backward/v401/bigram/ver4_bigram_list_policy.h"
#include "suggest/core/dictionary/property/bigram_property.h"
#include "suggest/policyimpl/dictionary/header/header_policy.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/bigram/bigram_list_read_write_utils.h"
#include "suggest/policyimpl/dictionary/structure/backward/v401/content/bigram_dict_content.h"
#include "suggest/policyimpl/dictionary/structure/backward/v401/content/terminal_position_lookup_table.h"
#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_constants.h"
#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
namespace latinime {
namespace backward {
namespace v401 {
void Ver4BigramListPolicy::getNextBigram(int *const outBigramPos, int *const outProbability,
bool *const outHasNext, int *const bigramEntryPos) const {
const BigramEntry bigramEntry =
mBigramDictContent->getBigramEntryAndAdvancePosition(bigramEntryPos);
if (outBigramPos) {
// Lookup target PtNode position.
*outBigramPos = mTerminalPositionLookupTable->getTerminalPtNodePosition(
bigramEntry.getTargetTerminalId());
}
if (outProbability) {
if (bigramEntry.hasHistoricalInfo()) {
*outProbability =
ForgettingCurveUtils::decodeProbability(bigramEntry.getHistoricalInfo(),
mHeaderPolicy);
} else {
*outProbability = bigramEntry.getProbability();
}
}
if (outHasNext) {
*outHasNext = bigramEntry.hasNext();
}
}
bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTargetTerminalId,
const BigramProperty *const bigramProperty, bool *const outAddedNewEntry) {
// 1. The word has no bigrams yet.
// 2. The word has bigrams, and there is the target in the list.
// 3. The word has bigrams, and there is an invalid entry that can be reclaimed.
// 4. The word has bigrams. We have to append new bigram entry to the list.
// 5. Same as 4, but the list is the last entry of the content file.
if (outAddedNewEntry) {
*outAddedNewEntry = false;
}
const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId);
if (bigramListPos == NOT_A_DICT_POS) {
// Case 1. PtNode that doesn't have a bigram list.
// Create new bigram list.
if (!mBigramDictContent->createNewBigramList(terminalId)) {
return false;
}
const BigramEntry newBigramEntry(false /* hasNext */, NOT_A_PROBABILITY,
newTargetTerminalId);
const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom(&newBigramEntry,
bigramProperty);
// Write an entry.
const int writingPos = mBigramDictContent->getBigramListHeadPos(terminalId);
if (!mBigramDictContent->writeBigramEntry(&bigramEntryToWrite, writingPos)) {
return false;
}
if (outAddedNewEntry) {
*outAddedNewEntry = true;
}
return true;
}
int tailEntryPos = NOT_A_DICT_POS;
const int entryPosToUpdate = getEntryPosToUpdate(newTargetTerminalId, bigramListPos,
&tailEntryPos);
if (tailEntryPos != NOT_A_DICT_POS || entryPosToUpdate == NOT_A_DICT_POS) {
// Case 4, 5.
// Add new entry to the bigram list.
if (tailEntryPos == NOT_A_DICT_POS) {
// Case 4. Create new bigram list.
if (!mBigramDictContent->createNewBigramList(terminalId)) {
return false;
}
const int destPos = mBigramDictContent->getBigramListHeadPos(terminalId);
// Copy existing bigram list.
if (!mBigramDictContent->copyBigramList(bigramListPos, destPos, &tailEntryPos)) {
return false;
}
}
// Write new entry at the tail position of the bigram content.
const BigramEntry newBigramEntry(false /* hasNext */, NOT_A_PROBABILITY,
newTargetTerminalId);
const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom(
&newBigramEntry, bigramProperty);
if (!mBigramDictContent->writeBigramEntryAtTail(&bigramEntryToWrite)) {
return false;
}
// Update has next flag of the tail entry.
if (!updateHasNextFlag(true /* hasNext */, tailEntryPos)) {
return false;
}
if (outAddedNewEntry) {
*outAddedNewEntry = true;
}
return true;
}
// Case 2. Overwrite the existing entry. Case 3. Reclaim and reuse the existing invalid entry.
const BigramEntry originalBigramEntry = mBigramDictContent->getBigramEntry(entryPosToUpdate);
if (!originalBigramEntry.isValid()) {
// Case 3. Reuse the existing invalid entry. outAddedNewEntry is false when an existing
// entry is updated.
if (outAddedNewEntry) {
*outAddedNewEntry = true;
}
}
const BigramEntry updatedBigramEntry =
originalBigramEntry.updateTargetTerminalIdAndGetEntry(newTargetTerminalId);
const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom(
&updatedBigramEntry, bigramProperty);
return mBigramDictContent->writeBigramEntry(&bigramEntryToWrite, entryPosToUpdate);
}
bool Ver4BigramListPolicy::removeEntry(const int terminalId, const int targetTerminalId) {
const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId);
if (bigramListPos == NOT_A_DICT_POS) {
// Bigram list doesn't exist.
return false;
}
const int entryPosToUpdate = getEntryPosToUpdate(targetTerminalId, bigramListPos,
nullptr /* outTailEntryPos */);
if (entryPosToUpdate == NOT_A_DICT_POS) {
// Bigram entry doesn't exist.
return false;
}
const BigramEntry bigramEntry = mBigramDictContent->getBigramEntry(entryPosToUpdate);
if (targetTerminalId != bigramEntry.getTargetTerminalId()) {
// Bigram entry doesn't exist.
return false;
}
// Remove bigram entry by marking it as invalid entry and overwriting the original entry.
const BigramEntry updatedBigramEntry = bigramEntry.getInvalidatedEntry();
return mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPosToUpdate);
}
bool Ver4BigramListPolicy::updateAllBigramEntriesAndDeleteUselessEntries(const int terminalId,
int *const outBigramCount) {
const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId);
if (bigramListPos == NOT_A_DICT_POS) {
// Bigram list doesn't exist.
return true;
}
bool hasNext = true;
int readingPos = bigramListPos;
while (hasNext) {
const int entryPos = readingPos;
const BigramEntry bigramEntry =
mBigramDictContent->getBigramEntryAndAdvancePosition(&readingPos);
hasNext = bigramEntry.hasNext();
if (!bigramEntry.isValid()) {
continue;
}
const int targetPtNodePos = mTerminalPositionLookupTable->getTerminalPtNodePosition(
bigramEntry.getTargetTerminalId());
if (targetPtNodePos == NOT_A_DICT_POS) {
// Invalidate bigram entry.
const BigramEntry updatedBigramEntry = bigramEntry.getInvalidatedEntry();
if (!mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPos)) {
return false;
}
} else if (bigramEntry.hasHistoricalInfo()) {
const HistoricalInfo historicalInfo = ForgettingCurveUtils::createHistoricalInfoToSave(
bigramEntry.getHistoricalInfo(), mHeaderPolicy);
if (ForgettingCurveUtils::needsToKeep(&historicalInfo, mHeaderPolicy)) {
const BigramEntry updatedBigramEntry =
bigramEntry.updateHistoricalInfoAndGetEntry(&historicalInfo);
if (!mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPos)) {
return false;
}
*outBigramCount += 1;
} else {
// Remove entry.
const BigramEntry updatedBigramEntry = bigramEntry.getInvalidatedEntry();
if (!mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPos)) {
return false;
}
}
} else {
*outBigramCount += 1;
}
}
return true;
}
int Ver4BigramListPolicy::getBigramEntryConut(const int terminalId) {
const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId);
if (bigramListPos == NOT_A_DICT_POS) {
// Bigram list doesn't exist.
return 0;
}
int bigramCount = 0;
bool hasNext = true;
int readingPos = bigramListPos;
while (hasNext) {
const BigramEntry bigramEntry =
mBigramDictContent->getBigramEntryAndAdvancePosition(&readingPos);
hasNext = bigramEntry.hasNext();
if (bigramEntry.isValid()) {
bigramCount++;
}
}
return bigramCount;
}
int Ver4BigramListPolicy::getEntryPosToUpdate(const int targetTerminalIdToFind,
const int bigramListPos, int *const outTailEntryPos) const {
if (outTailEntryPos) {
*outTailEntryPos = NOT_A_DICT_POS;
}
bool hasNext = true;
int invalidEntryPos = NOT_A_DICT_POS;
int readingPos = bigramListPos;
while (hasNext) {
const int entryPos = readingPos;
const BigramEntry bigramEntry =
mBigramDictContent->getBigramEntryAndAdvancePosition(&readingPos);
hasNext = bigramEntry.hasNext();
if (bigramEntry.getTargetTerminalId() == targetTerminalIdToFind) {
// Entry with same target is found.
return entryPos;
} else if (!bigramEntry.isValid()) {
// Invalid entry that can be reused is found.
invalidEntryPos = entryPos;
}
if (!hasNext && mBigramDictContent->isContentTailPos(readingPos)) {
if (outTailEntryPos) {
*outTailEntryPos = entryPos;
}
}
}
return invalidEntryPos;
}
const BigramEntry Ver4BigramListPolicy::createUpdatedBigramEntryFrom(
const BigramEntry *const originalBigramEntry,
const BigramProperty *const bigramProperty) const {
// TODO: Consolidate historical info and probability.
if (mHeaderPolicy->hasHistoricalInfoOfWords()) {
const HistoricalInfo historicalInfoForUpdate(bigramProperty->getTimestamp(),
bigramProperty->getLevel(), bigramProperty->getCount());
const HistoricalInfo updatedHistoricalInfo =
ForgettingCurveUtils::createUpdatedHistoricalInfo(
originalBigramEntry->getHistoricalInfo(), bigramProperty->getProbability(),
&historicalInfoForUpdate, mHeaderPolicy);
return originalBigramEntry->updateHistoricalInfoAndGetEntry(&updatedHistoricalInfo);
} else {
return originalBigramEntry->updateProbabilityAndGetEntry(bigramProperty->getProbability());
}
}
bool Ver4BigramListPolicy::updateHasNextFlag(const bool hasNext, const int bigramEntryPos) {
const BigramEntry bigramEntry = mBigramDictContent->getBigramEntry(bigramEntryPos);
const BigramEntry updatedBigramEntry = bigramEntry.updateHasNextAndGetEntry(hasNext);
return mBigramDictContent->writeBigramEntry(&updatedBigramEntry, bigramEntryPos);
}
} // namespace v401
} // namespace backward
} // namespace latinime

View File

@ -0,0 +1,93 @@
/*
* Copyright (C) 2013 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* !!!!! DO NOT CHANGE THE LOGIC IN THIS FILE !!!!!
* Do not edit this file other than updating policy's interface.
*
* This file was generated from
* suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.h
*/
#ifndef LATINIME_BACKWARD_V401_VER4_BIGRAM_LIST_POLICY_H
#define LATINIME_BACKWARD_V401_VER4_BIGRAM_LIST_POLICY_H
#include "defines.h"
#include "suggest/core/policy/dictionary_bigrams_structure_policy.h"
#include "suggest/policyimpl/dictionary/structure/backward/v401/content/bigram_entry.h"
namespace latinime {
namespace backward {
namespace v401 {
class BigramDictContent;
} // namespace v401
} // namespace backward
class BigramProperty;
namespace backward {
namespace v401 {
} // namespace v401
} // namespace backward
class HeaderPolicy;
namespace backward {
namespace v401 {
class TerminalPositionLookupTable;
class Ver4BigramListPolicy : public DictionaryBigramsStructurePolicy {
public:
Ver4BigramListPolicy(BigramDictContent *const bigramDictContent,
const TerminalPositionLookupTable *const terminalPositionLookupTable,
const HeaderPolicy *const headerPolicy)
: mBigramDictContent(bigramDictContent),
mTerminalPositionLookupTable(terminalPositionLookupTable),
mHeaderPolicy(headerPolicy) {}
void getNextBigram(int *const outBigramPos, int *const outProbability,
bool *const outHasNext, int *const bigramEntryPos) const;
void skipAllBigrams(int *const pos) const {
// Do nothing because we don't need to skip bigram lists in ver4 dictionaries.
}
bool addNewEntry(const int terminalId, const int newTargetTerminalId,
const BigramProperty *const bigramProperty, bool *const outAddedNewEntry);
bool removeEntry(const int terminalId, const int targetTerminalId);
bool updateAllBigramEntriesAndDeleteUselessEntries(const int terminalId,
int *const outBigramCount);
int getBigramEntryConut(const int terminalId);
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4BigramListPolicy);
int getEntryPosToUpdate(const int targetTerminalIdToFind, const int bigramListPos,
int *const outTailEntryPos) const;
const BigramEntry createUpdatedBigramEntryFrom(const BigramEntry *const originalBigramEntry,
const BigramProperty *const bigramProperty) const;
bool updateHasNextFlag(const bool hasNext, const int bigramEntryPos);
BigramDictContent *const mBigramDictContent;
const TerminalPositionLookupTable *const mTerminalPositionLookupTable;
const HeaderPolicy *const mHeaderPolicy;
};
} // namespace v401
} // namespace backward
} // namespace latinime
#endif /* LATINIME_BACKWARD_V401_VER4_BIGRAM_LIST_POLICY_H */

View File

@ -0,0 +1,224 @@
/*
* Copyright (C) 2013 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* !!!!! DO NOT EDIT THIS FILE !!!!!
*
* This file was generated from
* suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp
*/
#include "suggest/policyimpl/dictionary/structure/backward/v401/content/bigram_dict_content.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
namespace latinime {
namespace backward {
namespace v401 {
const BigramEntry BigramDictContent::getBigramEntryAndAdvancePosition(
int *const bigramEntryPos) const {
const BufferWithExtendableBuffer *const bigramListBuffer = getContentBuffer();
if (*bigramEntryPos < 0 || *bigramEntryPos >= bigramListBuffer->getTailPosition()) {
AKLOGE("Invalid bigram entry position. bigramEntryPos: %d, bufSize: %d",
*bigramEntryPos, bigramListBuffer->getTailPosition());
ASSERT(false);
return BigramEntry(false /* hasNext */, NOT_A_PROBABILITY,
Ver4DictConstants::NOT_A_TERMINAL_ID);
}
const int bigramFlags = bigramListBuffer->readUintAndAdvancePosition(
Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE, bigramEntryPos);
const bool hasNext = (bigramFlags & Ver4DictConstants::BIGRAM_HAS_NEXT_MASK) != 0;
int probability = NOT_A_PROBABILITY;
int timestamp = NOT_A_TIMESTAMP;
int level = 0;
int count = 0;
if (mHasHistoricalInfo) {
probability = bigramListBuffer->readUintAndAdvancePosition(
Ver4DictConstants::PROBABILITY_SIZE, bigramEntryPos);
timestamp = bigramListBuffer->readUintAndAdvancePosition(
Ver4DictConstants::TIME_STAMP_FIELD_SIZE, bigramEntryPos);
level = bigramListBuffer->readUintAndAdvancePosition(
Ver4DictConstants::WORD_LEVEL_FIELD_SIZE, bigramEntryPos);
count = bigramListBuffer->readUintAndAdvancePosition(
Ver4DictConstants::WORD_COUNT_FIELD_SIZE, bigramEntryPos);
} else {
probability = bigramFlags & Ver4DictConstants::BIGRAM_PROBABILITY_MASK;
}
const int encodedTargetTerminalId = bigramListBuffer->readUintAndAdvancePosition(
Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE, bigramEntryPos);
const int targetTerminalId =
(encodedTargetTerminalId == Ver4DictConstants::INVALID_BIGRAM_TARGET_TERMINAL_ID) ?
Ver4DictConstants::NOT_A_TERMINAL_ID : encodedTargetTerminalId;
if (mHasHistoricalInfo) {
const HistoricalInfo historicalInfo(timestamp, level, count);
return BigramEntry(hasNext, probability, &historicalInfo, targetTerminalId);
} else {
return BigramEntry(hasNext, probability, targetTerminalId);
}
}
bool BigramDictContent::writeBigramEntryAndAdvancePosition(
const BigramEntry *const bigramEntryToWrite, int *const entryWritingPos) {
BufferWithExtendableBuffer *const bigramListBuffer = getWritableContentBuffer();
const int bigramFlags = createAndGetBigramFlags(
mHasHistoricalInfo ? 0 : bigramEntryToWrite->getProbability(),
bigramEntryToWrite->hasNext());
if (!bigramListBuffer->writeUintAndAdvancePosition(bigramFlags,
Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE, entryWritingPos)) {
AKLOGE("Cannot write bigram flags. pos: %d, flags: %x", *entryWritingPos, bigramFlags);
return false;
}
if (mHasHistoricalInfo) {
if (!bigramListBuffer->writeUintAndAdvancePosition(bigramEntryToWrite->getProbability(),
Ver4DictConstants::PROBABILITY_SIZE, entryWritingPos)) {
AKLOGE("Cannot write bigram probability. pos: %d, probability: %d", *entryWritingPos,
bigramEntryToWrite->getProbability());
return false;
}
const HistoricalInfo *const historicalInfo = bigramEntryToWrite->getHistoricalInfo();
if (!bigramListBuffer->writeUintAndAdvancePosition(historicalInfo->getTimeStamp(),
Ver4DictConstants::TIME_STAMP_FIELD_SIZE, entryWritingPos)) {
AKLOGE("Cannot write bigram timestamps. pos: %d, timestamp: %d", *entryWritingPos,
historicalInfo->getTimeStamp());
return false;
}
if (!bigramListBuffer->writeUintAndAdvancePosition(historicalInfo->getLevel(),
Ver4DictConstants::WORD_LEVEL_FIELD_SIZE, entryWritingPos)) {
AKLOGE("Cannot write bigram level. pos: %d, level: %d", *entryWritingPos,
historicalInfo->getLevel());
return false;
}
if (!bigramListBuffer->writeUintAndAdvancePosition(historicalInfo->getCount(),
Ver4DictConstants::WORD_COUNT_FIELD_SIZE, entryWritingPos)) {
AKLOGE("Cannot write bigram count. pos: %d, count: %d", *entryWritingPos,
historicalInfo->getCount());
return false;
}
}
const int targetTerminalIdToWrite =
(bigramEntryToWrite->getTargetTerminalId() == Ver4DictConstants::NOT_A_TERMINAL_ID) ?
Ver4DictConstants::INVALID_BIGRAM_TARGET_TERMINAL_ID :
bigramEntryToWrite->getTargetTerminalId();
if (!bigramListBuffer->writeUintAndAdvancePosition(targetTerminalIdToWrite,
Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE, entryWritingPos)) {
AKLOGE("Cannot write bigram target terminal id. pos: %d, target terminal id: %d",
*entryWritingPos, bigramEntryToWrite->getTargetTerminalId());
return false;
}
return true;
}
bool BigramDictContent::copyBigramList(const int bigramListPos, const int toPos,
int *const outTailEntryPos) {
int readingPos = bigramListPos;
int writingPos = toPos;
bool hasNext = true;
while (hasNext) {
const BigramEntry bigramEntry = getBigramEntryAndAdvancePosition(&readingPos);
hasNext = bigramEntry.hasNext();
if (!hasNext) {
*outTailEntryPos = writingPos;
}
if (!writeBigramEntryAndAdvancePosition(&bigramEntry, &writingPos)) {
AKLOGE("Cannot write bigram entry to copy. pos: %d", writingPos);
return false;
}
}
return true;
}
bool BigramDictContent::runGC(const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
const BigramDictContent *const originalBigramDictContent,
int *const outBigramEntryCount) {
for (TerminalPositionLookupTable::TerminalIdMap::const_iterator it = terminalIdMap->begin();
it != terminalIdMap->end(); ++it) {
const int originalBigramListPos =
originalBigramDictContent->getBigramListHeadPos(it->first);
if (originalBigramListPos == NOT_A_DICT_POS) {
// This terminal does not have a bigram list.
continue;
}
const int bigramListPos = getContentBuffer()->getTailPosition();
int bigramEntryCount = 0;
// Copy bigram list with GC from original content.
if (!runGCBigramList(originalBigramListPos, originalBigramDictContent, bigramListPos,
terminalIdMap, &bigramEntryCount)) {
AKLOGE("Cannot complete GC for the bigram list. original pos: %d, pos: %d",
originalBigramListPos, bigramListPos);
return false;
}
if (bigramEntryCount == 0) {
// All bigram entries are useless. This terminal does not have a bigram list.
continue;
}
*outBigramEntryCount += bigramEntryCount;
// Set bigram list position to the lookup table.
if (!getUpdatableAddressLookupTable()->set(it->second, bigramListPos)) {
AKLOGE("Cannot set bigram list position. terminal id: %d, pos: %d",
it->second, bigramListPos);
return false;
}
}
return true;
}
// Returns whether GC for the bigram list was succeeded or not.
bool BigramDictContent::runGCBigramList(const int bigramListPos,
const BigramDictContent *const sourceBigramDictContent, const int toPos,
const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
int *const outEntrycount) {
bool hasNext = true;
int readingPos = bigramListPos;
int writingPos = toPos;
int lastEntryPos = NOT_A_DICT_POS;
while (hasNext) {
const BigramEntry originalBigramEntry =
sourceBigramDictContent->getBigramEntryAndAdvancePosition(&readingPos);
hasNext = originalBigramEntry.hasNext();
if (originalBigramEntry.getTargetTerminalId() == Ver4DictConstants::NOT_A_TERMINAL_ID) {
continue;
}
TerminalPositionLookupTable::TerminalIdMap::const_iterator it =
terminalIdMap->find(originalBigramEntry.getTargetTerminalId());
if (it == terminalIdMap->end()) {
// Target word has been removed.
continue;
}
lastEntryPos = hasNext ? writingPos : NOT_A_DICT_POS;
const BigramEntry updatedBigramEntry =
originalBigramEntry.updateTargetTerminalIdAndGetEntry(it->second);
if (!writeBigramEntryAndAdvancePosition(&updatedBigramEntry, &writingPos)) {
AKLOGE("Cannot write bigram entry to run GC. pos: %d", writingPos);
return false;
}
*outEntrycount += 1;
}
if (lastEntryPos != NOT_A_DICT_POS) {
// Update has next flag in the last written entry.
const BigramEntry bigramEntry = getBigramEntry(lastEntryPos).updateHasNextAndGetEntry(
false /* hasNext */);
if (!writeBigramEntry(&bigramEntry, lastEntryPos)) {
AKLOGE("Cannot write bigram entry to set hasNext flag after GC. pos: %d", writingPos);
return false;
}
}
return true;
}
} // namespace v401
} // namespace backward
} // namespace latinime

View File

@ -0,0 +1,122 @@
/*
* Copyright (C) 2013, The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* !!!!! DO NOT EDIT THIS FILE !!!!!
*
* This file was generated from
* suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h
*/
#ifndef LATINIME_BACKWARD_V401_BIGRAM_DICT_CONTENT_H
#define LATINIME_BACKWARD_V401_BIGRAM_DICT_CONTENT_H
#include "defines.h"
#include "suggest/policyimpl/dictionary/structure/backward/v401/content/bigram_entry.h"
#include "suggest/policyimpl/dictionary/structure/backward/v401/content/sparse_table_dict_content.h"
#include "suggest/policyimpl/dictionary/structure/backward/v401/content/terminal_position_lookup_table.h"
#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_constants.h"
namespace latinime {
namespace backward {
namespace v401 {
class BigramDictContent : public SparseTableDictContent {
public:
BigramDictContent(const char *const dictPath, const bool hasHistoricalInfo,
const bool isUpdatable)
: SparseTableDictContent(dictPath,
Ver4DictConstants::BIGRAM_LOOKUP_TABLE_FILE_EXTENSION,
Ver4DictConstants::BIGRAM_CONTENT_TABLE_FILE_EXTENSION,
Ver4DictConstants::BIGRAM_FILE_EXTENSION, isUpdatable,
Ver4DictConstants::BIGRAM_ADDRESS_TABLE_BLOCK_SIZE,
Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE),
mHasHistoricalInfo(hasHistoricalInfo) {}
BigramDictContent(const bool hasHistoricalInfo)
: SparseTableDictContent(Ver4DictConstants::BIGRAM_ADDRESS_TABLE_BLOCK_SIZE,
Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE),
mHasHistoricalInfo(hasHistoricalInfo) {}
const BigramEntry getBigramEntry(const int bigramEntryPos) const {
int readingPos = bigramEntryPos;
return getBigramEntryAndAdvancePosition(&readingPos);
}
const BigramEntry getBigramEntryAndAdvancePosition(int *const bigramEntryPos) const;
// Returns head position of bigram list for a PtNode specified by terminalId.
int getBigramListHeadPos(const int terminalId) const {
const SparseTable *const addressLookupTable = getAddressLookupTable();
if (!addressLookupTable->contains(terminalId)) {
return NOT_A_DICT_POS;
}
return addressLookupTable->get(terminalId);
}
bool writeBigramEntryAtTail(const BigramEntry *const bigramEntryToWrite) {
int writingPos = getContentBuffer()->getTailPosition();
return writeBigramEntryAndAdvancePosition(bigramEntryToWrite, &writingPos);
}
bool writeBigramEntry(const BigramEntry *const bigramEntryToWrite, const int entryWritingPos) {
int writingPos = entryWritingPos;
return writeBigramEntryAndAdvancePosition(bigramEntryToWrite, &writingPos);
}
bool writeBigramEntryAndAdvancePosition(const BigramEntry *const bigramEntryToWrite,
int *const entryWritingPos);
bool createNewBigramList(const int terminalId) {
const int bigramListPos = getContentBuffer()->getTailPosition();
return getUpdatableAddressLookupTable()->set(terminalId, bigramListPos);
}
bool copyBigramList(const int bigramListPos, const int toPos, int *const outTailEntryPos);
bool flushToFile(const char *const dictPath) const {
return flush(dictPath, Ver4DictConstants::BIGRAM_LOOKUP_TABLE_FILE_EXTENSION,
Ver4DictConstants::BIGRAM_CONTENT_TABLE_FILE_EXTENSION,
Ver4DictConstants::BIGRAM_FILE_EXTENSION);
}
bool runGC(const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
const BigramDictContent *const originalBigramDictContent,
int *const outBigramEntryCount);
bool isContentTailPos(const int pos) const {
return pos == getContentBuffer()->getTailPosition();
}
private:
DISALLOW_COPY_AND_ASSIGN(BigramDictContent);
int createAndGetBigramFlags(const int probability, const bool hasNext) const {
return (probability & Ver4DictConstants::BIGRAM_PROBABILITY_MASK)
| (hasNext ? Ver4DictConstants::BIGRAM_HAS_NEXT_MASK : 0);
}
bool runGCBigramList(const int bigramListPos,
const BigramDictContent *const sourceBigramDictContent, const int toPos,
const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
int *const outEntryCount);
bool mHasHistoricalInfo;
};
} // namespace v401
} // namespace backward
} // namespace latinime
#endif /* LATINIME_BACKWARD_V401_BIGRAM_DICT_CONTENT_H */

View File

@ -0,0 +1,110 @@
/*
* Copyright (C) 2013, The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* !!!!! DO NOT EDIT THIS FILE !!!!!
*
* This file was generated from
* suggest/policyimpl/dictionary/structure/v4/content/bigram_entry.h
*/
#ifndef LATINIME_BACKWARD_V401_BIGRAM_ENTRY_H
#define LATINIME_BACKWARD_V401_BIGRAM_ENTRY_H
#include "defines.h"
#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_constants.h"
#include "suggest/policyimpl/dictionary/utils/historical_info.h"
namespace latinime {
namespace backward {
namespace v401 {
class BigramEntry {
public:
BigramEntry(const BigramEntry& bigramEntry)
: mHasNext(bigramEntry.mHasNext), mProbability(bigramEntry.mProbability),
mHistoricalInfo(), mTargetTerminalId(bigramEntry.mTargetTerminalId) {}
// Entry with historical information.
BigramEntry(const bool hasNext, const int probability, const int targetTerminalId)
: mHasNext(hasNext), mProbability(probability), mHistoricalInfo(),
mTargetTerminalId(targetTerminalId) {}
// Entry with historical information.
BigramEntry(const bool hasNext, const int probability,
const HistoricalInfo *const historicalInfo, const int targetTerminalId)
: mHasNext(hasNext), mProbability(probability), mHistoricalInfo(*historicalInfo),
mTargetTerminalId(targetTerminalId) {}
const BigramEntry getInvalidatedEntry() const {
return updateTargetTerminalIdAndGetEntry(Ver4DictConstants::NOT_A_TERMINAL_ID);
}
const BigramEntry updateHasNextAndGetEntry(const bool hasNext) const {
return BigramEntry(hasNext, mProbability, &mHistoricalInfo, mTargetTerminalId);
}
const BigramEntry updateTargetTerminalIdAndGetEntry(const int newTargetTerminalId) const {
return BigramEntry(mHasNext, mProbability, &mHistoricalInfo, newTargetTerminalId);
}
const BigramEntry updateProbabilityAndGetEntry(const int probability) const {
return BigramEntry(mHasNext, probability, &mHistoricalInfo, mTargetTerminalId);
}
const BigramEntry updateHistoricalInfoAndGetEntry(
const HistoricalInfo *const historicalInfo) const {
return BigramEntry(mHasNext, mProbability, historicalInfo, mTargetTerminalId);
}
bool isValid() const {
return mTargetTerminalId != Ver4DictConstants::NOT_A_TERMINAL_ID;
}
bool hasNext() const {
return mHasNext;
}
int getProbability() const {
return mProbability;
}
bool hasHistoricalInfo() const {
return mHistoricalInfo.isValid();
}
const HistoricalInfo *getHistoricalInfo() const {
return &mHistoricalInfo;
}
int getTargetTerminalId() const {
return mTargetTerminalId;
}
private:
// Copy constructor is public to use this class as a type of return value.
DISALLOW_DEFAULT_CONSTRUCTOR(BigramEntry);
DISALLOW_ASSIGNMENT_OPERATOR(BigramEntry);
const bool mHasNext;
const int mProbability;
const HistoricalInfo mHistoricalInfo;
const int mTargetTerminalId;
};
} // namespace v401
} // namespace backward
} // namespace latinime
#endif /* LATINIME_BACKWARD_V401_BIGRAM_ENTRY_H */

View File

@ -0,0 +1,47 @@
/*
* Copyright (C) 2013, The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* !!!!! DO NOT EDIT THIS FILE !!!!!
*
* This file was generated from
* suggest/policyimpl/dictionary/structure/v4/content/dict_content.h
*/
#ifndef LATINIME_BACKWARD_V401_DICT_CONTENT_H
#define LATINIME_BACKWARD_V401_DICT_CONTENT_H
#include "defines.h"
namespace latinime {
namespace backward {
namespace v401 {
class DictContent {
public:
virtual ~DictContent() {}
virtual bool isValid() const = 0;
protected:
DictContent() {}
private:
DISALLOW_COPY_AND_ASSIGN(DictContent);
};
} // namespace v401
} // namespace backward
} // namespace latinime
#endif /* LATINIME_BACKWARD_V401_DICT_CONTENT_H */

View File

@ -0,0 +1,171 @@
/*
* Copyright (C) 2013 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* !!!!! DO NOT EDIT THIS FILE !!!!!
*
* This file was generated from
* suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.cpp
*/
#include "suggest/policyimpl/dictionary/structure/backward/v401/content/probability_dict_content.h"
#include "suggest/policyimpl/dictionary/structure/backward/v401/content/probability_entry.h"
#include "suggest/policyimpl/dictionary/structure/backward/v401/content/terminal_position_lookup_table.h"
#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_constants.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
namespace latinime {
namespace backward {
namespace v401 {
const ProbabilityEntry ProbabilityDictContent::getProbabilityEntry(const int terminalId) const {
if (terminalId < 0 || terminalId >= mSize) {
// This method can be called with invalid terminal id during GC.
return ProbabilityEntry(0 /* flags */, NOT_A_PROBABILITY);
}
const BufferWithExtendableBuffer *const buffer = getBuffer();
int entryPos = getEntryPos(terminalId);
const int flags = buffer->readUintAndAdvancePosition(
Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE, &entryPos);
const int probability = buffer->readUintAndAdvancePosition(
Ver4DictConstants::PROBABILITY_SIZE, &entryPos);
if (mHasHistoricalInfo) {
const int timestamp = buffer->readUintAndAdvancePosition(
Ver4DictConstants::TIME_STAMP_FIELD_SIZE, &entryPos);
const int level = buffer->readUintAndAdvancePosition(
Ver4DictConstants::WORD_LEVEL_FIELD_SIZE, &entryPos);
const int count = buffer->readUintAndAdvancePosition(
Ver4DictConstants::WORD_COUNT_FIELD_SIZE, &entryPos);
const HistoricalInfo historicalInfo(timestamp, level, count);
return ProbabilityEntry(flags, probability, &historicalInfo);
} else {
return ProbabilityEntry(flags, probability);
}
}
bool ProbabilityDictContent::setProbabilityEntry(const int terminalId,
const ProbabilityEntry *const probabilityEntry) {
if (terminalId < 0) {
return false;
}
const int entryPos = getEntryPos(terminalId);
if (terminalId >= mSize) {
ProbabilityEntry dummyEntry;
// Write new entry.
int writingPos = getBuffer()->getTailPosition();
while (writingPos <= entryPos) {
// Fulfilling with dummy entries until writingPos.
if (!writeEntry(&dummyEntry, writingPos)) {
AKLOGE("Cannot write dummy entry. pos: %d, mSize: %d", writingPos, mSize);
return false;
}
writingPos += getEntrySize();
mSize++;
}
}
return writeEntry(probabilityEntry, entryPos);
}
bool ProbabilityDictContent::flushToFile(const char *const dictPath) const {
if (getEntryPos(mSize) < getBuffer()->getTailPosition()) {
ProbabilityDictContent probabilityDictContentToWrite(mHasHistoricalInfo);
for (int i = 0; i < mSize; ++i) {
const ProbabilityEntry probabilityEntry = getProbabilityEntry(i);
if (!probabilityDictContentToWrite.setProbabilityEntry(i, &probabilityEntry)) {
AKLOGE("Cannot set probability entry in flushToFile. terminalId: %d", i);
return false;
}
}
return probabilityDictContentToWrite.flush(dictPath,
Ver4DictConstants::FREQ_FILE_EXTENSION);
} else {
return flush(dictPath, Ver4DictConstants::FREQ_FILE_EXTENSION);
}
}
bool ProbabilityDictContent::runGC(
const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
const ProbabilityDictContent *const originalProbabilityDictContent) {
mSize = 0;
for (TerminalPositionLookupTable::TerminalIdMap::const_iterator it = terminalIdMap->begin();
it != terminalIdMap->end(); ++it) {
const ProbabilityEntry probabilityEntry =
originalProbabilityDictContent->getProbabilityEntry(it->first);
if (!setProbabilityEntry(it->second, &probabilityEntry)) {
AKLOGE("Cannot set probability entry in runGC. terminalId: %d", it->second);
return false;
}
mSize++;
}
return true;
}
int ProbabilityDictContent::getEntrySize() const {
if (mHasHistoricalInfo) {
return Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE
+ Ver4DictConstants::PROBABILITY_SIZE
+ Ver4DictConstants::TIME_STAMP_FIELD_SIZE
+ Ver4DictConstants::WORD_LEVEL_FIELD_SIZE
+ Ver4DictConstants::WORD_COUNT_FIELD_SIZE;
} else {
return Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE
+ Ver4DictConstants::PROBABILITY_SIZE;
}
}
int ProbabilityDictContent::getEntryPos(const int terminalId) const {
return terminalId * getEntrySize();
}
bool ProbabilityDictContent::writeEntry(const ProbabilityEntry *const probabilityEntry,
const int entryPos) {
BufferWithExtendableBuffer *const bufferToWrite = getWritableBuffer();
int writingPos = entryPos;
if (!bufferToWrite->writeUintAndAdvancePosition(probabilityEntry->getFlags(),
Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE, &writingPos)) {
AKLOGE("Cannot write flags in probability dict content. pos: %d", writingPos);
return false;
}
if (!bufferToWrite->writeUintAndAdvancePosition(probabilityEntry->getProbability(),
Ver4DictConstants::PROBABILITY_SIZE, &writingPos)) {
AKLOGE("Cannot write probability in probability dict content. pos: %d", writingPos);
return false;
}
if (mHasHistoricalInfo) {
const HistoricalInfo *const historicalInfo = probabilityEntry->getHistoricalInfo();
if (!bufferToWrite->writeUintAndAdvancePosition(historicalInfo->getTimeStamp(),
Ver4DictConstants::TIME_STAMP_FIELD_SIZE, &writingPos)) {
AKLOGE("Cannot write timestamp in probability dict content. pos: %d", writingPos);
return false;
}
if (!bufferToWrite->writeUintAndAdvancePosition(historicalInfo->getLevel(),
Ver4DictConstants::WORD_LEVEL_FIELD_SIZE, &writingPos)) {
AKLOGE("Cannot write level in probability dict content. pos: %d", writingPos);
return false;
}
if (!bufferToWrite->writeUintAndAdvancePosition(historicalInfo->getCount(),
Ver4DictConstants::WORD_COUNT_FIELD_SIZE, &writingPos)) {
AKLOGE("Cannot write count in probability dict content. pos: %d", writingPos);
return false;
}
}
return true;
}
} // namespace v401
} // namespace backward
} // namespace latinime

View File

@ -0,0 +1,74 @@
/*
* Copyright (C) 2013, The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* !!!!! DO NOT EDIT THIS FILE !!!!!
*
* This file was generated from
* suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h
*/
#ifndef LATINIME_BACKWARD_V401_PROBABILITY_DICT_CONTENT_H
#define LATINIME_BACKWARD_V401_PROBABILITY_DICT_CONTENT_H
#include "defines.h"
#include "suggest/policyimpl/dictionary/structure/backward/v401/content/single_dict_content.h"
#include "suggest/policyimpl/dictionary/structure/backward/v401/content/terminal_position_lookup_table.h"
#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_constants.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
namespace latinime {
namespace backward {
namespace v401 {
class ProbabilityEntry;
class ProbabilityDictContent : public SingleDictContent {
public:
ProbabilityDictContent(const char *const dictPath, const bool hasHistoricalInfo,
const bool isUpdatable)
: SingleDictContent(dictPath, Ver4DictConstants::FREQ_FILE_EXTENSION, isUpdatable),
mHasHistoricalInfo(hasHistoricalInfo),
mSize(getBuffer()->getTailPosition() / getEntrySize()) {}
ProbabilityDictContent(const bool hasHistoricalInfo)
: mHasHistoricalInfo(hasHistoricalInfo), mSize(0) {}
const ProbabilityEntry getProbabilityEntry(const int terminalId) const;
bool setProbabilityEntry(const int terminalId, const ProbabilityEntry *const probabilityEntry);
bool flushToFile(const char *const dictPath) const;
bool runGC(const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
const ProbabilityDictContent *const originalProbabilityDictContent);
private:
DISALLOW_COPY_AND_ASSIGN(ProbabilityDictContent);
int getEntrySize() const;
int getEntryPos(const int terminalId) const;
bool writeEntry(const ProbabilityEntry *const probabilityEntry, const int entryPos);
bool mHasHistoricalInfo;
int mSize;
};
} // namespace v401
} // namespace backward
} // namespace latinime
#endif /* LATINIME_BACKWARD_V401_PROBABILITY_DICT_CONTENT_H */

View File

@ -0,0 +1,90 @@
/*
* Copyright (C) 2013, The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* !!!!! DO NOT EDIT THIS FILE !!!!!
*
* This file was generated from
* suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h
*/
#ifndef LATINIME_BACKWARD_V401_PROBABILITY_ENTRY_H
#define LATINIME_BACKWARD_V401_PROBABILITY_ENTRY_H
#include "defines.h"
#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_constants.h"
#include "suggest/policyimpl/dictionary/utils/historical_info.h"
namespace latinime {
namespace backward {
namespace v401 {
class ProbabilityEntry {
public:
ProbabilityEntry(const ProbabilityEntry &probabilityEntry)
: mFlags(probabilityEntry.mFlags), mProbability(probabilityEntry.mProbability),
mHistoricalInfo(probabilityEntry.mHistoricalInfo) {}
// Dummy entry
ProbabilityEntry()
: mFlags(0), mProbability(NOT_A_PROBABILITY), mHistoricalInfo() {}
// Entry without historical information
ProbabilityEntry(const int flags, const int probability)
: mFlags(flags), mProbability(probability), mHistoricalInfo() {}
// Entry with historical information.
ProbabilityEntry(const int flags, const int probability,
const HistoricalInfo *const historicalInfo)
: mFlags(flags), mProbability(probability), mHistoricalInfo(*historicalInfo) {}
const ProbabilityEntry createEntryWithUpdatedProbability(const int probability) const {
return ProbabilityEntry(mFlags, probability, &mHistoricalInfo);
}
const ProbabilityEntry createEntryWithUpdatedHistoricalInfo(
const HistoricalInfo *const historicalInfo) const {
return ProbabilityEntry(mFlags, mProbability, historicalInfo);
}
bool hasHistoricalInfo() const {
return mHistoricalInfo.isValid();
}
int getFlags() const {
return mFlags;
}
int getProbability() const {
return mProbability;
}
const HistoricalInfo *getHistoricalInfo() const {
return &mHistoricalInfo;
}
private:
// Copy constructor is public to use this class as a type of return value.
DISALLOW_ASSIGNMENT_OPERATOR(ProbabilityEntry);
const int mFlags;
const int mProbability;
const HistoricalInfo mHistoricalInfo;
};
} // namespace v401
} // namespace backward
} // namespace latinime
#endif /* LATINIME_BACKWARD_V401_PROBABILITY_ENTRY_H */

View File

@ -0,0 +1,199 @@
/*
* Copyright (C) 2013, The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* !!!!! DO NOT EDIT THIS FILE !!!!!
*
* This file was generated from
* suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.cpp
*/
#include "suggest/policyimpl/dictionary/structure/backward/v401/content/shortcut_dict_content.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
namespace latinime {
namespace backward {
namespace v401 {
void ShortcutDictContent::getShortcutEntryAndAdvancePosition(const int maxCodePointCount,
int *const outCodePoint, int *const outCodePointCount, int *const outProbability,
bool *const outhasNext, int *const shortcutEntryPos) const {
const BufferWithExtendableBuffer *const shortcutListBuffer = getContentBuffer();
if (*shortcutEntryPos < 0 || *shortcutEntryPos >= shortcutListBuffer->getTailPosition()) {
AKLOGE("Invalid shortcut entry position. shortcutEntryPos: %d, bufSize: %d",
*shortcutEntryPos, shortcutListBuffer->getTailPosition());
ASSERT(false);
if (outhasNext) {
*outhasNext = false;
}
if (outCodePointCount) {
*outCodePointCount = 0;
}
return;
}
const int shortcutFlags = shortcutListBuffer->readUintAndAdvancePosition(
Ver4DictConstants::SHORTCUT_FLAGS_FIELD_SIZE, shortcutEntryPos);
if (outProbability) {
*outProbability = shortcutFlags & Ver4DictConstants::SHORTCUT_PROBABILITY_MASK;
}
if (outhasNext) {
*outhasNext = shortcutFlags & Ver4DictConstants::SHORTCUT_HAS_NEXT_MASK;
}
if (outCodePoint && outCodePointCount) {
shortcutListBuffer->readCodePointsAndAdvancePosition(
maxCodePointCount, outCodePoint, outCodePointCount, shortcutEntryPos);
}
}
int ShortcutDictContent::getShortcutListHeadPos(const int terminalId) const {
const SparseTable *const addressLookupTable = getAddressLookupTable();
if (!addressLookupTable->contains(terminalId)) {
return NOT_A_DICT_POS;
}
return addressLookupTable->get(terminalId);
}
bool ShortcutDictContent::flushToFile(const char *const dictPath) const {
return flush(dictPath, Ver4DictConstants::SHORTCUT_LOOKUP_TABLE_FILE_EXTENSION,
Ver4DictConstants::SHORTCUT_CONTENT_TABLE_FILE_EXTENSION,
Ver4DictConstants::SHORTCUT_FILE_EXTENSION);
}
bool ShortcutDictContent::runGC(
const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
const ShortcutDictContent *const originalShortcutDictContent) {
for (TerminalPositionLookupTable::TerminalIdMap::const_iterator it = terminalIdMap->begin();
it != terminalIdMap->end(); ++it) {
const int originalShortcutListPos =
originalShortcutDictContent->getShortcutListHeadPos(it->first);
if (originalShortcutListPos == NOT_A_DICT_POS) {
continue;
}
const int shortcutListPos = getContentBuffer()->getTailPosition();
// Copy shortcut list from original content.
if (!copyShortcutListFromDictContent(originalShortcutListPos, originalShortcutDictContent,
shortcutListPos)) {
AKLOGE("Cannot copy shortcut list during GC. original pos: %d, pos: %d",
originalShortcutListPos, shortcutListPos);
return false;
}
// Set shortcut list position to the lookup table.
if (!getUpdatableAddressLookupTable()->set(it->second, shortcutListPos)) {
AKLOGE("Cannot set shortcut list position. terminal id: %d, pos: %d",
it->second, shortcutListPos);
return false;
}
}
return true;
}
bool ShortcutDictContent::createNewShortcutList(const int terminalId) {
const int shortcutListListPos = getContentBuffer()->getTailPosition();
return getUpdatableAddressLookupTable()->set(terminalId, shortcutListListPos);
}
bool ShortcutDictContent::copyShortcutList(const int shortcutListPos, const int toPos) {
return copyShortcutListFromDictContent(shortcutListPos, this, toPos);
}
bool ShortcutDictContent::copyShortcutListFromDictContent(const int shortcutListPos,
const ShortcutDictContent *const sourceShortcutDictContent, const int toPos) {
bool hasNext = true;
int readingPos = shortcutListPos;
int writingPos = toPos;
int codePoints[MAX_WORD_LENGTH];
while (hasNext) {
int probability = 0;
int codePointCount = 0;
sourceShortcutDictContent->getShortcutEntryAndAdvancePosition(MAX_WORD_LENGTH,
codePoints, &codePointCount, &probability, &hasNext, &readingPos);
if (!writeShortcutEntryAndAdvancePosition(codePoints, codePointCount, probability,
hasNext, &writingPos)) {
AKLOGE("Cannot write shortcut entry to copy. pos: %d", writingPos);
return false;
}
}
return true;
}
bool ShortcutDictContent::setProbability(const int probability, const int shortcutEntryPos) {
BufferWithExtendableBuffer *const shortcutListBuffer = getWritableContentBuffer();
const int shortcutFlags = shortcutListBuffer->readUint(
Ver4DictConstants::SHORTCUT_FLAGS_FIELD_SIZE, shortcutEntryPos);
const bool hasNext = shortcutFlags & Ver4DictConstants::SHORTCUT_HAS_NEXT_MASK;
const int shortcutFlagsToWrite = createAndGetShortcutFlags(probability, hasNext);
return shortcutListBuffer->writeUint(shortcutFlagsToWrite,
Ver4DictConstants::SHORTCUT_FLAGS_FIELD_SIZE, shortcutEntryPos);
}
bool ShortcutDictContent::writeShortcutEntryAndAdvancePosition(const int *const codePoint,
const int codePointCount, const int probability, const bool hasNext,
int *const shortcutEntryPos) {
BufferWithExtendableBuffer *const shortcutListBuffer = getWritableContentBuffer();
const int shortcutFlags = createAndGetShortcutFlags(probability, hasNext);
if (!shortcutListBuffer->writeUintAndAdvancePosition(shortcutFlags,
Ver4DictConstants::SHORTCUT_FLAGS_FIELD_SIZE, shortcutEntryPos)) {
AKLOGE("Cannot write shortcut flags. flags; %x, pos: %d", shortcutFlags, *shortcutEntryPos);
return false;
}
if (!shortcutListBuffer->writeCodePointsAndAdvancePosition(codePoint, codePointCount,
true /* writesTerminator */, shortcutEntryPos)) {
AKLOGE("Cannot write shortcut target code points. pos: %d", *shortcutEntryPos);
return false;
}
return true;
}
// Find a shortcut entry that has specified target and return its position.
int ShortcutDictContent::findShortcutEntryAndGetPos(const int shortcutListPos,
const int *const targetCodePointsToFind, const int codePointCount) const {
bool hasNext = true;
int readingPos = shortcutListPos;
int targetCodePoints[MAX_WORD_LENGTH];
while (hasNext) {
const int entryPos = readingPos;
int probability = 0;
int targetCodePointCount = 0;
getShortcutEntryAndAdvancePosition(MAX_WORD_LENGTH, targetCodePoints, &targetCodePointCount,
&probability, &hasNext, &readingPos);
if (targetCodePointCount != codePointCount) {
continue;
}
bool matched = true;
for (int i = 0; i < codePointCount; ++i) {
if (targetCodePointsToFind[i] != targetCodePoints[i]) {
matched = false;
break;
}
}
if (matched) {
return entryPos;
}
}
return NOT_A_DICT_POS;
}
int ShortcutDictContent::createAndGetShortcutFlags(const int probability,
const bool hasNext) const {
return (probability & Ver4DictConstants::SHORTCUT_PROBABILITY_MASK)
| (hasNext ? Ver4DictConstants::SHORTCUT_HAS_NEXT_MASK : 0);
}
} // namespace v401
} // namespace backward
} // namespace latinime

View File

@ -0,0 +1,101 @@
/*
* Copyright (C) 2013, The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* !!!!! DO NOT EDIT THIS FILE !!!!!
*
* This file was generated from
* suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.h
*/
#ifndef LATINIME_BACKWARD_V401_SHORTCUT_DICT_CONTENT_H
#define LATINIME_BACKWARD_V401_SHORTCUT_DICT_CONTENT_H
#include "defines.h"
#include "suggest/policyimpl/dictionary/structure/backward/v401/content/sparse_table_dict_content.h"
#include "suggest/policyimpl/dictionary/structure/backward/v401/content/terminal_position_lookup_table.h"
#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_constants.h"
namespace latinime {
namespace backward {
namespace v401 {
class ShortcutDictContent : public SparseTableDictContent {
public:
ShortcutDictContent(const char *const dictPath, const bool isUpdatable)
: SparseTableDictContent(dictPath,
Ver4DictConstants::SHORTCUT_LOOKUP_TABLE_FILE_EXTENSION,
Ver4DictConstants::SHORTCUT_CONTENT_TABLE_FILE_EXTENSION,
Ver4DictConstants::SHORTCUT_FILE_EXTENSION, isUpdatable,
Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE,
Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_DATA_SIZE) {}
ShortcutDictContent()
: SparseTableDictContent(Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE,
Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_DATA_SIZE) {}
void getShortcutEntry(const int maxCodePointCount, int *const outCodePoint,
int *const outCodePointCount, int *const outProbability, bool *const outhasNext,
const int shortcutEntryPos) {
int readingPos = shortcutEntryPos;
return getShortcutEntryAndAdvancePosition(maxCodePointCount, outCodePoint,
outCodePointCount, outProbability, outhasNext, &readingPos);
}
void getShortcutEntryAndAdvancePosition(const int maxCodePointCount,
int *const outCodePoint, int *const outCodePointCount, int *const outProbability,
bool *const outhasNext, int *const shortcutEntryPos) const;
// Returns head position of shortcut list for a PtNode specified by terminalId.
int getShortcutListHeadPos(const int terminalId) const;
bool flushToFile(const char *const dictPath) const;
bool runGC(const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
const ShortcutDictContent *const originalShortcutDictContent);
bool createNewShortcutList(const int terminalId);
bool copyShortcutList(const int shortcutListPos, const int toPos);
bool setProbability(const int probability, const int shortcutEntryPos);
bool writeShortcutEntry(const int *const codePoint, const int codePointCount,
const int probability, const bool hasNext, const int shortcutEntryPos) {
int writingPos = shortcutEntryPos;
return writeShortcutEntryAndAdvancePosition(codePoint, codePointCount, probability,
hasNext, &writingPos);
}
bool writeShortcutEntryAndAdvancePosition(const int *const codePoint,
const int codePointCount, const int probability, const bool hasNext,
int *const shortcutEntryPos);
int findShortcutEntryAndGetPos(const int shortcutListPos,
const int *const targetCodePointsToFind, const int codePointCount) const;
private:
DISALLOW_COPY_AND_ASSIGN(ShortcutDictContent);
bool copyShortcutListFromDictContent(const int shortcutListPos,
const ShortcutDictContent *const sourceShortcutDictContent, const int toPos);
int createAndGetShortcutFlags(const int probability, const bool hasNext) const;
};
} // namespace v401
} // namespace backward
} // namespace latinime
#endif /* LATINIME_BACKWARD_V401_SHORTCUT_DICT_CONTENT_H */

View File

@ -0,0 +1,86 @@
/*
* Copyright (C) 2013, The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* !!!!! DO NOT EDIT THIS FILE !!!!!
*
* This file was generated from
* suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h
*/
#ifndef LATINIME_BACKWARD_V401_SINGLE_DICT_CONTENT_H
#define LATINIME_BACKWARD_V401_SINGLE_DICT_CONTENT_H
#include "defines.h"
#include "suggest/policyimpl/dictionary/structure/backward/v401/content/dict_content.h"
#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_constants.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
#include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h"
#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
namespace latinime {
namespace backward {
namespace v401 {
class SingleDictContent : public DictContent {
public:
SingleDictContent(const char *const dictPath, const char *const contentFileName,
const bool isUpdatable)
: mMmappedBuffer(MmappedBuffer::openBuffer(dictPath, contentFileName, isUpdatable)),
mExpandableContentBuffer(mMmappedBuffer ? mMmappedBuffer->getBuffer() : nullptr,
mMmappedBuffer ? mMmappedBuffer->getBufferSize() : 0,
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
mIsValid(mMmappedBuffer) {}
SingleDictContent()
: mMmappedBuffer(nullptr),
mExpandableContentBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE), mIsValid(true) {}
virtual ~SingleDictContent() {}
virtual bool isValid() const {
return mIsValid;
}
bool isNearSizeLimit() const {
return mExpandableContentBuffer.isNearSizeLimit();
}
protected:
BufferWithExtendableBuffer *getWritableBuffer() {
return &mExpandableContentBuffer;
}
const BufferWithExtendableBuffer *getBuffer() const {
return &mExpandableContentBuffer;
}
bool flush(const char *const dictPath, const char *const contentFileNameSuffix) const {
return DictFileWritingUtils::flushBufferToFileWithSuffix(dictPath,
contentFileNameSuffix, &mExpandableContentBuffer);
}
private:
DISALLOW_COPY_AND_ASSIGN(SingleDictContent);
const MmappedBuffer::MmappedBufferPtr mMmappedBuffer;
BufferWithExtendableBuffer mExpandableContentBuffer;
const bool mIsValid;
};
} // namespace v401
} // namespace backward
} // namespace latinime
#endif /* LATINIME_BACKWARD_V401_SINGLE_DICT_CONTENT_H */

View File

@ -0,0 +1,50 @@
/*
* Copyright (C) 2013 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* !!!!! DO NOT EDIT THIS FILE !!!!!
*
* This file was generated from
* suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.cpp
*/
#include "suggest/policyimpl/dictionary/structure/backward/v401/content/sparse_table_dict_content.h"
namespace latinime {
namespace backward {
namespace v401 {
bool SparseTableDictContent::flush(const char *const dictPath,
const char *const lookupTableFileNameSuffix, const char *const addressTableFileNameSuffix,
const char *const contentFileNameSuffix) const {
if (!DictFileWritingUtils::flushBufferToFileWithSuffix(dictPath, lookupTableFileNameSuffix,
&mExpandableLookupTableBuffer)){
return false;
}
if (!DictFileWritingUtils::flushBufferToFileWithSuffix(dictPath, addressTableFileNameSuffix,
&mExpandableAddressTableBuffer)) {
return false;
}
if (!DictFileWritingUtils::flushBufferToFileWithSuffix(dictPath, contentFileNameSuffix,
&mExpandableContentBuffer)) {
return false;
}
return true;
}
} // namespace v401
} // namespace backward
} // namespace latinime

View File

@ -0,0 +1,122 @@
/*
* Copyright (C) 2013, The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* !!!!! DO NOT EDIT THIS FILE !!!!!
*
* This file was generated from
* suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h
*/
#ifndef LATINIME_BACKWARD_V401_SPARSE_TABLE_DICT_CONTENT_H
#define LATINIME_BACKWARD_V401_SPARSE_TABLE_DICT_CONTENT_H
#include "defines.h"
#include "suggest/policyimpl/dictionary/structure/backward/v401/content/dict_content.h"
#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_constants.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
#include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h"
#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
#include "suggest/policyimpl/dictionary/utils/sparse_table.h"
namespace latinime {
namespace backward {
namespace v401 {
// TODO: Support multiple contents.
class SparseTableDictContent : public DictContent {
public:
AK_FORCE_INLINE SparseTableDictContent(const char *const dictPath,
const char *const lookupTableFileName, const char *const addressTableFileName,
const char *const contentFileName, const bool isUpdatable,
const int sparseTableBlockSize, const int sparseTableDataSize)
: mLookupTableBuffer(
MmappedBuffer::openBuffer(dictPath, lookupTableFileName, isUpdatable)),
mAddressTableBuffer(
MmappedBuffer::openBuffer(dictPath, addressTableFileName, isUpdatable)),
mContentBuffer(
MmappedBuffer::openBuffer(dictPath, contentFileName, isUpdatable)),
mExpandableLookupTableBuffer(
mLookupTableBuffer ? mLookupTableBuffer->getBuffer() : nullptr,
mLookupTableBuffer ? mLookupTableBuffer->getBufferSize() : 0,
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
mExpandableAddressTableBuffer(
mAddressTableBuffer ? mAddressTableBuffer->getBuffer() : nullptr,
mAddressTableBuffer ? mAddressTableBuffer->getBufferSize() : 0,
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
mExpandableContentBuffer(mContentBuffer ? mContentBuffer->getBuffer() : nullptr,
mContentBuffer ? mContentBuffer->getBufferSize() : 0,
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
mAddressLookupTable(&mExpandableLookupTableBuffer, &mExpandableAddressTableBuffer,
sparseTableBlockSize, sparseTableDataSize),
mIsValid(mLookupTableBuffer && mAddressTableBuffer && mContentBuffer) {}
SparseTableDictContent(const int sparseTableBlockSize, const int sparseTableDataSize)
: mLookupTableBuffer(), mAddressTableBuffer(), mContentBuffer(),
mExpandableLookupTableBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE),
mExpandableAddressTableBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE),
mExpandableContentBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE),
mAddressLookupTable(&mExpandableLookupTableBuffer, &mExpandableAddressTableBuffer,
sparseTableBlockSize, sparseTableDataSize), mIsValid(true) {}
virtual ~SparseTableDictContent() {}
virtual bool isValid() const {
return mIsValid;
}
bool isNearSizeLimit() const {
return mExpandableLookupTableBuffer.isNearSizeLimit()
|| mExpandableAddressTableBuffer.isNearSizeLimit()
|| mExpandableContentBuffer.isNearSizeLimit();
}
protected:
SparseTable *getUpdatableAddressLookupTable() {
return &mAddressLookupTable;
}
const SparseTable *getAddressLookupTable() const {
return &mAddressLookupTable;
}
BufferWithExtendableBuffer *getWritableContentBuffer() {
return &mExpandableContentBuffer;
}
const BufferWithExtendableBuffer *getContentBuffer() const {
return &mExpandableContentBuffer;
}
bool flush(const char *const dictDirPath, const char *const lookupTableFileName,
const char *const addressTableFileName, const char *const contentFileName) const;
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(SparseTableDictContent);
const MmappedBuffer::MmappedBufferPtr mLookupTableBuffer;
const MmappedBuffer::MmappedBufferPtr mAddressTableBuffer;
const MmappedBuffer::MmappedBufferPtr mContentBuffer;
BufferWithExtendableBuffer mExpandableLookupTableBuffer;
BufferWithExtendableBuffer mExpandableAddressTableBuffer;
BufferWithExtendableBuffer mExpandableContentBuffer;
SparseTable mAddressLookupTable;
const bool mIsValid;
};
} // namespace v401
} // namespace backward
} // namespace latinime
#endif /* LATINIME_BACKWARD_V401_SPARSE_TABLE_DICT_CONTENT_H */

View File

@ -0,0 +1,111 @@
/*
* Copyright (C) 2013 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* !!!!! DO NOT EDIT THIS FILE !!!!!
*
* This file was generated from
* suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.cpp
*/
#include "suggest/policyimpl/dictionary/structure/backward/v401/content/terminal_position_lookup_table.h"
#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_reading_utils.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
namespace latinime {
namespace backward {
namespace v401 {
int TerminalPositionLookupTable::getTerminalPtNodePosition(const int terminalId) const {
if (terminalId < 0 || terminalId >= mSize) {
return NOT_A_DICT_POS;
}
const int terminalPos = getBuffer()->readUint(
Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE, getEntryPos(terminalId));
return (terminalPos == Ver4DictConstants::NOT_A_TERMINAL_ADDRESS) ?
NOT_A_DICT_POS : terminalPos;
}
bool TerminalPositionLookupTable::setTerminalPtNodePosition(
const int terminalId, const int terminalPtNodePos) {
if (terminalId < 0) {
return NOT_A_DICT_POS;
}
while (terminalId >= mSize) {
// Write new entry.
if (!getWritableBuffer()->writeUint(Ver4DictConstants::NOT_A_TERMINAL_ADDRESS,
Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE, getEntryPos(mSize))) {
return false;
}
mSize++;
}
const int terminalPos = (terminalPtNodePos != NOT_A_DICT_POS) ?
terminalPtNodePos : Ver4DictConstants::NOT_A_TERMINAL_ADDRESS;
return getWritableBuffer()->writeUint(terminalPos,
Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE, getEntryPos(terminalId));
}
bool TerminalPositionLookupTable::flushToFile(const char *const dictPath) const {
// If the used buffer size is smaller than the actual buffer size, regenerate the lookup
// table and write the new table to the file.
if (getEntryPos(mSize) < getBuffer()->getTailPosition()) {
TerminalPositionLookupTable lookupTableToWrite;
for (int i = 0; i < mSize; ++i) {
const int terminalPtNodePosition = getTerminalPtNodePosition(i);
if (!lookupTableToWrite.setTerminalPtNodePosition(i, terminalPtNodePosition)) {
AKLOGE("Cannot set terminal position to lookupTableToWrite."
" terminalId: %d, position: %d", i, terminalPtNodePosition);
return false;
}
}
return lookupTableToWrite.flush(dictPath,
Ver4DictConstants::TERMINAL_ADDRESS_TABLE_FILE_EXTENSION);
} else {
// We can simply use this lookup table because the buffer size has not been
// changed.
return flush(dictPath, Ver4DictConstants::TERMINAL_ADDRESS_TABLE_FILE_EXTENSION);
}
}
bool TerminalPositionLookupTable::runGCTerminalIds(TerminalIdMap *const terminalIdMap) {
int removedEntryCount = 0;
int nextNewTerminalId = 0;
for (int i = 0; i < mSize; ++i) {
const int terminalPos = getBuffer()->readUint(
Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE, getEntryPos(i));
if (terminalPos == Ver4DictConstants::NOT_A_TERMINAL_ADDRESS) {
// This entry is a garbage.
removedEntryCount++;
} else {
// Give a new terminal id to the entry.
if (!getWritableBuffer()->writeUint(terminalPos,
Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE,
getEntryPos(nextNewTerminalId))) {
return false;
}
// Memorize the mapping to the old terminal id to the new terminal id.
terminalIdMap->insert(TerminalIdMap::value_type(i, nextNewTerminalId));
nextNewTerminalId++;
}
}
mSize = nextNewTerminalId;
return true;
}
} // namespace v401
} // namespace backward
} // namespace latinime

View File

@ -0,0 +1,73 @@
/*
* Copyright (C) 2013 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* !!!!! DO NOT EDIT THIS FILE !!!!!
*
* This file was generated from
* suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h
*/
#ifndef LATINIME_BACKWARD_V401_TERMINAL_POSITION_LOOKUP_TABLE_H
#define LATINIME_BACKWARD_V401_TERMINAL_POSITION_LOOKUP_TABLE_H
#include <unordered_map>
#include "defines.h"
#include "suggest/policyimpl/dictionary/structure/backward/v401/content/single_dict_content.h"
#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_constants.h"
namespace latinime {
namespace backward {
namespace v401 {
class TerminalPositionLookupTable : public SingleDictContent {
public:
typedef std::unordered_map<int, int> TerminalIdMap;
TerminalPositionLookupTable(const char *const dictPath, const bool isUpdatable)
: SingleDictContent(dictPath,
Ver4DictConstants::TERMINAL_ADDRESS_TABLE_FILE_EXTENSION, isUpdatable),
mSize(getBuffer()->getTailPosition()
/ Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE) {}
TerminalPositionLookupTable() : mSize(0) {}
int getTerminalPtNodePosition(const int terminalId) const;
bool setTerminalPtNodePosition(const int terminalId, const int terminalPtNodePos);
int getNextTerminalId() const {
return mSize;
}
bool flushToFile(const char *const dictPath) const;
bool runGCTerminalIds(TerminalIdMap *const terminalIdMap);
private:
DISALLOW_COPY_AND_ASSIGN(TerminalPositionLookupTable);
int getEntryPos(const int terminalId) const {
return terminalId * Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE;
}
int mSize;
};
} // namespace v401
} // namespace backward
} // namespace latinime
#endif // LATINIME_BACKWARD_V401_TERMINAL_POSITION_LOOKUP_TABLE_H

View File

@ -0,0 +1,118 @@
/*
* Copyright (C) 2013 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* !!!!! DO NOT CHANGE THE LOGIC IN THIS FILE !!!!!
* Do not edit this file other than updating policy's interface.
*
* This file was generated from
* suggest/policyimpl/dictionary/structure/v4/shortcut/ver4_shortcut_list_policy.h
*/
#ifndef LATINIME_BACKWARD_V401_VER4_SHORTCUT_LIST_POLICY_H
#define LATINIME_BACKWARD_V401_VER4_SHORTCUT_LIST_POLICY_H
#include "defines.h"
#include "suggest/core/policy/dictionary_shortcuts_structure_policy.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/shortcut/shortcut_list_reading_utils.h"
#include "suggest/policyimpl/dictionary/structure/backward/v401/content/shortcut_dict_content.h"
#include "suggest/policyimpl/dictionary/structure/backward/v401/content/terminal_position_lookup_table.h"
namespace latinime {
namespace backward {
namespace v401 {
class Ver4ShortcutListPolicy : public DictionaryShortcutsStructurePolicy {
public:
Ver4ShortcutListPolicy(ShortcutDictContent *const shortcutDictContent,
const TerminalPositionLookupTable *const terminalPositionLookupTable)
: mShortcutDictContent(shortcutDictContent) {}
~Ver4ShortcutListPolicy() {}
int getStartPos(const int pos) const {
// The first shortcut entry is located at the head position of the shortcut list.
return pos;
}
void getNextShortcut(const int maxCodePointCount, int *const outCodePoint,
int *const outCodePointCount, bool *const outIsWhitelist, bool *const outHasNext,
int *const pos) const {
int probability = 0;
mShortcutDictContent->getShortcutEntryAndAdvancePosition(maxCodePointCount,
outCodePoint, outCodePointCount, &probability, outHasNext, pos);
if (outIsWhitelist) {
*outIsWhitelist = ShortcutListReadingUtils::isWhitelist(probability);
}
}
void skipAllShortcuts(int *const pos) const {
// Do nothing because we don't need to skip shortcut lists in ver4 dictionaries.
}
bool addNewShortcut(const int terminalId, const int *const codePoints, const int codePointCount,
const int probability) {
const int shortcutListPos = mShortcutDictContent->getShortcutListHeadPos(terminalId);
if (shortcutListPos == NOT_A_DICT_POS) {
// Create shortcut list.
if (!mShortcutDictContent->createNewShortcutList(terminalId)) {
AKLOGE("Cannot create new shortcut list. terminal id: %d", terminalId);
return false;
}
const int writingPos = mShortcutDictContent->getShortcutListHeadPos(terminalId);
return mShortcutDictContent->writeShortcutEntry(codePoints, codePointCount, probability,
false /* hasNext */, writingPos);
}
const int entryPos = mShortcutDictContent->findShortcutEntryAndGetPos(shortcutListPos,
codePoints, codePointCount);
if (entryPos == NOT_A_DICT_POS) {
// Add new entry to the shortcut list.
// Create new shortcut list.
if (!mShortcutDictContent->createNewShortcutList(terminalId)) {
AKLOGE("Cannot create new shortcut list. terminal id: %d", terminalId);
return false;
}
int writingPos = mShortcutDictContent->getShortcutListHeadPos(terminalId);
if (!mShortcutDictContent->writeShortcutEntryAndAdvancePosition(codePoints,
codePointCount, probability, true /* hasNext */, &writingPos)) {
AKLOGE("Cannot write shortcut entry. terminal id: %d, pos: %d", terminalId,
writingPos);
return false;
}
return mShortcutDictContent->copyShortcutList(shortcutListPos, writingPos);
}
// Overwrite existing entry.
bool hasNext = false;
mShortcutDictContent->getShortcutEntry(MAX_WORD_LENGTH, 0 /* outCodePoint */,
0 /* outCodePointCount */ , 0 /* probability */, &hasNext, entryPos);
if (!mShortcutDictContent->writeShortcutEntry(codePoints,
codePointCount, probability, hasNext, entryPos)) {
AKLOGE("Cannot overwrite shortcut entry. terminal id: %d, pos: %d", terminalId,
entryPos);
return false;
}
return true;
}
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4ShortcutListPolicy);
ShortcutDictContent *const mShortcutDictContent;
};
} // namespace v401
} // namespace backward
} // namespace latinime
#endif // LATINIME_BACKWARD_V401_VER4_SHORTCUT_LIST_POLICY_H

View File

@ -0,0 +1,155 @@
/*
* Copyright (C) 2013 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* !!!!! DO NOT EDIT THIS FILE !!!!!
*
* This file was generated from
* suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.cpp
*/
#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_buffers.h"
#include <cerrno>
#include <cstring>
#include <sys/stat.h>
#include <sys/types.h>
#include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h"
#include "suggest/policyimpl/dictionary/utils/file_utils.h"
namespace latinime {
namespace backward {
namespace v401 {
/* static */ Ver4DictBuffers::Ver4DictBuffersPtr Ver4DictBuffers::openVer4DictBuffers(
const char *const dictPath, MmappedBuffer::MmappedBufferPtr headerBuffer,
const FormatUtils::FORMAT_VERSION formatVersion) {
if (!headerBuffer) {
ASSERT(false);
AKLOGE("The header buffer must be valid to open ver4 dict buffers.");
return Ver4DictBuffersPtr(nullptr);
}
// TODO: take only dictDirPath, and open both header and trie files in the constructor below
const bool isUpdatable = headerBuffer->isUpdatable();
return Ver4DictBuffersPtr(new Ver4DictBuffers(dictPath, std::move(headerBuffer), isUpdatable,
formatVersion));
}
bool Ver4DictBuffers::flushHeaderAndDictBuffers(const char *const dictDirPath,
const BufferWithExtendableBuffer *const headerBuffer) const {
// Create temporary directory.
const int tmpDirPathBufSize = FileUtils::getFilePathWithSuffixBufSize(dictDirPath,
DictFileWritingUtils::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE);
char tmpDirPath[tmpDirPathBufSize];
FileUtils::getFilePathWithSuffix(dictDirPath,
DictFileWritingUtils::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE, tmpDirPathBufSize,
tmpDirPath);
if (FileUtils::existsDir(tmpDirPath)) {
if (!FileUtils::removeDirAndFiles(tmpDirPath)) {
AKLOGE("Existing directory %s cannot be removed.", tmpDirPath);
ASSERT(false);
return false;
}
}
if (mkdir(tmpDirPath, S_IRWXU) == -1) {
AKLOGE("Cannot create directory: %s. errno: %d.", tmpDirPath, errno);
return false;
}
// Get dictionary base path.
const int dictNameBufSize = strlen(dictDirPath) + 1 /* terminator */;
char dictName[dictNameBufSize];
FileUtils::getBasename(dictDirPath, dictNameBufSize, dictName);
const int dictPathBufSize = FileUtils::getFilePathBufSize(tmpDirPath, dictName);
char dictPath[dictPathBufSize];
FileUtils::getFilePath(tmpDirPath, dictName, dictPathBufSize, dictPath);
// Write header file.
if (!DictFileWritingUtils::flushBufferToFileWithSuffix(dictPath,
Ver4DictConstants::HEADER_FILE_EXTENSION, headerBuffer)) {
AKLOGE("Dictionary header file %s%s cannot be written.", tmpDirPath,
Ver4DictConstants::HEADER_FILE_EXTENSION);
return false;
}
// Write trie file.
if (!DictFileWritingUtils::flushBufferToFileWithSuffix(dictPath,
Ver4DictConstants::TRIE_FILE_EXTENSION, &mExpandableTrieBuffer)) {
AKLOGE("Dictionary trie file %s%s cannot be written.", tmpDirPath,
Ver4DictConstants::TRIE_FILE_EXTENSION);
return false;
}
// Write dictionary contents.
if (!mTerminalPositionLookupTable.flushToFile(dictPath)) {
AKLOGE("Terminal position lookup table cannot be written. %s", tmpDirPath);
return false;
}
if (!mProbabilityDictContent.flushToFile(dictPath)) {
AKLOGE("Probability dict content cannot be written. %s", tmpDirPath);
return false;
}
if (!mBigramDictContent.flushToFile(dictPath)) {
AKLOGE("Bigram dict content cannot be written. %s", tmpDirPath);
return false;
}
if (!mShortcutDictContent.flushToFile(dictPath)) {
AKLOGE("Shortcut dict content cannot be written. %s", tmpDirPath);
return false;
}
// Remove existing dictionary.
if (!FileUtils::removeDirAndFiles(dictDirPath)) {
AKLOGE("Existing directory %s cannot be removed.", dictDirPath);
ASSERT(false);
return false;
}
// Rename temporary directory.
if (rename(tmpDirPath, dictDirPath) != 0) {
AKLOGE("%s cannot be renamed to %s", tmpDirPath, dictDirPath);
ASSERT(false);
return false;
}
return true;
}
Ver4DictBuffers::Ver4DictBuffers(const char *const dictPath,
MmappedBuffer::MmappedBufferPtr headerBuffer, const bool isUpdatable,
const FormatUtils::FORMAT_VERSION formatVersion)
: mHeaderBuffer(std::move(headerBuffer)),
mDictBuffer(MmappedBuffer::openBuffer(dictPath,
Ver4DictConstants::TRIE_FILE_EXTENSION, isUpdatable)),
mHeaderPolicy(mHeaderBuffer->getBuffer(), formatVersion),
mExpandableHeaderBuffer(mHeaderBuffer ? mHeaderBuffer->getBuffer() : nullptr,
mHeaderPolicy.getSize(),
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
mExpandableTrieBuffer(mDictBuffer ? mDictBuffer->getBuffer() : nullptr,
mDictBuffer ? mDictBuffer->getBufferSize() : 0,
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
mTerminalPositionLookupTable(dictPath, isUpdatable),
mProbabilityDictContent(dictPath, mHeaderPolicy.hasHistoricalInfoOfWords(), isUpdatable),
mBigramDictContent(dictPath, mHeaderPolicy.hasHistoricalInfoOfWords(), isUpdatable),
mShortcutDictContent(dictPath, isUpdatable),
mIsUpdatable(isUpdatable) {}
Ver4DictBuffers::Ver4DictBuffers(const HeaderPolicy *const headerPolicy, const int maxTrieSize)
: mHeaderBuffer(nullptr), mDictBuffer(nullptr), mHeaderPolicy(headerPolicy),
mExpandableHeaderBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE),
mExpandableTrieBuffer(maxTrieSize), mTerminalPositionLookupTable(),
mProbabilityDictContent(headerPolicy->hasHistoricalInfoOfWords()),
mBigramDictContent(headerPolicy->hasHistoricalInfoOfWords()), mShortcutDictContent(),
mIsUpdatable(true) {}
} // namespace v401
} // namespace backward
} // namespace latinime

View File

@ -0,0 +1,152 @@
/*
* Copyright (C) 2013, The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* !!!!! DO NOT EDIT THIS FILE !!!!!
*
* This file was generated from
* suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h
*/
#ifndef LATINIME_BACKWARD_V401_VER4_DICT_BUFFER_H
#define LATINIME_BACKWARD_V401_VER4_DICT_BUFFER_H
#include <memory>
#include "defines.h"
#include "suggest/policyimpl/dictionary/header/header_policy.h"
#include "suggest/policyimpl/dictionary/structure/backward/v401/content/bigram_dict_content.h"
#include "suggest/policyimpl/dictionary/structure/backward/v401/content/probability_dict_content.h"
#include "suggest/policyimpl/dictionary/structure/backward/v401/content/shortcut_dict_content.h"
#include "suggest/policyimpl/dictionary/structure/backward/v401/content/terminal_position_lookup_table.h"
#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_constants.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
namespace latinime {
namespace backward {
namespace v401 {
class Ver4DictBuffers {
public:
typedef std::unique_ptr<Ver4DictBuffers> Ver4DictBuffersPtr;
static Ver4DictBuffersPtr openVer4DictBuffers(const char *const dictDirPath,
MmappedBuffer::MmappedBufferPtr headerBuffer,
const FormatUtils::FORMAT_VERSION formatVersion);
static AK_FORCE_INLINE Ver4DictBuffersPtr createVer4DictBuffers(
const HeaderPolicy *const headerPolicy, const int maxTrieSize) {
return Ver4DictBuffersPtr(new Ver4DictBuffers(headerPolicy, maxTrieSize));
}
AK_FORCE_INLINE bool isValid() const {
return mHeaderBuffer && mDictBuffer && mHeaderPolicy.isValid()
&& mProbabilityDictContent.isValid() && mTerminalPositionLookupTable.isValid()
&& mBigramDictContent.isValid() && mShortcutDictContent.isValid();
}
AK_FORCE_INLINE bool isNearSizeLimit() const {
return mExpandableTrieBuffer.isNearSizeLimit()
|| mTerminalPositionLookupTable.isNearSizeLimit()
|| mProbabilityDictContent.isNearSizeLimit()
|| mBigramDictContent.isNearSizeLimit()
|| mShortcutDictContent.isNearSizeLimit();
}
AK_FORCE_INLINE const HeaderPolicy *getHeaderPolicy() const {
return &mHeaderPolicy;
}
AK_FORCE_INLINE BufferWithExtendableBuffer *getWritableHeaderBuffer() {
return &mExpandableHeaderBuffer;
}
AK_FORCE_INLINE BufferWithExtendableBuffer *getWritableTrieBuffer() {
return &mExpandableTrieBuffer;
}
AK_FORCE_INLINE const BufferWithExtendableBuffer *getTrieBuffer() const {
return &mExpandableTrieBuffer;
}
AK_FORCE_INLINE TerminalPositionLookupTable *getMutableTerminalPositionLookupTable() {
return &mTerminalPositionLookupTable;
}
AK_FORCE_INLINE const TerminalPositionLookupTable *getTerminalPositionLookupTable() const {
return &mTerminalPositionLookupTable;
}
AK_FORCE_INLINE ProbabilityDictContent *getMutableProbabilityDictContent() {
return &mProbabilityDictContent;
}
AK_FORCE_INLINE const ProbabilityDictContent *getProbabilityDictContent() const {
return &mProbabilityDictContent;
}
AK_FORCE_INLINE BigramDictContent *getMutableBigramDictContent() {
return &mBigramDictContent;
}
AK_FORCE_INLINE const BigramDictContent *getBigramDictContent() const {
return &mBigramDictContent;
}
AK_FORCE_INLINE ShortcutDictContent *getMutableShortcutDictContent() {
return &mShortcutDictContent;
}
AK_FORCE_INLINE const ShortcutDictContent *getShortcutDictContent() const {
return &mShortcutDictContent;
}
AK_FORCE_INLINE bool isUpdatable() const {
return mIsUpdatable;
}
bool flush(const char *const dictDirPath) const {
return flushHeaderAndDictBuffers(dictDirPath, &mExpandableHeaderBuffer);
}
bool flushHeaderAndDictBuffers(const char *const dictDirPath,
const BufferWithExtendableBuffer *const headerBuffer) const;
private:
DISALLOW_COPY_AND_ASSIGN(Ver4DictBuffers);
Ver4DictBuffers(const char *const dictDirPath,
const MmappedBuffer::MmappedBufferPtr headerBuffer, const bool isUpdatable,
const FormatUtils::FORMAT_VERSION formatVersion);
Ver4DictBuffers(const HeaderPolicy *const headerPolicy, const int maxTrieSize);
const MmappedBuffer::MmappedBufferPtr mHeaderBuffer;
const MmappedBuffer::MmappedBufferPtr mDictBuffer;
const HeaderPolicy mHeaderPolicy;
BufferWithExtendableBuffer mExpandableHeaderBuffer;
BufferWithExtendableBuffer mExpandableTrieBuffer;
TerminalPositionLookupTable mTerminalPositionLookupTable;
ProbabilityDictContent mProbabilityDictContent;
BigramDictContent mBigramDictContent;
ShortcutDictContent mShortcutDictContent;
const int mIsUpdatable;
};
} // namespace v401
} // namespace backward
} // namespace latinime
#endif /* LATINIME_BACKWARD_V401_VER4_DICT_BUFFER_H */

View File

@ -0,0 +1,81 @@
/*
* Copyright (C) 2013, The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* !!!!! DO NOT EDIT THIS FILE !!!!!
*
* This file was generated from
* suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp
*/
#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_constants.h"
namespace latinime {
namespace backward {
namespace v401 {
// These values MUST match the definitions in FormatSpec.java.
const char *const Ver4DictConstants::TRIE_FILE_EXTENSION = ".trie";
const char *const Ver4DictConstants::HEADER_FILE_EXTENSION = ".header";
const char *const Ver4DictConstants::FREQ_FILE_EXTENSION = ".freq";
// tat = Terminal Address Table
const char *const Ver4DictConstants::TERMINAL_ADDRESS_TABLE_FILE_EXTENSION = ".tat";
const char *const Ver4DictConstants::BIGRAM_FILE_EXTENSION = ".bigram_freq";
const char *const Ver4DictConstants::BIGRAM_LOOKUP_TABLE_FILE_EXTENSION = ".bigram_lookup";
const char *const Ver4DictConstants::BIGRAM_CONTENT_TABLE_FILE_EXTENSION = ".bigram_index_freq";
const char *const Ver4DictConstants::SHORTCUT_FILE_EXTENSION = ".shortcut_shortcut";
const char *const Ver4DictConstants::SHORTCUT_LOOKUP_TABLE_FILE_EXTENSION = ".shortcut_lookup";
const char *const Ver4DictConstants::SHORTCUT_CONTENT_TABLE_FILE_EXTENSION =
".shortcut_index_shortcut";
// Version 4 dictionary size is implicitly limited to 8MB due to 3-byte offsets.
const int Ver4DictConstants::MAX_DICTIONARY_SIZE = 8 * 1024 * 1024;
// Extended region size, which is not GCed region size in dict file + additional buffer size, is
// limited to 1MB to prevent from inefficient traversing.
const int Ver4DictConstants::MAX_DICT_EXTENDED_REGION_SIZE = 1 * 1024 * 1024;
const int Ver4DictConstants::NOT_A_TERMINAL_ID = -1;
const int Ver4DictConstants::PROBABILITY_SIZE = 1;
const int Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE = 1;
const int Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE = 3;
const int Ver4DictConstants::NOT_A_TERMINAL_ADDRESS = 0;
const int Ver4DictConstants::TERMINAL_ID_FIELD_SIZE = 4;
const int Ver4DictConstants::TIME_STAMP_FIELD_SIZE = 4;
const int Ver4DictConstants::WORD_LEVEL_FIELD_SIZE = 1;
const int Ver4DictConstants::WORD_COUNT_FIELD_SIZE = 1;
const int Ver4DictConstants::BIGRAM_ADDRESS_TABLE_BLOCK_SIZE = 16;
const int Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE = 4;
const int Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE = 64;
const int Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_DATA_SIZE = 4;
const int Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE = 3;
// Unsigned int max value of BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE-byte is used for representing
// invalid terminal ID in bigram lists.
const int Ver4DictConstants::INVALID_BIGRAM_TARGET_TERMINAL_ID =
(1 << (BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE * 8)) - 1;
const int Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE = 1;
const int Ver4DictConstants::BIGRAM_PROBABILITY_MASK = 0x0F;
const int Ver4DictConstants::BIGRAM_HAS_NEXT_MASK = 0x80;
const int Ver4DictConstants::BIGRAM_LARGE_PROBABILITY_FIELD_SIZE = 1;
const int Ver4DictConstants::SHORTCUT_FLAGS_FIELD_SIZE = 1;
const int Ver4DictConstants::SHORTCUT_PROBABILITY_MASK = 0x0F;
const int Ver4DictConstants::SHORTCUT_HAS_NEXT_MASK = 0x80;
} // namespace v401
} // namespace backward
} // namespace latinime

View File

@ -0,0 +1,84 @@
/*
* Copyright (C) 2013, The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* !!!!! DO NOT EDIT THIS FILE !!!!!
*
* This file was generated from
* suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h
*/
#ifndef LATINIME_BACKWARD_V401_VER4_DICT_CONSTANTS_H
#define LATINIME_BACKWARD_V401_VER4_DICT_CONSTANTS_H
#include "defines.h"
namespace latinime {
namespace backward {
namespace v401 {
// TODO: Create PtConstants under the pt_common and move some constant values there.
// Note that there are corresponding definitions in FormatSpec.java.
class Ver4DictConstants {
public:
static const char *const TRIE_FILE_EXTENSION;
static const char *const HEADER_FILE_EXTENSION;
static const char *const FREQ_FILE_EXTENSION;
static const char *const TERMINAL_ADDRESS_TABLE_FILE_EXTENSION;
static const char *const BIGRAM_FILE_EXTENSION;
static const char *const BIGRAM_LOOKUP_TABLE_FILE_EXTENSION;
static const char *const BIGRAM_CONTENT_TABLE_FILE_EXTENSION;
static const char *const SHORTCUT_FILE_EXTENSION;
static const char *const SHORTCUT_LOOKUP_TABLE_FILE_EXTENSION;
static const char *const SHORTCUT_CONTENT_TABLE_FILE_EXTENSION;
static const int MAX_DICTIONARY_SIZE;
static const int MAX_DICT_EXTENDED_REGION_SIZE;
static const int NOT_A_TERMINAL_ID;
static const int PROBABILITY_SIZE;
static const int FLAGS_IN_PROBABILITY_FILE_SIZE;
static const int TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE;
static const int NOT_A_TERMINAL_ADDRESS;
static const int TERMINAL_ID_FIELD_SIZE;
static const int TIME_STAMP_FIELD_SIZE;
static const int WORD_LEVEL_FIELD_SIZE;
static const int WORD_COUNT_FIELD_SIZE;
static const int BIGRAM_ADDRESS_TABLE_BLOCK_SIZE;
static const int BIGRAM_ADDRESS_TABLE_DATA_SIZE;
static const int SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE;
static const int SHORTCUT_ADDRESS_TABLE_DATA_SIZE;
static const int BIGRAM_FLAGS_FIELD_SIZE;
static const int BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE;
static const int INVALID_BIGRAM_TARGET_TERMINAL_ID;
static const int BIGRAM_PROBABILITY_MASK;
static const int BIGRAM_HAS_NEXT_MASK;
// Used when bigram list has time stamp.
static const int BIGRAM_LARGE_PROBABILITY_FIELD_SIZE;
static const int SHORTCUT_FLAGS_FIELD_SIZE;
static const int SHORTCUT_PROBABILITY_MASK;
static const int SHORTCUT_HAS_NEXT_MASK;
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4DictConstants);
};
} // namespace v401
} // namespace backward
} // namespace latinime
#endif /* LATINIME_BACKWARD_V401_VER4_DICT_CONSTANTS_H */

View File

@ -0,0 +1,109 @@
/*
* Copyright (C) 2013, The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* !!!!! DO NOT EDIT THIS FILE !!!!!
*
* This file was generated from
* suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.cpp
*/
#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_node_reader.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.h"
#include "suggest/policyimpl/dictionary/structure/backward/v401/content/probability_dict_content.h"
#include "suggest/policyimpl/dictionary/structure/backward/v401/content/probability_entry.h"
#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_reading_utils.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
namespace latinime {
namespace backward {
namespace v401 {
const PtNodeParams Ver4PatriciaTrieNodeReader::fetchPtNodeInfoFromBufferAndProcessMovedPtNode(
const int ptNodePos, const int siblingNodePos) const {
if (ptNodePos < 0 || ptNodePos >= mBuffer->getTailPosition()) {
// Reading invalid position because of bug or broken dictionary.
AKLOGE("Fetching PtNode info from invalid dictionary position: %d, dictionary size: %d",
ptNodePos, mBuffer->getTailPosition());
ASSERT(false);
return PtNodeParams();
}
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(ptNodePos);
const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
int pos = ptNodePos;
const int headPos = ptNodePos;
if (usesAdditionalBuffer) {
pos -= mBuffer->getOriginalBufferSize();
}
const PatriciaTrieReadingUtils::NodeFlags flags =
PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos);
const int parentPosOffset =
DynamicPtReadingUtils::getParentPtNodePosOffsetAndAdvancePosition(
dictBuf, &pos);
const int parentPos =
DynamicPtReadingUtils::getParentPtNodePos(parentPosOffset, headPos);
int codePoints[MAX_WORD_LENGTH];
const int codePonitCount = PatriciaTrieReadingUtils::getCharsAndAdvancePosition(
dictBuf, flags, MAX_WORD_LENGTH, codePoints, &pos);
int terminalIdFieldPos = NOT_A_DICT_POS;
int terminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
int probability = NOT_A_PROBABILITY;
if (PatriciaTrieReadingUtils::isTerminal(flags)) {
terminalIdFieldPos = pos;
if (usesAdditionalBuffer) {
terminalIdFieldPos += mBuffer->getOriginalBufferSize();
}
terminalId = Ver4PatriciaTrieReadingUtils::getTerminalIdAndAdvancePosition(dictBuf, &pos);
const ProbabilityEntry probabilityEntry =
mProbabilityDictContent->getProbabilityEntry(terminalId);
if (probabilityEntry.hasHistoricalInfo()) {
probability = ForgettingCurveUtils::decodeProbability(
probabilityEntry.getHistoricalInfo(), mHeaderPolicy);
} else {
probability = probabilityEntry.getProbability();
}
}
int childrenPosFieldPos = pos;
if (usesAdditionalBuffer) {
childrenPosFieldPos += mBuffer->getOriginalBufferSize();
}
int childrenPos = DynamicPtReadingUtils::readChildrenPositionAndAdvancePosition(
dictBuf, &pos);
if (usesAdditionalBuffer && childrenPos != NOT_A_DICT_POS) {
childrenPos += mBuffer->getOriginalBufferSize();
}
if (usesAdditionalBuffer) {
pos += mBuffer->getOriginalBufferSize();
}
// Sibling position is the tail position of original PtNode.
int newSiblingNodePos = (siblingNodePos == NOT_A_DICT_POS) ? pos : siblingNodePos;
// Read destination node if the read node is a moved node.
if (DynamicPtReadingUtils::isMoved(flags)) {
// The destination position is stored at the same place as the parent position.
return fetchPtNodeInfoFromBufferAndProcessMovedPtNode(parentPos, newSiblingNodePos);
} else {
return PtNodeParams(headPos, flags, parentPos, codePonitCount, codePoints,
terminalIdFieldPos, terminalId, probability, childrenPosFieldPos, childrenPos,
newSiblingNodePos);
}
}
} // namespace v401
} // namespace backward
} // namespace latinime

View File

@ -0,0 +1,79 @@
/*
* Copyright (C) 2013, The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* !!!!! DO NOT EDIT THIS FILE !!!!!
*
* This file was generated from
* suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h
*/
#ifndef LATINIME_BACKWARD_V401_VER4_PATRICIA_TRIE_NODE_READER_H
#define LATINIME_BACKWARD_V401_VER4_PATRICIA_TRIE_NODE_READER_H
#include "defines.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_reader.h"
namespace latinime {
namespace backward {
namespace v401 {
} // namespace v401
} // namespace backward
class BufferWithExtendableBuffer;
namespace backward {
namespace v401 {
} // namespace v401
} // namespace backward
class HeaderPolicy;
namespace backward {
namespace v401 {
class ProbabilityDictContent;
/*
* This class is used for helping to read nodes of ver4 patricia trie. This class handles moved
* node and reads node attributes including probability form probabilityBuffer.
*/
class Ver4PatriciaTrieNodeReader : public PtNodeReader {
public:
Ver4PatriciaTrieNodeReader(const BufferWithExtendableBuffer *const buffer,
const ProbabilityDictContent *const probabilityDictContent,
const HeaderPolicy *const headerPolicy)
: mBuffer(buffer), mProbabilityDictContent(probabilityDictContent),
mHeaderPolicy(headerPolicy) {}
~Ver4PatriciaTrieNodeReader() {}
virtual const PtNodeParams fetchNodeInfoInBufferFromPtNodePos(const int ptNodePos) const {
return fetchPtNodeInfoFromBufferAndProcessMovedPtNode(ptNodePos,
NOT_A_DICT_POS /* siblingNodePos */);
}
private:
DISALLOW_COPY_AND_ASSIGN(Ver4PatriciaTrieNodeReader);
const BufferWithExtendableBuffer *const mBuffer;
const ProbabilityDictContent *const mProbabilityDictContent;
const HeaderPolicy *const mHeaderPolicy;
const PtNodeParams fetchPtNodeInfoFromBufferAndProcessMovedPtNode(const int ptNodePos,
const int siblingNodePos) const;
};
} // namespace v401
} // namespace backward
} // namespace latinime
#endif /* LATINIME_BACKWARD_V401_VER4_PATRICIA_TRIE_NODE_READER_H */

View File

@ -0,0 +1,429 @@
/*
* Copyright (C) 2013, The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* !!!!! DO NOT EDIT THIS FILE !!!!!
*
* This file was generated from
* suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp
*/
#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_node_writer.h"
#include "suggest/core/dictionary/property/unigram_property.h"
#include "suggest/policyimpl/dictionary/header/header_policy.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_writing_utils.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.h"
#include "suggest/policyimpl/dictionary/structure/backward/v401/bigram/ver4_bigram_list_policy.h"
#include "suggest/policyimpl/dictionary/structure/backward/v401/content/probability_entry.h"
#include "suggest/policyimpl/dictionary/structure/backward/v401/shortcut/ver4_shortcut_list_policy.h"
#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_node_reader.h"
#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_buffers.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
namespace latinime {
namespace backward {
namespace v401 {
const int Ver4PatriciaTrieNodeWriter::CHILDREN_POSITION_FIELD_SIZE = 3;
bool Ver4PatriciaTrieNodeWriter::markPtNodeAsDeleted(
const PtNodeParams *const toBeUpdatedPtNodeParams) {
int pos = toBeUpdatedPtNodeParams->getHeadPos();
const bool usesAdditionalBuffer = mTrieBuffer->isInAdditionalBuffer(pos);
const uint8_t *const dictBuf = mTrieBuffer->getBuffer(usesAdditionalBuffer);
if (usesAdditionalBuffer) {
pos -= mTrieBuffer->getOriginalBufferSize();
}
// Read original flags
const PatriciaTrieReadingUtils::NodeFlags originalFlags =
PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos);
const PatriciaTrieReadingUtils::NodeFlags updatedFlags =
DynamicPtReadingUtils::updateAndGetFlags(originalFlags, false /* isMoved */,
true /* isDeleted */, false /* willBecomeNonTerminal */);
int writingPos = toBeUpdatedPtNodeParams->getHeadPos();
// Update flags.
if (!DynamicPtWritingUtils::writeFlagsAndAdvancePosition(mTrieBuffer, updatedFlags,
&writingPos)) {
return false;
}
if (toBeUpdatedPtNodeParams->isTerminal()) {
// The PtNode is a terminal. Delete entry from the terminal position lookup table.
return mBuffers->getMutableTerminalPositionLookupTable()->setTerminalPtNodePosition(
toBeUpdatedPtNodeParams->getTerminalId(), NOT_A_DICT_POS /* ptNodePos */);
} else {
return true;
}
}
bool Ver4PatriciaTrieNodeWriter::markPtNodeAsMoved(
const PtNodeParams *const toBeUpdatedPtNodeParams,
const int movedPos, const int bigramLinkedNodePos) {
int pos = toBeUpdatedPtNodeParams->getHeadPos();
const bool usesAdditionalBuffer = mTrieBuffer->isInAdditionalBuffer(pos);
const uint8_t *const dictBuf = mTrieBuffer->getBuffer(usesAdditionalBuffer);
if (usesAdditionalBuffer) {
pos -= mTrieBuffer->getOriginalBufferSize();
}
// Read original flags
const PatriciaTrieReadingUtils::NodeFlags originalFlags =
PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos);
const PatriciaTrieReadingUtils::NodeFlags updatedFlags =
DynamicPtReadingUtils::updateAndGetFlags(originalFlags, true /* isMoved */,
false /* isDeleted */, false /* willBecomeNonTerminal */);
int writingPos = toBeUpdatedPtNodeParams->getHeadPos();
// Update flags.
if (!DynamicPtWritingUtils::writeFlagsAndAdvancePosition(mTrieBuffer, updatedFlags,
&writingPos)) {
return false;
}
// Update moved position, which is stored in the parent offset field.
if (!DynamicPtWritingUtils::writeParentPosOffsetAndAdvancePosition(
mTrieBuffer, movedPos, toBeUpdatedPtNodeParams->getHeadPos(), &writingPos)) {
return false;
}
if (toBeUpdatedPtNodeParams->hasChildren()) {
// Update children's parent position.
mReadingHelper.initWithPtNodeArrayPos(toBeUpdatedPtNodeParams->getChildrenPos());
while (!mReadingHelper.isEnd()) {
const PtNodeParams childPtNodeParams(mReadingHelper.getPtNodeParams());
int parentOffsetFieldPos = childPtNodeParams.getHeadPos()
+ DynamicPtWritingUtils::NODE_FLAG_FIELD_SIZE;
if (!DynamicPtWritingUtils::writeParentPosOffsetAndAdvancePosition(
mTrieBuffer, bigramLinkedNodePos, childPtNodeParams.getHeadPos(),
&parentOffsetFieldPos)) {
// Parent offset cannot be written because of a bug or a broken dictionary; thus,
// we give up to update dictionary.
return false;
}
mReadingHelper.readNextSiblingNode(childPtNodeParams);
}
}
return true;
}
bool Ver4PatriciaTrieNodeWriter::markPtNodeAsWillBecomeNonTerminal(
const PtNodeParams *const toBeUpdatedPtNodeParams) {
int pos = toBeUpdatedPtNodeParams->getHeadPos();
const bool usesAdditionalBuffer = mTrieBuffer->isInAdditionalBuffer(pos);
const uint8_t *const dictBuf = mTrieBuffer->getBuffer(usesAdditionalBuffer);
if (usesAdditionalBuffer) {
pos -= mTrieBuffer->getOriginalBufferSize();
}
// Read original flags
const PatriciaTrieReadingUtils::NodeFlags originalFlags =
PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos);
const PatriciaTrieReadingUtils::NodeFlags updatedFlags =
DynamicPtReadingUtils::updateAndGetFlags(originalFlags, false /* isMoved */,
false /* isDeleted */, true /* willBecomeNonTerminal */);
if (!mBuffers->getMutableTerminalPositionLookupTable()->setTerminalPtNodePosition(
toBeUpdatedPtNodeParams->getTerminalId(), NOT_A_DICT_POS /* ptNodePos */)) {
AKLOGE("Cannot update terminal position lookup table. terminal id: %d",
toBeUpdatedPtNodeParams->getTerminalId());
return false;
}
// Update flags.
int writingPos = toBeUpdatedPtNodeParams->getHeadPos();
return DynamicPtWritingUtils::writeFlagsAndAdvancePosition(mTrieBuffer, updatedFlags,
&writingPos);
}
bool Ver4PatriciaTrieNodeWriter::updatePtNodeUnigramProperty(
const PtNodeParams *const toBeUpdatedPtNodeParams,
const UnigramProperty *const unigramProperty) {
// Update probability and historical information.
// TODO: Update other information in the unigram property.
if (!toBeUpdatedPtNodeParams->isTerminal()) {
return false;
}
const ProbabilityEntry originalProbabilityEntry =
mBuffers->getProbabilityDictContent()->getProbabilityEntry(
toBeUpdatedPtNodeParams->getTerminalId());
const ProbabilityEntry probabilityEntry = createUpdatedEntryFrom(&originalProbabilityEntry,
unigramProperty);
return mBuffers->getMutableProbabilityDictContent()->setProbabilityEntry(
toBeUpdatedPtNodeParams->getTerminalId(), &probabilityEntry);
}
bool Ver4PatriciaTrieNodeWriter::updatePtNodeProbabilityAndGetNeedsToKeepPtNodeAfterGC(
const PtNodeParams *const toBeUpdatedPtNodeParams, bool *const outNeedsToKeepPtNode) {
if (!toBeUpdatedPtNodeParams->isTerminal()) {
AKLOGE("updatePtNodeProbabilityAndGetNeedsToSaveForGC is called for non-terminal PtNode.");
return false;
}
const ProbabilityEntry originalProbabilityEntry =
mBuffers->getProbabilityDictContent()->getProbabilityEntry(
toBeUpdatedPtNodeParams->getTerminalId());
if (originalProbabilityEntry.hasHistoricalInfo()) {
const HistoricalInfo historicalInfo = ForgettingCurveUtils::createHistoricalInfoToSave(
originalProbabilityEntry.getHistoricalInfo(), mHeaderPolicy);
const ProbabilityEntry probabilityEntry =
originalProbabilityEntry.createEntryWithUpdatedHistoricalInfo(&historicalInfo);
if (!mBuffers->getMutableProbabilityDictContent()->setProbabilityEntry(
toBeUpdatedPtNodeParams->getTerminalId(), &probabilityEntry)) {
AKLOGE("Cannot write updated probability entry. terminalId: %d",
toBeUpdatedPtNodeParams->getTerminalId());
return false;
}
const bool isValid = ForgettingCurveUtils::needsToKeep(&historicalInfo, mHeaderPolicy);
if (!isValid) {
if (!markPtNodeAsWillBecomeNonTerminal(toBeUpdatedPtNodeParams)) {
AKLOGE("Cannot mark PtNode as willBecomeNonTerminal.");
return false;
}
}
*outNeedsToKeepPtNode = isValid;
} else {
// No need to update probability.
*outNeedsToKeepPtNode = true;
}
return true;
}
bool Ver4PatriciaTrieNodeWriter::updateChildrenPosition(
const PtNodeParams *const toBeUpdatedPtNodeParams, const int newChildrenPosition) {
int childrenPosFieldPos = toBeUpdatedPtNodeParams->getChildrenPosFieldPos();
return DynamicPtWritingUtils::writeChildrenPositionAndAdvancePosition(mTrieBuffer,
newChildrenPosition, &childrenPosFieldPos);
}
bool Ver4PatriciaTrieNodeWriter::updateTerminalId(const PtNodeParams *const toBeUpdatedPtNodeParams,
const int newTerminalId) {
return mTrieBuffer->writeUint(newTerminalId, Ver4DictConstants::TERMINAL_ID_FIELD_SIZE,
toBeUpdatedPtNodeParams->getTerminalIdFieldPos());
}
bool Ver4PatriciaTrieNodeWriter::writePtNodeAndAdvancePosition(
const PtNodeParams *const ptNodeParams, int *const ptNodeWritingPos) {
return writePtNodeAndGetTerminalIdAndAdvancePosition(ptNodeParams, 0 /* outTerminalId */,
ptNodeWritingPos);
}
bool Ver4PatriciaTrieNodeWriter::writeNewTerminalPtNodeAndAdvancePosition(
const PtNodeParams *const ptNodeParams, const UnigramProperty *const unigramProperty,
int *const ptNodeWritingPos) {
int terminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
if (!writePtNodeAndGetTerminalIdAndAdvancePosition(ptNodeParams, &terminalId,
ptNodeWritingPos)) {
return false;
}
// Write probability.
ProbabilityEntry newProbabilityEntry;
const ProbabilityEntry probabilityEntryToWrite = createUpdatedEntryFrom(
&newProbabilityEntry, unigramProperty);
return mBuffers->getMutableProbabilityDictContent()->setProbabilityEntry(terminalId,
&probabilityEntryToWrite);
}
bool Ver4PatriciaTrieNodeWriter::addNewBigramEntry(
const PtNodeParams *const sourcePtNodeParams, const PtNodeParams *const targetPtNodeParam,
const BigramProperty *const bigramProperty, bool *const outAddedNewBigram) {
if (!mBigramPolicy->addNewEntry(sourcePtNodeParams->getTerminalId(),
targetPtNodeParam->getTerminalId(), bigramProperty, outAddedNewBigram)) {
AKLOGE("Cannot add new bigram entry. terminalId: %d, targetTerminalId: %d",
sourcePtNodeParams->getTerminalId(), targetPtNodeParam->getTerminalId());
return false;
}
if (!sourcePtNodeParams->hasBigrams()) {
// Update has bigrams flag.
return updatePtNodeFlags(sourcePtNodeParams->getHeadPos(),
sourcePtNodeParams->isBlacklisted(), sourcePtNodeParams->isNotAWord(),
sourcePtNodeParams->isTerminal(), sourcePtNodeParams->hasShortcutTargets(),
true /* hasBigrams */,
sourcePtNodeParams->getCodePointCount() > 1 /* hasMultipleChars */);
}
return true;
}
bool Ver4PatriciaTrieNodeWriter::removeBigramEntry(
const PtNodeParams *const sourcePtNodeParams, const PtNodeParams *const targetPtNodeParam) {
return mBigramPolicy->removeEntry(sourcePtNodeParams->getTerminalId(),
targetPtNodeParam->getTerminalId());
}
bool Ver4PatriciaTrieNodeWriter::updateAllBigramEntriesAndDeleteUselessEntries(
const PtNodeParams *const sourcePtNodeParams, int *const outBigramEntryCount) {
return mBigramPolicy->updateAllBigramEntriesAndDeleteUselessEntries(
sourcePtNodeParams->getTerminalId(), outBigramEntryCount);
}
bool Ver4PatriciaTrieNodeWriter::updateAllPositionFields(
const PtNodeParams *const toBeUpdatedPtNodeParams,
const DictPositionRelocationMap *const dictPositionRelocationMap,
int *const outBigramEntryCount) {
int parentPos = toBeUpdatedPtNodeParams->getParentPos();
if (parentPos != NOT_A_DICT_POS) {
PtNodeWriter::PtNodePositionRelocationMap::const_iterator it =
dictPositionRelocationMap->mPtNodePositionRelocationMap.find(parentPos);
if (it != dictPositionRelocationMap->mPtNodePositionRelocationMap.end()) {
parentPos = it->second;
}
}
int writingPos = toBeUpdatedPtNodeParams->getHeadPos()
+ DynamicPtWritingUtils::NODE_FLAG_FIELD_SIZE;
// Write updated parent offset.
if (!DynamicPtWritingUtils::writeParentPosOffsetAndAdvancePosition(mTrieBuffer,
parentPos, toBeUpdatedPtNodeParams->getHeadPos(), &writingPos)) {
return false;
}
// Updates children position.
int childrenPos = toBeUpdatedPtNodeParams->getChildrenPos();
if (childrenPos != NOT_A_DICT_POS) {
PtNodeWriter::PtNodeArrayPositionRelocationMap::const_iterator it =
dictPositionRelocationMap->mPtNodeArrayPositionRelocationMap.find(childrenPos);
if (it != dictPositionRelocationMap->mPtNodeArrayPositionRelocationMap.end()) {
childrenPos = it->second;
}
}
if (!updateChildrenPosition(toBeUpdatedPtNodeParams, childrenPos)) {
return false;
}
// Counts bigram entries.
if (outBigramEntryCount) {
*outBigramEntryCount = mBigramPolicy->getBigramEntryConut(
toBeUpdatedPtNodeParams->getTerminalId());
}
return true;
}
bool Ver4PatriciaTrieNodeWriter::addShortcutTarget(const PtNodeParams *const ptNodeParams,
const int *const targetCodePoints, const int targetCodePointCount,
const int shortcutProbability) {
if (!mShortcutPolicy->addNewShortcut(ptNodeParams->getTerminalId(),
targetCodePoints, targetCodePointCount, shortcutProbability)) {
AKLOGE("Cannot add new shortuct entry. terminalId: %d", ptNodeParams->getTerminalId());
return false;
}
if (!ptNodeParams->hasShortcutTargets()) {
// Update has shortcut targets flag.
return updatePtNodeFlags(ptNodeParams->getHeadPos(),
ptNodeParams->isBlacklisted(), ptNodeParams->isNotAWord(),
ptNodeParams->isTerminal(), true /* hasShortcutTargets */,
ptNodeParams->hasBigrams(),
ptNodeParams->getCodePointCount() > 1 /* hasMultipleChars */);
}
return true;
}
bool Ver4PatriciaTrieNodeWriter::updatePtNodeHasBigramsAndShortcutTargetsFlags(
const PtNodeParams *const ptNodeParams) {
const bool hasBigrams = mBuffers->getBigramDictContent()->getBigramListHeadPos(
ptNodeParams->getTerminalId()) != NOT_A_DICT_POS;
const bool hasShortcutTargets = mBuffers->getShortcutDictContent()->getShortcutListHeadPos(
ptNodeParams->getTerminalId()) != NOT_A_DICT_POS;
return updatePtNodeFlags(ptNodeParams->getHeadPos(), ptNodeParams->isBlacklisted(),
ptNodeParams->isNotAWord(), ptNodeParams->isTerminal(), hasShortcutTargets,
hasBigrams, ptNodeParams->getCodePointCount() > 1 /* hasMultipleChars */);
}
bool Ver4PatriciaTrieNodeWriter::writePtNodeAndGetTerminalIdAndAdvancePosition(
const PtNodeParams *const ptNodeParams, int *const outTerminalId,
int *const ptNodeWritingPos) {
const int nodePos = *ptNodeWritingPos;
// Write dummy flags. The Node flags are updated with appropriate flags at the last step of the
// PtNode writing.
if (!DynamicPtWritingUtils::writeFlagsAndAdvancePosition(mTrieBuffer,
0 /* nodeFlags */, ptNodeWritingPos)) {
return false;
}
// Calculate a parent offset and write the offset.
if (!DynamicPtWritingUtils::writeParentPosOffsetAndAdvancePosition(mTrieBuffer,
ptNodeParams->getParentPos(), nodePos, ptNodeWritingPos)) {
return false;
}
// Write code points
if (!DynamicPtWritingUtils::writeCodePointsAndAdvancePosition(mTrieBuffer,
ptNodeParams->getCodePoints(), ptNodeParams->getCodePointCount(), ptNodeWritingPos)) {
return false;
}
int terminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
if (!ptNodeParams->willBecomeNonTerminal()) {
if (ptNodeParams->getTerminalId() != Ver4DictConstants::NOT_A_TERMINAL_ID) {
terminalId = ptNodeParams->getTerminalId();
} else if (ptNodeParams->isTerminal()) {
// Write terminal information using a new terminal id.
// Get a new unused terminal id.
terminalId = mBuffers->getTerminalPositionLookupTable()->getNextTerminalId();
}
}
const int isTerminal = terminalId != Ver4DictConstants::NOT_A_TERMINAL_ID;
if (isTerminal) {
// Update the lookup table.
if (!mBuffers->getMutableTerminalPositionLookupTable()->setTerminalPtNodePosition(
terminalId, nodePos)) {
return false;
}
// Write terminal Id.
if (!mTrieBuffer->writeUintAndAdvancePosition(terminalId,
Ver4DictConstants::TERMINAL_ID_FIELD_SIZE, ptNodeWritingPos)) {
return false;
}
if (outTerminalId) {
*outTerminalId = terminalId;
}
}
// Write children position
if (!DynamicPtWritingUtils::writeChildrenPositionAndAdvancePosition(mTrieBuffer,
ptNodeParams->getChildrenPos(), ptNodeWritingPos)) {
return false;
}
return updatePtNodeFlags(nodePos, ptNodeParams->isBlacklisted(), ptNodeParams->isNotAWord(),
isTerminal, ptNodeParams->hasShortcutTargets(), ptNodeParams->hasBigrams(),
ptNodeParams->getCodePointCount() > 1 /* hasMultipleChars */);
}
const ProbabilityEntry Ver4PatriciaTrieNodeWriter::createUpdatedEntryFrom(
const ProbabilityEntry *const originalProbabilityEntry,
const UnigramProperty *const unigramProperty) const {
// TODO: Consolidate historical info and probability.
if (mHeaderPolicy->hasHistoricalInfoOfWords()) {
const HistoricalInfo historicalInfoForUpdate(unigramProperty->getTimestamp(),
unigramProperty->getLevel(), unigramProperty->getCount());
const HistoricalInfo updatedHistoricalInfo =
ForgettingCurveUtils::createUpdatedHistoricalInfo(
originalProbabilityEntry->getHistoricalInfo(),
unigramProperty->getProbability(), &historicalInfoForUpdate, mHeaderPolicy);
return originalProbabilityEntry->createEntryWithUpdatedHistoricalInfo(
&updatedHistoricalInfo);
} else {
return originalProbabilityEntry->createEntryWithUpdatedProbability(
unigramProperty->getProbability());
}
}
bool Ver4PatriciaTrieNodeWriter::updatePtNodeFlags(const int ptNodePos,
const bool isBlacklisted, const bool isNotAWord, const bool isTerminal,
const bool hasShortcutTargets, const bool hasBigrams, const bool hasMultipleChars) {
// Create node flags and write them.
PatriciaTrieReadingUtils::NodeFlags nodeFlags =
PatriciaTrieReadingUtils::createAndGetFlags(isBlacklisted, isNotAWord, isTerminal,
hasShortcutTargets, hasBigrams, hasMultipleChars,
CHILDREN_POSITION_FIELD_SIZE);
if (!DynamicPtWritingUtils::writeFlags(mTrieBuffer, nodeFlags, ptNodePos)) {
AKLOGE("Cannot write PtNode flags. flags: %x, pos: %d", nodeFlags, ptNodePos);
return false;
}
return true;
}
} // namespace v401
} // namespace backward
} // namespace latinime

View File

@ -0,0 +1,145 @@
/*
* Copyright (C) 2013, The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* !!!!! DO NOT EDIT THIS FILE !!!!!
*
* This file was generated from
* suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h
*/
#ifndef LATINIME_BACKWARD_V401_VER4_PATRICIA_TRIE_NODE_WRITER_H
#define LATINIME_BACKWARD_V401_VER4_PATRICIA_TRIE_NODE_WRITER_H
#include "defines.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h"
#include "suggest/policyimpl/dictionary/structure/backward/v401/content/probability_entry.h"
namespace latinime {
namespace backward {
namespace v401 {
} // namespace v401
} // namespace backward
class BufferWithExtendableBuffer;
namespace backward {
namespace v401 {
} // namespace v401
} // namespace backward
class HeaderPolicy;
namespace backward {
namespace v401 {
class Ver4BigramListPolicy;
class Ver4DictBuffers;
class Ver4PatriciaTrieNodeReader;
class Ver4PtNodeArrayReader;
class Ver4ShortcutListPolicy;
/*
* This class is used for helping to writes nodes of ver4 patricia trie.
*/
class Ver4PatriciaTrieNodeWriter : public PtNodeWriter {
public:
Ver4PatriciaTrieNodeWriter(BufferWithExtendableBuffer *const trieBuffer,
Ver4DictBuffers *const buffers, const HeaderPolicy *const headerPolicy,
const PtNodeReader *const ptNodeReader,
const PtNodeArrayReader *const ptNodeArrayReader,
Ver4BigramListPolicy *const bigramPolicy, Ver4ShortcutListPolicy *const shortcutPolicy)
: mTrieBuffer(trieBuffer), mBuffers(buffers), mHeaderPolicy(headerPolicy),
mReadingHelper(ptNodeReader, ptNodeArrayReader), mBigramPolicy(bigramPolicy),
mShortcutPolicy(shortcutPolicy) {}
virtual ~Ver4PatriciaTrieNodeWriter() {}
virtual bool markPtNodeAsDeleted(const PtNodeParams *const toBeUpdatedPtNodeParams);
virtual bool markPtNodeAsMoved(const PtNodeParams *const toBeUpdatedPtNodeParams,
const int movedPos, const int bigramLinkedNodePos);
virtual bool markPtNodeAsWillBecomeNonTerminal(
const PtNodeParams *const toBeUpdatedPtNodeParams);
virtual bool updatePtNodeUnigramProperty(const PtNodeParams *const toBeUpdatedPtNodeParams,
const UnigramProperty *const unigramProperty);
virtual bool updatePtNodeProbabilityAndGetNeedsToKeepPtNodeAfterGC(
const PtNodeParams *const toBeUpdatedPtNodeParams, bool *const outNeedsToKeepPtNode);
virtual bool updateChildrenPosition(const PtNodeParams *const toBeUpdatedPtNodeParams,
const int newChildrenPosition);
bool updateTerminalId(const PtNodeParams *const toBeUpdatedPtNodeParams,
const int newTerminalId);
virtual bool writePtNodeAndAdvancePosition(const PtNodeParams *const ptNodeParams,
int *const ptNodeWritingPos);
virtual bool writeNewTerminalPtNodeAndAdvancePosition(const PtNodeParams *const ptNodeParams,
const UnigramProperty *const unigramProperty, int *const ptNodeWritingPos);
virtual bool addNewBigramEntry(const PtNodeParams *const sourcePtNodeParams,
const PtNodeParams *const targetPtNodeParam, const BigramProperty *const bigramProperty,
bool *const outAddedNewBigram);
virtual bool removeBigramEntry(const PtNodeParams *const sourcePtNodeParams,
const PtNodeParams *const targetPtNodeParam);
virtual bool updateAllBigramEntriesAndDeleteUselessEntries(
const PtNodeParams *const sourcePtNodeParams, int *const outBigramEntryCount);
virtual bool updateAllPositionFields(const PtNodeParams *const toBeUpdatedPtNodeParams,
const DictPositionRelocationMap *const dictPositionRelocationMap,
int *const outBigramEntryCount);
virtual bool addShortcutTarget(const PtNodeParams *const ptNodeParams,
const int *const targetCodePoints, const int targetCodePointCount,
const int shortcutProbability);
bool updatePtNodeHasBigramsAndShortcutTargetsFlags(const PtNodeParams *const ptNodeParams);
private:
DISALLOW_COPY_AND_ASSIGN(Ver4PatriciaTrieNodeWriter);
bool writePtNodeAndGetTerminalIdAndAdvancePosition(
const PtNodeParams *const ptNodeParams, int *const outTerminalId,
int *const ptNodeWritingPos);
// Create updated probability entry using given unigram property. In addition to the
// probability, this method updates historical information if needed.
// TODO: Update flags belonging to the unigram property.
const ProbabilityEntry createUpdatedEntryFrom(
const ProbabilityEntry *const originalProbabilityEntry,
const UnigramProperty *const unigramProperty) const;
bool updatePtNodeFlags(const int ptNodePos, const bool isBlacklisted, const bool isNotAWord,
const bool isTerminal, const bool hasShortcutTargets, const bool hasBigrams,
const bool hasMultipleChars);
static const int CHILDREN_POSITION_FIELD_SIZE;
BufferWithExtendableBuffer *const mTrieBuffer;
Ver4DictBuffers *const mBuffers;
const HeaderPolicy *const mHeaderPolicy;
DynamicPtReadingHelper mReadingHelper;
Ver4BigramListPolicy *const mBigramPolicy;
Ver4ShortcutListPolicy *const mShortcutPolicy;
};
} // namespace v401
} // namespace backward
} // namespace latinime
#endif /* LATINIME_BACKWARD_V401_VER4_PATRICIA_TRIE_NODE_WRITER_H */

View File

@ -0,0 +1,475 @@
/*
* Copyright (C) 2013, The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* !!!!! DO NOT CHANGE THE LOGIC IN THIS FILE !!!!!
* Do not edit this file other than updating policy's interface.
*
* This file was generated from
* suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
*/
#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_policy.h"
#include <vector>
#include "suggest/core/dicnode/dic_node.h"
#include "suggest/core/dicnode/dic_node_vector.h"
#include "suggest/core/dictionary/property/bigram_property.h"
#include "suggest/core/dictionary/property/unigram_property.h"
#include "suggest/core/dictionary/property/word_property.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h"
#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_node_reader.h"
#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
#include "suggest/policyimpl/dictionary/utils/probability_utils.h"
namespace latinime {
namespace backward {
namespace v401 {
// Note that there are corresponding definitions in Java side in BinaryDictionaryTests and
// BinaryDictionaryDecayingTests.
const char *const Ver4PatriciaTriePolicy::UNIGRAM_COUNT_QUERY = "UNIGRAM_COUNT";
const char *const Ver4PatriciaTriePolicy::BIGRAM_COUNT_QUERY = "BIGRAM_COUNT";
const char *const Ver4PatriciaTriePolicy::MAX_UNIGRAM_COUNT_QUERY = "MAX_UNIGRAM_COUNT";
const char *const Ver4PatriciaTriePolicy::MAX_BIGRAM_COUNT_QUERY = "MAX_BIGRAM_COUNT";
const int Ver4PatriciaTriePolicy::MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS = 1024;
const int Ver4PatriciaTriePolicy::MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS =
Ver4DictConstants::MAX_DICTIONARY_SIZE - MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS;
void Ver4PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const dicNode,
DicNodeVector *const childDicNodes) const {
if (!dicNode->hasChildren()) {
return;
}
DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader);
readingHelper.initWithPtNodeArrayPos(dicNode->getChildrenPtNodeArrayPos());
while (!readingHelper.isEnd()) {
const PtNodeParams ptNodeParams = readingHelper.getPtNodeParams();
if (!ptNodeParams.isValid()) {
break;
}
bool isTerminal = ptNodeParams.isTerminal() && !ptNodeParams.isDeleted();
if (isTerminal && mHeaderPolicy->isDecayingDict()) {
// A DecayingDict may have a terminal PtNode that has a terminal DicNode whose
// probability is NOT_A_PROBABILITY. In such case, we don't want to treat it as a
// valid terminal DicNode.
isTerminal = ptNodeParams.getProbability() != NOT_A_PROBABILITY;
}
childDicNodes->pushLeavingChild(dicNode, ptNodeParams.getHeadPos(),
ptNodeParams.getChildrenPos(), ptNodeParams.getProbability(), isTerminal,
ptNodeParams.hasChildren(),
ptNodeParams.isBlacklisted()
|| ptNodeParams.isNotAWord() /* isBlacklistedOrNotAWord */,
ptNodeParams.getCodePointCount(), ptNodeParams.getCodePoints());
readingHelper.readNextSiblingNode(ptNodeParams);
}
if (readingHelper.isError()) {
mIsCorrupted = true;
AKLOGE("Dictionary reading error in createAndGetAllChildDicNodes().");
}
}
int Ver4PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
const int ptNodePos, const int maxCodePointCount, int *const outCodePoints,
int *const outUnigramProbability) const {
DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader);
readingHelper.initWithPtNodePos(ptNodePos);
const int codePointCount = readingHelper.getCodePointsAndProbabilityAndReturnCodePointCount(
maxCodePointCount, outCodePoints, outUnigramProbability);
if (readingHelper.isError()) {
mIsCorrupted = true;
AKLOGE("Dictionary reading error in getCodePointsAndProbabilityAndReturnCodePointCount().");
}
return codePointCount;
}
int Ver4PatriciaTriePolicy::getTerminalPtNodePositionOfWord(const int *const inWord,
const int length, const bool forceLowerCaseSearch) const {
DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader);
readingHelper.initWithPtNodeArrayPos(getRootPosition());
const int ptNodePos =
readingHelper.getTerminalPtNodePositionOfWord(inWord, length, forceLowerCaseSearch);
if (readingHelper.isError()) {
mIsCorrupted = true;
AKLOGE("Dictionary reading error in createAndGetAllChildDicNodes().");
}
return ptNodePos;
}
int Ver4PatriciaTriePolicy::getProbability(const int unigramProbability,
const int bigramProbability) const {
if (mHeaderPolicy->isDecayingDict()) {
// Both probabilities are encoded. Decode them and get probability.
return ForgettingCurveUtils::getProbability(unigramProbability, bigramProbability);
} else {
if (unigramProbability == NOT_A_PROBABILITY) {
return NOT_A_PROBABILITY;
} else if (bigramProbability == NOT_A_PROBABILITY) {
return ProbabilityUtils::backoff(unigramProbability);
} else {
// bigramProbability is a bigram probability delta.
return ProbabilityUtils::computeProbabilityForBigram(unigramProbability,
bigramProbability);
}
}
}
int Ver4PatriciaTriePolicy::getUnigramProbabilityOfPtNode(const int ptNodePos) const {
if (ptNodePos == NOT_A_DICT_POS) {
return NOT_A_PROBABILITY;
}
const PtNodeParams ptNodeParams(mNodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos));
if (ptNodeParams.isDeleted() || ptNodeParams.isBlacklisted() || ptNodeParams.isNotAWord()) {
return NOT_A_PROBABILITY;
}
return getProbability(ptNodeParams.getProbability(), NOT_A_PROBABILITY);
}
int Ver4PatriciaTriePolicy::getShortcutPositionOfPtNode(const int ptNodePos) const {
if (ptNodePos == NOT_A_DICT_POS) {
return NOT_A_DICT_POS;
}
const PtNodeParams ptNodeParams(mNodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos));
if (ptNodeParams.isDeleted()) {
return NOT_A_DICT_POS;
}
return mBuffers->getShortcutDictContent()->getShortcutListHeadPos(
ptNodeParams.getTerminalId());
}
int Ver4PatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) const {
if (ptNodePos == NOT_A_DICT_POS) {
return NOT_A_DICT_POS;
}
const PtNodeParams ptNodeParams(mNodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos));
if (ptNodeParams.isDeleted()) {
return NOT_A_DICT_POS;
}
return mBuffers->getBigramDictContent()->getBigramListHeadPos(
ptNodeParams.getTerminalId());
}
bool Ver4PatriciaTriePolicy::addUnigramWord(const int *const word, const int length,
const UnigramProperty *const unigramProperty) {
if (!mBuffers->isUpdatable()) {
AKLOGI("Warning: addUnigramWord() is called for non-updatable dictionary.");
return false;
}
if (mDictBuffer->getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) {
AKLOGE("The dictionary is too large to dynamically update. Dictionary size: %d",
mDictBuffer->getTailPosition());
return false;
}
if (length > MAX_WORD_LENGTH) {
AKLOGE("The word is too long to insert to the dictionary, length: %d", length);
return false;
}
for (const auto &shortcut : unigramProperty->getShortcuts()) {
if (shortcut.getTargetCodePoints()->size() > MAX_WORD_LENGTH) {
AKLOGE("One of shortcut targets is too long to insert to the dictionary, length: %d",
shortcut.getTargetCodePoints()->size());
return false;
}
}
DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader);
readingHelper.initWithPtNodeArrayPos(getRootPosition());
bool addedNewUnigram = false;
if (mUpdatingHelper.addUnigramWord(&readingHelper, word, length,
unigramProperty, &addedNewUnigram)) {
if (addedNewUnigram) {
mUnigramCount++;
}
if (unigramProperty->getShortcuts().size() > 0) {
// Add shortcut target.
const int wordPos = getTerminalPtNodePositionOfWord(word, length,
false /* forceLowerCaseSearch */);
if (wordPos == NOT_A_DICT_POS) {
AKLOGE("Cannot find terminal PtNode position to add shortcut target.");
return false;
}
for (const auto &shortcut : unigramProperty->getShortcuts()) {
if (!mUpdatingHelper.addShortcutTarget(wordPos,
shortcut.getTargetCodePoints()->data(),
shortcut.getTargetCodePoints()->size(), shortcut.getProbability())) {
AKLOGE("Cannot add new shortcut target. PtNodePos: %d, length: %d, "
"probability: %d", wordPos, shortcut.getTargetCodePoints()->size(),
shortcut.getProbability());
return false;
}
}
}
return true;
} else {
return false;
}
}
bool Ver4PatriciaTriePolicy::addBigramWords(const int *const word0, const int length0,
const BigramProperty *const bigramProperty) {
if (!mBuffers->isUpdatable()) {
AKLOGI("Warning: addBigramWords() is called for non-updatable dictionary.");
return false;
}
if (mDictBuffer->getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) {
AKLOGE("The dictionary is too large to dynamically update. Dictionary size: %d",
mDictBuffer->getTailPosition());
return false;
}
if (length0 > MAX_WORD_LENGTH
|| bigramProperty->getTargetCodePoints()->size() > MAX_WORD_LENGTH) {
AKLOGE("Either src word or target word is too long to insert the bigram to the dictionary. "
"length0: %d, length1: %d", length0, bigramProperty->getTargetCodePoints()->size());
return false;
}
const int word0Pos = getTerminalPtNodePositionOfWord(word0, length0,
false /* forceLowerCaseSearch */);
if (word0Pos == NOT_A_DICT_POS) {
return false;
}
const int word1Pos = getTerminalPtNodePositionOfWord(
bigramProperty->getTargetCodePoints()->data(),
bigramProperty->getTargetCodePoints()->size(), false /* forceLowerCaseSearch */);
if (word1Pos == NOT_A_DICT_POS) {
return false;
}
bool addedNewBigram = false;
if (mUpdatingHelper.addBigramWords(word0Pos, word1Pos, bigramProperty, &addedNewBigram)) {
if (addedNewBigram) {
mBigramCount++;
}
return true;
} else {
return false;
}
}
bool Ver4PatriciaTriePolicy::removeBigramWords(const int *const word0, const int length0,
const int *const word1, const int length1) {
if (!mBuffers->isUpdatable()) {
AKLOGI("Warning: addBigramWords() is called for non-updatable dictionary.");
return false;
}
if (mDictBuffer->getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) {
AKLOGE("The dictionary is too large to dynamically update. Dictionary size: %d",
mDictBuffer->getTailPosition());
return false;
}
if (length0 > MAX_WORD_LENGTH || length1 > MAX_WORD_LENGTH) {
AKLOGE("Either src word or target word is too long to remove the bigram to from the "
"dictionary. length0: %d, length1: %d", length0, length1);
return false;
}
const int word0Pos = getTerminalPtNodePositionOfWord(word0, length0,
false /* forceLowerCaseSearch */);
if (word0Pos == NOT_A_DICT_POS) {
return false;
}
const int word1Pos = getTerminalPtNodePositionOfWord(word1, length1,
false /* forceLowerCaseSearch */);
if (word1Pos == NOT_A_DICT_POS) {
return false;
}
if (mUpdatingHelper.removeBigramWords(word0Pos, word1Pos)) {
mBigramCount--;
return true;
} else {
return false;
}
}
void Ver4PatriciaTriePolicy::flush(const char *const filePath) {
if (!mBuffers->isUpdatable()) {
AKLOGI("Warning: flush() is called for non-updatable dictionary. filePath: %s", filePath);
return;
}
if (!mWritingHelper.writeToDictFile(filePath, mUnigramCount, mBigramCount)) {
AKLOGE("Cannot flush the dictionary to file.");
mIsCorrupted = true;
}
}
void Ver4PatriciaTriePolicy::flushWithGC(const char *const filePath) {
if (!mBuffers->isUpdatable()) {
AKLOGI("Warning: flushWithGC() is called for non-updatable dictionary.");
return;
}
if (!mWritingHelper.writeToDictFileWithGC(getRootPosition(), filePath)) {
AKLOGE("Cannot flush the dictionary to file with GC.");
mIsCorrupted = true;
}
}
bool Ver4PatriciaTriePolicy::needsToRunGC(const bool mindsBlockByGC) const {
if (!mBuffers->isUpdatable()) {
AKLOGI("Warning: needsToRunGC() is called for non-updatable dictionary.");
return false;
}
if (mBuffers->isNearSizeLimit()) {
// Additional buffer size is near the limit.
return true;
} else if (mHeaderPolicy->getExtendedRegionSize() + mDictBuffer->getUsedAdditionalBufferSize()
> Ver4DictConstants::MAX_DICT_EXTENDED_REGION_SIZE) {
// Total extended region size of the trie exceeds the limit.
return true;
} else if (mDictBuffer->getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS
&& mDictBuffer->getUsedAdditionalBufferSize() > 0) {
// Needs to reduce dictionary size.
return true;
} else if (mHeaderPolicy->isDecayingDict()) {
return ForgettingCurveUtils::needsToDecay(mindsBlockByGC, mUnigramCount, mBigramCount,
mHeaderPolicy);
}
return false;
}
void Ver4PatriciaTriePolicy::getProperty(const char *const query, const int queryLength,
char *const outResult, const int maxResultLength) {
const int compareLength = queryLength + 1 /* terminator */;
if (strncmp(query, UNIGRAM_COUNT_QUERY, compareLength) == 0) {
snprintf(outResult, maxResultLength, "%d", mUnigramCount);
} else if (strncmp(query, BIGRAM_COUNT_QUERY, compareLength) == 0) {
snprintf(outResult, maxResultLength, "%d", mBigramCount);
} else if (strncmp(query, MAX_UNIGRAM_COUNT_QUERY, compareLength) == 0) {
snprintf(outResult, maxResultLength, "%d",
mHeaderPolicy->isDecayingDict() ?
ForgettingCurveUtils::getUnigramCountHardLimit(
mHeaderPolicy->getMaxUnigramCount()) :
static_cast<int>(Ver4DictConstants::MAX_DICTIONARY_SIZE));
} else if (strncmp(query, MAX_BIGRAM_COUNT_QUERY, compareLength) == 0) {
snprintf(outResult, maxResultLength, "%d",
mHeaderPolicy->isDecayingDict() ?
ForgettingCurveUtils::getBigramCountHardLimit(
mHeaderPolicy->getMaxBigramCount()) :
static_cast<int>(Ver4DictConstants::MAX_DICTIONARY_SIZE));
}
}
const WordProperty Ver4PatriciaTriePolicy::getWordProperty(const int *const codePoints,
const int codePointCount) const {
const int ptNodePos = getTerminalPtNodePositionOfWord(codePoints, codePointCount,
false /* forceLowerCaseSearch */);
if (ptNodePos == NOT_A_DICT_POS) {
AKLOGE("getWordProperty is called for invalid word.");
return WordProperty();
}
const PtNodeParams ptNodeParams = mNodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos);
std::vector<int> codePointVector(ptNodeParams.getCodePoints(),
ptNodeParams.getCodePoints() + ptNodeParams.getCodePointCount());
const ProbabilityEntry probabilityEntry =
mBuffers->getProbabilityDictContent()->getProbabilityEntry(
ptNodeParams.getTerminalId());
const HistoricalInfo *const historicalInfo = probabilityEntry.getHistoricalInfo();
// Fetch bigram information.
std::vector<BigramProperty> bigrams;
const int bigramListPos = getBigramsPositionOfPtNode(ptNodePos);
if (bigramListPos != NOT_A_DICT_POS) {
int bigramWord1CodePoints[MAX_WORD_LENGTH];
const BigramDictContent *const bigramDictContent = mBuffers->getBigramDictContent();
const TerminalPositionLookupTable *const terminalPositionLookupTable =
mBuffers->getTerminalPositionLookupTable();
bool hasNext = true;
int readingPos = bigramListPos;
while (hasNext) {
const BigramEntry bigramEntry =
bigramDictContent->getBigramEntryAndAdvancePosition(&readingPos);
hasNext = bigramEntry.hasNext();
const int word1TerminalId = bigramEntry.getTargetTerminalId();
const int word1TerminalPtNodePos =
terminalPositionLookupTable->getTerminalPtNodePosition(word1TerminalId);
if (word1TerminalPtNodePos == NOT_A_DICT_POS) {
continue;
}
// Word (unigram) probability
int word1Probability = NOT_A_PROBABILITY;
const int codePointCount = getCodePointsAndProbabilityAndReturnCodePointCount(
word1TerminalPtNodePos, MAX_WORD_LENGTH, bigramWord1CodePoints,
&word1Probability);
const std::vector<int> word1(bigramWord1CodePoints,
bigramWord1CodePoints + codePointCount);
const HistoricalInfo *const historicalInfo = bigramEntry.getHistoricalInfo();
const int probability = bigramEntry.hasHistoricalInfo() ?
ForgettingCurveUtils::decodeProbability(
bigramEntry.getHistoricalInfo(), mHeaderPolicy) :
getProbability(word1Probability, bigramEntry.getProbability());
bigrams.emplace_back(&word1, probability,
historicalInfo->getTimeStamp(), historicalInfo->getLevel(),
historicalInfo->getCount());
}
}
// Fetch shortcut information.
std::vector<UnigramProperty::ShortcutProperty> shortcuts;
int shortcutPos = getShortcutPositionOfPtNode(ptNodePos);
if (shortcutPos != NOT_A_DICT_POS) {
int shortcutTarget[MAX_WORD_LENGTH];
const ShortcutDictContent *const shortcutDictContent =
mBuffers->getShortcutDictContent();
bool hasNext = true;
while (hasNext) {
int shortcutTargetLength = 0;
int shortcutProbability = NOT_A_PROBABILITY;
shortcutDictContent->getShortcutEntryAndAdvancePosition(MAX_WORD_LENGTH, shortcutTarget,
&shortcutTargetLength, &shortcutProbability, &hasNext, &shortcutPos);
const std::vector<int> target(shortcutTarget, shortcutTarget + shortcutTargetLength);
shortcuts.emplace_back(&target, shortcutProbability);
}
}
const UnigramProperty unigramProperty(ptNodeParams.isNotAWord(),
ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(),
historicalInfo->getTimeStamp(), historicalInfo->getLevel(),
historicalInfo->getCount(), &shortcuts);
return WordProperty(&codePointVector, &unigramProperty, &bigrams);
}
int Ver4PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const outCodePoints) {
// TODO: Return code point count like other methods.
// Null termination.
outCodePoints[0] = 0;
if (token == 0) {
mTerminalPtNodePositionsForIteratingWords.clear();
DynamicPtReadingHelper::TraversePolicyToGetAllTerminalPtNodePositions traversePolicy(
&mTerminalPtNodePositionsForIteratingWords);
DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader);
readingHelper.initWithPtNodeArrayPos(getRootPosition());
readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(&traversePolicy);
}
const int terminalPtNodePositionsVectorSize =
static_cast<int>(mTerminalPtNodePositionsForIteratingWords.size());
if (token < 0 || token >= terminalPtNodePositionsVectorSize) {
AKLOGE("Given token %d is invalid.", token);
return 0;
}
const int terminalPtNodePos = mTerminalPtNodePositionsForIteratingWords[token];
int unigramProbability = NOT_A_PROBABILITY;
const int codePointCount = getCodePointsAndProbabilityAndReturnCodePointCount(
terminalPtNodePos, MAX_WORD_LENGTH, outCodePoints, &unigramProbability);
if (codePointCount < MAX_WORD_LENGTH) {
// Null termination. outCodePoints have to be null terminated or contain MAX_WORD_LENGTH
// code points.
outCodePoints[codePointCount] = 0;
}
const int nextToken = token + 1;
if (nextToken >= terminalPtNodePositionsVectorSize) {
// All words have been iterated.
mTerminalPtNodePositionsForIteratingWords.clear();
return 0;
}
return nextToken;
}
} // namespace v401
} // namespace backward
} // namespace latinime

View File

@ -0,0 +1,168 @@
/*
* Copyright (C) 2013, The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* !!!!! DO NOT CHANGE THE LOGIC IN THIS FILE !!!!!
* Do not edit this file other than updating policy's interface.
*
* This file was generated from
* suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h
*/
#ifndef LATINIME_BACKWARD_V401_VER4_PATRICIA_TRIE_POLICY_H
#define LATINIME_BACKWARD_V401_VER4_PATRICIA_TRIE_POLICY_H
#include <vector>
#include "defines.h"
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
#include "suggest/policyimpl/dictionary/header/header_policy.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.h"
#include "suggest/policyimpl/dictionary/structure/backward/v401/bigram/ver4_bigram_list_policy.h"
#include "suggest/policyimpl/dictionary/structure/backward/v401/shortcut/ver4_shortcut_list_policy.h"
#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_buffers.h"
#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_node_reader.h"
#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_node_writer.h"
#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_writing_helper.h"
#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_pt_node_array_reader.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
namespace latinime {
namespace backward {
namespace v401 {
} // namespace v401
} // namespace backward
class DicNode;
namespace backward {
namespace v401 {
} // namespace v401
} // namespace backward
class DicNodeVector;
namespace backward {
namespace v401 {
class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
public:
Ver4PatriciaTriePolicy(Ver4DictBuffers::Ver4DictBuffersPtr buffers)
: mBuffers(std::move(buffers)), mHeaderPolicy(mBuffers->getHeaderPolicy()),
mDictBuffer(mBuffers->getWritableTrieBuffer()),
mBigramPolicy(mBuffers->getMutableBigramDictContent(),
mBuffers->getTerminalPositionLookupTable(), mHeaderPolicy),
mShortcutPolicy(mBuffers->getMutableShortcutDictContent(),
mBuffers->getTerminalPositionLookupTable()),
mNodeReader(mDictBuffer, mBuffers->getProbabilityDictContent(), mHeaderPolicy),
mPtNodeArrayReader(mDictBuffer),
mNodeWriter(mDictBuffer, mBuffers.get(), mHeaderPolicy, &mNodeReader,
&mPtNodeArrayReader, &mBigramPolicy, &mShortcutPolicy),
mUpdatingHelper(mDictBuffer, &mNodeReader, &mNodeWriter),
mWritingHelper(mBuffers.get()),
mUnigramCount(mHeaderPolicy->getUnigramCount()),
mBigramCount(mHeaderPolicy->getBigramCount()),
mTerminalPtNodePositionsForIteratingWords(), mIsCorrupted(false) {};
AK_FORCE_INLINE int getRootPosition() const {
return 0;
}
void createAndGetAllChildDicNodes(const DicNode *const dicNode,
DicNodeVector *const childDicNodes) const;
int getCodePointsAndProbabilityAndReturnCodePointCount(
const int terminalPtNodePos, const int maxCodePointCount, int *const outCodePoints,
int *const outUnigramProbability) const;
int getTerminalPtNodePositionOfWord(const int *const inWord,
const int length, const bool forceLowerCaseSearch) const;
int getProbability(const int unigramProbability, const int bigramProbability) const;
int getUnigramProbabilityOfPtNode(const int ptNodePos) const;
int getShortcutPositionOfPtNode(const int ptNodePos) const;
int getBigramsPositionOfPtNode(const int ptNodePos) const;
const DictionaryHeaderStructurePolicy *getHeaderStructurePolicy() const {
return mHeaderPolicy;
}
const DictionaryBigramsStructurePolicy *getBigramsStructurePolicy() const {
return &mBigramPolicy;
}
const DictionaryShortcutsStructurePolicy *getShortcutsStructurePolicy() const {
return &mShortcutPolicy;
}
bool addUnigramWord(const int *const word, const int length,
const UnigramProperty *const unigramProperty);
bool addBigramWords(const int *const word0, const int length0,
const BigramProperty *const bigramProperty);
bool removeBigramWords(const int *const word0, const int length0, const int *const word1,
const int length1);
void flush(const char *const filePath);
void flushWithGC(const char *const filePath);
bool needsToRunGC(const bool mindsBlockByGC) const;
void getProperty(const char *const query, const int queryLength, char *const outResult,
const int maxResultLength);
const WordProperty getWordProperty(const int *const codePoints,
const int codePointCount) const;
int getNextWordAndNextToken(const int token, int *const outCodePoints);
bool isCorrupted() const {
return mIsCorrupted;
}
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4PatriciaTriePolicy);
static const char *const UNIGRAM_COUNT_QUERY;
static const char *const BIGRAM_COUNT_QUERY;
static const char *const MAX_UNIGRAM_COUNT_QUERY;
static const char *const MAX_BIGRAM_COUNT_QUERY;
// When the dictionary size is near the maximum size, we have to refuse dynamic operations to
// prevent the dictionary from overflowing.
static const int MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS;
static const int MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS;
const Ver4DictBuffers::Ver4DictBuffersPtr mBuffers;
const HeaderPolicy *const mHeaderPolicy;
BufferWithExtendableBuffer *const mDictBuffer;
Ver4BigramListPolicy mBigramPolicy;
Ver4ShortcutListPolicy mShortcutPolicy;
Ver4PatriciaTrieNodeReader mNodeReader;
Ver4PtNodeArrayReader mPtNodeArrayReader;
Ver4PatriciaTrieNodeWriter mNodeWriter;
DynamicPtUpdatingHelper mUpdatingHelper;
Ver4PatriciaTrieWritingHelper mWritingHelper;
int mUnigramCount;
int mBigramCount;
std::vector<int> mTerminalPtNodePositionsForIteratingWords;
mutable bool mIsCorrupted;
};
} // namespace v401
} // namespace backward
} // namespace latinime
#endif // LATINIME_BACKWARD_V401_VER4_PATRICIA_TRIE_POLICY_H

View File

@ -0,0 +1,39 @@
/*
* Copyright (C) 2013, The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* !!!!! DO NOT EDIT THIS FILE !!!!!
*
* This file was generated from
* suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.cpp
*/
#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_reading_utils.h"
#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h"
namespace latinime {
namespace backward {
namespace v401 {
/* static */ int Ver4PatriciaTrieReadingUtils::getTerminalIdAndAdvancePosition(
const uint8_t *const buffer, int *pos) {
return ByteArrayUtils::readUint32AndAdvancePosition(buffer, pos);
}
} // namespace v401
} // namespace backward
} // namespace latinime

View File

@ -0,0 +1,52 @@
/*
* Copyright (C) 2013, The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* !!!!! DO NOT EDIT THIS FILE !!!!!
*
* This file was generated from
* suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.h
*/
#ifndef LATINIME_BACKWARD_V401_VER4_PATRICIA_TRIE_READING_UTILS_H
#define LATINIME_BACKWARD_V401_VER4_PATRICIA_TRIE_READING_UTILS_H
#include <cstdint>
#include "defines.h"
namespace latinime {
namespace backward {
namespace v401 {
} // namespace v401
} // namespace backward
class BufferWithExtendableBuffer;
namespace backward {
namespace v401 {
class Ver4PatriciaTrieReadingUtils {
public:
static int getTerminalIdAndAdvancePosition(const uint8_t *const buffer,
int *const pos);
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4PatriciaTrieReadingUtils);
};
} // namespace v401
} // namespace backward
} // namespace latinime
#endif /* LATINIME_BACKWARD_V401_VER4_PATRICIA_TRIE_READING_UTILS_H */

View File

@ -0,0 +1,301 @@
/*
* Copyright (C) 2013, The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* !!!!! DO NOT EDIT THIS FILE !!!!!
*
* This file was generated from
* suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp
*/
#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_writing_helper.h"
#include <cstring>
#include <queue>
#include "suggest/policyimpl/dictionary/header/header_policy.h"
#include "suggest/policyimpl/dictionary/structure/backward/v401/bigram/ver4_bigram_list_policy.h"
#include "suggest/policyimpl/dictionary/structure/backward/v401/shortcut/ver4_shortcut_list_policy.h"
#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_buffers.h"
#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_constants.h"
#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_node_reader.h"
#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_node_writer.h"
#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_pt_node_array_reader.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
#include "suggest/policyimpl/dictionary/utils/file_utils.h"
#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
namespace latinime {
namespace backward {
namespace v401 {
bool Ver4PatriciaTrieWritingHelper::writeToDictFile(const char *const dictDirPath,
const int unigramCount, const int bigramCount) const {
const HeaderPolicy *const headerPolicy = mBuffers->getHeaderPolicy();
BufferWithExtendableBuffer headerBuffer(
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE);
const int extendedRegionSize = headerPolicy->getExtendedRegionSize()
+ mBuffers->getTrieBuffer()->getUsedAdditionalBufferSize();
if (!headerPolicy->fillInAndWriteHeaderToBuffer(false /* updatesLastDecayedTime */,
unigramCount, bigramCount, extendedRegionSize, &headerBuffer)) {
AKLOGE("Cannot write header structure to buffer. "
"updatesLastDecayedTime: %d, unigramCount: %d, bigramCount: %d, "
"extendedRegionSize: %d", false, unigramCount, bigramCount,
extendedRegionSize);
return false;
}
return mBuffers->flushHeaderAndDictBuffers(dictDirPath, &headerBuffer);
}
bool Ver4PatriciaTrieWritingHelper::writeToDictFileWithGC(const int rootPtNodeArrayPos,
const char *const dictDirPath) {
const HeaderPolicy *const headerPolicy = mBuffers->getHeaderPolicy();
Ver4DictBuffers::Ver4DictBuffersPtr dictBuffers(
Ver4DictBuffers::createVer4DictBuffers(headerPolicy,
Ver4DictConstants::MAX_DICTIONARY_SIZE));
int unigramCount = 0;
int bigramCount = 0;
if (!runGC(rootPtNodeArrayPos, headerPolicy, dictBuffers.get(), &unigramCount, &bigramCount)) {
return false;
}
BufferWithExtendableBuffer headerBuffer(
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE);
if (!headerPolicy->fillInAndWriteHeaderToBuffer(true /* updatesLastDecayedTime */,
unigramCount, bigramCount, 0 /* extendedRegionSize */, &headerBuffer)) {
return false;
}
return dictBuffers->flushHeaderAndDictBuffers(dictDirPath, &headerBuffer);
}
bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
const HeaderPolicy *const headerPolicy, Ver4DictBuffers *const buffersToWrite,
int *const outUnigramCount, int *const outBigramCount) {
Ver4PatriciaTrieNodeReader ptNodeReader(mBuffers->getTrieBuffer(),
mBuffers->getProbabilityDictContent(), headerPolicy);
Ver4PtNodeArrayReader ptNodeArrayReader(mBuffers->getTrieBuffer());
Ver4BigramListPolicy bigramPolicy(mBuffers->getMutableBigramDictContent(),
mBuffers->getTerminalPositionLookupTable(), headerPolicy);
Ver4ShortcutListPolicy shortcutPolicy(mBuffers->getMutableShortcutDictContent(),
mBuffers->getTerminalPositionLookupTable());
Ver4PatriciaTrieNodeWriter ptNodeWriter(mBuffers->getWritableTrieBuffer(),
mBuffers, headerPolicy, &ptNodeReader, &ptNodeArrayReader, &bigramPolicy,
&shortcutPolicy);
DynamicPtReadingHelper readingHelper(&ptNodeReader, &ptNodeArrayReader);
readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
DynamicPtGcEventListeners
::TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted
traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted(
&ptNodeWriter);
if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(
&traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted)) {
return false;
}
const int unigramCount = traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted
.getValidUnigramCount();
const int maxUnigramCount = headerPolicy->getMaxUnigramCount();
if (headerPolicy->isDecayingDict() && unigramCount > maxUnigramCount) {
if (!truncateUnigrams(&ptNodeReader, &ptNodeWriter, maxUnigramCount)) {
AKLOGE("Cannot remove unigrams. current: %d, max: %d", unigramCount,
maxUnigramCount);
return false;
}
}
readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
DynamicPtGcEventListeners::TraversePolicyToUpdateBigramProbability
traversePolicyToUpdateBigramProbability(&ptNodeWriter);
if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(
&traversePolicyToUpdateBigramProbability)) {
return false;
}
const int bigramCount = traversePolicyToUpdateBigramProbability.getValidBigramEntryCount();
const int maxBigramCount = headerPolicy->getMaxBigramCount();
if (headerPolicy->isDecayingDict() && bigramCount > maxBigramCount) {
if (!truncateBigrams(maxBigramCount)) {
AKLOGE("Cannot remove bigrams. current: %d, max: %d", bigramCount, maxBigramCount);
return false;
}
}
// Mapping from positions in mBuffer to positions in bufferToWrite.
PtNodeWriter::DictPositionRelocationMap dictPositionRelocationMap;
readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
Ver4PatriciaTrieNodeWriter ptNodeWriterForNewBuffers(buffersToWrite->getWritableTrieBuffer(),
buffersToWrite, headerPolicy, &ptNodeReader, &ptNodeArrayReader, &bigramPolicy,
&shortcutPolicy);
DynamicPtGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer
traversePolicyToPlaceAndWriteValidPtNodesToBuffer(&ptNodeWriterForNewBuffers,
buffersToWrite->getWritableTrieBuffer(), &dictPositionRelocationMap);
if (!readingHelper.traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner(
&traversePolicyToPlaceAndWriteValidPtNodesToBuffer)) {
return false;
}
// Create policy instances for the GCed dictionary.
Ver4PatriciaTrieNodeReader newPtNodeReader(buffersToWrite->getTrieBuffer(),
buffersToWrite->getProbabilityDictContent(), headerPolicy);
Ver4PtNodeArrayReader newPtNodeArrayreader(buffersToWrite->getTrieBuffer());
Ver4BigramListPolicy newBigramPolicy(buffersToWrite->getMutableBigramDictContent(),
buffersToWrite->getTerminalPositionLookupTable(), headerPolicy);
Ver4ShortcutListPolicy newShortcutPolicy(buffersToWrite->getMutableShortcutDictContent(),
buffersToWrite->getTerminalPositionLookupTable());
Ver4PatriciaTrieNodeWriter newPtNodeWriter(buffersToWrite->getWritableTrieBuffer(),
buffersToWrite, headerPolicy, &newPtNodeReader, &newPtNodeArrayreader, &newBigramPolicy,
&newShortcutPolicy);
// Re-assign terminal IDs for valid terminal PtNodes.
TerminalPositionLookupTable::TerminalIdMap terminalIdMap;
if(!buffersToWrite->getMutableTerminalPositionLookupTable()->runGCTerminalIds(
&terminalIdMap)) {
return false;
}
// Run GC for probability dict content.
if (!buffersToWrite->getMutableProbabilityDictContent()->runGC(&terminalIdMap,
mBuffers->getProbabilityDictContent())) {
return false;
}
// Run GC for bigram dict content.
if(!buffersToWrite->getMutableBigramDictContent()->runGC(&terminalIdMap,
mBuffers->getBigramDictContent(), outBigramCount)) {
return false;
}
// Run GC for shortcut dict content.
if(!buffersToWrite->getMutableShortcutDictContent()->runGC(&terminalIdMap,
mBuffers->getShortcutDictContent())) {
return false;
}
DynamicPtReadingHelper newDictReadingHelper(&newPtNodeReader, &newPtNodeArrayreader);
newDictReadingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
DynamicPtGcEventListeners::TraversePolicyToUpdateAllPositionFields
traversePolicyToUpdateAllPositionFields(&newPtNodeWriter, &dictPositionRelocationMap);
if (!newDictReadingHelper.traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner(
&traversePolicyToUpdateAllPositionFields)) {
return false;
}
newDictReadingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
TraversePolicyToUpdateAllPtNodeFlagsAndTerminalIds
traversePolicyToUpdateAllPtNodeFlagsAndTerminalIds(&newPtNodeWriter, &terminalIdMap);
if (!newDictReadingHelper.traverseAllPtNodesInPostorderDepthFirstManner(
&traversePolicyToUpdateAllPtNodeFlagsAndTerminalIds)) {
return false;
}
*outUnigramCount = traversePolicyToUpdateAllPositionFields.getUnigramCount();
return true;
}
bool Ver4PatriciaTrieWritingHelper::truncateUnigrams(
const Ver4PatriciaTrieNodeReader *const ptNodeReader,
Ver4PatriciaTrieNodeWriter *const ptNodeWriter, const int maxUnigramCount) {
const TerminalPositionLookupTable *const terminalPosLookupTable =
mBuffers->getTerminalPositionLookupTable();
const int nextTerminalId = terminalPosLookupTable->getNextTerminalId();
std::priority_queue<DictProbability, std::vector<DictProbability>, DictProbabilityComparator>
priorityQueue;
for (int i = 0; i < nextTerminalId; ++i) {
const int terminalPos = terminalPosLookupTable->getTerminalPtNodePosition(i);
if (terminalPos == NOT_A_DICT_POS) {
continue;
}
const ProbabilityEntry probabilityEntry =
mBuffers->getProbabilityDictContent()->getProbabilityEntry(i);
const int probability = probabilityEntry.hasHistoricalInfo() ?
ForgettingCurveUtils::decodeProbability(
probabilityEntry.getHistoricalInfo(), mBuffers->getHeaderPolicy()) :
probabilityEntry.getProbability();
priorityQueue.push(DictProbability(terminalPos, probability,
probabilityEntry.getHistoricalInfo()->getTimeStamp()));
}
// Delete unigrams.
while (static_cast<int>(priorityQueue.size()) > maxUnigramCount) {
const int ptNodePos = priorityQueue.top().getDictPos();
const PtNodeParams ptNodeParams =
ptNodeReader->fetchNodeInfoInBufferFromPtNodePos(ptNodePos);
if (!ptNodeWriter->markPtNodeAsWillBecomeNonTerminal(&ptNodeParams)) {
AKLOGE("Cannot mark PtNode as willBecomeNonterminal. PtNode pos: %d", ptNodePos);
return false;
}
priorityQueue.pop();
}
return true;
}
bool Ver4PatriciaTrieWritingHelper::truncateBigrams(const int maxBigramCount) {
const TerminalPositionLookupTable *const terminalPosLookupTable =
mBuffers->getTerminalPositionLookupTable();
const int nextTerminalId = terminalPosLookupTable->getNextTerminalId();
std::priority_queue<DictProbability, std::vector<DictProbability>, DictProbabilityComparator>
priorityQueue;
BigramDictContent *const bigramDictContent = mBuffers->getMutableBigramDictContent();
for (int i = 0; i < nextTerminalId; ++i) {
const int bigramListPos = bigramDictContent->getBigramListHeadPos(i);
if (bigramListPos == NOT_A_DICT_POS) {
continue;
}
bool hasNext = true;
int readingPos = bigramListPos;
while (hasNext) {
const int entryPos = readingPos;
const BigramEntry bigramEntry =
bigramDictContent->getBigramEntryAndAdvancePosition(&readingPos);
hasNext = bigramEntry.hasNext();
if (!bigramEntry.isValid()) {
continue;
}
const int probability = bigramEntry.hasHistoricalInfo() ?
ForgettingCurveUtils::decodeProbability(
bigramEntry.getHistoricalInfo(), mBuffers->getHeaderPolicy()) :
bigramEntry.getProbability();
priorityQueue.push(DictProbability(entryPos, probability,
bigramEntry.getHistoricalInfo()->getTimeStamp()));
}
}
// Delete bigrams.
while (static_cast<int>(priorityQueue.size()) > maxBigramCount) {
const int entryPos = priorityQueue.top().getDictPos();
const BigramEntry bigramEntry = bigramDictContent->getBigramEntry(entryPos);
const BigramEntry invalidatedBigramEntry = bigramEntry.getInvalidatedEntry();
if (!bigramDictContent->writeBigramEntry(&invalidatedBigramEntry, entryPos)) {
AKLOGE("Cannot write bigram entry to remove. pos: %d", entryPos);
return false;
}
priorityQueue.pop();
}
return true;
}
bool Ver4PatriciaTrieWritingHelper::TraversePolicyToUpdateAllPtNodeFlagsAndTerminalIds
::onVisitingPtNode(const PtNodeParams *const ptNodeParams) {
if (!ptNodeParams->isTerminal()) {
return true;
}
TerminalPositionLookupTable::TerminalIdMap::const_iterator it =
mTerminalIdMap->find(ptNodeParams->getTerminalId());
if (it == mTerminalIdMap->end()) {
AKLOGE("terminal Id %d is not in the terminal position map. map size: %zd",
ptNodeParams->getTerminalId(), mTerminalIdMap->size());
return false;
}
if (!mPtNodeWriter->updateTerminalId(ptNodeParams, it->second)) {
AKLOGE("Cannot update terminal id. %d -> %d", it->first, it->second);
}
return mPtNodeWriter->updatePtNodeHasBigramsAndShortcutTargetsFlags(ptNodeParams);
}
} // namespace v401
} // namespace backward
} // namespace latinime

View File

@ -0,0 +1,140 @@
/*
* Copyright (C) 2013, The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* !!!!! DO NOT EDIT THIS FILE !!!!!
*
* This file was generated from
* suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.h
*/
#ifndef LATINIME_BACKWARD_V401_VER4_PATRICIA_TRIE_WRITING_HELPER_H
#define LATINIME_BACKWARD_V401_VER4_PATRICIA_TRIE_WRITING_HELPER_H
#include "defines.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_gc_event_listeners.h"
#include "suggest/policyimpl/dictionary/structure/backward/v401/content/terminal_position_lookup_table.h"
namespace latinime {
namespace backward {
namespace v401 {
} // namespace v401
} // namespace backward
class HeaderPolicy;
namespace backward {
namespace v401 {
class Ver4DictBuffers;
class Ver4PatriciaTrieNodeReader;
class Ver4PatriciaTrieNodeWriter;
class Ver4PatriciaTrieWritingHelper {
public:
Ver4PatriciaTrieWritingHelper(Ver4DictBuffers *const buffers)
: mBuffers(buffers) {}
bool writeToDictFile(const char *const dictDirPath, const int unigramCount,
const int bigramCount) const;
// This method cannot be const because the original dictionary buffer will be updated to detect
// useless PtNodes during GC.
bool writeToDictFileWithGC(const int rootPtNodeArrayPos, const char *const dictDirPath);
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4PatriciaTrieWritingHelper);
class TraversePolicyToUpdateAllPtNodeFlagsAndTerminalIds
: public DynamicPtReadingHelper::TraversingEventListener {
public:
TraversePolicyToUpdateAllPtNodeFlagsAndTerminalIds(
Ver4PatriciaTrieNodeWriter *const ptNodeWriter,
const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap)
: mPtNodeWriter(ptNodeWriter), mTerminalIdMap(terminalIdMap) {}
bool onAscend() { return true; }
bool onDescend(const int ptNodeArrayPos) { return true; }
bool onReadingPtNodeArrayTail() { return true; }
bool onVisitingPtNode(const PtNodeParams *const ptNodeParams);
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(TraversePolicyToUpdateAllPtNodeFlagsAndTerminalIds);
Ver4PatriciaTrieNodeWriter *const mPtNodeWriter;
const TerminalPositionLookupTable::TerminalIdMap *const mTerminalIdMap;
};
// For truncateUnigrams() and truncateBigrams().
class DictProbability {
public:
DictProbability(const int dictPos, const int probability, const int timestamp)
: mDictPos(dictPos), mProbability(probability), mTimestamp(timestamp) {}
int getDictPos() const {
return mDictPos;
}
int getProbability() const {
return mProbability;
}
int getTimestamp() const {
return mTimestamp;
}
private:
DISALLOW_DEFAULT_CONSTRUCTOR(DictProbability);
int mDictPos;
int mProbability;
int mTimestamp;
};
// For truncateUnigrams() and truncateBigrams().
class DictProbabilityComparator {
public:
bool operator()(const DictProbability &left, const DictProbability &right) {
if (left.getProbability() != right.getProbability()) {
return left.getProbability() > right.getProbability();
}
if (left.getTimestamp() != right.getTimestamp()) {
return left.getTimestamp() < right.getTimestamp();
}
return left.getDictPos() > right.getDictPos();
}
private:
DISALLOW_ASSIGNMENT_OPERATOR(DictProbabilityComparator);
};
bool runGC(const int rootPtNodeArrayPos, const HeaderPolicy *const headerPolicy,
Ver4DictBuffers *const buffersToWrite, int *const outUnigramCount,
int *const outBigramCount);
bool truncateUnigrams(const Ver4PatriciaTrieNodeReader *const ptNodeReader,
Ver4PatriciaTrieNodeWriter *const ptNodeWriter, const int maxUnigramCount);
bool truncateBigrams(const int maxBigramCount);
Ver4DictBuffers *const mBuffers;
};
} // namespace v401
} // namespace backward
} // namespace latinime
#endif /* LATINIME_BACKWARD_V401_VER4_PATRICIA_TRIE_WRITING_HELPER_H */

View File

@ -0,0 +1,90 @@
/*
* Copyright (C) 2014, The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* !!!!! DO NOT EDIT THIS FILE !!!!!
*
* This file was generated from
* suggest/policyimpl/dictionary/structure/v4/ver4_pt_node_array_reader.cpp
*/
#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_pt_node_array_reader.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
namespace latinime {
namespace backward {
namespace v401 {
bool Ver4PtNodeArrayReader::readPtNodeArrayInfoAndReturnIfValid(const int ptNodeArrayPos,
int *const outPtNodeCount, int *const outFirstPtNodePos) const {
if (ptNodeArrayPos < 0 || ptNodeArrayPos >= mBuffer->getTailPosition()) {
// Reading invalid position because of a bug or a broken dictionary.
AKLOGE("Reading PtNode array info from invalid dictionary position: %d, dict size: %d",
ptNodeArrayPos, mBuffer->getTailPosition());
ASSERT(false);
return false;
}
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(ptNodeArrayPos);
const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
int readingPos = ptNodeArrayPos;
if (usesAdditionalBuffer) {
readingPos -= mBuffer->getOriginalBufferSize();
}
const int ptNodeCountInArray = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition(
dictBuf, &readingPos);
if (usesAdditionalBuffer) {
readingPos += mBuffer->getOriginalBufferSize();
}
if (ptNodeCountInArray < 0) {
AKLOGE("Invalid PtNode count in an array: %d.", ptNodeCountInArray);
return false;
}
*outPtNodeCount = ptNodeCountInArray;
*outFirstPtNodePos = readingPos;
return true;
}
bool Ver4PtNodeArrayReader::readForwardLinkAndReturnIfValid(const int forwordLinkPos,
int *const outNextPtNodeArrayPos) const {
if (forwordLinkPos < 0 || forwordLinkPos >= mBuffer->getTailPosition()) {
// Reading invalid position because of bug or broken dictionary.
AKLOGE("Reading forward link from invalid dictionary position: %d, dict size: %d",
forwordLinkPos, mBuffer->getTailPosition());
ASSERT(false);
return false;
}
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(forwordLinkPos);
const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
int readingPos = forwordLinkPos;
if (usesAdditionalBuffer) {
readingPos -= mBuffer->getOriginalBufferSize();
}
const int nextPtNodeArrayOffset =
DynamicPtReadingUtils::getForwardLinkPosition(dictBuf, readingPos);
if (DynamicPtReadingUtils::isValidForwardLinkPosition(nextPtNodeArrayOffset)) {
*outNextPtNodeArrayPos = forwordLinkPos + nextPtNodeArrayOffset;
} else {
*outNextPtNodeArrayPos = NOT_A_DICT_POS;
}
return true;
}
} // namespace v401
} // namespace backward
} // namespace latinime

View File

@ -0,0 +1,57 @@
/*
* Copyright (C) 2014, The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* !!!!! DO NOT EDIT THIS FILE !!!!!
*
* This file was generated from
* suggest/policyimpl/dictionary/structure/v4/ver4_pt_node_array_reader.h
*/
#ifndef LATINIME_BACKWARD_V401_VER4_PT_NODE_ARRAY_READER_H
#define LATINIME_BACKWARD_V401_VER4_PT_NODE_ARRAY_READER_H
#include "defines.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_array_reader.h"
namespace latinime {
namespace backward {
namespace v401 {
} // namespace v401
} // namespace backward
class BufferWithExtendableBuffer;
namespace backward {
namespace v401 {
class Ver4PtNodeArrayReader : public PtNodeArrayReader {
public:
Ver4PtNodeArrayReader(const BufferWithExtendableBuffer *const buffer) : mBuffer(buffer) {};
virtual bool readPtNodeArrayInfoAndReturnIfValid(const int ptNodeArrayPos,
int *const outPtNodeCount, int *const outFirstPtNodePos) const;
virtual bool readForwardLinkAndReturnIfValid(const int forwordLinkPos,
int *const outNextPtNodeArrayPos) const;
private:
DISALLOW_COPY_AND_ASSIGN(Ver4PtNodeArrayReader);
const BufferWithExtendableBuffer *const mBuffer;
};
} // namespace v401
} // namespace backward
} // namespace latinime
#endif /* LATINIME_BACKWARD_V401_VER4_PT_NODE_ARRAY_READER_H */