From 6bf268132d60061fd26bd8cba63a12b56b22056e Mon Sep 17 00:00:00 2001 From: Keisuke Kuroyanagi Date: Wed, 14 May 2014 19:09:01 +0900 Subject: [PATCH] Generate dict code for version 401. Bug: 13406708 Change-Id: I769ac84d54d997e7aefab0c9c16727455a132e0b --- native/jni/CleanupNativeFileList.mk | 1 + native/jni/NativeFileList.mk | 21 + .../structure/backward/v401/Readme.txt | 1 + .../v401/bigram/ver4_bigram_list_policy.cpp | 290 +++++++++++ .../v401/bigram/ver4_bigram_list_policy.h | 93 ++++ .../v401/content/bigram_dict_content.cpp | 224 +++++++++ .../v401/content/bigram_dict_content.h | 122 +++++ .../backward/v401/content/bigram_entry.h | 110 ++++ .../backward/v401/content/dict_content.h | 47 ++ .../v401/content/probability_dict_content.cpp | 171 +++++++ .../v401/content/probability_dict_content.h | 74 +++ .../backward/v401/content/probability_entry.h | 90 ++++ .../v401/content/shortcut_dict_content.cpp | 199 ++++++++ .../v401/content/shortcut_dict_content.h | 101 ++++ .../v401/content/single_dict_content.h | 86 ++++ .../content/sparse_table_dict_content.cpp | 50 ++ .../v401/content/sparse_table_dict_content.h | 122 +++++ .../terminal_position_lookup_table.cpp | 111 ++++ .../content/terminal_position_lookup_table.h | 73 +++ .../v401/shortcut/ver4_shortcut_list_policy.h | 118 +++++ .../backward/v401/ver4_dict_buffers.cpp | 155 ++++++ .../backward/v401/ver4_dict_buffers.h | 152 ++++++ .../backward/v401/ver4_dict_constants.cpp | 81 +++ .../backward/v401/ver4_dict_constants.h | 84 ++++ .../v401/ver4_patricia_trie_node_reader.cpp | 109 ++++ .../v401/ver4_patricia_trie_node_reader.h | 79 +++ .../v401/ver4_patricia_trie_node_writer.cpp | 429 ++++++++++++++++ .../v401/ver4_patricia_trie_node_writer.h | 145 ++++++ .../v401/ver4_patricia_trie_policy.cpp | 475 ++++++++++++++++++ .../backward/v401/ver4_patricia_trie_policy.h | 168 +++++++ .../v401/ver4_patricia_trie_reading_utils.cpp | 39 ++ .../v401/ver4_patricia_trie_reading_utils.h | 52 ++ .../ver4_patricia_trie_writing_helper.cpp | 301 +++++++++++ .../v401/ver4_patricia_trie_writing_helper.h | 140 ++++++ .../v401/ver4_pt_node_array_reader.cpp | 90 ++++ .../backward/v401/ver4_pt_node_array_reader.h | 57 +++ 36 files changed, 4660 insertions(+) create mode 100644 native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/Readme.txt create mode 100644 native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/bigram/ver4_bigram_list_policy.cpp create mode 100644 native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/bigram/ver4_bigram_list_policy.h create mode 100644 native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/bigram_dict_content.cpp create mode 100644 native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/bigram_dict_content.h create mode 100644 native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/bigram_entry.h create mode 100644 native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/dict_content.h create mode 100644 native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/probability_dict_content.cpp create mode 100644 native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/probability_dict_content.h create mode 100644 native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/probability_entry.h create mode 100644 native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/shortcut_dict_content.cpp create mode 100644 native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/shortcut_dict_content.h create mode 100644 native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/single_dict_content.h create mode 100644 native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/sparse_table_dict_content.cpp create mode 100644 native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/sparse_table_dict_content.h create mode 100644 native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/terminal_position_lookup_table.cpp create mode 100644 native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/terminal_position_lookup_table.h create mode 100644 native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/shortcut/ver4_shortcut_list_policy.h create mode 100644 native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_buffers.cpp create mode 100644 native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_buffers.h create mode 100644 native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_constants.cpp create mode 100644 native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_constants.h create mode 100644 native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_node_reader.cpp create mode 100644 native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_node_reader.h create mode 100644 native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_node_writer.cpp create mode 100644 native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_node_writer.h create mode 100644 native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_policy.cpp create mode 100644 native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_policy.h create mode 100644 native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_reading_utils.cpp create mode 100644 native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_reading_utils.h create mode 100644 native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_writing_helper.cpp create mode 100644 native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_writing_helper.h create mode 100644 native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_pt_node_array_reader.cpp create mode 100644 native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_pt_node_array_reader.h diff --git a/native/jni/CleanupNativeFileList.mk b/native/jni/CleanupNativeFileList.mk index 1738f8c58..eed6f1e63 100644 --- a/native/jni/CleanupNativeFileList.mk +++ b/native/jni/CleanupNativeFileList.mk @@ -13,6 +13,7 @@ # limitations under the License. LATIN_IME_CORE_SRC_FILES := +LATIN_IME_CORE_SRC_FILES_BACKWARD_V401 := LATIN_IME_CORE_TEST_FILES := LATIN_IME_JNI_SRC_FILES := LATIN_IME_SRC_DIR := diff --git a/native/jni/NativeFileList.mk b/native/jni/NativeFileList.mk index 6ccfec911..33be86b52 100644 --- a/native/jni/NativeFileList.mk +++ b/native/jni/NativeFileList.mk @@ -98,6 +98,27 @@ LATIN_IME_CORE_SRC_FILES := \ log_utils.cpp \ time_keeper.cpp) +LATIN_IME_CORE_SRC_FILES_BACKWARD_V401 := \ + $(addprefix suggest/policyimpl/dictionary/structure/backward/v401/, \ + ver4_dict_buffers.cpp \ + ver4_dict_constants.cpp \ + ver4_patricia_trie_node_reader.cpp \ + ver4_patricia_trie_node_writer.cpp \ + ver4_patricia_trie_policy.cpp \ + ver4_patricia_trie_reading_utils.cpp \ + ver4_patricia_trie_writing_helper.cpp \ + ver4_pt_node_array_reader.cpp) \ + $(addprefix suggest/policyimpl/dictionary/structure/backward/v401/content/, \ + bigram_dict_content.cpp \ + probability_dict_content.cpp \ + shortcut_dict_content.cpp \ + sparse_table_dict_content.cpp \ + terminal_position_lookup_table.cpp) \ + $(addprefix suggest/policyimpl/dictionary/structure/backward/v401/bigram/, \ + ver4_bigram_list_policy.cpp) + +LATIN_IME_CORE_SRC_FILES += $(LATIN_IME_CORE_SRC_FILES_BACKWARD_V401) + LATIN_IME_CORE_TEST_FILES := \ defines_test.cpp \ suggest/core/layout/normal_distribution_2d_test.cpp \ diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/Readme.txt b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/Readme.txt new file mode 100644 index 000000000..9e29e836c --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/Readme.txt @@ -0,0 +1 @@ +Files under this directory have been auto generated. diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/bigram/ver4_bigram_list_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/bigram/ver4_bigram_list_policy.cpp new file mode 100644 index 000000000..7ad072f09 --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/bigram/ver4_bigram_list_policy.cpp @@ -0,0 +1,290 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * !!!!! DO NOT CHANGE THE LOGIC IN THIS FILE !!!!! + * Do not edit this file other than updating policy's interface. + * + * This file was generated from + * suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.cpp + */ + +#include "suggest/policyimpl/dictionary/structure/backward/v401/bigram/ver4_bigram_list_policy.h" + +#include "suggest/core/dictionary/property/bigram_property.h" +#include "suggest/policyimpl/dictionary/header/header_policy.h" +#include "suggest/policyimpl/dictionary/structure/pt_common/bigram/bigram_list_read_write_utils.h" +#include "suggest/policyimpl/dictionary/structure/backward/v401/content/bigram_dict_content.h" +#include "suggest/policyimpl/dictionary/structure/backward/v401/content/terminal_position_lookup_table.h" +#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_constants.h" +#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h" + +namespace latinime { +namespace backward { +namespace v401 { + +void Ver4BigramListPolicy::getNextBigram(int *const outBigramPos, int *const outProbability, + bool *const outHasNext, int *const bigramEntryPos) const { + const BigramEntry bigramEntry = + mBigramDictContent->getBigramEntryAndAdvancePosition(bigramEntryPos); + if (outBigramPos) { + // Lookup target PtNode position. + *outBigramPos = mTerminalPositionLookupTable->getTerminalPtNodePosition( + bigramEntry.getTargetTerminalId()); + } + if (outProbability) { + if (bigramEntry.hasHistoricalInfo()) { + *outProbability = + ForgettingCurveUtils::decodeProbability(bigramEntry.getHistoricalInfo(), + mHeaderPolicy); + } else { + *outProbability = bigramEntry.getProbability(); + } + } + if (outHasNext) { + *outHasNext = bigramEntry.hasNext(); + } +} + +bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTargetTerminalId, + const BigramProperty *const bigramProperty, bool *const outAddedNewEntry) { + // 1. The word has no bigrams yet. + // 2. The word has bigrams, and there is the target in the list. + // 3. The word has bigrams, and there is an invalid entry that can be reclaimed. + // 4. The word has bigrams. We have to append new bigram entry to the list. + // 5. Same as 4, but the list is the last entry of the content file. + if (outAddedNewEntry) { + *outAddedNewEntry = false; + } + const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId); + if (bigramListPos == NOT_A_DICT_POS) { + // Case 1. PtNode that doesn't have a bigram list. + // Create new bigram list. + if (!mBigramDictContent->createNewBigramList(terminalId)) { + return false; + } + const BigramEntry newBigramEntry(false /* hasNext */, NOT_A_PROBABILITY, + newTargetTerminalId); + const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom(&newBigramEntry, + bigramProperty); + // Write an entry. + const int writingPos = mBigramDictContent->getBigramListHeadPos(terminalId); + if (!mBigramDictContent->writeBigramEntry(&bigramEntryToWrite, writingPos)) { + return false; + } + if (outAddedNewEntry) { + *outAddedNewEntry = true; + } + return true; + } + + int tailEntryPos = NOT_A_DICT_POS; + const int entryPosToUpdate = getEntryPosToUpdate(newTargetTerminalId, bigramListPos, + &tailEntryPos); + if (tailEntryPos != NOT_A_DICT_POS || entryPosToUpdate == NOT_A_DICT_POS) { + // Case 4, 5. + // Add new entry to the bigram list. + if (tailEntryPos == NOT_A_DICT_POS) { + // Case 4. Create new bigram list. + if (!mBigramDictContent->createNewBigramList(terminalId)) { + return false; + } + const int destPos = mBigramDictContent->getBigramListHeadPos(terminalId); + // Copy existing bigram list. + if (!mBigramDictContent->copyBigramList(bigramListPos, destPos, &tailEntryPos)) { + return false; + } + } + // Write new entry at the tail position of the bigram content. + const BigramEntry newBigramEntry(false /* hasNext */, NOT_A_PROBABILITY, + newTargetTerminalId); + const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom( + &newBigramEntry, bigramProperty); + if (!mBigramDictContent->writeBigramEntryAtTail(&bigramEntryToWrite)) { + return false; + } + // Update has next flag of the tail entry. + if (!updateHasNextFlag(true /* hasNext */, tailEntryPos)) { + return false; + } + if (outAddedNewEntry) { + *outAddedNewEntry = true; + } + return true; + } + + // Case 2. Overwrite the existing entry. Case 3. Reclaim and reuse the existing invalid entry. + const BigramEntry originalBigramEntry = mBigramDictContent->getBigramEntry(entryPosToUpdate); + if (!originalBigramEntry.isValid()) { + // Case 3. Reuse the existing invalid entry. outAddedNewEntry is false when an existing + // entry is updated. + if (outAddedNewEntry) { + *outAddedNewEntry = true; + } + } + const BigramEntry updatedBigramEntry = + originalBigramEntry.updateTargetTerminalIdAndGetEntry(newTargetTerminalId); + const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom( + &updatedBigramEntry, bigramProperty); + return mBigramDictContent->writeBigramEntry(&bigramEntryToWrite, entryPosToUpdate); +} + +bool Ver4BigramListPolicy::removeEntry(const int terminalId, const int targetTerminalId) { + const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId); + if (bigramListPos == NOT_A_DICT_POS) { + // Bigram list doesn't exist. + return false; + } + const int entryPosToUpdate = getEntryPosToUpdate(targetTerminalId, bigramListPos, + nullptr /* outTailEntryPos */); + if (entryPosToUpdate == NOT_A_DICT_POS) { + // Bigram entry doesn't exist. + return false; + } + const BigramEntry bigramEntry = mBigramDictContent->getBigramEntry(entryPosToUpdate); + if (targetTerminalId != bigramEntry.getTargetTerminalId()) { + // Bigram entry doesn't exist. + return false; + } + // Remove bigram entry by marking it as invalid entry and overwriting the original entry. + const BigramEntry updatedBigramEntry = bigramEntry.getInvalidatedEntry(); + return mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPosToUpdate); +} + +bool Ver4BigramListPolicy::updateAllBigramEntriesAndDeleteUselessEntries(const int terminalId, + int *const outBigramCount) { + const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId); + if (bigramListPos == NOT_A_DICT_POS) { + // Bigram list doesn't exist. + return true; + } + bool hasNext = true; + int readingPos = bigramListPos; + while (hasNext) { + const int entryPos = readingPos; + const BigramEntry bigramEntry = + mBigramDictContent->getBigramEntryAndAdvancePosition(&readingPos); + hasNext = bigramEntry.hasNext(); + if (!bigramEntry.isValid()) { + continue; + } + const int targetPtNodePos = mTerminalPositionLookupTable->getTerminalPtNodePosition( + bigramEntry.getTargetTerminalId()); + if (targetPtNodePos == NOT_A_DICT_POS) { + // Invalidate bigram entry. + const BigramEntry updatedBigramEntry = bigramEntry.getInvalidatedEntry(); + if (!mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPos)) { + return false; + } + } else if (bigramEntry.hasHistoricalInfo()) { + const HistoricalInfo historicalInfo = ForgettingCurveUtils::createHistoricalInfoToSave( + bigramEntry.getHistoricalInfo(), mHeaderPolicy); + if (ForgettingCurveUtils::needsToKeep(&historicalInfo, mHeaderPolicy)) { + const BigramEntry updatedBigramEntry = + bigramEntry.updateHistoricalInfoAndGetEntry(&historicalInfo); + if (!mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPos)) { + return false; + } + *outBigramCount += 1; + } else { + // Remove entry. + const BigramEntry updatedBigramEntry = bigramEntry.getInvalidatedEntry(); + if (!mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPos)) { + return false; + } + } + } else { + *outBigramCount += 1; + } + } + return true; +} + +int Ver4BigramListPolicy::getBigramEntryConut(const int terminalId) { + const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId); + if (bigramListPos == NOT_A_DICT_POS) { + // Bigram list doesn't exist. + return 0; + } + int bigramCount = 0; + bool hasNext = true; + int readingPos = bigramListPos; + while (hasNext) { + const BigramEntry bigramEntry = + mBigramDictContent->getBigramEntryAndAdvancePosition(&readingPos); + hasNext = bigramEntry.hasNext(); + if (bigramEntry.isValid()) { + bigramCount++; + } + } + return bigramCount; +} + +int Ver4BigramListPolicy::getEntryPosToUpdate(const int targetTerminalIdToFind, + const int bigramListPos, int *const outTailEntryPos) const { + if (outTailEntryPos) { + *outTailEntryPos = NOT_A_DICT_POS; + } + bool hasNext = true; + int invalidEntryPos = NOT_A_DICT_POS; + int readingPos = bigramListPos; + while (hasNext) { + const int entryPos = readingPos; + const BigramEntry bigramEntry = + mBigramDictContent->getBigramEntryAndAdvancePosition(&readingPos); + hasNext = bigramEntry.hasNext(); + if (bigramEntry.getTargetTerminalId() == targetTerminalIdToFind) { + // Entry with same target is found. + return entryPos; + } else if (!bigramEntry.isValid()) { + // Invalid entry that can be reused is found. + invalidEntryPos = entryPos; + } + if (!hasNext && mBigramDictContent->isContentTailPos(readingPos)) { + if (outTailEntryPos) { + *outTailEntryPos = entryPos; + } + } + } + return invalidEntryPos; +} + +const BigramEntry Ver4BigramListPolicy::createUpdatedBigramEntryFrom( + const BigramEntry *const originalBigramEntry, + const BigramProperty *const bigramProperty) const { + // TODO: Consolidate historical info and probability. + if (mHeaderPolicy->hasHistoricalInfoOfWords()) { + const HistoricalInfo historicalInfoForUpdate(bigramProperty->getTimestamp(), + bigramProperty->getLevel(), bigramProperty->getCount()); + const HistoricalInfo updatedHistoricalInfo = + ForgettingCurveUtils::createUpdatedHistoricalInfo( + originalBigramEntry->getHistoricalInfo(), bigramProperty->getProbability(), + &historicalInfoForUpdate, mHeaderPolicy); + return originalBigramEntry->updateHistoricalInfoAndGetEntry(&updatedHistoricalInfo); + } else { + return originalBigramEntry->updateProbabilityAndGetEntry(bigramProperty->getProbability()); + } +} + +bool Ver4BigramListPolicy::updateHasNextFlag(const bool hasNext, const int bigramEntryPos) { + const BigramEntry bigramEntry = mBigramDictContent->getBigramEntry(bigramEntryPos); + const BigramEntry updatedBigramEntry = bigramEntry.updateHasNextAndGetEntry(hasNext); + return mBigramDictContent->writeBigramEntry(&updatedBigramEntry, bigramEntryPos); +} + +} // namespace v401 +} // namespace backward +} // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/bigram/ver4_bigram_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/bigram/ver4_bigram_list_policy.h new file mode 100644 index 000000000..adf687bac --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/bigram/ver4_bigram_list_policy.h @@ -0,0 +1,93 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * !!!!! DO NOT CHANGE THE LOGIC IN THIS FILE !!!!! + * Do not edit this file other than updating policy's interface. + * + * This file was generated from + * suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.h + */ + +#ifndef LATINIME_BACKWARD_V401_VER4_BIGRAM_LIST_POLICY_H +#define LATINIME_BACKWARD_V401_VER4_BIGRAM_LIST_POLICY_H + +#include "defines.h" +#include "suggest/core/policy/dictionary_bigrams_structure_policy.h" +#include "suggest/policyimpl/dictionary/structure/backward/v401/content/bigram_entry.h" + +namespace latinime { +namespace backward { +namespace v401 { + +class BigramDictContent; +} // namespace v401 +} // namespace backward +class BigramProperty; +namespace backward { +namespace v401 { +} // namespace v401 +} // namespace backward +class HeaderPolicy; +namespace backward { +namespace v401 { +class TerminalPositionLookupTable; + +class Ver4BigramListPolicy : public DictionaryBigramsStructurePolicy { + public: + Ver4BigramListPolicy(BigramDictContent *const bigramDictContent, + const TerminalPositionLookupTable *const terminalPositionLookupTable, + const HeaderPolicy *const headerPolicy) + : mBigramDictContent(bigramDictContent), + mTerminalPositionLookupTable(terminalPositionLookupTable), + mHeaderPolicy(headerPolicy) {} + + void getNextBigram(int *const outBigramPos, int *const outProbability, + bool *const outHasNext, int *const bigramEntryPos) const; + + void skipAllBigrams(int *const pos) const { + // Do nothing because we don't need to skip bigram lists in ver4 dictionaries. + } + + bool addNewEntry(const int terminalId, const int newTargetTerminalId, + const BigramProperty *const bigramProperty, bool *const outAddedNewEntry); + + bool removeEntry(const int terminalId, const int targetTerminalId); + + bool updateAllBigramEntriesAndDeleteUselessEntries(const int terminalId, + int *const outBigramCount); + + int getBigramEntryConut(const int terminalId); + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4BigramListPolicy); + + int getEntryPosToUpdate(const int targetTerminalIdToFind, const int bigramListPos, + int *const outTailEntryPos) const; + + const BigramEntry createUpdatedBigramEntryFrom(const BigramEntry *const originalBigramEntry, + const BigramProperty *const bigramProperty) const; + + bool updateHasNextFlag(const bool hasNext, const int bigramEntryPos); + + BigramDictContent *const mBigramDictContent; + const TerminalPositionLookupTable *const mTerminalPositionLookupTable; + const HeaderPolicy *const mHeaderPolicy; +}; +} // namespace v401 +} // namespace backward +} // namespace latinime +#endif /* LATINIME_BACKWARD_V401_VER4_BIGRAM_LIST_POLICY_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/bigram_dict_content.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/bigram_dict_content.cpp new file mode 100644 index 000000000..1e53ff94a --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/bigram_dict_content.cpp @@ -0,0 +1,224 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * !!!!! DO NOT EDIT THIS FILE !!!!! + * + * This file was generated from + * suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp + */ + +#include "suggest/policyimpl/dictionary/structure/backward/v401/content/bigram_dict_content.h" + +#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" + +namespace latinime { +namespace backward { +namespace v401 { + +const BigramEntry BigramDictContent::getBigramEntryAndAdvancePosition( + int *const bigramEntryPos) const { + const BufferWithExtendableBuffer *const bigramListBuffer = getContentBuffer(); + if (*bigramEntryPos < 0 || *bigramEntryPos >= bigramListBuffer->getTailPosition()) { + AKLOGE("Invalid bigram entry position. bigramEntryPos: %d, bufSize: %d", + *bigramEntryPos, bigramListBuffer->getTailPosition()); + ASSERT(false); + return BigramEntry(false /* hasNext */, NOT_A_PROBABILITY, + Ver4DictConstants::NOT_A_TERMINAL_ID); + } + const int bigramFlags = bigramListBuffer->readUintAndAdvancePosition( + Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE, bigramEntryPos); + const bool hasNext = (bigramFlags & Ver4DictConstants::BIGRAM_HAS_NEXT_MASK) != 0; + int probability = NOT_A_PROBABILITY; + int timestamp = NOT_A_TIMESTAMP; + int level = 0; + int count = 0; + if (mHasHistoricalInfo) { + probability = bigramListBuffer->readUintAndAdvancePosition( + Ver4DictConstants::PROBABILITY_SIZE, bigramEntryPos); + timestamp = bigramListBuffer->readUintAndAdvancePosition( + Ver4DictConstants::TIME_STAMP_FIELD_SIZE, bigramEntryPos); + level = bigramListBuffer->readUintAndAdvancePosition( + Ver4DictConstants::WORD_LEVEL_FIELD_SIZE, bigramEntryPos); + count = bigramListBuffer->readUintAndAdvancePosition( + Ver4DictConstants::WORD_COUNT_FIELD_SIZE, bigramEntryPos); + } else { + probability = bigramFlags & Ver4DictConstants::BIGRAM_PROBABILITY_MASK; + } + const int encodedTargetTerminalId = bigramListBuffer->readUintAndAdvancePosition( + Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE, bigramEntryPos); + const int targetTerminalId = + (encodedTargetTerminalId == Ver4DictConstants::INVALID_BIGRAM_TARGET_TERMINAL_ID) ? + Ver4DictConstants::NOT_A_TERMINAL_ID : encodedTargetTerminalId; + if (mHasHistoricalInfo) { + const HistoricalInfo historicalInfo(timestamp, level, count); + return BigramEntry(hasNext, probability, &historicalInfo, targetTerminalId); + } else { + return BigramEntry(hasNext, probability, targetTerminalId); + } +} + +bool BigramDictContent::writeBigramEntryAndAdvancePosition( + const BigramEntry *const bigramEntryToWrite, int *const entryWritingPos) { + BufferWithExtendableBuffer *const bigramListBuffer = getWritableContentBuffer(); + const int bigramFlags = createAndGetBigramFlags( + mHasHistoricalInfo ? 0 : bigramEntryToWrite->getProbability(), + bigramEntryToWrite->hasNext()); + if (!bigramListBuffer->writeUintAndAdvancePosition(bigramFlags, + Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE, entryWritingPos)) { + AKLOGE("Cannot write bigram flags. pos: %d, flags: %x", *entryWritingPos, bigramFlags); + return false; + } + if (mHasHistoricalInfo) { + if (!bigramListBuffer->writeUintAndAdvancePosition(bigramEntryToWrite->getProbability(), + Ver4DictConstants::PROBABILITY_SIZE, entryWritingPos)) { + AKLOGE("Cannot write bigram probability. pos: %d, probability: %d", *entryWritingPos, + bigramEntryToWrite->getProbability()); + return false; + } + const HistoricalInfo *const historicalInfo = bigramEntryToWrite->getHistoricalInfo(); + if (!bigramListBuffer->writeUintAndAdvancePosition(historicalInfo->getTimeStamp(), + Ver4DictConstants::TIME_STAMP_FIELD_SIZE, entryWritingPos)) { + AKLOGE("Cannot write bigram timestamps. pos: %d, timestamp: %d", *entryWritingPos, + historicalInfo->getTimeStamp()); + return false; + } + if (!bigramListBuffer->writeUintAndAdvancePosition(historicalInfo->getLevel(), + Ver4DictConstants::WORD_LEVEL_FIELD_SIZE, entryWritingPos)) { + AKLOGE("Cannot write bigram level. pos: %d, level: %d", *entryWritingPos, + historicalInfo->getLevel()); + return false; + } + if (!bigramListBuffer->writeUintAndAdvancePosition(historicalInfo->getCount(), + Ver4DictConstants::WORD_COUNT_FIELD_SIZE, entryWritingPos)) { + AKLOGE("Cannot write bigram count. pos: %d, count: %d", *entryWritingPos, + historicalInfo->getCount()); + return false; + } + } + const int targetTerminalIdToWrite = + (bigramEntryToWrite->getTargetTerminalId() == Ver4DictConstants::NOT_A_TERMINAL_ID) ? + Ver4DictConstants::INVALID_BIGRAM_TARGET_TERMINAL_ID : + bigramEntryToWrite->getTargetTerminalId(); + if (!bigramListBuffer->writeUintAndAdvancePosition(targetTerminalIdToWrite, + Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE, entryWritingPos)) { + AKLOGE("Cannot write bigram target terminal id. pos: %d, target terminal id: %d", + *entryWritingPos, bigramEntryToWrite->getTargetTerminalId()); + return false; + } + return true; +} + +bool BigramDictContent::copyBigramList(const int bigramListPos, const int toPos, + int *const outTailEntryPos) { + int readingPos = bigramListPos; + int writingPos = toPos; + bool hasNext = true; + while (hasNext) { + const BigramEntry bigramEntry = getBigramEntryAndAdvancePosition(&readingPos); + hasNext = bigramEntry.hasNext(); + if (!hasNext) { + *outTailEntryPos = writingPos; + } + if (!writeBigramEntryAndAdvancePosition(&bigramEntry, &writingPos)) { + AKLOGE("Cannot write bigram entry to copy. pos: %d", writingPos); + return false; + } + } + return true; +} + +bool BigramDictContent::runGC(const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap, + const BigramDictContent *const originalBigramDictContent, + int *const outBigramEntryCount) { + for (TerminalPositionLookupTable::TerminalIdMap::const_iterator it = terminalIdMap->begin(); + it != terminalIdMap->end(); ++it) { + const int originalBigramListPos = + originalBigramDictContent->getBigramListHeadPos(it->first); + if (originalBigramListPos == NOT_A_DICT_POS) { + // This terminal does not have a bigram list. + continue; + } + const int bigramListPos = getContentBuffer()->getTailPosition(); + int bigramEntryCount = 0; + // Copy bigram list with GC from original content. + if (!runGCBigramList(originalBigramListPos, originalBigramDictContent, bigramListPos, + terminalIdMap, &bigramEntryCount)) { + AKLOGE("Cannot complete GC for the bigram list. original pos: %d, pos: %d", + originalBigramListPos, bigramListPos); + return false; + } + if (bigramEntryCount == 0) { + // All bigram entries are useless. This terminal does not have a bigram list. + continue; + } + *outBigramEntryCount += bigramEntryCount; + // Set bigram list position to the lookup table. + if (!getUpdatableAddressLookupTable()->set(it->second, bigramListPos)) { + AKLOGE("Cannot set bigram list position. terminal id: %d, pos: %d", + it->second, bigramListPos); + return false; + } + } + return true; +} + +// Returns whether GC for the bigram list was succeeded or not. +bool BigramDictContent::runGCBigramList(const int bigramListPos, + const BigramDictContent *const sourceBigramDictContent, const int toPos, + const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap, + int *const outEntrycount) { + bool hasNext = true; + int readingPos = bigramListPos; + int writingPos = toPos; + int lastEntryPos = NOT_A_DICT_POS; + while (hasNext) { + const BigramEntry originalBigramEntry = + sourceBigramDictContent->getBigramEntryAndAdvancePosition(&readingPos); + hasNext = originalBigramEntry.hasNext(); + if (originalBigramEntry.getTargetTerminalId() == Ver4DictConstants::NOT_A_TERMINAL_ID) { + continue; + } + TerminalPositionLookupTable::TerminalIdMap::const_iterator it = + terminalIdMap->find(originalBigramEntry.getTargetTerminalId()); + if (it == terminalIdMap->end()) { + // Target word has been removed. + continue; + } + lastEntryPos = hasNext ? writingPos : NOT_A_DICT_POS; + const BigramEntry updatedBigramEntry = + originalBigramEntry.updateTargetTerminalIdAndGetEntry(it->second); + if (!writeBigramEntryAndAdvancePosition(&updatedBigramEntry, &writingPos)) { + AKLOGE("Cannot write bigram entry to run GC. pos: %d", writingPos); + return false; + } + *outEntrycount += 1; + } + if (lastEntryPos != NOT_A_DICT_POS) { + // Update has next flag in the last written entry. + const BigramEntry bigramEntry = getBigramEntry(lastEntryPos).updateHasNextAndGetEntry( + false /* hasNext */); + if (!writeBigramEntry(&bigramEntry, lastEntryPos)) { + AKLOGE("Cannot write bigram entry to set hasNext flag after GC. pos: %d", writingPos); + return false; + } + } + return true; +} + +} // namespace v401 +} // namespace backward +} // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/bigram_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/bigram_dict_content.h new file mode 100644 index 000000000..f9c474b4a --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/bigram_dict_content.h @@ -0,0 +1,122 @@ +/* + * Copyright (C) 2013, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * !!!!! DO NOT EDIT THIS FILE !!!!! + * + * This file was generated from + * suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h + */ + +#ifndef LATINIME_BACKWARD_V401_BIGRAM_DICT_CONTENT_H +#define LATINIME_BACKWARD_V401_BIGRAM_DICT_CONTENT_H + +#include "defines.h" +#include "suggest/policyimpl/dictionary/structure/backward/v401/content/bigram_entry.h" +#include "suggest/policyimpl/dictionary/structure/backward/v401/content/sparse_table_dict_content.h" +#include "suggest/policyimpl/dictionary/structure/backward/v401/content/terminal_position_lookup_table.h" +#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_constants.h" + +namespace latinime { +namespace backward { +namespace v401 { + +class BigramDictContent : public SparseTableDictContent { + public: + BigramDictContent(const char *const dictPath, const bool hasHistoricalInfo, + const bool isUpdatable) + : SparseTableDictContent(dictPath, + Ver4DictConstants::BIGRAM_LOOKUP_TABLE_FILE_EXTENSION, + Ver4DictConstants::BIGRAM_CONTENT_TABLE_FILE_EXTENSION, + Ver4DictConstants::BIGRAM_FILE_EXTENSION, isUpdatable, + Ver4DictConstants::BIGRAM_ADDRESS_TABLE_BLOCK_SIZE, + Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE), + mHasHistoricalInfo(hasHistoricalInfo) {} + + BigramDictContent(const bool hasHistoricalInfo) + : SparseTableDictContent(Ver4DictConstants::BIGRAM_ADDRESS_TABLE_BLOCK_SIZE, + Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE), + mHasHistoricalInfo(hasHistoricalInfo) {} + + const BigramEntry getBigramEntry(const int bigramEntryPos) const { + int readingPos = bigramEntryPos; + return getBigramEntryAndAdvancePosition(&readingPos); + } + + const BigramEntry getBigramEntryAndAdvancePosition(int *const bigramEntryPos) const; + + // Returns head position of bigram list for a PtNode specified by terminalId. + int getBigramListHeadPos(const int terminalId) const { + const SparseTable *const addressLookupTable = getAddressLookupTable(); + if (!addressLookupTable->contains(terminalId)) { + return NOT_A_DICT_POS; + } + return addressLookupTable->get(terminalId); + } + + bool writeBigramEntryAtTail(const BigramEntry *const bigramEntryToWrite) { + int writingPos = getContentBuffer()->getTailPosition(); + return writeBigramEntryAndAdvancePosition(bigramEntryToWrite, &writingPos); + } + + bool writeBigramEntry(const BigramEntry *const bigramEntryToWrite, const int entryWritingPos) { + int writingPos = entryWritingPos; + return writeBigramEntryAndAdvancePosition(bigramEntryToWrite, &writingPos); + } + + bool writeBigramEntryAndAdvancePosition(const BigramEntry *const bigramEntryToWrite, + int *const entryWritingPos); + + bool createNewBigramList(const int terminalId) { + const int bigramListPos = getContentBuffer()->getTailPosition(); + return getUpdatableAddressLookupTable()->set(terminalId, bigramListPos); + } + + bool copyBigramList(const int bigramListPos, const int toPos, int *const outTailEntryPos); + + bool flushToFile(const char *const dictPath) const { + return flush(dictPath, Ver4DictConstants::BIGRAM_LOOKUP_TABLE_FILE_EXTENSION, + Ver4DictConstants::BIGRAM_CONTENT_TABLE_FILE_EXTENSION, + Ver4DictConstants::BIGRAM_FILE_EXTENSION); + } + + bool runGC(const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap, + const BigramDictContent *const originalBigramDictContent, + int *const outBigramEntryCount); + + bool isContentTailPos(const int pos) const { + return pos == getContentBuffer()->getTailPosition(); + } + + private: + DISALLOW_COPY_AND_ASSIGN(BigramDictContent); + + int createAndGetBigramFlags(const int probability, const bool hasNext) const { + return (probability & Ver4DictConstants::BIGRAM_PROBABILITY_MASK) + | (hasNext ? Ver4DictConstants::BIGRAM_HAS_NEXT_MASK : 0); + } + + bool runGCBigramList(const int bigramListPos, + const BigramDictContent *const sourceBigramDictContent, const int toPos, + const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap, + int *const outEntryCount); + + bool mHasHistoricalInfo; +}; +} // namespace v401 +} // namespace backward +} // namespace latinime +#endif /* LATINIME_BACKWARD_V401_BIGRAM_DICT_CONTENT_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/bigram_entry.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/bigram_entry.h new file mode 100644 index 000000000..82c4b53a8 --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/bigram_entry.h @@ -0,0 +1,110 @@ +/* + * Copyright (C) 2013, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * !!!!! DO NOT EDIT THIS FILE !!!!! + * + * This file was generated from + * suggest/policyimpl/dictionary/structure/v4/content/bigram_entry.h + */ + +#ifndef LATINIME_BACKWARD_V401_BIGRAM_ENTRY_H +#define LATINIME_BACKWARD_V401_BIGRAM_ENTRY_H + +#include "defines.h" +#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_constants.h" +#include "suggest/policyimpl/dictionary/utils/historical_info.h" + +namespace latinime { +namespace backward { +namespace v401 { + +class BigramEntry { + public: + BigramEntry(const BigramEntry& bigramEntry) + : mHasNext(bigramEntry.mHasNext), mProbability(bigramEntry.mProbability), + mHistoricalInfo(), mTargetTerminalId(bigramEntry.mTargetTerminalId) {} + + // Entry with historical information. + BigramEntry(const bool hasNext, const int probability, const int targetTerminalId) + : mHasNext(hasNext), mProbability(probability), mHistoricalInfo(), + mTargetTerminalId(targetTerminalId) {} + + // Entry with historical information. + BigramEntry(const bool hasNext, const int probability, + const HistoricalInfo *const historicalInfo, const int targetTerminalId) + : mHasNext(hasNext), mProbability(probability), mHistoricalInfo(*historicalInfo), + mTargetTerminalId(targetTerminalId) {} + + const BigramEntry getInvalidatedEntry() const { + return updateTargetTerminalIdAndGetEntry(Ver4DictConstants::NOT_A_TERMINAL_ID); + } + + const BigramEntry updateHasNextAndGetEntry(const bool hasNext) const { + return BigramEntry(hasNext, mProbability, &mHistoricalInfo, mTargetTerminalId); + } + + const BigramEntry updateTargetTerminalIdAndGetEntry(const int newTargetTerminalId) const { + return BigramEntry(mHasNext, mProbability, &mHistoricalInfo, newTargetTerminalId); + } + + const BigramEntry updateProbabilityAndGetEntry(const int probability) const { + return BigramEntry(mHasNext, probability, &mHistoricalInfo, mTargetTerminalId); + } + + const BigramEntry updateHistoricalInfoAndGetEntry( + const HistoricalInfo *const historicalInfo) const { + return BigramEntry(mHasNext, mProbability, historicalInfo, mTargetTerminalId); + } + + bool isValid() const { + return mTargetTerminalId != Ver4DictConstants::NOT_A_TERMINAL_ID; + } + + bool hasNext() const { + return mHasNext; + } + + int getProbability() const { + return mProbability; + } + + bool hasHistoricalInfo() const { + return mHistoricalInfo.isValid(); + } + + const HistoricalInfo *getHistoricalInfo() const { + return &mHistoricalInfo; + } + + int getTargetTerminalId() const { + return mTargetTerminalId; + } + + private: + // Copy constructor is public to use this class as a type of return value. + DISALLOW_DEFAULT_CONSTRUCTOR(BigramEntry); + DISALLOW_ASSIGNMENT_OPERATOR(BigramEntry); + + const bool mHasNext; + const int mProbability; + const HistoricalInfo mHistoricalInfo; + const int mTargetTerminalId; +}; +} // namespace v401 +} // namespace backward +} // namespace latinime +#endif /* LATINIME_BACKWARD_V401_BIGRAM_ENTRY_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/dict_content.h new file mode 100644 index 000000000..39e29001c --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/dict_content.h @@ -0,0 +1,47 @@ +/* + * Copyright (C) 2013, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * !!!!! DO NOT EDIT THIS FILE !!!!! + * + * This file was generated from + * suggest/policyimpl/dictionary/structure/v4/content/dict_content.h + */ + +#ifndef LATINIME_BACKWARD_V401_DICT_CONTENT_H +#define LATINIME_BACKWARD_V401_DICT_CONTENT_H + +#include "defines.h" + +namespace latinime { +namespace backward { +namespace v401 { + +class DictContent { + public: + virtual ~DictContent() {} + virtual bool isValid() const = 0; + + protected: + DictContent() {} + + private: + DISALLOW_COPY_AND_ASSIGN(DictContent); +}; +} // namespace v401 +} // namespace backward +} // namespace latinime +#endif /* LATINIME_BACKWARD_V401_DICT_CONTENT_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/probability_dict_content.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/probability_dict_content.cpp new file mode 100644 index 000000000..337b97c05 --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/probability_dict_content.cpp @@ -0,0 +1,171 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * !!!!! DO NOT EDIT THIS FILE !!!!! + * + * This file was generated from + * suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.cpp + */ + +#include "suggest/policyimpl/dictionary/structure/backward/v401/content/probability_dict_content.h" + +#include "suggest/policyimpl/dictionary/structure/backward/v401/content/probability_entry.h" +#include "suggest/policyimpl/dictionary/structure/backward/v401/content/terminal_position_lookup_table.h" +#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_constants.h" +#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" + +namespace latinime { +namespace backward { +namespace v401 { + +const ProbabilityEntry ProbabilityDictContent::getProbabilityEntry(const int terminalId) const { + if (terminalId < 0 || terminalId >= mSize) { + // This method can be called with invalid terminal id during GC. + return ProbabilityEntry(0 /* flags */, NOT_A_PROBABILITY); + } + const BufferWithExtendableBuffer *const buffer = getBuffer(); + int entryPos = getEntryPos(terminalId); + const int flags = buffer->readUintAndAdvancePosition( + Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE, &entryPos); + const int probability = buffer->readUintAndAdvancePosition( + Ver4DictConstants::PROBABILITY_SIZE, &entryPos); + if (mHasHistoricalInfo) { + const int timestamp = buffer->readUintAndAdvancePosition( + Ver4DictConstants::TIME_STAMP_FIELD_SIZE, &entryPos); + const int level = buffer->readUintAndAdvancePosition( + Ver4DictConstants::WORD_LEVEL_FIELD_SIZE, &entryPos); + const int count = buffer->readUintAndAdvancePosition( + Ver4DictConstants::WORD_COUNT_FIELD_SIZE, &entryPos); + const HistoricalInfo historicalInfo(timestamp, level, count); + return ProbabilityEntry(flags, probability, &historicalInfo); + } else { + return ProbabilityEntry(flags, probability); + } +} + +bool ProbabilityDictContent::setProbabilityEntry(const int terminalId, + const ProbabilityEntry *const probabilityEntry) { + if (terminalId < 0) { + return false; + } + const int entryPos = getEntryPos(terminalId); + if (terminalId >= mSize) { + ProbabilityEntry dummyEntry; + // Write new entry. + int writingPos = getBuffer()->getTailPosition(); + while (writingPos <= entryPos) { + // Fulfilling with dummy entries until writingPos. + if (!writeEntry(&dummyEntry, writingPos)) { + AKLOGE("Cannot write dummy entry. pos: %d, mSize: %d", writingPos, mSize); + return false; + } + writingPos += getEntrySize(); + mSize++; + } + } + return writeEntry(probabilityEntry, entryPos); +} + +bool ProbabilityDictContent::flushToFile(const char *const dictPath) const { + if (getEntryPos(mSize) < getBuffer()->getTailPosition()) { + ProbabilityDictContent probabilityDictContentToWrite(mHasHistoricalInfo); + for (int i = 0; i < mSize; ++i) { + const ProbabilityEntry probabilityEntry = getProbabilityEntry(i); + if (!probabilityDictContentToWrite.setProbabilityEntry(i, &probabilityEntry)) { + AKLOGE("Cannot set probability entry in flushToFile. terminalId: %d", i); + return false; + } + } + return probabilityDictContentToWrite.flush(dictPath, + Ver4DictConstants::FREQ_FILE_EXTENSION); + } else { + return flush(dictPath, Ver4DictConstants::FREQ_FILE_EXTENSION); + } +} + +bool ProbabilityDictContent::runGC( + const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap, + const ProbabilityDictContent *const originalProbabilityDictContent) { + mSize = 0; + for (TerminalPositionLookupTable::TerminalIdMap::const_iterator it = terminalIdMap->begin(); + it != terminalIdMap->end(); ++it) { + const ProbabilityEntry probabilityEntry = + originalProbabilityDictContent->getProbabilityEntry(it->first); + if (!setProbabilityEntry(it->second, &probabilityEntry)) { + AKLOGE("Cannot set probability entry in runGC. terminalId: %d", it->second); + return false; + } + mSize++; + } + return true; +} + +int ProbabilityDictContent::getEntrySize() const { + if (mHasHistoricalInfo) { + return Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE + + Ver4DictConstants::PROBABILITY_SIZE + + Ver4DictConstants::TIME_STAMP_FIELD_SIZE + + Ver4DictConstants::WORD_LEVEL_FIELD_SIZE + + Ver4DictConstants::WORD_COUNT_FIELD_SIZE; + } else { + return Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE + + Ver4DictConstants::PROBABILITY_SIZE; + } +} + +int ProbabilityDictContent::getEntryPos(const int terminalId) const { + return terminalId * getEntrySize(); +} + +bool ProbabilityDictContent::writeEntry(const ProbabilityEntry *const probabilityEntry, + const int entryPos) { + BufferWithExtendableBuffer *const bufferToWrite = getWritableBuffer(); + int writingPos = entryPos; + if (!bufferToWrite->writeUintAndAdvancePosition(probabilityEntry->getFlags(), + Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE, &writingPos)) { + AKLOGE("Cannot write flags in probability dict content. pos: %d", writingPos); + return false; + } + if (!bufferToWrite->writeUintAndAdvancePosition(probabilityEntry->getProbability(), + Ver4DictConstants::PROBABILITY_SIZE, &writingPos)) { + AKLOGE("Cannot write probability in probability dict content. pos: %d", writingPos); + return false; + } + if (mHasHistoricalInfo) { + const HistoricalInfo *const historicalInfo = probabilityEntry->getHistoricalInfo(); + if (!bufferToWrite->writeUintAndAdvancePosition(historicalInfo->getTimeStamp(), + Ver4DictConstants::TIME_STAMP_FIELD_SIZE, &writingPos)) { + AKLOGE("Cannot write timestamp in probability dict content. pos: %d", writingPos); + return false; + } + if (!bufferToWrite->writeUintAndAdvancePosition(historicalInfo->getLevel(), + Ver4DictConstants::WORD_LEVEL_FIELD_SIZE, &writingPos)) { + AKLOGE("Cannot write level in probability dict content. pos: %d", writingPos); + return false; + } + if (!bufferToWrite->writeUintAndAdvancePosition(historicalInfo->getCount(), + Ver4DictConstants::WORD_COUNT_FIELD_SIZE, &writingPos)) { + AKLOGE("Cannot write count in probability dict content. pos: %d", writingPos); + return false; + } + } + return true; +} + +} // namespace v401 +} // namespace backward +} // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/probability_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/probability_dict_content.h new file mode 100644 index 000000000..db3070994 --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/probability_dict_content.h @@ -0,0 +1,74 @@ +/* + * Copyright (C) 2013, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * !!!!! DO NOT EDIT THIS FILE !!!!! + * + * This file was generated from + * suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h + */ + +#ifndef LATINIME_BACKWARD_V401_PROBABILITY_DICT_CONTENT_H +#define LATINIME_BACKWARD_V401_PROBABILITY_DICT_CONTENT_H + +#include "defines.h" +#include "suggest/policyimpl/dictionary/structure/backward/v401/content/single_dict_content.h" +#include "suggest/policyimpl/dictionary/structure/backward/v401/content/terminal_position_lookup_table.h" +#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_constants.h" +#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" + +namespace latinime { +namespace backward { +namespace v401 { + +class ProbabilityEntry; + +class ProbabilityDictContent : public SingleDictContent { + public: + ProbabilityDictContent(const char *const dictPath, const bool hasHistoricalInfo, + const bool isUpdatable) + : SingleDictContent(dictPath, Ver4DictConstants::FREQ_FILE_EXTENSION, isUpdatable), + mHasHistoricalInfo(hasHistoricalInfo), + mSize(getBuffer()->getTailPosition() / getEntrySize()) {} + + ProbabilityDictContent(const bool hasHistoricalInfo) + : mHasHistoricalInfo(hasHistoricalInfo), mSize(0) {} + + const ProbabilityEntry getProbabilityEntry(const int terminalId) const; + + bool setProbabilityEntry(const int terminalId, const ProbabilityEntry *const probabilityEntry); + + bool flushToFile(const char *const dictPath) const; + + bool runGC(const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap, + const ProbabilityDictContent *const originalProbabilityDictContent); + + private: + DISALLOW_COPY_AND_ASSIGN(ProbabilityDictContent); + + int getEntrySize() const; + + int getEntryPos(const int terminalId) const; + + bool writeEntry(const ProbabilityEntry *const probabilityEntry, const int entryPos); + + bool mHasHistoricalInfo; + int mSize; +}; +} // namespace v401 +} // namespace backward +} // namespace latinime +#endif /* LATINIME_BACKWARD_V401_PROBABILITY_DICT_CONTENT_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/probability_entry.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/probability_entry.h new file mode 100644 index 000000000..d341e7b07 --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/probability_entry.h @@ -0,0 +1,90 @@ +/* + * Copyright (C) 2013, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * !!!!! DO NOT EDIT THIS FILE !!!!! + * + * This file was generated from + * suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h + */ + +#ifndef LATINIME_BACKWARD_V401_PROBABILITY_ENTRY_H +#define LATINIME_BACKWARD_V401_PROBABILITY_ENTRY_H + +#include "defines.h" +#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_constants.h" +#include "suggest/policyimpl/dictionary/utils/historical_info.h" + +namespace latinime { +namespace backward { +namespace v401 { + +class ProbabilityEntry { + public: + ProbabilityEntry(const ProbabilityEntry &probabilityEntry) + : mFlags(probabilityEntry.mFlags), mProbability(probabilityEntry.mProbability), + mHistoricalInfo(probabilityEntry.mHistoricalInfo) {} + + // Dummy entry + ProbabilityEntry() + : mFlags(0), mProbability(NOT_A_PROBABILITY), mHistoricalInfo() {} + + // Entry without historical information + ProbabilityEntry(const int flags, const int probability) + : mFlags(flags), mProbability(probability), mHistoricalInfo() {} + + // Entry with historical information. + ProbabilityEntry(const int flags, const int probability, + const HistoricalInfo *const historicalInfo) + : mFlags(flags), mProbability(probability), mHistoricalInfo(*historicalInfo) {} + + const ProbabilityEntry createEntryWithUpdatedProbability(const int probability) const { + return ProbabilityEntry(mFlags, probability, &mHistoricalInfo); + } + + const ProbabilityEntry createEntryWithUpdatedHistoricalInfo( + const HistoricalInfo *const historicalInfo) const { + return ProbabilityEntry(mFlags, mProbability, historicalInfo); + } + + bool hasHistoricalInfo() const { + return mHistoricalInfo.isValid(); + } + + int getFlags() const { + return mFlags; + } + + int getProbability() const { + return mProbability; + } + + const HistoricalInfo *getHistoricalInfo() const { + return &mHistoricalInfo; + } + + private: + // Copy constructor is public to use this class as a type of return value. + DISALLOW_ASSIGNMENT_OPERATOR(ProbabilityEntry); + + const int mFlags; + const int mProbability; + const HistoricalInfo mHistoricalInfo; +}; +} // namespace v401 +} // namespace backward +} // namespace latinime +#endif /* LATINIME_BACKWARD_V401_PROBABILITY_ENTRY_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/shortcut_dict_content.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/shortcut_dict_content.cpp new file mode 100644 index 000000000..3214807ad --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/shortcut_dict_content.cpp @@ -0,0 +1,199 @@ +/* + * Copyright (C) 2013, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * !!!!! DO NOT EDIT THIS FILE !!!!! + * + * This file was generated from + * suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.cpp + */ + +#include "suggest/policyimpl/dictionary/structure/backward/v401/content/shortcut_dict_content.h" + +#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" + +namespace latinime { +namespace backward { +namespace v401 { + +void ShortcutDictContent::getShortcutEntryAndAdvancePosition(const int maxCodePointCount, + int *const outCodePoint, int *const outCodePointCount, int *const outProbability, + bool *const outhasNext, int *const shortcutEntryPos) const { + const BufferWithExtendableBuffer *const shortcutListBuffer = getContentBuffer(); + if (*shortcutEntryPos < 0 || *shortcutEntryPos >= shortcutListBuffer->getTailPosition()) { + AKLOGE("Invalid shortcut entry position. shortcutEntryPos: %d, bufSize: %d", + *shortcutEntryPos, shortcutListBuffer->getTailPosition()); + ASSERT(false); + if (outhasNext) { + *outhasNext = false; + } + if (outCodePointCount) { + *outCodePointCount = 0; + } + return; + } + + const int shortcutFlags = shortcutListBuffer->readUintAndAdvancePosition( + Ver4DictConstants::SHORTCUT_FLAGS_FIELD_SIZE, shortcutEntryPos); + if (outProbability) { + *outProbability = shortcutFlags & Ver4DictConstants::SHORTCUT_PROBABILITY_MASK; + } + if (outhasNext) { + *outhasNext = shortcutFlags & Ver4DictConstants::SHORTCUT_HAS_NEXT_MASK; + } + if (outCodePoint && outCodePointCount) { + shortcutListBuffer->readCodePointsAndAdvancePosition( + maxCodePointCount, outCodePoint, outCodePointCount, shortcutEntryPos); + } +} + +int ShortcutDictContent::getShortcutListHeadPos(const int terminalId) const { + const SparseTable *const addressLookupTable = getAddressLookupTable(); + if (!addressLookupTable->contains(terminalId)) { + return NOT_A_DICT_POS; + } + return addressLookupTable->get(terminalId); +} + +bool ShortcutDictContent::flushToFile(const char *const dictPath) const { + return flush(dictPath, Ver4DictConstants::SHORTCUT_LOOKUP_TABLE_FILE_EXTENSION, + Ver4DictConstants::SHORTCUT_CONTENT_TABLE_FILE_EXTENSION, + Ver4DictConstants::SHORTCUT_FILE_EXTENSION); +} + +bool ShortcutDictContent::runGC( + const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap, + const ShortcutDictContent *const originalShortcutDictContent) { + for (TerminalPositionLookupTable::TerminalIdMap::const_iterator it = terminalIdMap->begin(); + it != terminalIdMap->end(); ++it) { + const int originalShortcutListPos = + originalShortcutDictContent->getShortcutListHeadPos(it->first); + if (originalShortcutListPos == NOT_A_DICT_POS) { + continue; + } + const int shortcutListPos = getContentBuffer()->getTailPosition(); + // Copy shortcut list from original content. + if (!copyShortcutListFromDictContent(originalShortcutListPos, originalShortcutDictContent, + shortcutListPos)) { + AKLOGE("Cannot copy shortcut list during GC. original pos: %d, pos: %d", + originalShortcutListPos, shortcutListPos); + return false; + } + // Set shortcut list position to the lookup table. + if (!getUpdatableAddressLookupTable()->set(it->second, shortcutListPos)) { + AKLOGE("Cannot set shortcut list position. terminal id: %d, pos: %d", + it->second, shortcutListPos); + return false; + } + } + return true; +} + +bool ShortcutDictContent::createNewShortcutList(const int terminalId) { + const int shortcutListListPos = getContentBuffer()->getTailPosition(); + return getUpdatableAddressLookupTable()->set(terminalId, shortcutListListPos); +} + +bool ShortcutDictContent::copyShortcutList(const int shortcutListPos, const int toPos) { + return copyShortcutListFromDictContent(shortcutListPos, this, toPos); +} + +bool ShortcutDictContent::copyShortcutListFromDictContent(const int shortcutListPos, + const ShortcutDictContent *const sourceShortcutDictContent, const int toPos) { + bool hasNext = true; + int readingPos = shortcutListPos; + int writingPos = toPos; + int codePoints[MAX_WORD_LENGTH]; + while (hasNext) { + int probability = 0; + int codePointCount = 0; + sourceShortcutDictContent->getShortcutEntryAndAdvancePosition(MAX_WORD_LENGTH, + codePoints, &codePointCount, &probability, &hasNext, &readingPos); + if (!writeShortcutEntryAndAdvancePosition(codePoints, codePointCount, probability, + hasNext, &writingPos)) { + AKLOGE("Cannot write shortcut entry to copy. pos: %d", writingPos); + return false; + } + } + return true; +} + +bool ShortcutDictContent::setProbability(const int probability, const int shortcutEntryPos) { + BufferWithExtendableBuffer *const shortcutListBuffer = getWritableContentBuffer(); + const int shortcutFlags = shortcutListBuffer->readUint( + Ver4DictConstants::SHORTCUT_FLAGS_FIELD_SIZE, shortcutEntryPos); + const bool hasNext = shortcutFlags & Ver4DictConstants::SHORTCUT_HAS_NEXT_MASK; + const int shortcutFlagsToWrite = createAndGetShortcutFlags(probability, hasNext); + return shortcutListBuffer->writeUint(shortcutFlagsToWrite, + Ver4DictConstants::SHORTCUT_FLAGS_FIELD_SIZE, shortcutEntryPos); +} + +bool ShortcutDictContent::writeShortcutEntryAndAdvancePosition(const int *const codePoint, + const int codePointCount, const int probability, const bool hasNext, + int *const shortcutEntryPos) { + BufferWithExtendableBuffer *const shortcutListBuffer = getWritableContentBuffer(); + const int shortcutFlags = createAndGetShortcutFlags(probability, hasNext); + if (!shortcutListBuffer->writeUintAndAdvancePosition(shortcutFlags, + Ver4DictConstants::SHORTCUT_FLAGS_FIELD_SIZE, shortcutEntryPos)) { + AKLOGE("Cannot write shortcut flags. flags; %x, pos: %d", shortcutFlags, *shortcutEntryPos); + return false; + } + if (!shortcutListBuffer->writeCodePointsAndAdvancePosition(codePoint, codePointCount, + true /* writesTerminator */, shortcutEntryPos)) { + AKLOGE("Cannot write shortcut target code points. pos: %d", *shortcutEntryPos); + return false; + } + return true; +} + +// Find a shortcut entry that has specified target and return its position. +int ShortcutDictContent::findShortcutEntryAndGetPos(const int shortcutListPos, + const int *const targetCodePointsToFind, const int codePointCount) const { + bool hasNext = true; + int readingPos = shortcutListPos; + int targetCodePoints[MAX_WORD_LENGTH]; + while (hasNext) { + const int entryPos = readingPos; + int probability = 0; + int targetCodePointCount = 0; + getShortcutEntryAndAdvancePosition(MAX_WORD_LENGTH, targetCodePoints, &targetCodePointCount, + &probability, &hasNext, &readingPos); + if (targetCodePointCount != codePointCount) { + continue; + } + bool matched = true; + for (int i = 0; i < codePointCount; ++i) { + if (targetCodePointsToFind[i] != targetCodePoints[i]) { + matched = false; + break; + } + } + if (matched) { + return entryPos; + } + } + return NOT_A_DICT_POS; +} + +int ShortcutDictContent::createAndGetShortcutFlags(const int probability, + const bool hasNext) const { + return (probability & Ver4DictConstants::SHORTCUT_PROBABILITY_MASK) + | (hasNext ? Ver4DictConstants::SHORTCUT_HAS_NEXT_MASK : 0); +} + +} // namespace v401 +} // namespace backward +} // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/shortcut_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/shortcut_dict_content.h new file mode 100644 index 000000000..75fd4f3b2 --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/shortcut_dict_content.h @@ -0,0 +1,101 @@ +/* + * Copyright (C) 2013, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * !!!!! DO NOT EDIT THIS FILE !!!!! + * + * This file was generated from + * suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.h + */ + +#ifndef LATINIME_BACKWARD_V401_SHORTCUT_DICT_CONTENT_H +#define LATINIME_BACKWARD_V401_SHORTCUT_DICT_CONTENT_H + +#include "defines.h" +#include "suggest/policyimpl/dictionary/structure/backward/v401/content/sparse_table_dict_content.h" +#include "suggest/policyimpl/dictionary/structure/backward/v401/content/terminal_position_lookup_table.h" +#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_constants.h" + +namespace latinime { +namespace backward { +namespace v401 { + +class ShortcutDictContent : public SparseTableDictContent { + public: + ShortcutDictContent(const char *const dictPath, const bool isUpdatable) + : SparseTableDictContent(dictPath, + Ver4DictConstants::SHORTCUT_LOOKUP_TABLE_FILE_EXTENSION, + Ver4DictConstants::SHORTCUT_CONTENT_TABLE_FILE_EXTENSION, + Ver4DictConstants::SHORTCUT_FILE_EXTENSION, isUpdatable, + Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE, + Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_DATA_SIZE) {} + + ShortcutDictContent() + : SparseTableDictContent(Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE, + Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_DATA_SIZE) {} + + void getShortcutEntry(const int maxCodePointCount, int *const outCodePoint, + int *const outCodePointCount, int *const outProbability, bool *const outhasNext, + const int shortcutEntryPos) { + int readingPos = shortcutEntryPos; + return getShortcutEntryAndAdvancePosition(maxCodePointCount, outCodePoint, + outCodePointCount, outProbability, outhasNext, &readingPos); + } + + void getShortcutEntryAndAdvancePosition(const int maxCodePointCount, + int *const outCodePoint, int *const outCodePointCount, int *const outProbability, + bool *const outhasNext, int *const shortcutEntryPos) const; + + // Returns head position of shortcut list for a PtNode specified by terminalId. + int getShortcutListHeadPos(const int terminalId) const; + + bool flushToFile(const char *const dictPath) const; + + bool runGC(const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap, + const ShortcutDictContent *const originalShortcutDictContent); + + bool createNewShortcutList(const int terminalId); + + bool copyShortcutList(const int shortcutListPos, const int toPos); + + bool setProbability(const int probability, const int shortcutEntryPos); + + bool writeShortcutEntry(const int *const codePoint, const int codePointCount, + const int probability, const bool hasNext, const int shortcutEntryPos) { + int writingPos = shortcutEntryPos; + return writeShortcutEntryAndAdvancePosition(codePoint, codePointCount, probability, + hasNext, &writingPos); + } + + bool writeShortcutEntryAndAdvancePosition(const int *const codePoint, + const int codePointCount, const int probability, const bool hasNext, + int *const shortcutEntryPos); + + int findShortcutEntryAndGetPos(const int shortcutListPos, + const int *const targetCodePointsToFind, const int codePointCount) const; + + private: + DISALLOW_COPY_AND_ASSIGN(ShortcutDictContent); + + bool copyShortcutListFromDictContent(const int shortcutListPos, + const ShortcutDictContent *const sourceShortcutDictContent, const int toPos); + + int createAndGetShortcutFlags(const int probability, const bool hasNext) const; +}; +} // namespace v401 +} // namespace backward +} // namespace latinime +#endif /* LATINIME_BACKWARD_V401_SHORTCUT_DICT_CONTENT_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/single_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/single_dict_content.h new file mode 100644 index 000000000..a519cd835 --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/single_dict_content.h @@ -0,0 +1,86 @@ +/* + * Copyright (C) 2013, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * !!!!! DO NOT EDIT THIS FILE !!!!! + * + * This file was generated from + * suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h + */ + +#ifndef LATINIME_BACKWARD_V401_SINGLE_DICT_CONTENT_H +#define LATINIME_BACKWARD_V401_SINGLE_DICT_CONTENT_H + +#include "defines.h" +#include "suggest/policyimpl/dictionary/structure/backward/v401/content/dict_content.h" +#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_constants.h" +#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" +#include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h" +#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h" + +namespace latinime { +namespace backward { +namespace v401 { + +class SingleDictContent : public DictContent { + public: + SingleDictContent(const char *const dictPath, const char *const contentFileName, + const bool isUpdatable) + : mMmappedBuffer(MmappedBuffer::openBuffer(dictPath, contentFileName, isUpdatable)), + mExpandableContentBuffer(mMmappedBuffer ? mMmappedBuffer->getBuffer() : nullptr, + mMmappedBuffer ? mMmappedBuffer->getBufferSize() : 0, + BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE), + mIsValid(mMmappedBuffer) {} + + SingleDictContent() + : mMmappedBuffer(nullptr), + mExpandableContentBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE), mIsValid(true) {} + + virtual ~SingleDictContent() {} + + virtual bool isValid() const { + return mIsValid; + } + + bool isNearSizeLimit() const { + return mExpandableContentBuffer.isNearSizeLimit(); + } + + protected: + BufferWithExtendableBuffer *getWritableBuffer() { + return &mExpandableContentBuffer; + } + + const BufferWithExtendableBuffer *getBuffer() const { + return &mExpandableContentBuffer; + } + + bool flush(const char *const dictPath, const char *const contentFileNameSuffix) const { + return DictFileWritingUtils::flushBufferToFileWithSuffix(dictPath, + contentFileNameSuffix, &mExpandableContentBuffer); + } + + private: + DISALLOW_COPY_AND_ASSIGN(SingleDictContent); + + const MmappedBuffer::MmappedBufferPtr mMmappedBuffer; + BufferWithExtendableBuffer mExpandableContentBuffer; + const bool mIsValid; +}; +} // namespace v401 +} // namespace backward +} // namespace latinime +#endif /* LATINIME_BACKWARD_V401_SINGLE_DICT_CONTENT_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/sparse_table_dict_content.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/sparse_table_dict_content.cpp new file mode 100644 index 000000000..638132c3d --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/sparse_table_dict_content.cpp @@ -0,0 +1,50 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * !!!!! DO NOT EDIT THIS FILE !!!!! + * + * This file was generated from + * suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.cpp + */ + +#include "suggest/policyimpl/dictionary/structure/backward/v401/content/sparse_table_dict_content.h" + +namespace latinime { +namespace backward { +namespace v401 { + +bool SparseTableDictContent::flush(const char *const dictPath, + const char *const lookupTableFileNameSuffix, const char *const addressTableFileNameSuffix, + const char *const contentFileNameSuffix) const { + if (!DictFileWritingUtils::flushBufferToFileWithSuffix(dictPath, lookupTableFileNameSuffix, + &mExpandableLookupTableBuffer)){ + return false; + } + if (!DictFileWritingUtils::flushBufferToFileWithSuffix(dictPath, addressTableFileNameSuffix, + &mExpandableAddressTableBuffer)) { + return false; + } + if (!DictFileWritingUtils::flushBufferToFileWithSuffix(dictPath, contentFileNameSuffix, + &mExpandableContentBuffer)) { + return false; + } + return true; +} + +} // namespace v401 +} // namespace backward +} // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/sparse_table_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/sparse_table_dict_content.h new file mode 100644 index 000000000..b95de2eda --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/sparse_table_dict_content.h @@ -0,0 +1,122 @@ +/* + * Copyright (C) 2013, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * !!!!! DO NOT EDIT THIS FILE !!!!! + * + * This file was generated from + * suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h + */ + +#ifndef LATINIME_BACKWARD_V401_SPARSE_TABLE_DICT_CONTENT_H +#define LATINIME_BACKWARD_V401_SPARSE_TABLE_DICT_CONTENT_H + +#include "defines.h" +#include "suggest/policyimpl/dictionary/structure/backward/v401/content/dict_content.h" +#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_constants.h" +#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" +#include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h" +#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h" +#include "suggest/policyimpl/dictionary/utils/sparse_table.h" + +namespace latinime { +namespace backward { +namespace v401 { + +// TODO: Support multiple contents. +class SparseTableDictContent : public DictContent { + public: + AK_FORCE_INLINE SparseTableDictContent(const char *const dictPath, + const char *const lookupTableFileName, const char *const addressTableFileName, + const char *const contentFileName, const bool isUpdatable, + const int sparseTableBlockSize, const int sparseTableDataSize) + : mLookupTableBuffer( + MmappedBuffer::openBuffer(dictPath, lookupTableFileName, isUpdatable)), + mAddressTableBuffer( + MmappedBuffer::openBuffer(dictPath, addressTableFileName, isUpdatable)), + mContentBuffer( + MmappedBuffer::openBuffer(dictPath, contentFileName, isUpdatable)), + mExpandableLookupTableBuffer( + mLookupTableBuffer ? mLookupTableBuffer->getBuffer() : nullptr, + mLookupTableBuffer ? mLookupTableBuffer->getBufferSize() : 0, + BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE), + mExpandableAddressTableBuffer( + mAddressTableBuffer ? mAddressTableBuffer->getBuffer() : nullptr, + mAddressTableBuffer ? mAddressTableBuffer->getBufferSize() : 0, + BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE), + mExpandableContentBuffer(mContentBuffer ? mContentBuffer->getBuffer() : nullptr, + mContentBuffer ? mContentBuffer->getBufferSize() : 0, + BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE), + mAddressLookupTable(&mExpandableLookupTableBuffer, &mExpandableAddressTableBuffer, + sparseTableBlockSize, sparseTableDataSize), + mIsValid(mLookupTableBuffer && mAddressTableBuffer && mContentBuffer) {} + + SparseTableDictContent(const int sparseTableBlockSize, const int sparseTableDataSize) + : mLookupTableBuffer(), mAddressTableBuffer(), mContentBuffer(), + mExpandableLookupTableBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE), + mExpandableAddressTableBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE), + mExpandableContentBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE), + mAddressLookupTable(&mExpandableLookupTableBuffer, &mExpandableAddressTableBuffer, + sparseTableBlockSize, sparseTableDataSize), mIsValid(true) {} + + virtual ~SparseTableDictContent() {} + + virtual bool isValid() const { + return mIsValid; + } + + bool isNearSizeLimit() const { + return mExpandableLookupTableBuffer.isNearSizeLimit() + || mExpandableAddressTableBuffer.isNearSizeLimit() + || mExpandableContentBuffer.isNearSizeLimit(); + } + + protected: + SparseTable *getUpdatableAddressLookupTable() { + return &mAddressLookupTable; + } + + const SparseTable *getAddressLookupTable() const { + return &mAddressLookupTable; + } + + BufferWithExtendableBuffer *getWritableContentBuffer() { + return &mExpandableContentBuffer; + } + + const BufferWithExtendableBuffer *getContentBuffer() const { + return &mExpandableContentBuffer; + } + + bool flush(const char *const dictDirPath, const char *const lookupTableFileName, + const char *const addressTableFileName, const char *const contentFileName) const; + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(SparseTableDictContent); + + const MmappedBuffer::MmappedBufferPtr mLookupTableBuffer; + const MmappedBuffer::MmappedBufferPtr mAddressTableBuffer; + const MmappedBuffer::MmappedBufferPtr mContentBuffer; + BufferWithExtendableBuffer mExpandableLookupTableBuffer; + BufferWithExtendableBuffer mExpandableAddressTableBuffer; + BufferWithExtendableBuffer mExpandableContentBuffer; + SparseTable mAddressLookupTable; + const bool mIsValid; +}; +} // namespace v401 +} // namespace backward +} // namespace latinime +#endif /* LATINIME_BACKWARD_V401_SPARSE_TABLE_DICT_CONTENT_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/terminal_position_lookup_table.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/terminal_position_lookup_table.cpp new file mode 100644 index 000000000..ab8a3ae43 --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/terminal_position_lookup_table.cpp @@ -0,0 +1,111 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * !!!!! DO NOT EDIT THIS FILE !!!!! + * + * This file was generated from + * suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.cpp + */ + +#include "suggest/policyimpl/dictionary/structure/backward/v401/content/terminal_position_lookup_table.h" + +#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_reading_utils.h" +#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" + +namespace latinime { +namespace backward { +namespace v401 { + +int TerminalPositionLookupTable::getTerminalPtNodePosition(const int terminalId) const { + if (terminalId < 0 || terminalId >= mSize) { + return NOT_A_DICT_POS; + } + const int terminalPos = getBuffer()->readUint( + Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE, getEntryPos(terminalId)); + return (terminalPos == Ver4DictConstants::NOT_A_TERMINAL_ADDRESS) ? + NOT_A_DICT_POS : terminalPos; +} + +bool TerminalPositionLookupTable::setTerminalPtNodePosition( + const int terminalId, const int terminalPtNodePos) { + if (terminalId < 0) { + return NOT_A_DICT_POS; + } + while (terminalId >= mSize) { + // Write new entry. + if (!getWritableBuffer()->writeUint(Ver4DictConstants::NOT_A_TERMINAL_ADDRESS, + Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE, getEntryPos(mSize))) { + return false; + } + mSize++; + } + const int terminalPos = (terminalPtNodePos != NOT_A_DICT_POS) ? + terminalPtNodePos : Ver4DictConstants::NOT_A_TERMINAL_ADDRESS; + return getWritableBuffer()->writeUint(terminalPos, + Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE, getEntryPos(terminalId)); +} + +bool TerminalPositionLookupTable::flushToFile(const char *const dictPath) const { + // If the used buffer size is smaller than the actual buffer size, regenerate the lookup + // table and write the new table to the file. + if (getEntryPos(mSize) < getBuffer()->getTailPosition()) { + TerminalPositionLookupTable lookupTableToWrite; + for (int i = 0; i < mSize; ++i) { + const int terminalPtNodePosition = getTerminalPtNodePosition(i); + if (!lookupTableToWrite.setTerminalPtNodePosition(i, terminalPtNodePosition)) { + AKLOGE("Cannot set terminal position to lookupTableToWrite." + " terminalId: %d, position: %d", i, terminalPtNodePosition); + return false; + } + } + return lookupTableToWrite.flush(dictPath, + Ver4DictConstants::TERMINAL_ADDRESS_TABLE_FILE_EXTENSION); + } else { + // We can simply use this lookup table because the buffer size has not been + // changed. + return flush(dictPath, Ver4DictConstants::TERMINAL_ADDRESS_TABLE_FILE_EXTENSION); + } +} + +bool TerminalPositionLookupTable::runGCTerminalIds(TerminalIdMap *const terminalIdMap) { + int removedEntryCount = 0; + int nextNewTerminalId = 0; + for (int i = 0; i < mSize; ++i) { + const int terminalPos = getBuffer()->readUint( + Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE, getEntryPos(i)); + if (terminalPos == Ver4DictConstants::NOT_A_TERMINAL_ADDRESS) { + // This entry is a garbage. + removedEntryCount++; + } else { + // Give a new terminal id to the entry. + if (!getWritableBuffer()->writeUint(terminalPos, + Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE, + getEntryPos(nextNewTerminalId))) { + return false; + } + // Memorize the mapping to the old terminal id to the new terminal id. + terminalIdMap->insert(TerminalIdMap::value_type(i, nextNewTerminalId)); + nextNewTerminalId++; + } + } + mSize = nextNewTerminalId; + return true; +} + +} // namespace v401 +} // namespace backward +} // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/terminal_position_lookup_table.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/terminal_position_lookup_table.h new file mode 100644 index 000000000..dbf0e6088 --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/content/terminal_position_lookup_table.h @@ -0,0 +1,73 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * !!!!! DO NOT EDIT THIS FILE !!!!! + * + * This file was generated from + * suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h + */ + +#ifndef LATINIME_BACKWARD_V401_TERMINAL_POSITION_LOOKUP_TABLE_H +#define LATINIME_BACKWARD_V401_TERMINAL_POSITION_LOOKUP_TABLE_H + +#include + +#include "defines.h" +#include "suggest/policyimpl/dictionary/structure/backward/v401/content/single_dict_content.h" +#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_constants.h" + +namespace latinime { +namespace backward { +namespace v401 { + +class TerminalPositionLookupTable : public SingleDictContent { + public: + typedef std::unordered_map TerminalIdMap; + + TerminalPositionLookupTable(const char *const dictPath, const bool isUpdatable) + : SingleDictContent(dictPath, + Ver4DictConstants::TERMINAL_ADDRESS_TABLE_FILE_EXTENSION, isUpdatable), + mSize(getBuffer()->getTailPosition() + / Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE) {} + + TerminalPositionLookupTable() : mSize(0) {} + + int getTerminalPtNodePosition(const int terminalId) const; + + bool setTerminalPtNodePosition(const int terminalId, const int terminalPtNodePos); + + int getNextTerminalId() const { + return mSize; + } + + bool flushToFile(const char *const dictPath) const; + + bool runGCTerminalIds(TerminalIdMap *const terminalIdMap); + + private: + DISALLOW_COPY_AND_ASSIGN(TerminalPositionLookupTable); + + int getEntryPos(const int terminalId) const { + return terminalId * Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE; + } + + int mSize; +}; +} // namespace v401 +} // namespace backward +} // namespace latinime +#endif // LATINIME_BACKWARD_V401_TERMINAL_POSITION_LOOKUP_TABLE_H diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/shortcut/ver4_shortcut_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/shortcut/ver4_shortcut_list_policy.h new file mode 100644 index 000000000..6a4e83c0d --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/shortcut/ver4_shortcut_list_policy.h @@ -0,0 +1,118 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * !!!!! DO NOT CHANGE THE LOGIC IN THIS FILE !!!!! + * Do not edit this file other than updating policy's interface. + * + * This file was generated from + * suggest/policyimpl/dictionary/structure/v4/shortcut/ver4_shortcut_list_policy.h + */ + +#ifndef LATINIME_BACKWARD_V401_VER4_SHORTCUT_LIST_POLICY_H +#define LATINIME_BACKWARD_V401_VER4_SHORTCUT_LIST_POLICY_H + +#include "defines.h" +#include "suggest/core/policy/dictionary_shortcuts_structure_policy.h" +#include "suggest/policyimpl/dictionary/structure/pt_common/shortcut/shortcut_list_reading_utils.h" +#include "suggest/policyimpl/dictionary/structure/backward/v401/content/shortcut_dict_content.h" +#include "suggest/policyimpl/dictionary/structure/backward/v401/content/terminal_position_lookup_table.h" + +namespace latinime { +namespace backward { +namespace v401 { + +class Ver4ShortcutListPolicy : public DictionaryShortcutsStructurePolicy { + public: + Ver4ShortcutListPolicy(ShortcutDictContent *const shortcutDictContent, + const TerminalPositionLookupTable *const terminalPositionLookupTable) + : mShortcutDictContent(shortcutDictContent) {} + + ~Ver4ShortcutListPolicy() {} + + int getStartPos(const int pos) const { + // The first shortcut entry is located at the head position of the shortcut list. + return pos; + } + + void getNextShortcut(const int maxCodePointCount, int *const outCodePoint, + int *const outCodePointCount, bool *const outIsWhitelist, bool *const outHasNext, + int *const pos) const { + int probability = 0; + mShortcutDictContent->getShortcutEntryAndAdvancePosition(maxCodePointCount, + outCodePoint, outCodePointCount, &probability, outHasNext, pos); + if (outIsWhitelist) { + *outIsWhitelist = ShortcutListReadingUtils::isWhitelist(probability); + } + } + + void skipAllShortcuts(int *const pos) const { + // Do nothing because we don't need to skip shortcut lists in ver4 dictionaries. + } + + bool addNewShortcut(const int terminalId, const int *const codePoints, const int codePointCount, + const int probability) { + const int shortcutListPos = mShortcutDictContent->getShortcutListHeadPos(terminalId); + if (shortcutListPos == NOT_A_DICT_POS) { + // Create shortcut list. + if (!mShortcutDictContent->createNewShortcutList(terminalId)) { + AKLOGE("Cannot create new shortcut list. terminal id: %d", terminalId); + return false; + } + const int writingPos = mShortcutDictContent->getShortcutListHeadPos(terminalId); + return mShortcutDictContent->writeShortcutEntry(codePoints, codePointCount, probability, + false /* hasNext */, writingPos); + } + const int entryPos = mShortcutDictContent->findShortcutEntryAndGetPos(shortcutListPos, + codePoints, codePointCount); + if (entryPos == NOT_A_DICT_POS) { + // Add new entry to the shortcut list. + // Create new shortcut list. + if (!mShortcutDictContent->createNewShortcutList(terminalId)) { + AKLOGE("Cannot create new shortcut list. terminal id: %d", terminalId); + return false; + } + int writingPos = mShortcutDictContent->getShortcutListHeadPos(terminalId); + if (!mShortcutDictContent->writeShortcutEntryAndAdvancePosition(codePoints, + codePointCount, probability, true /* hasNext */, &writingPos)) { + AKLOGE("Cannot write shortcut entry. terminal id: %d, pos: %d", terminalId, + writingPos); + return false; + } + return mShortcutDictContent->copyShortcutList(shortcutListPos, writingPos); + } + // Overwrite existing entry. + bool hasNext = false; + mShortcutDictContent->getShortcutEntry(MAX_WORD_LENGTH, 0 /* outCodePoint */, + 0 /* outCodePointCount */ , 0 /* probability */, &hasNext, entryPos); + if (!mShortcutDictContent->writeShortcutEntry(codePoints, + codePointCount, probability, hasNext, entryPos)) { + AKLOGE("Cannot overwrite shortcut entry. terminal id: %d, pos: %d", terminalId, + entryPos); + return false; + } + return true; + } + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4ShortcutListPolicy); + + ShortcutDictContent *const mShortcutDictContent; +}; +} // namespace v401 +} // namespace backward +} // namespace latinime +#endif // LATINIME_BACKWARD_V401_VER4_SHORTCUT_LIST_POLICY_H diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_buffers.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_buffers.cpp new file mode 100644 index 000000000..55ead01a9 --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_buffers.cpp @@ -0,0 +1,155 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * !!!!! DO NOT EDIT THIS FILE !!!!! + * + * This file was generated from + * suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.cpp + */ + +#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_buffers.h" + +#include +#include +#include +#include + +#include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h" +#include "suggest/policyimpl/dictionary/utils/file_utils.h" + +namespace latinime { +namespace backward { +namespace v401 { + +/* static */ Ver4DictBuffers::Ver4DictBuffersPtr Ver4DictBuffers::openVer4DictBuffers( + const char *const dictPath, MmappedBuffer::MmappedBufferPtr headerBuffer, + const FormatUtils::FORMAT_VERSION formatVersion) { + if (!headerBuffer) { + ASSERT(false); + AKLOGE("The header buffer must be valid to open ver4 dict buffers."); + return Ver4DictBuffersPtr(nullptr); + } + // TODO: take only dictDirPath, and open both header and trie files in the constructor below + const bool isUpdatable = headerBuffer->isUpdatable(); + return Ver4DictBuffersPtr(new Ver4DictBuffers(dictPath, std::move(headerBuffer), isUpdatable, + formatVersion)); +} + +bool Ver4DictBuffers::flushHeaderAndDictBuffers(const char *const dictDirPath, + const BufferWithExtendableBuffer *const headerBuffer) const { + // Create temporary directory. + const int tmpDirPathBufSize = FileUtils::getFilePathWithSuffixBufSize(dictDirPath, + DictFileWritingUtils::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE); + char tmpDirPath[tmpDirPathBufSize]; + FileUtils::getFilePathWithSuffix(dictDirPath, + DictFileWritingUtils::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE, tmpDirPathBufSize, + tmpDirPath); + if (FileUtils::existsDir(tmpDirPath)) { + if (!FileUtils::removeDirAndFiles(tmpDirPath)) { + AKLOGE("Existing directory %s cannot be removed.", tmpDirPath); + ASSERT(false); + return false; + } + } + if (mkdir(tmpDirPath, S_IRWXU) == -1) { + AKLOGE("Cannot create directory: %s. errno: %d.", tmpDirPath, errno); + return false; + } + // Get dictionary base path. + const int dictNameBufSize = strlen(dictDirPath) + 1 /* terminator */; + char dictName[dictNameBufSize]; + FileUtils::getBasename(dictDirPath, dictNameBufSize, dictName); + const int dictPathBufSize = FileUtils::getFilePathBufSize(tmpDirPath, dictName); + char dictPath[dictPathBufSize]; + FileUtils::getFilePath(tmpDirPath, dictName, dictPathBufSize, dictPath); + + // Write header file. + if (!DictFileWritingUtils::flushBufferToFileWithSuffix(dictPath, + Ver4DictConstants::HEADER_FILE_EXTENSION, headerBuffer)) { + AKLOGE("Dictionary header file %s%s cannot be written.", tmpDirPath, + Ver4DictConstants::HEADER_FILE_EXTENSION); + return false; + } + // Write trie file. + if (!DictFileWritingUtils::flushBufferToFileWithSuffix(dictPath, + Ver4DictConstants::TRIE_FILE_EXTENSION, &mExpandableTrieBuffer)) { + AKLOGE("Dictionary trie file %s%s cannot be written.", tmpDirPath, + Ver4DictConstants::TRIE_FILE_EXTENSION); + return false; + } + // Write dictionary contents. + if (!mTerminalPositionLookupTable.flushToFile(dictPath)) { + AKLOGE("Terminal position lookup table cannot be written. %s", tmpDirPath); + return false; + } + if (!mProbabilityDictContent.flushToFile(dictPath)) { + AKLOGE("Probability dict content cannot be written. %s", tmpDirPath); + return false; + } + if (!mBigramDictContent.flushToFile(dictPath)) { + AKLOGE("Bigram dict content cannot be written. %s", tmpDirPath); + return false; + } + if (!mShortcutDictContent.flushToFile(dictPath)) { + AKLOGE("Shortcut dict content cannot be written. %s", tmpDirPath); + return false; + } + // Remove existing dictionary. + if (!FileUtils::removeDirAndFiles(dictDirPath)) { + AKLOGE("Existing directory %s cannot be removed.", dictDirPath); + ASSERT(false); + return false; + } + // Rename temporary directory. + if (rename(tmpDirPath, dictDirPath) != 0) { + AKLOGE("%s cannot be renamed to %s", tmpDirPath, dictDirPath); + ASSERT(false); + return false; + } + return true; +} + +Ver4DictBuffers::Ver4DictBuffers(const char *const dictPath, + MmappedBuffer::MmappedBufferPtr headerBuffer, const bool isUpdatable, + const FormatUtils::FORMAT_VERSION formatVersion) + : mHeaderBuffer(std::move(headerBuffer)), + mDictBuffer(MmappedBuffer::openBuffer(dictPath, + Ver4DictConstants::TRIE_FILE_EXTENSION, isUpdatable)), + mHeaderPolicy(mHeaderBuffer->getBuffer(), formatVersion), + mExpandableHeaderBuffer(mHeaderBuffer ? mHeaderBuffer->getBuffer() : nullptr, + mHeaderPolicy.getSize(), + BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE), + mExpandableTrieBuffer(mDictBuffer ? mDictBuffer->getBuffer() : nullptr, + mDictBuffer ? mDictBuffer->getBufferSize() : 0, + BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE), + mTerminalPositionLookupTable(dictPath, isUpdatable), + mProbabilityDictContent(dictPath, mHeaderPolicy.hasHistoricalInfoOfWords(), isUpdatable), + mBigramDictContent(dictPath, mHeaderPolicy.hasHistoricalInfoOfWords(), isUpdatable), + mShortcutDictContent(dictPath, isUpdatable), + mIsUpdatable(isUpdatable) {} + +Ver4DictBuffers::Ver4DictBuffers(const HeaderPolicy *const headerPolicy, const int maxTrieSize) + : mHeaderBuffer(nullptr), mDictBuffer(nullptr), mHeaderPolicy(headerPolicy), + mExpandableHeaderBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE), + mExpandableTrieBuffer(maxTrieSize), mTerminalPositionLookupTable(), + mProbabilityDictContent(headerPolicy->hasHistoricalInfoOfWords()), + mBigramDictContent(headerPolicy->hasHistoricalInfoOfWords()), mShortcutDictContent(), + mIsUpdatable(true) {} + +} // namespace v401 +} // namespace backward +} // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_buffers.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_buffers.h new file mode 100644 index 000000000..716ed931b --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_buffers.h @@ -0,0 +1,152 @@ +/* + * Copyright (C) 2013, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * !!!!! DO NOT EDIT THIS FILE !!!!! + * + * This file was generated from + * suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h + */ + +#ifndef LATINIME_BACKWARD_V401_VER4_DICT_BUFFER_H +#define LATINIME_BACKWARD_V401_VER4_DICT_BUFFER_H + +#include + +#include "defines.h" +#include "suggest/policyimpl/dictionary/header/header_policy.h" +#include "suggest/policyimpl/dictionary/structure/backward/v401/content/bigram_dict_content.h" +#include "suggest/policyimpl/dictionary/structure/backward/v401/content/probability_dict_content.h" +#include "suggest/policyimpl/dictionary/structure/backward/v401/content/shortcut_dict_content.h" +#include "suggest/policyimpl/dictionary/structure/backward/v401/content/terminal_position_lookup_table.h" +#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_constants.h" +#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" +#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h" + +namespace latinime { +namespace backward { +namespace v401 { + +class Ver4DictBuffers { + public: + typedef std::unique_ptr Ver4DictBuffersPtr; + + static Ver4DictBuffersPtr openVer4DictBuffers(const char *const dictDirPath, + MmappedBuffer::MmappedBufferPtr headerBuffer, + const FormatUtils::FORMAT_VERSION formatVersion); + + static AK_FORCE_INLINE Ver4DictBuffersPtr createVer4DictBuffers( + const HeaderPolicy *const headerPolicy, const int maxTrieSize) { + return Ver4DictBuffersPtr(new Ver4DictBuffers(headerPolicy, maxTrieSize)); + } + + AK_FORCE_INLINE bool isValid() const { + return mHeaderBuffer && mDictBuffer && mHeaderPolicy.isValid() + && mProbabilityDictContent.isValid() && mTerminalPositionLookupTable.isValid() + && mBigramDictContent.isValid() && mShortcutDictContent.isValid(); + } + + AK_FORCE_INLINE bool isNearSizeLimit() const { + return mExpandableTrieBuffer.isNearSizeLimit() + || mTerminalPositionLookupTable.isNearSizeLimit() + || mProbabilityDictContent.isNearSizeLimit() + || mBigramDictContent.isNearSizeLimit() + || mShortcutDictContent.isNearSizeLimit(); + } + + AK_FORCE_INLINE const HeaderPolicy *getHeaderPolicy() const { + return &mHeaderPolicy; + } + + AK_FORCE_INLINE BufferWithExtendableBuffer *getWritableHeaderBuffer() { + return &mExpandableHeaderBuffer; + } + + AK_FORCE_INLINE BufferWithExtendableBuffer *getWritableTrieBuffer() { + return &mExpandableTrieBuffer; + } + + AK_FORCE_INLINE const BufferWithExtendableBuffer *getTrieBuffer() const { + return &mExpandableTrieBuffer; + } + + AK_FORCE_INLINE TerminalPositionLookupTable *getMutableTerminalPositionLookupTable() { + return &mTerminalPositionLookupTable; + } + + AK_FORCE_INLINE const TerminalPositionLookupTable *getTerminalPositionLookupTable() const { + return &mTerminalPositionLookupTable; + } + + AK_FORCE_INLINE ProbabilityDictContent *getMutableProbabilityDictContent() { + return &mProbabilityDictContent; + } + + AK_FORCE_INLINE const ProbabilityDictContent *getProbabilityDictContent() const { + return &mProbabilityDictContent; + } + + AK_FORCE_INLINE BigramDictContent *getMutableBigramDictContent() { + return &mBigramDictContent; + } + + AK_FORCE_INLINE const BigramDictContent *getBigramDictContent() const { + return &mBigramDictContent; + } + + AK_FORCE_INLINE ShortcutDictContent *getMutableShortcutDictContent() { + return &mShortcutDictContent; + } + + AK_FORCE_INLINE const ShortcutDictContent *getShortcutDictContent() const { + return &mShortcutDictContent; + } + + AK_FORCE_INLINE bool isUpdatable() const { + return mIsUpdatable; + } + + bool flush(const char *const dictDirPath) const { + return flushHeaderAndDictBuffers(dictDirPath, &mExpandableHeaderBuffer); + } + + bool flushHeaderAndDictBuffers(const char *const dictDirPath, + const BufferWithExtendableBuffer *const headerBuffer) const; + + private: + DISALLOW_COPY_AND_ASSIGN(Ver4DictBuffers); + + Ver4DictBuffers(const char *const dictDirPath, + const MmappedBuffer::MmappedBufferPtr headerBuffer, const bool isUpdatable, + const FormatUtils::FORMAT_VERSION formatVersion); + + Ver4DictBuffers(const HeaderPolicy *const headerPolicy, const int maxTrieSize); + + const MmappedBuffer::MmappedBufferPtr mHeaderBuffer; + const MmappedBuffer::MmappedBufferPtr mDictBuffer; + const HeaderPolicy mHeaderPolicy; + BufferWithExtendableBuffer mExpandableHeaderBuffer; + BufferWithExtendableBuffer mExpandableTrieBuffer; + TerminalPositionLookupTable mTerminalPositionLookupTable; + ProbabilityDictContent mProbabilityDictContent; + BigramDictContent mBigramDictContent; + ShortcutDictContent mShortcutDictContent; + const int mIsUpdatable; +}; +} // namespace v401 +} // namespace backward +} // namespace latinime +#endif /* LATINIME_BACKWARD_V401_VER4_DICT_BUFFER_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_constants.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_constants.cpp new file mode 100644 index 000000000..793b44ed4 --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_constants.cpp @@ -0,0 +1,81 @@ +/* + * Copyright (C) 2013, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * !!!!! DO NOT EDIT THIS FILE !!!!! + * + * This file was generated from + * suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp + */ + +#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_constants.h" + +namespace latinime { +namespace backward { +namespace v401 { + +// These values MUST match the definitions in FormatSpec.java. +const char *const Ver4DictConstants::TRIE_FILE_EXTENSION = ".trie"; +const char *const Ver4DictConstants::HEADER_FILE_EXTENSION = ".header"; +const char *const Ver4DictConstants::FREQ_FILE_EXTENSION = ".freq"; +// tat = Terminal Address Table +const char *const Ver4DictConstants::TERMINAL_ADDRESS_TABLE_FILE_EXTENSION = ".tat"; +const char *const Ver4DictConstants::BIGRAM_FILE_EXTENSION = ".bigram_freq"; +const char *const Ver4DictConstants::BIGRAM_LOOKUP_TABLE_FILE_EXTENSION = ".bigram_lookup"; +const char *const Ver4DictConstants::BIGRAM_CONTENT_TABLE_FILE_EXTENSION = ".bigram_index_freq"; +const char *const Ver4DictConstants::SHORTCUT_FILE_EXTENSION = ".shortcut_shortcut"; +const char *const Ver4DictConstants::SHORTCUT_LOOKUP_TABLE_FILE_EXTENSION = ".shortcut_lookup"; +const char *const Ver4DictConstants::SHORTCUT_CONTENT_TABLE_FILE_EXTENSION = + ".shortcut_index_shortcut"; + +// Version 4 dictionary size is implicitly limited to 8MB due to 3-byte offsets. +const int Ver4DictConstants::MAX_DICTIONARY_SIZE = 8 * 1024 * 1024; +// Extended region size, which is not GCed region size in dict file + additional buffer size, is +// limited to 1MB to prevent from inefficient traversing. +const int Ver4DictConstants::MAX_DICT_EXTENDED_REGION_SIZE = 1 * 1024 * 1024; + +const int Ver4DictConstants::NOT_A_TERMINAL_ID = -1; +const int Ver4DictConstants::PROBABILITY_SIZE = 1; +const int Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE = 1; +const int Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE = 3; +const int Ver4DictConstants::NOT_A_TERMINAL_ADDRESS = 0; +const int Ver4DictConstants::TERMINAL_ID_FIELD_SIZE = 4; +const int Ver4DictConstants::TIME_STAMP_FIELD_SIZE = 4; +const int Ver4DictConstants::WORD_LEVEL_FIELD_SIZE = 1; +const int Ver4DictConstants::WORD_COUNT_FIELD_SIZE = 1; + +const int Ver4DictConstants::BIGRAM_ADDRESS_TABLE_BLOCK_SIZE = 16; +const int Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE = 4; +const int Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE = 64; +const int Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_DATA_SIZE = 4; + +const int Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE = 3; +// Unsigned int max value of BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE-byte is used for representing +// invalid terminal ID in bigram lists. +const int Ver4DictConstants::INVALID_BIGRAM_TARGET_TERMINAL_ID = + (1 << (BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE * 8)) - 1; +const int Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE = 1; +const int Ver4DictConstants::BIGRAM_PROBABILITY_MASK = 0x0F; +const int Ver4DictConstants::BIGRAM_HAS_NEXT_MASK = 0x80; +const int Ver4DictConstants::BIGRAM_LARGE_PROBABILITY_FIELD_SIZE = 1; + +const int Ver4DictConstants::SHORTCUT_FLAGS_FIELD_SIZE = 1; +const int Ver4DictConstants::SHORTCUT_PROBABILITY_MASK = 0x0F; +const int Ver4DictConstants::SHORTCUT_HAS_NEXT_MASK = 0x80; + +} // namespace v401 +} // namespace backward +} // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_constants.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_constants.h new file mode 100644 index 000000000..17afeb156 --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_constants.h @@ -0,0 +1,84 @@ +/* + * Copyright (C) 2013, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * !!!!! DO NOT EDIT THIS FILE !!!!! + * + * This file was generated from + * suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h + */ + +#ifndef LATINIME_BACKWARD_V401_VER4_DICT_CONSTANTS_H +#define LATINIME_BACKWARD_V401_VER4_DICT_CONSTANTS_H + +#include "defines.h" + +namespace latinime { +namespace backward { +namespace v401 { + +// TODO: Create PtConstants under the pt_common and move some constant values there. +// Note that there are corresponding definitions in FormatSpec.java. +class Ver4DictConstants { + public: + static const char *const TRIE_FILE_EXTENSION; + static const char *const HEADER_FILE_EXTENSION; + static const char *const FREQ_FILE_EXTENSION; + static const char *const TERMINAL_ADDRESS_TABLE_FILE_EXTENSION; + static const char *const BIGRAM_FILE_EXTENSION; + static const char *const BIGRAM_LOOKUP_TABLE_FILE_EXTENSION; + static const char *const BIGRAM_CONTENT_TABLE_FILE_EXTENSION; + static const char *const SHORTCUT_FILE_EXTENSION; + static const char *const SHORTCUT_LOOKUP_TABLE_FILE_EXTENSION; + static const char *const SHORTCUT_CONTENT_TABLE_FILE_EXTENSION; + + static const int MAX_DICTIONARY_SIZE; + static const int MAX_DICT_EXTENDED_REGION_SIZE; + + static const int NOT_A_TERMINAL_ID; + static const int PROBABILITY_SIZE; + static const int FLAGS_IN_PROBABILITY_FILE_SIZE; + static const int TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE; + static const int NOT_A_TERMINAL_ADDRESS; + static const int TERMINAL_ID_FIELD_SIZE; + static const int TIME_STAMP_FIELD_SIZE; + static const int WORD_LEVEL_FIELD_SIZE; + static const int WORD_COUNT_FIELD_SIZE; + + static const int BIGRAM_ADDRESS_TABLE_BLOCK_SIZE; + static const int BIGRAM_ADDRESS_TABLE_DATA_SIZE; + static const int SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE; + static const int SHORTCUT_ADDRESS_TABLE_DATA_SIZE; + + static const int BIGRAM_FLAGS_FIELD_SIZE; + static const int BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE; + static const int INVALID_BIGRAM_TARGET_TERMINAL_ID; + static const int BIGRAM_PROBABILITY_MASK; + static const int BIGRAM_HAS_NEXT_MASK; + // Used when bigram list has time stamp. + static const int BIGRAM_LARGE_PROBABILITY_FIELD_SIZE; + + static const int SHORTCUT_FLAGS_FIELD_SIZE; + static const int SHORTCUT_PROBABILITY_MASK; + static const int SHORTCUT_HAS_NEXT_MASK; + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4DictConstants); +}; +} // namespace v401 +} // namespace backward +} // namespace latinime +#endif /* LATINIME_BACKWARD_V401_VER4_DICT_CONSTANTS_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_node_reader.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_node_reader.cpp new file mode 100644 index 000000000..80b51b292 --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_node_reader.cpp @@ -0,0 +1,109 @@ +/* + * Copyright (C) 2013, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * !!!!! DO NOT EDIT THIS FILE !!!!! + * + * This file was generated from + * suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.cpp + */ + +#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_node_reader.h" + +#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.h" +#include "suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.h" +#include "suggest/policyimpl/dictionary/structure/backward/v401/content/probability_dict_content.h" +#include "suggest/policyimpl/dictionary/structure/backward/v401/content/probability_entry.h" +#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_reading_utils.h" +#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" +#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h" + +namespace latinime { +namespace backward { +namespace v401 { + +const PtNodeParams Ver4PatriciaTrieNodeReader::fetchPtNodeInfoFromBufferAndProcessMovedPtNode( + const int ptNodePos, const int siblingNodePos) const { + if (ptNodePos < 0 || ptNodePos >= mBuffer->getTailPosition()) { + // Reading invalid position because of bug or broken dictionary. + AKLOGE("Fetching PtNode info from invalid dictionary position: %d, dictionary size: %d", + ptNodePos, mBuffer->getTailPosition()); + ASSERT(false); + return PtNodeParams(); + } + const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(ptNodePos); + const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer); + int pos = ptNodePos; + const int headPos = ptNodePos; + if (usesAdditionalBuffer) { + pos -= mBuffer->getOriginalBufferSize(); + } + const PatriciaTrieReadingUtils::NodeFlags flags = + PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos); + const int parentPosOffset = + DynamicPtReadingUtils::getParentPtNodePosOffsetAndAdvancePosition( + dictBuf, &pos); + const int parentPos = + DynamicPtReadingUtils::getParentPtNodePos(parentPosOffset, headPos); + int codePoints[MAX_WORD_LENGTH]; + const int codePonitCount = PatriciaTrieReadingUtils::getCharsAndAdvancePosition( + dictBuf, flags, MAX_WORD_LENGTH, codePoints, &pos); + int terminalIdFieldPos = NOT_A_DICT_POS; + int terminalId = Ver4DictConstants::NOT_A_TERMINAL_ID; + int probability = NOT_A_PROBABILITY; + if (PatriciaTrieReadingUtils::isTerminal(flags)) { + terminalIdFieldPos = pos; + if (usesAdditionalBuffer) { + terminalIdFieldPos += mBuffer->getOriginalBufferSize(); + } + terminalId = Ver4PatriciaTrieReadingUtils::getTerminalIdAndAdvancePosition(dictBuf, &pos); + const ProbabilityEntry probabilityEntry = + mProbabilityDictContent->getProbabilityEntry(terminalId); + if (probabilityEntry.hasHistoricalInfo()) { + probability = ForgettingCurveUtils::decodeProbability( + probabilityEntry.getHistoricalInfo(), mHeaderPolicy); + } else { + probability = probabilityEntry.getProbability(); + } + } + int childrenPosFieldPos = pos; + if (usesAdditionalBuffer) { + childrenPosFieldPos += mBuffer->getOriginalBufferSize(); + } + int childrenPos = DynamicPtReadingUtils::readChildrenPositionAndAdvancePosition( + dictBuf, &pos); + if (usesAdditionalBuffer && childrenPos != NOT_A_DICT_POS) { + childrenPos += mBuffer->getOriginalBufferSize(); + } + if (usesAdditionalBuffer) { + pos += mBuffer->getOriginalBufferSize(); + } + // Sibling position is the tail position of original PtNode. + int newSiblingNodePos = (siblingNodePos == NOT_A_DICT_POS) ? pos : siblingNodePos; + // Read destination node if the read node is a moved node. + if (DynamicPtReadingUtils::isMoved(flags)) { + // The destination position is stored at the same place as the parent position. + return fetchPtNodeInfoFromBufferAndProcessMovedPtNode(parentPos, newSiblingNodePos); + } else { + return PtNodeParams(headPos, flags, parentPos, codePonitCount, codePoints, + terminalIdFieldPos, terminalId, probability, childrenPosFieldPos, childrenPos, + newSiblingNodePos); + } +} + +} // namespace v401 +} // namespace backward +} // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_node_reader.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_node_reader.h new file mode 100644 index 000000000..0531b0a29 --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_node_reader.h @@ -0,0 +1,79 @@ +/* + * Copyright (C) 2013, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * !!!!! DO NOT EDIT THIS FILE !!!!! + * + * This file was generated from + * suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h + */ + +#ifndef LATINIME_BACKWARD_V401_VER4_PATRICIA_TRIE_NODE_READER_H +#define LATINIME_BACKWARD_V401_VER4_PATRICIA_TRIE_NODE_READER_H + +#include "defines.h" +#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h" +#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_reader.h" + +namespace latinime { +namespace backward { +namespace v401 { + +} // namespace v401 +} // namespace backward +class BufferWithExtendableBuffer; +namespace backward { +namespace v401 { +} // namespace v401 +} // namespace backward +class HeaderPolicy; +namespace backward { +namespace v401 { +class ProbabilityDictContent; + +/* + * This class is used for helping to read nodes of ver4 patricia trie. This class handles moved + * node and reads node attributes including probability form probabilityBuffer. + */ +class Ver4PatriciaTrieNodeReader : public PtNodeReader { + public: + Ver4PatriciaTrieNodeReader(const BufferWithExtendableBuffer *const buffer, + const ProbabilityDictContent *const probabilityDictContent, + const HeaderPolicy *const headerPolicy) + : mBuffer(buffer), mProbabilityDictContent(probabilityDictContent), + mHeaderPolicy(headerPolicy) {} + + ~Ver4PatriciaTrieNodeReader() {} + + virtual const PtNodeParams fetchNodeInfoInBufferFromPtNodePos(const int ptNodePos) const { + return fetchPtNodeInfoFromBufferAndProcessMovedPtNode(ptNodePos, + NOT_A_DICT_POS /* siblingNodePos */); + } + + private: + DISALLOW_COPY_AND_ASSIGN(Ver4PatriciaTrieNodeReader); + + const BufferWithExtendableBuffer *const mBuffer; + const ProbabilityDictContent *const mProbabilityDictContent; + const HeaderPolicy *const mHeaderPolicy; + + const PtNodeParams fetchPtNodeInfoFromBufferAndProcessMovedPtNode(const int ptNodePos, + const int siblingNodePos) const; +}; +} // namespace v401 +} // namespace backward +} // namespace latinime +#endif /* LATINIME_BACKWARD_V401_VER4_PATRICIA_TRIE_NODE_READER_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_node_writer.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_node_writer.cpp new file mode 100644 index 000000000..8de6bacfc --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_node_writer.cpp @@ -0,0 +1,429 @@ +/* + * Copyright (C) 2013, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * !!!!! DO NOT EDIT THIS FILE !!!!! + * + * This file was generated from + * suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp + */ + +#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_node_writer.h" + +#include "suggest/core/dictionary/property/unigram_property.h" +#include "suggest/policyimpl/dictionary/header/header_policy.h" +#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.h" +#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_writing_utils.h" +#include "suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.h" +#include "suggest/policyimpl/dictionary/structure/backward/v401/bigram/ver4_bigram_list_policy.h" +#include "suggest/policyimpl/dictionary/structure/backward/v401/content/probability_entry.h" +#include "suggest/policyimpl/dictionary/structure/backward/v401/shortcut/ver4_shortcut_list_policy.h" +#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_node_reader.h" +#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_buffers.h" +#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" +#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h" + +namespace latinime { +namespace backward { +namespace v401 { + +const int Ver4PatriciaTrieNodeWriter::CHILDREN_POSITION_FIELD_SIZE = 3; + +bool Ver4PatriciaTrieNodeWriter::markPtNodeAsDeleted( + const PtNodeParams *const toBeUpdatedPtNodeParams) { + int pos = toBeUpdatedPtNodeParams->getHeadPos(); + const bool usesAdditionalBuffer = mTrieBuffer->isInAdditionalBuffer(pos); + const uint8_t *const dictBuf = mTrieBuffer->getBuffer(usesAdditionalBuffer); + if (usesAdditionalBuffer) { + pos -= mTrieBuffer->getOriginalBufferSize(); + } + // Read original flags + const PatriciaTrieReadingUtils::NodeFlags originalFlags = + PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos); + const PatriciaTrieReadingUtils::NodeFlags updatedFlags = + DynamicPtReadingUtils::updateAndGetFlags(originalFlags, false /* isMoved */, + true /* isDeleted */, false /* willBecomeNonTerminal */); + int writingPos = toBeUpdatedPtNodeParams->getHeadPos(); + // Update flags. + if (!DynamicPtWritingUtils::writeFlagsAndAdvancePosition(mTrieBuffer, updatedFlags, + &writingPos)) { + return false; + } + if (toBeUpdatedPtNodeParams->isTerminal()) { + // The PtNode is a terminal. Delete entry from the terminal position lookup table. + return mBuffers->getMutableTerminalPositionLookupTable()->setTerminalPtNodePosition( + toBeUpdatedPtNodeParams->getTerminalId(), NOT_A_DICT_POS /* ptNodePos */); + } else { + return true; + } +} + +bool Ver4PatriciaTrieNodeWriter::markPtNodeAsMoved( + const PtNodeParams *const toBeUpdatedPtNodeParams, + const int movedPos, const int bigramLinkedNodePos) { + int pos = toBeUpdatedPtNodeParams->getHeadPos(); + const bool usesAdditionalBuffer = mTrieBuffer->isInAdditionalBuffer(pos); + const uint8_t *const dictBuf = mTrieBuffer->getBuffer(usesAdditionalBuffer); + if (usesAdditionalBuffer) { + pos -= mTrieBuffer->getOriginalBufferSize(); + } + // Read original flags + const PatriciaTrieReadingUtils::NodeFlags originalFlags = + PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos); + const PatriciaTrieReadingUtils::NodeFlags updatedFlags = + DynamicPtReadingUtils::updateAndGetFlags(originalFlags, true /* isMoved */, + false /* isDeleted */, false /* willBecomeNonTerminal */); + int writingPos = toBeUpdatedPtNodeParams->getHeadPos(); + // Update flags. + if (!DynamicPtWritingUtils::writeFlagsAndAdvancePosition(mTrieBuffer, updatedFlags, + &writingPos)) { + return false; + } + // Update moved position, which is stored in the parent offset field. + if (!DynamicPtWritingUtils::writeParentPosOffsetAndAdvancePosition( + mTrieBuffer, movedPos, toBeUpdatedPtNodeParams->getHeadPos(), &writingPos)) { + return false; + } + if (toBeUpdatedPtNodeParams->hasChildren()) { + // Update children's parent position. + mReadingHelper.initWithPtNodeArrayPos(toBeUpdatedPtNodeParams->getChildrenPos()); + while (!mReadingHelper.isEnd()) { + const PtNodeParams childPtNodeParams(mReadingHelper.getPtNodeParams()); + int parentOffsetFieldPos = childPtNodeParams.getHeadPos() + + DynamicPtWritingUtils::NODE_FLAG_FIELD_SIZE; + if (!DynamicPtWritingUtils::writeParentPosOffsetAndAdvancePosition( + mTrieBuffer, bigramLinkedNodePos, childPtNodeParams.getHeadPos(), + &parentOffsetFieldPos)) { + // Parent offset cannot be written because of a bug or a broken dictionary; thus, + // we give up to update dictionary. + return false; + } + mReadingHelper.readNextSiblingNode(childPtNodeParams); + } + } + return true; +} + +bool Ver4PatriciaTrieNodeWriter::markPtNodeAsWillBecomeNonTerminal( + const PtNodeParams *const toBeUpdatedPtNodeParams) { + int pos = toBeUpdatedPtNodeParams->getHeadPos(); + const bool usesAdditionalBuffer = mTrieBuffer->isInAdditionalBuffer(pos); + const uint8_t *const dictBuf = mTrieBuffer->getBuffer(usesAdditionalBuffer); + if (usesAdditionalBuffer) { + pos -= mTrieBuffer->getOriginalBufferSize(); + } + // Read original flags + const PatriciaTrieReadingUtils::NodeFlags originalFlags = + PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos); + const PatriciaTrieReadingUtils::NodeFlags updatedFlags = + DynamicPtReadingUtils::updateAndGetFlags(originalFlags, false /* isMoved */, + false /* isDeleted */, true /* willBecomeNonTerminal */); + if (!mBuffers->getMutableTerminalPositionLookupTable()->setTerminalPtNodePosition( + toBeUpdatedPtNodeParams->getTerminalId(), NOT_A_DICT_POS /* ptNodePos */)) { + AKLOGE("Cannot update terminal position lookup table. terminal id: %d", + toBeUpdatedPtNodeParams->getTerminalId()); + return false; + } + // Update flags. + int writingPos = toBeUpdatedPtNodeParams->getHeadPos(); + return DynamicPtWritingUtils::writeFlagsAndAdvancePosition(mTrieBuffer, updatedFlags, + &writingPos); +} + +bool Ver4PatriciaTrieNodeWriter::updatePtNodeUnigramProperty( + const PtNodeParams *const toBeUpdatedPtNodeParams, + const UnigramProperty *const unigramProperty) { + // Update probability and historical information. + // TODO: Update other information in the unigram property. + if (!toBeUpdatedPtNodeParams->isTerminal()) { + return false; + } + const ProbabilityEntry originalProbabilityEntry = + mBuffers->getProbabilityDictContent()->getProbabilityEntry( + toBeUpdatedPtNodeParams->getTerminalId()); + const ProbabilityEntry probabilityEntry = createUpdatedEntryFrom(&originalProbabilityEntry, + unigramProperty); + return mBuffers->getMutableProbabilityDictContent()->setProbabilityEntry( + toBeUpdatedPtNodeParams->getTerminalId(), &probabilityEntry); +} + +bool Ver4PatriciaTrieNodeWriter::updatePtNodeProbabilityAndGetNeedsToKeepPtNodeAfterGC( + const PtNodeParams *const toBeUpdatedPtNodeParams, bool *const outNeedsToKeepPtNode) { + if (!toBeUpdatedPtNodeParams->isTerminal()) { + AKLOGE("updatePtNodeProbabilityAndGetNeedsToSaveForGC is called for non-terminal PtNode."); + return false; + } + const ProbabilityEntry originalProbabilityEntry = + mBuffers->getProbabilityDictContent()->getProbabilityEntry( + toBeUpdatedPtNodeParams->getTerminalId()); + if (originalProbabilityEntry.hasHistoricalInfo()) { + const HistoricalInfo historicalInfo = ForgettingCurveUtils::createHistoricalInfoToSave( + originalProbabilityEntry.getHistoricalInfo(), mHeaderPolicy); + const ProbabilityEntry probabilityEntry = + originalProbabilityEntry.createEntryWithUpdatedHistoricalInfo(&historicalInfo); + if (!mBuffers->getMutableProbabilityDictContent()->setProbabilityEntry( + toBeUpdatedPtNodeParams->getTerminalId(), &probabilityEntry)) { + AKLOGE("Cannot write updated probability entry. terminalId: %d", + toBeUpdatedPtNodeParams->getTerminalId()); + return false; + } + const bool isValid = ForgettingCurveUtils::needsToKeep(&historicalInfo, mHeaderPolicy); + if (!isValid) { + if (!markPtNodeAsWillBecomeNonTerminal(toBeUpdatedPtNodeParams)) { + AKLOGE("Cannot mark PtNode as willBecomeNonTerminal."); + return false; + } + } + *outNeedsToKeepPtNode = isValid; + } else { + // No need to update probability. + *outNeedsToKeepPtNode = true; + } + return true; +} + +bool Ver4PatriciaTrieNodeWriter::updateChildrenPosition( + const PtNodeParams *const toBeUpdatedPtNodeParams, const int newChildrenPosition) { + int childrenPosFieldPos = toBeUpdatedPtNodeParams->getChildrenPosFieldPos(); + return DynamicPtWritingUtils::writeChildrenPositionAndAdvancePosition(mTrieBuffer, + newChildrenPosition, &childrenPosFieldPos); +} + +bool Ver4PatriciaTrieNodeWriter::updateTerminalId(const PtNodeParams *const toBeUpdatedPtNodeParams, + const int newTerminalId) { + return mTrieBuffer->writeUint(newTerminalId, Ver4DictConstants::TERMINAL_ID_FIELD_SIZE, + toBeUpdatedPtNodeParams->getTerminalIdFieldPos()); +} + +bool Ver4PatriciaTrieNodeWriter::writePtNodeAndAdvancePosition( + const PtNodeParams *const ptNodeParams, int *const ptNodeWritingPos) { + return writePtNodeAndGetTerminalIdAndAdvancePosition(ptNodeParams, 0 /* outTerminalId */, + ptNodeWritingPos); +} + + +bool Ver4PatriciaTrieNodeWriter::writeNewTerminalPtNodeAndAdvancePosition( + const PtNodeParams *const ptNodeParams, const UnigramProperty *const unigramProperty, + int *const ptNodeWritingPos) { + int terminalId = Ver4DictConstants::NOT_A_TERMINAL_ID; + if (!writePtNodeAndGetTerminalIdAndAdvancePosition(ptNodeParams, &terminalId, + ptNodeWritingPos)) { + return false; + } + // Write probability. + ProbabilityEntry newProbabilityEntry; + const ProbabilityEntry probabilityEntryToWrite = createUpdatedEntryFrom( + &newProbabilityEntry, unigramProperty); + return mBuffers->getMutableProbabilityDictContent()->setProbabilityEntry(terminalId, + &probabilityEntryToWrite); +} + +bool Ver4PatriciaTrieNodeWriter::addNewBigramEntry( + const PtNodeParams *const sourcePtNodeParams, const PtNodeParams *const targetPtNodeParam, + const BigramProperty *const bigramProperty, bool *const outAddedNewBigram) { + if (!mBigramPolicy->addNewEntry(sourcePtNodeParams->getTerminalId(), + targetPtNodeParam->getTerminalId(), bigramProperty, outAddedNewBigram)) { + AKLOGE("Cannot add new bigram entry. terminalId: %d, targetTerminalId: %d", + sourcePtNodeParams->getTerminalId(), targetPtNodeParam->getTerminalId()); + return false; + } + if (!sourcePtNodeParams->hasBigrams()) { + // Update has bigrams flag. + return updatePtNodeFlags(sourcePtNodeParams->getHeadPos(), + sourcePtNodeParams->isBlacklisted(), sourcePtNodeParams->isNotAWord(), + sourcePtNodeParams->isTerminal(), sourcePtNodeParams->hasShortcutTargets(), + true /* hasBigrams */, + sourcePtNodeParams->getCodePointCount() > 1 /* hasMultipleChars */); + } + return true; +} + +bool Ver4PatriciaTrieNodeWriter::removeBigramEntry( + const PtNodeParams *const sourcePtNodeParams, const PtNodeParams *const targetPtNodeParam) { + return mBigramPolicy->removeEntry(sourcePtNodeParams->getTerminalId(), + targetPtNodeParam->getTerminalId()); +} + +bool Ver4PatriciaTrieNodeWriter::updateAllBigramEntriesAndDeleteUselessEntries( + const PtNodeParams *const sourcePtNodeParams, int *const outBigramEntryCount) { + return mBigramPolicy->updateAllBigramEntriesAndDeleteUselessEntries( + sourcePtNodeParams->getTerminalId(), outBigramEntryCount); +} + +bool Ver4PatriciaTrieNodeWriter::updateAllPositionFields( + const PtNodeParams *const toBeUpdatedPtNodeParams, + const DictPositionRelocationMap *const dictPositionRelocationMap, + int *const outBigramEntryCount) { + int parentPos = toBeUpdatedPtNodeParams->getParentPos(); + if (parentPos != NOT_A_DICT_POS) { + PtNodeWriter::PtNodePositionRelocationMap::const_iterator it = + dictPositionRelocationMap->mPtNodePositionRelocationMap.find(parentPos); + if (it != dictPositionRelocationMap->mPtNodePositionRelocationMap.end()) { + parentPos = it->second; + } + } + int writingPos = toBeUpdatedPtNodeParams->getHeadPos() + + DynamicPtWritingUtils::NODE_FLAG_FIELD_SIZE; + // Write updated parent offset. + if (!DynamicPtWritingUtils::writeParentPosOffsetAndAdvancePosition(mTrieBuffer, + parentPos, toBeUpdatedPtNodeParams->getHeadPos(), &writingPos)) { + return false; + } + + // Updates children position. + int childrenPos = toBeUpdatedPtNodeParams->getChildrenPos(); + if (childrenPos != NOT_A_DICT_POS) { + PtNodeWriter::PtNodeArrayPositionRelocationMap::const_iterator it = + dictPositionRelocationMap->mPtNodeArrayPositionRelocationMap.find(childrenPos); + if (it != dictPositionRelocationMap->mPtNodeArrayPositionRelocationMap.end()) { + childrenPos = it->second; + } + } + if (!updateChildrenPosition(toBeUpdatedPtNodeParams, childrenPos)) { + return false; + } + + // Counts bigram entries. + if (outBigramEntryCount) { + *outBigramEntryCount = mBigramPolicy->getBigramEntryConut( + toBeUpdatedPtNodeParams->getTerminalId()); + } + return true; +} + +bool Ver4PatriciaTrieNodeWriter::addShortcutTarget(const PtNodeParams *const ptNodeParams, + const int *const targetCodePoints, const int targetCodePointCount, + const int shortcutProbability) { + if (!mShortcutPolicy->addNewShortcut(ptNodeParams->getTerminalId(), + targetCodePoints, targetCodePointCount, shortcutProbability)) { + AKLOGE("Cannot add new shortuct entry. terminalId: %d", ptNodeParams->getTerminalId()); + return false; + } + if (!ptNodeParams->hasShortcutTargets()) { + // Update has shortcut targets flag. + return updatePtNodeFlags(ptNodeParams->getHeadPos(), + ptNodeParams->isBlacklisted(), ptNodeParams->isNotAWord(), + ptNodeParams->isTerminal(), true /* hasShortcutTargets */, + ptNodeParams->hasBigrams(), + ptNodeParams->getCodePointCount() > 1 /* hasMultipleChars */); + } + return true; +} + +bool Ver4PatriciaTrieNodeWriter::updatePtNodeHasBigramsAndShortcutTargetsFlags( + const PtNodeParams *const ptNodeParams) { + const bool hasBigrams = mBuffers->getBigramDictContent()->getBigramListHeadPos( + ptNodeParams->getTerminalId()) != NOT_A_DICT_POS; + const bool hasShortcutTargets = mBuffers->getShortcutDictContent()->getShortcutListHeadPos( + ptNodeParams->getTerminalId()) != NOT_A_DICT_POS; + return updatePtNodeFlags(ptNodeParams->getHeadPos(), ptNodeParams->isBlacklisted(), + ptNodeParams->isNotAWord(), ptNodeParams->isTerminal(), hasShortcutTargets, + hasBigrams, ptNodeParams->getCodePointCount() > 1 /* hasMultipleChars */); +} + +bool Ver4PatriciaTrieNodeWriter::writePtNodeAndGetTerminalIdAndAdvancePosition( + const PtNodeParams *const ptNodeParams, int *const outTerminalId, + int *const ptNodeWritingPos) { + const int nodePos = *ptNodeWritingPos; + // Write dummy flags. The Node flags are updated with appropriate flags at the last step of the + // PtNode writing. + if (!DynamicPtWritingUtils::writeFlagsAndAdvancePosition(mTrieBuffer, + 0 /* nodeFlags */, ptNodeWritingPos)) { + return false; + } + // Calculate a parent offset and write the offset. + if (!DynamicPtWritingUtils::writeParentPosOffsetAndAdvancePosition(mTrieBuffer, + ptNodeParams->getParentPos(), nodePos, ptNodeWritingPos)) { + return false; + } + // Write code points + if (!DynamicPtWritingUtils::writeCodePointsAndAdvancePosition(mTrieBuffer, + ptNodeParams->getCodePoints(), ptNodeParams->getCodePointCount(), ptNodeWritingPos)) { + return false; + } + int terminalId = Ver4DictConstants::NOT_A_TERMINAL_ID; + if (!ptNodeParams->willBecomeNonTerminal()) { + if (ptNodeParams->getTerminalId() != Ver4DictConstants::NOT_A_TERMINAL_ID) { + terminalId = ptNodeParams->getTerminalId(); + } else if (ptNodeParams->isTerminal()) { + // Write terminal information using a new terminal id. + // Get a new unused terminal id. + terminalId = mBuffers->getTerminalPositionLookupTable()->getNextTerminalId(); + } + } + const int isTerminal = terminalId != Ver4DictConstants::NOT_A_TERMINAL_ID; + if (isTerminal) { + // Update the lookup table. + if (!mBuffers->getMutableTerminalPositionLookupTable()->setTerminalPtNodePosition( + terminalId, nodePos)) { + return false; + } + // Write terminal Id. + if (!mTrieBuffer->writeUintAndAdvancePosition(terminalId, + Ver4DictConstants::TERMINAL_ID_FIELD_SIZE, ptNodeWritingPos)) { + return false; + } + if (outTerminalId) { + *outTerminalId = terminalId; + } + } + // Write children position + if (!DynamicPtWritingUtils::writeChildrenPositionAndAdvancePosition(mTrieBuffer, + ptNodeParams->getChildrenPos(), ptNodeWritingPos)) { + return false; + } + return updatePtNodeFlags(nodePos, ptNodeParams->isBlacklisted(), ptNodeParams->isNotAWord(), + isTerminal, ptNodeParams->hasShortcutTargets(), ptNodeParams->hasBigrams(), + ptNodeParams->getCodePointCount() > 1 /* hasMultipleChars */); +} + +const ProbabilityEntry Ver4PatriciaTrieNodeWriter::createUpdatedEntryFrom( + const ProbabilityEntry *const originalProbabilityEntry, + const UnigramProperty *const unigramProperty) const { + // TODO: Consolidate historical info and probability. + if (mHeaderPolicy->hasHistoricalInfoOfWords()) { + const HistoricalInfo historicalInfoForUpdate(unigramProperty->getTimestamp(), + unigramProperty->getLevel(), unigramProperty->getCount()); + const HistoricalInfo updatedHistoricalInfo = + ForgettingCurveUtils::createUpdatedHistoricalInfo( + originalProbabilityEntry->getHistoricalInfo(), + unigramProperty->getProbability(), &historicalInfoForUpdate, mHeaderPolicy); + return originalProbabilityEntry->createEntryWithUpdatedHistoricalInfo( + &updatedHistoricalInfo); + } else { + return originalProbabilityEntry->createEntryWithUpdatedProbability( + unigramProperty->getProbability()); + } +} + +bool Ver4PatriciaTrieNodeWriter::updatePtNodeFlags(const int ptNodePos, + const bool isBlacklisted, const bool isNotAWord, const bool isTerminal, + const bool hasShortcutTargets, const bool hasBigrams, const bool hasMultipleChars) { + // Create node flags and write them. + PatriciaTrieReadingUtils::NodeFlags nodeFlags = + PatriciaTrieReadingUtils::createAndGetFlags(isBlacklisted, isNotAWord, isTerminal, + hasShortcutTargets, hasBigrams, hasMultipleChars, + CHILDREN_POSITION_FIELD_SIZE); + if (!DynamicPtWritingUtils::writeFlags(mTrieBuffer, nodeFlags, ptNodePos)) { + AKLOGE("Cannot write PtNode flags. flags: %x, pos: %d", nodeFlags, ptNodePos); + return false; + } + return true; +} + +} // namespace v401 +} // namespace backward +} // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_node_writer.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_node_writer.h new file mode 100644 index 000000000..7f1851d63 --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_node_writer.h @@ -0,0 +1,145 @@ +/* + * Copyright (C) 2013, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * !!!!! DO NOT EDIT THIS FILE !!!!! + * + * This file was generated from + * suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h + */ + +#ifndef LATINIME_BACKWARD_V401_VER4_PATRICIA_TRIE_NODE_WRITER_H +#define LATINIME_BACKWARD_V401_VER4_PATRICIA_TRIE_NODE_WRITER_H + +#include "defines.h" +#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h" +#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h" +#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h" +#include "suggest/policyimpl/dictionary/structure/backward/v401/content/probability_entry.h" + +namespace latinime { +namespace backward { +namespace v401 { + +} // namespace v401 +} // namespace backward +class BufferWithExtendableBuffer; +namespace backward { +namespace v401 { +} // namespace v401 +} // namespace backward +class HeaderPolicy; +namespace backward { +namespace v401 { +class Ver4BigramListPolicy; +class Ver4DictBuffers; +class Ver4PatriciaTrieNodeReader; +class Ver4PtNodeArrayReader; +class Ver4ShortcutListPolicy; + +/* + * This class is used for helping to writes nodes of ver4 patricia trie. + */ +class Ver4PatriciaTrieNodeWriter : public PtNodeWriter { + public: + Ver4PatriciaTrieNodeWriter(BufferWithExtendableBuffer *const trieBuffer, + Ver4DictBuffers *const buffers, const HeaderPolicy *const headerPolicy, + const PtNodeReader *const ptNodeReader, + const PtNodeArrayReader *const ptNodeArrayReader, + Ver4BigramListPolicy *const bigramPolicy, Ver4ShortcutListPolicy *const shortcutPolicy) + : mTrieBuffer(trieBuffer), mBuffers(buffers), mHeaderPolicy(headerPolicy), + mReadingHelper(ptNodeReader, ptNodeArrayReader), mBigramPolicy(bigramPolicy), + mShortcutPolicy(shortcutPolicy) {} + + virtual ~Ver4PatriciaTrieNodeWriter() {} + + virtual bool markPtNodeAsDeleted(const PtNodeParams *const toBeUpdatedPtNodeParams); + + virtual bool markPtNodeAsMoved(const PtNodeParams *const toBeUpdatedPtNodeParams, + const int movedPos, const int bigramLinkedNodePos); + + virtual bool markPtNodeAsWillBecomeNonTerminal( + const PtNodeParams *const toBeUpdatedPtNodeParams); + + virtual bool updatePtNodeUnigramProperty(const PtNodeParams *const toBeUpdatedPtNodeParams, + const UnigramProperty *const unigramProperty); + + virtual bool updatePtNodeProbabilityAndGetNeedsToKeepPtNodeAfterGC( + const PtNodeParams *const toBeUpdatedPtNodeParams, bool *const outNeedsToKeepPtNode); + + virtual bool updateChildrenPosition(const PtNodeParams *const toBeUpdatedPtNodeParams, + const int newChildrenPosition); + + bool updateTerminalId(const PtNodeParams *const toBeUpdatedPtNodeParams, + const int newTerminalId); + + virtual bool writePtNodeAndAdvancePosition(const PtNodeParams *const ptNodeParams, + int *const ptNodeWritingPos); + + virtual bool writeNewTerminalPtNodeAndAdvancePosition(const PtNodeParams *const ptNodeParams, + const UnigramProperty *const unigramProperty, int *const ptNodeWritingPos); + + virtual bool addNewBigramEntry(const PtNodeParams *const sourcePtNodeParams, + const PtNodeParams *const targetPtNodeParam, const BigramProperty *const bigramProperty, + bool *const outAddedNewBigram); + + virtual bool removeBigramEntry(const PtNodeParams *const sourcePtNodeParams, + const PtNodeParams *const targetPtNodeParam); + + virtual bool updateAllBigramEntriesAndDeleteUselessEntries( + const PtNodeParams *const sourcePtNodeParams, int *const outBigramEntryCount); + + virtual bool updateAllPositionFields(const PtNodeParams *const toBeUpdatedPtNodeParams, + const DictPositionRelocationMap *const dictPositionRelocationMap, + int *const outBigramEntryCount); + + virtual bool addShortcutTarget(const PtNodeParams *const ptNodeParams, + const int *const targetCodePoints, const int targetCodePointCount, + const int shortcutProbability); + + bool updatePtNodeHasBigramsAndShortcutTargetsFlags(const PtNodeParams *const ptNodeParams); + + private: + DISALLOW_COPY_AND_ASSIGN(Ver4PatriciaTrieNodeWriter); + + bool writePtNodeAndGetTerminalIdAndAdvancePosition( + const PtNodeParams *const ptNodeParams, int *const outTerminalId, + int *const ptNodeWritingPos); + + // Create updated probability entry using given unigram property. In addition to the + // probability, this method updates historical information if needed. + // TODO: Update flags belonging to the unigram property. + const ProbabilityEntry createUpdatedEntryFrom( + const ProbabilityEntry *const originalProbabilityEntry, + const UnigramProperty *const unigramProperty) const; + + bool updatePtNodeFlags(const int ptNodePos, const bool isBlacklisted, const bool isNotAWord, + const bool isTerminal, const bool hasShortcutTargets, const bool hasBigrams, + const bool hasMultipleChars); + + static const int CHILDREN_POSITION_FIELD_SIZE; + + BufferWithExtendableBuffer *const mTrieBuffer; + Ver4DictBuffers *const mBuffers; + const HeaderPolicy *const mHeaderPolicy; + DynamicPtReadingHelper mReadingHelper; + Ver4BigramListPolicy *const mBigramPolicy; + Ver4ShortcutListPolicy *const mShortcutPolicy; +}; +} // namespace v401 +} // namespace backward +} // namespace latinime +#endif /* LATINIME_BACKWARD_V401_VER4_PATRICIA_TRIE_NODE_WRITER_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_policy.cpp new file mode 100644 index 000000000..dde1af299 --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_policy.cpp @@ -0,0 +1,475 @@ +/* + * Copyright (C) 2013, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * !!!!! DO NOT CHANGE THE LOGIC IN THIS FILE !!!!! + * Do not edit this file other than updating policy's interface. + * + * This file was generated from + * suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp + */ + +#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_policy.h" + +#include + +#include "suggest/core/dicnode/dic_node.h" +#include "suggest/core/dicnode/dic_node_vector.h" +#include "suggest/core/dictionary/property/bigram_property.h" +#include "suggest/core/dictionary/property/unigram_property.h" +#include "suggest/core/dictionary/property/word_property.h" +#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h" +#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_node_reader.h" +#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h" +#include "suggest/policyimpl/dictionary/utils/probability_utils.h" + +namespace latinime { +namespace backward { +namespace v401 { + +// Note that there are corresponding definitions in Java side in BinaryDictionaryTests and +// BinaryDictionaryDecayingTests. +const char *const Ver4PatriciaTriePolicy::UNIGRAM_COUNT_QUERY = "UNIGRAM_COUNT"; +const char *const Ver4PatriciaTriePolicy::BIGRAM_COUNT_QUERY = "BIGRAM_COUNT"; +const char *const Ver4PatriciaTriePolicy::MAX_UNIGRAM_COUNT_QUERY = "MAX_UNIGRAM_COUNT"; +const char *const Ver4PatriciaTriePolicy::MAX_BIGRAM_COUNT_QUERY = "MAX_BIGRAM_COUNT"; +const int Ver4PatriciaTriePolicy::MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS = 1024; +const int Ver4PatriciaTriePolicy::MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS = + Ver4DictConstants::MAX_DICTIONARY_SIZE - MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS; + +void Ver4PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const dicNode, + DicNodeVector *const childDicNodes) const { + if (!dicNode->hasChildren()) { + return; + } + DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader); + readingHelper.initWithPtNodeArrayPos(dicNode->getChildrenPtNodeArrayPos()); + while (!readingHelper.isEnd()) { + const PtNodeParams ptNodeParams = readingHelper.getPtNodeParams(); + if (!ptNodeParams.isValid()) { + break; + } + bool isTerminal = ptNodeParams.isTerminal() && !ptNodeParams.isDeleted(); + if (isTerminal && mHeaderPolicy->isDecayingDict()) { + // A DecayingDict may have a terminal PtNode that has a terminal DicNode whose + // probability is NOT_A_PROBABILITY. In such case, we don't want to treat it as a + // valid terminal DicNode. + isTerminal = ptNodeParams.getProbability() != NOT_A_PROBABILITY; + } + childDicNodes->pushLeavingChild(dicNode, ptNodeParams.getHeadPos(), + ptNodeParams.getChildrenPos(), ptNodeParams.getProbability(), isTerminal, + ptNodeParams.hasChildren(), + ptNodeParams.isBlacklisted() + || ptNodeParams.isNotAWord() /* isBlacklistedOrNotAWord */, + ptNodeParams.getCodePointCount(), ptNodeParams.getCodePoints()); + readingHelper.readNextSiblingNode(ptNodeParams); + } + if (readingHelper.isError()) { + mIsCorrupted = true; + AKLOGE("Dictionary reading error in createAndGetAllChildDicNodes()."); + } +} + +int Ver4PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount( + const int ptNodePos, const int maxCodePointCount, int *const outCodePoints, + int *const outUnigramProbability) const { + DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader); + readingHelper.initWithPtNodePos(ptNodePos); + const int codePointCount = readingHelper.getCodePointsAndProbabilityAndReturnCodePointCount( + maxCodePointCount, outCodePoints, outUnigramProbability); + if (readingHelper.isError()) { + mIsCorrupted = true; + AKLOGE("Dictionary reading error in getCodePointsAndProbabilityAndReturnCodePointCount()."); + } + return codePointCount; +} + +int Ver4PatriciaTriePolicy::getTerminalPtNodePositionOfWord(const int *const inWord, + const int length, const bool forceLowerCaseSearch) const { + DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader); + readingHelper.initWithPtNodeArrayPos(getRootPosition()); + const int ptNodePos = + readingHelper.getTerminalPtNodePositionOfWord(inWord, length, forceLowerCaseSearch); + if (readingHelper.isError()) { + mIsCorrupted = true; + AKLOGE("Dictionary reading error in createAndGetAllChildDicNodes()."); + } + return ptNodePos; +} + +int Ver4PatriciaTriePolicy::getProbability(const int unigramProbability, + const int bigramProbability) const { + if (mHeaderPolicy->isDecayingDict()) { + // Both probabilities are encoded. Decode them and get probability. + return ForgettingCurveUtils::getProbability(unigramProbability, bigramProbability); + } else { + if (unigramProbability == NOT_A_PROBABILITY) { + return NOT_A_PROBABILITY; + } else if (bigramProbability == NOT_A_PROBABILITY) { + return ProbabilityUtils::backoff(unigramProbability); + } else { + // bigramProbability is a bigram probability delta. + return ProbabilityUtils::computeProbabilityForBigram(unigramProbability, + bigramProbability); + } + } +} + +int Ver4PatriciaTriePolicy::getUnigramProbabilityOfPtNode(const int ptNodePos) const { + if (ptNodePos == NOT_A_DICT_POS) { + return NOT_A_PROBABILITY; + } + const PtNodeParams ptNodeParams(mNodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos)); + if (ptNodeParams.isDeleted() || ptNodeParams.isBlacklisted() || ptNodeParams.isNotAWord()) { + return NOT_A_PROBABILITY; + } + return getProbability(ptNodeParams.getProbability(), NOT_A_PROBABILITY); +} + +int Ver4PatriciaTriePolicy::getShortcutPositionOfPtNode(const int ptNodePos) const { + if (ptNodePos == NOT_A_DICT_POS) { + return NOT_A_DICT_POS; + } + const PtNodeParams ptNodeParams(mNodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos)); + if (ptNodeParams.isDeleted()) { + return NOT_A_DICT_POS; + } + return mBuffers->getShortcutDictContent()->getShortcutListHeadPos( + ptNodeParams.getTerminalId()); +} + +int Ver4PatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) const { + if (ptNodePos == NOT_A_DICT_POS) { + return NOT_A_DICT_POS; + } + const PtNodeParams ptNodeParams(mNodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos)); + if (ptNodeParams.isDeleted()) { + return NOT_A_DICT_POS; + } + return mBuffers->getBigramDictContent()->getBigramListHeadPos( + ptNodeParams.getTerminalId()); +} + +bool Ver4PatriciaTriePolicy::addUnigramWord(const int *const word, const int length, + const UnigramProperty *const unigramProperty) { + if (!mBuffers->isUpdatable()) { + AKLOGI("Warning: addUnigramWord() is called for non-updatable dictionary."); + return false; + } + if (mDictBuffer->getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) { + AKLOGE("The dictionary is too large to dynamically update. Dictionary size: %d", + mDictBuffer->getTailPosition()); + return false; + } + if (length > MAX_WORD_LENGTH) { + AKLOGE("The word is too long to insert to the dictionary, length: %d", length); + return false; + } + for (const auto &shortcut : unigramProperty->getShortcuts()) { + if (shortcut.getTargetCodePoints()->size() > MAX_WORD_LENGTH) { + AKLOGE("One of shortcut targets is too long to insert to the dictionary, length: %d", + shortcut.getTargetCodePoints()->size()); + return false; + } + } + DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader); + readingHelper.initWithPtNodeArrayPos(getRootPosition()); + bool addedNewUnigram = false; + if (mUpdatingHelper.addUnigramWord(&readingHelper, word, length, + unigramProperty, &addedNewUnigram)) { + if (addedNewUnigram) { + mUnigramCount++; + } + if (unigramProperty->getShortcuts().size() > 0) { + // Add shortcut target. + const int wordPos = getTerminalPtNodePositionOfWord(word, length, + false /* forceLowerCaseSearch */); + if (wordPos == NOT_A_DICT_POS) { + AKLOGE("Cannot find terminal PtNode position to add shortcut target."); + return false; + } + for (const auto &shortcut : unigramProperty->getShortcuts()) { + if (!mUpdatingHelper.addShortcutTarget(wordPos, + shortcut.getTargetCodePoints()->data(), + shortcut.getTargetCodePoints()->size(), shortcut.getProbability())) { + AKLOGE("Cannot add new shortcut target. PtNodePos: %d, length: %d, " + "probability: %d", wordPos, shortcut.getTargetCodePoints()->size(), + shortcut.getProbability()); + return false; + } + } + } + return true; + } else { + return false; + } +} + +bool Ver4PatriciaTriePolicy::addBigramWords(const int *const word0, const int length0, + const BigramProperty *const bigramProperty) { + if (!mBuffers->isUpdatable()) { + AKLOGI("Warning: addBigramWords() is called for non-updatable dictionary."); + return false; + } + if (mDictBuffer->getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) { + AKLOGE("The dictionary is too large to dynamically update. Dictionary size: %d", + mDictBuffer->getTailPosition()); + return false; + } + if (length0 > MAX_WORD_LENGTH + || bigramProperty->getTargetCodePoints()->size() > MAX_WORD_LENGTH) { + AKLOGE("Either src word or target word is too long to insert the bigram to the dictionary. " + "length0: %d, length1: %d", length0, bigramProperty->getTargetCodePoints()->size()); + return false; + } + const int word0Pos = getTerminalPtNodePositionOfWord(word0, length0, + false /* forceLowerCaseSearch */); + if (word0Pos == NOT_A_DICT_POS) { + return false; + } + const int word1Pos = getTerminalPtNodePositionOfWord( + bigramProperty->getTargetCodePoints()->data(), + bigramProperty->getTargetCodePoints()->size(), false /* forceLowerCaseSearch */); + if (word1Pos == NOT_A_DICT_POS) { + return false; + } + bool addedNewBigram = false; + if (mUpdatingHelper.addBigramWords(word0Pos, word1Pos, bigramProperty, &addedNewBigram)) { + if (addedNewBigram) { + mBigramCount++; + } + return true; + } else { + return false; + } +} + +bool Ver4PatriciaTriePolicy::removeBigramWords(const int *const word0, const int length0, + const int *const word1, const int length1) { + if (!mBuffers->isUpdatable()) { + AKLOGI("Warning: addBigramWords() is called for non-updatable dictionary."); + return false; + } + if (mDictBuffer->getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) { + AKLOGE("The dictionary is too large to dynamically update. Dictionary size: %d", + mDictBuffer->getTailPosition()); + return false; + } + if (length0 > MAX_WORD_LENGTH || length1 > MAX_WORD_LENGTH) { + AKLOGE("Either src word or target word is too long to remove the bigram to from the " + "dictionary. length0: %d, length1: %d", length0, length1); + return false; + } + const int word0Pos = getTerminalPtNodePositionOfWord(word0, length0, + false /* forceLowerCaseSearch */); + if (word0Pos == NOT_A_DICT_POS) { + return false; + } + const int word1Pos = getTerminalPtNodePositionOfWord(word1, length1, + false /* forceLowerCaseSearch */); + if (word1Pos == NOT_A_DICT_POS) { + return false; + } + if (mUpdatingHelper.removeBigramWords(word0Pos, word1Pos)) { + mBigramCount--; + return true; + } else { + return false; + } +} + +void Ver4PatriciaTriePolicy::flush(const char *const filePath) { + if (!mBuffers->isUpdatable()) { + AKLOGI("Warning: flush() is called for non-updatable dictionary. filePath: %s", filePath); + return; + } + if (!mWritingHelper.writeToDictFile(filePath, mUnigramCount, mBigramCount)) { + AKLOGE("Cannot flush the dictionary to file."); + mIsCorrupted = true; + } +} + +void Ver4PatriciaTriePolicy::flushWithGC(const char *const filePath) { + if (!mBuffers->isUpdatable()) { + AKLOGI("Warning: flushWithGC() is called for non-updatable dictionary."); + return; + } + if (!mWritingHelper.writeToDictFileWithGC(getRootPosition(), filePath)) { + AKLOGE("Cannot flush the dictionary to file with GC."); + mIsCorrupted = true; + } +} + +bool Ver4PatriciaTriePolicy::needsToRunGC(const bool mindsBlockByGC) const { + if (!mBuffers->isUpdatable()) { + AKLOGI("Warning: needsToRunGC() is called for non-updatable dictionary."); + return false; + } + if (mBuffers->isNearSizeLimit()) { + // Additional buffer size is near the limit. + return true; + } else if (mHeaderPolicy->getExtendedRegionSize() + mDictBuffer->getUsedAdditionalBufferSize() + > Ver4DictConstants::MAX_DICT_EXTENDED_REGION_SIZE) { + // Total extended region size of the trie exceeds the limit. + return true; + } else if (mDictBuffer->getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS + && mDictBuffer->getUsedAdditionalBufferSize() > 0) { + // Needs to reduce dictionary size. + return true; + } else if (mHeaderPolicy->isDecayingDict()) { + return ForgettingCurveUtils::needsToDecay(mindsBlockByGC, mUnigramCount, mBigramCount, + mHeaderPolicy); + } + return false; +} + +void Ver4PatriciaTriePolicy::getProperty(const char *const query, const int queryLength, + char *const outResult, const int maxResultLength) { + const int compareLength = queryLength + 1 /* terminator */; + if (strncmp(query, UNIGRAM_COUNT_QUERY, compareLength) == 0) { + snprintf(outResult, maxResultLength, "%d", mUnigramCount); + } else if (strncmp(query, BIGRAM_COUNT_QUERY, compareLength) == 0) { + snprintf(outResult, maxResultLength, "%d", mBigramCount); + } else if (strncmp(query, MAX_UNIGRAM_COUNT_QUERY, compareLength) == 0) { + snprintf(outResult, maxResultLength, "%d", + mHeaderPolicy->isDecayingDict() ? + ForgettingCurveUtils::getUnigramCountHardLimit( + mHeaderPolicy->getMaxUnigramCount()) : + static_cast(Ver4DictConstants::MAX_DICTIONARY_SIZE)); + } else if (strncmp(query, MAX_BIGRAM_COUNT_QUERY, compareLength) == 0) { + snprintf(outResult, maxResultLength, "%d", + mHeaderPolicy->isDecayingDict() ? + ForgettingCurveUtils::getBigramCountHardLimit( + mHeaderPolicy->getMaxBigramCount()) : + static_cast(Ver4DictConstants::MAX_DICTIONARY_SIZE)); + } +} + +const WordProperty Ver4PatriciaTriePolicy::getWordProperty(const int *const codePoints, + const int codePointCount) const { + const int ptNodePos = getTerminalPtNodePositionOfWord(codePoints, codePointCount, + false /* forceLowerCaseSearch */); + if (ptNodePos == NOT_A_DICT_POS) { + AKLOGE("getWordProperty is called for invalid word."); + return WordProperty(); + } + const PtNodeParams ptNodeParams = mNodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos); + std::vector codePointVector(ptNodeParams.getCodePoints(), + ptNodeParams.getCodePoints() + ptNodeParams.getCodePointCount()); + const ProbabilityEntry probabilityEntry = + mBuffers->getProbabilityDictContent()->getProbabilityEntry( + ptNodeParams.getTerminalId()); + const HistoricalInfo *const historicalInfo = probabilityEntry.getHistoricalInfo(); + // Fetch bigram information. + std::vector bigrams; + const int bigramListPos = getBigramsPositionOfPtNode(ptNodePos); + if (bigramListPos != NOT_A_DICT_POS) { + int bigramWord1CodePoints[MAX_WORD_LENGTH]; + const BigramDictContent *const bigramDictContent = mBuffers->getBigramDictContent(); + const TerminalPositionLookupTable *const terminalPositionLookupTable = + mBuffers->getTerminalPositionLookupTable(); + bool hasNext = true; + int readingPos = bigramListPos; + while (hasNext) { + const BigramEntry bigramEntry = + bigramDictContent->getBigramEntryAndAdvancePosition(&readingPos); + hasNext = bigramEntry.hasNext(); + const int word1TerminalId = bigramEntry.getTargetTerminalId(); + const int word1TerminalPtNodePos = + terminalPositionLookupTable->getTerminalPtNodePosition(word1TerminalId); + if (word1TerminalPtNodePos == NOT_A_DICT_POS) { + continue; + } + // Word (unigram) probability + int word1Probability = NOT_A_PROBABILITY; + const int codePointCount = getCodePointsAndProbabilityAndReturnCodePointCount( + word1TerminalPtNodePos, MAX_WORD_LENGTH, bigramWord1CodePoints, + &word1Probability); + const std::vector word1(bigramWord1CodePoints, + bigramWord1CodePoints + codePointCount); + const HistoricalInfo *const historicalInfo = bigramEntry.getHistoricalInfo(); + const int probability = bigramEntry.hasHistoricalInfo() ? + ForgettingCurveUtils::decodeProbability( + bigramEntry.getHistoricalInfo(), mHeaderPolicy) : + getProbability(word1Probability, bigramEntry.getProbability()); + bigrams.emplace_back(&word1, probability, + historicalInfo->getTimeStamp(), historicalInfo->getLevel(), + historicalInfo->getCount()); + } + } + // Fetch shortcut information. + std::vector shortcuts; + int shortcutPos = getShortcutPositionOfPtNode(ptNodePos); + if (shortcutPos != NOT_A_DICT_POS) { + int shortcutTarget[MAX_WORD_LENGTH]; + const ShortcutDictContent *const shortcutDictContent = + mBuffers->getShortcutDictContent(); + bool hasNext = true; + while (hasNext) { + int shortcutTargetLength = 0; + int shortcutProbability = NOT_A_PROBABILITY; + shortcutDictContent->getShortcutEntryAndAdvancePosition(MAX_WORD_LENGTH, shortcutTarget, + &shortcutTargetLength, &shortcutProbability, &hasNext, &shortcutPos); + const std::vector target(shortcutTarget, shortcutTarget + shortcutTargetLength); + shortcuts.emplace_back(&target, shortcutProbability); + } + } + const UnigramProperty unigramProperty(ptNodeParams.isNotAWord(), + ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(), + historicalInfo->getTimeStamp(), historicalInfo->getLevel(), + historicalInfo->getCount(), &shortcuts); + return WordProperty(&codePointVector, &unigramProperty, &bigrams); +} + +int Ver4PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const outCodePoints) { + // TODO: Return code point count like other methods. + // Null termination. + outCodePoints[0] = 0; + if (token == 0) { + mTerminalPtNodePositionsForIteratingWords.clear(); + DynamicPtReadingHelper::TraversePolicyToGetAllTerminalPtNodePositions traversePolicy( + &mTerminalPtNodePositionsForIteratingWords); + DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader); + readingHelper.initWithPtNodeArrayPos(getRootPosition()); + readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(&traversePolicy); + } + const int terminalPtNodePositionsVectorSize = + static_cast(mTerminalPtNodePositionsForIteratingWords.size()); + if (token < 0 || token >= terminalPtNodePositionsVectorSize) { + AKLOGE("Given token %d is invalid.", token); + return 0; + } + const int terminalPtNodePos = mTerminalPtNodePositionsForIteratingWords[token]; + int unigramProbability = NOT_A_PROBABILITY; + const int codePointCount = getCodePointsAndProbabilityAndReturnCodePointCount( + terminalPtNodePos, MAX_WORD_LENGTH, outCodePoints, &unigramProbability); + if (codePointCount < MAX_WORD_LENGTH) { + // Null termination. outCodePoints have to be null terminated or contain MAX_WORD_LENGTH + // code points. + outCodePoints[codePointCount] = 0; + } + const int nextToken = token + 1; + if (nextToken >= terminalPtNodePositionsVectorSize) { + // All words have been iterated. + mTerminalPtNodePositionsForIteratingWords.clear(); + return 0; + } + return nextToken; +} + +} // namespace v401 +} // namespace backward +} // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_policy.h new file mode 100644 index 000000000..2f8ad539c --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_policy.h @@ -0,0 +1,168 @@ +/* + * Copyright (C) 2013, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * !!!!! DO NOT CHANGE THE LOGIC IN THIS FILE !!!!! + * Do not edit this file other than updating policy's interface. + * + * This file was generated from + * suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h + */ + +#ifndef LATINIME_BACKWARD_V401_VER4_PATRICIA_TRIE_POLICY_H +#define LATINIME_BACKWARD_V401_VER4_PATRICIA_TRIE_POLICY_H + +#include + +#include "defines.h" +#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" +#include "suggest/policyimpl/dictionary/header/header_policy.h" +#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.h" +#include "suggest/policyimpl/dictionary/structure/backward/v401/bigram/ver4_bigram_list_policy.h" +#include "suggest/policyimpl/dictionary/structure/backward/v401/shortcut/ver4_shortcut_list_policy.h" +#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_buffers.h" +#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_node_reader.h" +#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_node_writer.h" +#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_writing_helper.h" +#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_pt_node_array_reader.h" +#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" + +namespace latinime { +namespace backward { +namespace v401 { + +} // namespace v401 +} // namespace backward +class DicNode; +namespace backward { +namespace v401 { +} // namespace v401 +} // namespace backward +class DicNodeVector; +namespace backward { +namespace v401 { + +class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { + public: + Ver4PatriciaTriePolicy(Ver4DictBuffers::Ver4DictBuffersPtr buffers) + : mBuffers(std::move(buffers)), mHeaderPolicy(mBuffers->getHeaderPolicy()), + mDictBuffer(mBuffers->getWritableTrieBuffer()), + mBigramPolicy(mBuffers->getMutableBigramDictContent(), + mBuffers->getTerminalPositionLookupTable(), mHeaderPolicy), + mShortcutPolicy(mBuffers->getMutableShortcutDictContent(), + mBuffers->getTerminalPositionLookupTable()), + mNodeReader(mDictBuffer, mBuffers->getProbabilityDictContent(), mHeaderPolicy), + mPtNodeArrayReader(mDictBuffer), + mNodeWriter(mDictBuffer, mBuffers.get(), mHeaderPolicy, &mNodeReader, + &mPtNodeArrayReader, &mBigramPolicy, &mShortcutPolicy), + mUpdatingHelper(mDictBuffer, &mNodeReader, &mNodeWriter), + mWritingHelper(mBuffers.get()), + mUnigramCount(mHeaderPolicy->getUnigramCount()), + mBigramCount(mHeaderPolicy->getBigramCount()), + mTerminalPtNodePositionsForIteratingWords(), mIsCorrupted(false) {}; + + AK_FORCE_INLINE int getRootPosition() const { + return 0; + } + + void createAndGetAllChildDicNodes(const DicNode *const dicNode, + DicNodeVector *const childDicNodes) const; + + int getCodePointsAndProbabilityAndReturnCodePointCount( + const int terminalPtNodePos, const int maxCodePointCount, int *const outCodePoints, + int *const outUnigramProbability) const; + + int getTerminalPtNodePositionOfWord(const int *const inWord, + const int length, const bool forceLowerCaseSearch) const; + + int getProbability(const int unigramProbability, const int bigramProbability) const; + + int getUnigramProbabilityOfPtNode(const int ptNodePos) const; + + int getShortcutPositionOfPtNode(const int ptNodePos) const; + + int getBigramsPositionOfPtNode(const int ptNodePos) const; + + const DictionaryHeaderStructurePolicy *getHeaderStructurePolicy() const { + return mHeaderPolicy; + } + + const DictionaryBigramsStructurePolicy *getBigramsStructurePolicy() const { + return &mBigramPolicy; + } + + const DictionaryShortcutsStructurePolicy *getShortcutsStructurePolicy() const { + return &mShortcutPolicy; + } + + bool addUnigramWord(const int *const word, const int length, + const UnigramProperty *const unigramProperty); + + bool addBigramWords(const int *const word0, const int length0, + const BigramProperty *const bigramProperty); + + bool removeBigramWords(const int *const word0, const int length0, const int *const word1, + const int length1); + + void flush(const char *const filePath); + + void flushWithGC(const char *const filePath); + + bool needsToRunGC(const bool mindsBlockByGC) const; + + void getProperty(const char *const query, const int queryLength, char *const outResult, + const int maxResultLength); + + const WordProperty getWordProperty(const int *const codePoints, + const int codePointCount) const; + + int getNextWordAndNextToken(const int token, int *const outCodePoints); + + bool isCorrupted() const { + return mIsCorrupted; + } + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4PatriciaTriePolicy); + + static const char *const UNIGRAM_COUNT_QUERY; + static const char *const BIGRAM_COUNT_QUERY; + static const char *const MAX_UNIGRAM_COUNT_QUERY; + static const char *const MAX_BIGRAM_COUNT_QUERY; + // When the dictionary size is near the maximum size, we have to refuse dynamic operations to + // prevent the dictionary from overflowing. + static const int MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS; + static const int MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS; + + const Ver4DictBuffers::Ver4DictBuffersPtr mBuffers; + const HeaderPolicy *const mHeaderPolicy; + BufferWithExtendableBuffer *const mDictBuffer; + Ver4BigramListPolicy mBigramPolicy; + Ver4ShortcutListPolicy mShortcutPolicy; + Ver4PatriciaTrieNodeReader mNodeReader; + Ver4PtNodeArrayReader mPtNodeArrayReader; + Ver4PatriciaTrieNodeWriter mNodeWriter; + DynamicPtUpdatingHelper mUpdatingHelper; + Ver4PatriciaTrieWritingHelper mWritingHelper; + int mUnigramCount; + int mBigramCount; + std::vector mTerminalPtNodePositionsForIteratingWords; + mutable bool mIsCorrupted; +}; +} // namespace v401 +} // namespace backward +} // namespace latinime +#endif // LATINIME_BACKWARD_V401_VER4_PATRICIA_TRIE_POLICY_H diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_reading_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_reading_utils.cpp new file mode 100644 index 000000000..6cc36fbef --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_reading_utils.cpp @@ -0,0 +1,39 @@ +/* + * Copyright (C) 2013, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * !!!!! DO NOT EDIT THIS FILE !!!!! + * + * This file was generated from + * suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.cpp + */ + +#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_reading_utils.h" + +#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h" + +namespace latinime { +namespace backward { +namespace v401 { + +/* static */ int Ver4PatriciaTrieReadingUtils::getTerminalIdAndAdvancePosition( + const uint8_t *const buffer, int *pos) { + return ByteArrayUtils::readUint32AndAdvancePosition(buffer, pos); +} + +} // namespace v401 +} // namespace backward +} // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_reading_utils.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_reading_utils.h new file mode 100644 index 000000000..7417c261e --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_reading_utils.h @@ -0,0 +1,52 @@ +/* + * Copyright (C) 2013, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * !!!!! DO NOT EDIT THIS FILE !!!!! + * + * This file was generated from + * suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_reading_utils.h + */ + +#ifndef LATINIME_BACKWARD_V401_VER4_PATRICIA_TRIE_READING_UTILS_H +#define LATINIME_BACKWARD_V401_VER4_PATRICIA_TRIE_READING_UTILS_H + +#include + +#include "defines.h" + +namespace latinime { +namespace backward { +namespace v401 { + +} // namespace v401 +} // namespace backward +class BufferWithExtendableBuffer; +namespace backward { +namespace v401 { + +class Ver4PatriciaTrieReadingUtils { + public: + static int getTerminalIdAndAdvancePosition(const uint8_t *const buffer, + int *const pos); + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4PatriciaTrieReadingUtils); +}; +} // namespace v401 +} // namespace backward +} // namespace latinime +#endif /* LATINIME_BACKWARD_V401_VER4_PATRICIA_TRIE_READING_UTILS_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_writing_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_writing_helper.cpp new file mode 100644 index 000000000..10f27beb7 --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_writing_helper.cpp @@ -0,0 +1,301 @@ +/* + * Copyright (C) 2013, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * !!!!! DO NOT EDIT THIS FILE !!!!! + * + * This file was generated from + * suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp + */ + +#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_writing_helper.h" + +#include +#include + +#include "suggest/policyimpl/dictionary/header/header_policy.h" +#include "suggest/policyimpl/dictionary/structure/backward/v401/bigram/ver4_bigram_list_policy.h" +#include "suggest/policyimpl/dictionary/structure/backward/v401/shortcut/ver4_shortcut_list_policy.h" +#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_buffers.h" +#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_dict_constants.h" +#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_node_reader.h" +#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_node_writer.h" +#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_pt_node_array_reader.h" +#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" +#include "suggest/policyimpl/dictionary/utils/file_utils.h" +#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h" + +namespace latinime { +namespace backward { +namespace v401 { + +bool Ver4PatriciaTrieWritingHelper::writeToDictFile(const char *const dictDirPath, + const int unigramCount, const int bigramCount) const { + const HeaderPolicy *const headerPolicy = mBuffers->getHeaderPolicy(); + BufferWithExtendableBuffer headerBuffer( + BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE); + const int extendedRegionSize = headerPolicy->getExtendedRegionSize() + + mBuffers->getTrieBuffer()->getUsedAdditionalBufferSize(); + if (!headerPolicy->fillInAndWriteHeaderToBuffer(false /* updatesLastDecayedTime */, + unigramCount, bigramCount, extendedRegionSize, &headerBuffer)) { + AKLOGE("Cannot write header structure to buffer. " + "updatesLastDecayedTime: %d, unigramCount: %d, bigramCount: %d, " + "extendedRegionSize: %d", false, unigramCount, bigramCount, + extendedRegionSize); + return false; + } + return mBuffers->flushHeaderAndDictBuffers(dictDirPath, &headerBuffer); +} + +bool Ver4PatriciaTrieWritingHelper::writeToDictFileWithGC(const int rootPtNodeArrayPos, + const char *const dictDirPath) { + const HeaderPolicy *const headerPolicy = mBuffers->getHeaderPolicy(); + Ver4DictBuffers::Ver4DictBuffersPtr dictBuffers( + Ver4DictBuffers::createVer4DictBuffers(headerPolicy, + Ver4DictConstants::MAX_DICTIONARY_SIZE)); + int unigramCount = 0; + int bigramCount = 0; + if (!runGC(rootPtNodeArrayPos, headerPolicy, dictBuffers.get(), &unigramCount, &bigramCount)) { + return false; + } + BufferWithExtendableBuffer headerBuffer( + BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE); + if (!headerPolicy->fillInAndWriteHeaderToBuffer(true /* updatesLastDecayedTime */, + unigramCount, bigramCount, 0 /* extendedRegionSize */, &headerBuffer)) { + return false; + } + return dictBuffers->flushHeaderAndDictBuffers(dictDirPath, &headerBuffer); +} + +bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos, + const HeaderPolicy *const headerPolicy, Ver4DictBuffers *const buffersToWrite, + int *const outUnigramCount, int *const outBigramCount) { + Ver4PatriciaTrieNodeReader ptNodeReader(mBuffers->getTrieBuffer(), + mBuffers->getProbabilityDictContent(), headerPolicy); + Ver4PtNodeArrayReader ptNodeArrayReader(mBuffers->getTrieBuffer()); + Ver4BigramListPolicy bigramPolicy(mBuffers->getMutableBigramDictContent(), + mBuffers->getTerminalPositionLookupTable(), headerPolicy); + Ver4ShortcutListPolicy shortcutPolicy(mBuffers->getMutableShortcutDictContent(), + mBuffers->getTerminalPositionLookupTable()); + Ver4PatriciaTrieNodeWriter ptNodeWriter(mBuffers->getWritableTrieBuffer(), + mBuffers, headerPolicy, &ptNodeReader, &ptNodeArrayReader, &bigramPolicy, + &shortcutPolicy); + + DynamicPtReadingHelper readingHelper(&ptNodeReader, &ptNodeArrayReader); + readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos); + DynamicPtGcEventListeners + ::TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted + traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted( + &ptNodeWriter); + if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner( + &traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted)) { + return false; + } + const int unigramCount = traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted + .getValidUnigramCount(); + const int maxUnigramCount = headerPolicy->getMaxUnigramCount(); + if (headerPolicy->isDecayingDict() && unigramCount > maxUnigramCount) { + if (!truncateUnigrams(&ptNodeReader, &ptNodeWriter, maxUnigramCount)) { + AKLOGE("Cannot remove unigrams. current: %d, max: %d", unigramCount, + maxUnigramCount); + return false; + } + } + + readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos); + DynamicPtGcEventListeners::TraversePolicyToUpdateBigramProbability + traversePolicyToUpdateBigramProbability(&ptNodeWriter); + if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner( + &traversePolicyToUpdateBigramProbability)) { + return false; + } + const int bigramCount = traversePolicyToUpdateBigramProbability.getValidBigramEntryCount(); + const int maxBigramCount = headerPolicy->getMaxBigramCount(); + if (headerPolicy->isDecayingDict() && bigramCount > maxBigramCount) { + if (!truncateBigrams(maxBigramCount)) { + AKLOGE("Cannot remove bigrams. current: %d, max: %d", bigramCount, maxBigramCount); + return false; + } + } + + // Mapping from positions in mBuffer to positions in bufferToWrite. + PtNodeWriter::DictPositionRelocationMap dictPositionRelocationMap; + readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos); + Ver4PatriciaTrieNodeWriter ptNodeWriterForNewBuffers(buffersToWrite->getWritableTrieBuffer(), + buffersToWrite, headerPolicy, &ptNodeReader, &ptNodeArrayReader, &bigramPolicy, + &shortcutPolicy); + DynamicPtGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer + traversePolicyToPlaceAndWriteValidPtNodesToBuffer(&ptNodeWriterForNewBuffers, + buffersToWrite->getWritableTrieBuffer(), &dictPositionRelocationMap); + if (!readingHelper.traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner( + &traversePolicyToPlaceAndWriteValidPtNodesToBuffer)) { + return false; + } + + // Create policy instances for the GCed dictionary. + Ver4PatriciaTrieNodeReader newPtNodeReader(buffersToWrite->getTrieBuffer(), + buffersToWrite->getProbabilityDictContent(), headerPolicy); + Ver4PtNodeArrayReader newPtNodeArrayreader(buffersToWrite->getTrieBuffer()); + Ver4BigramListPolicy newBigramPolicy(buffersToWrite->getMutableBigramDictContent(), + buffersToWrite->getTerminalPositionLookupTable(), headerPolicy); + Ver4ShortcutListPolicy newShortcutPolicy(buffersToWrite->getMutableShortcutDictContent(), + buffersToWrite->getTerminalPositionLookupTable()); + Ver4PatriciaTrieNodeWriter newPtNodeWriter(buffersToWrite->getWritableTrieBuffer(), + buffersToWrite, headerPolicy, &newPtNodeReader, &newPtNodeArrayreader, &newBigramPolicy, + &newShortcutPolicy); + // Re-assign terminal IDs for valid terminal PtNodes. + TerminalPositionLookupTable::TerminalIdMap terminalIdMap; + if(!buffersToWrite->getMutableTerminalPositionLookupTable()->runGCTerminalIds( + &terminalIdMap)) { + return false; + } + // Run GC for probability dict content. + if (!buffersToWrite->getMutableProbabilityDictContent()->runGC(&terminalIdMap, + mBuffers->getProbabilityDictContent())) { + return false; + } + // Run GC for bigram dict content. + if(!buffersToWrite->getMutableBigramDictContent()->runGC(&terminalIdMap, + mBuffers->getBigramDictContent(), outBigramCount)) { + return false; + } + // Run GC for shortcut dict content. + if(!buffersToWrite->getMutableShortcutDictContent()->runGC(&terminalIdMap, + mBuffers->getShortcutDictContent())) { + return false; + } + DynamicPtReadingHelper newDictReadingHelper(&newPtNodeReader, &newPtNodeArrayreader); + newDictReadingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos); + DynamicPtGcEventListeners::TraversePolicyToUpdateAllPositionFields + traversePolicyToUpdateAllPositionFields(&newPtNodeWriter, &dictPositionRelocationMap); + if (!newDictReadingHelper.traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner( + &traversePolicyToUpdateAllPositionFields)) { + return false; + } + newDictReadingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos); + TraversePolicyToUpdateAllPtNodeFlagsAndTerminalIds + traversePolicyToUpdateAllPtNodeFlagsAndTerminalIds(&newPtNodeWriter, &terminalIdMap); + if (!newDictReadingHelper.traverseAllPtNodesInPostorderDepthFirstManner( + &traversePolicyToUpdateAllPtNodeFlagsAndTerminalIds)) { + return false; + } + *outUnigramCount = traversePolicyToUpdateAllPositionFields.getUnigramCount(); + return true; +} + +bool Ver4PatriciaTrieWritingHelper::truncateUnigrams( + const Ver4PatriciaTrieNodeReader *const ptNodeReader, + Ver4PatriciaTrieNodeWriter *const ptNodeWriter, const int maxUnigramCount) { + const TerminalPositionLookupTable *const terminalPosLookupTable = + mBuffers->getTerminalPositionLookupTable(); + const int nextTerminalId = terminalPosLookupTable->getNextTerminalId(); + std::priority_queue, DictProbabilityComparator> + priorityQueue; + for (int i = 0; i < nextTerminalId; ++i) { + const int terminalPos = terminalPosLookupTable->getTerminalPtNodePosition(i); + if (terminalPos == NOT_A_DICT_POS) { + continue; + } + const ProbabilityEntry probabilityEntry = + mBuffers->getProbabilityDictContent()->getProbabilityEntry(i); + const int probability = probabilityEntry.hasHistoricalInfo() ? + ForgettingCurveUtils::decodeProbability( + probabilityEntry.getHistoricalInfo(), mBuffers->getHeaderPolicy()) : + probabilityEntry.getProbability(); + priorityQueue.push(DictProbability(terminalPos, probability, + probabilityEntry.getHistoricalInfo()->getTimeStamp())); + } + + // Delete unigrams. + while (static_cast(priorityQueue.size()) > maxUnigramCount) { + const int ptNodePos = priorityQueue.top().getDictPos(); + const PtNodeParams ptNodeParams = + ptNodeReader->fetchNodeInfoInBufferFromPtNodePos(ptNodePos); + if (!ptNodeWriter->markPtNodeAsWillBecomeNonTerminal(&ptNodeParams)) { + AKLOGE("Cannot mark PtNode as willBecomeNonterminal. PtNode pos: %d", ptNodePos); + return false; + } + priorityQueue.pop(); + } + return true; +} + +bool Ver4PatriciaTrieWritingHelper::truncateBigrams(const int maxBigramCount) { + const TerminalPositionLookupTable *const terminalPosLookupTable = + mBuffers->getTerminalPositionLookupTable(); + const int nextTerminalId = terminalPosLookupTable->getNextTerminalId(); + std::priority_queue, DictProbabilityComparator> + priorityQueue; + BigramDictContent *const bigramDictContent = mBuffers->getMutableBigramDictContent(); + for (int i = 0; i < nextTerminalId; ++i) { + const int bigramListPos = bigramDictContent->getBigramListHeadPos(i); + if (bigramListPos == NOT_A_DICT_POS) { + continue; + } + bool hasNext = true; + int readingPos = bigramListPos; + while (hasNext) { + const int entryPos = readingPos; + const BigramEntry bigramEntry = + bigramDictContent->getBigramEntryAndAdvancePosition(&readingPos); + hasNext = bigramEntry.hasNext(); + if (!bigramEntry.isValid()) { + continue; + } + const int probability = bigramEntry.hasHistoricalInfo() ? + ForgettingCurveUtils::decodeProbability( + bigramEntry.getHistoricalInfo(), mBuffers->getHeaderPolicy()) : + bigramEntry.getProbability(); + priorityQueue.push(DictProbability(entryPos, probability, + bigramEntry.getHistoricalInfo()->getTimeStamp())); + } + } + + // Delete bigrams. + while (static_cast(priorityQueue.size()) > maxBigramCount) { + const int entryPos = priorityQueue.top().getDictPos(); + const BigramEntry bigramEntry = bigramDictContent->getBigramEntry(entryPos); + const BigramEntry invalidatedBigramEntry = bigramEntry.getInvalidatedEntry(); + if (!bigramDictContent->writeBigramEntry(&invalidatedBigramEntry, entryPos)) { + AKLOGE("Cannot write bigram entry to remove. pos: %d", entryPos); + return false; + } + priorityQueue.pop(); + } + return true; +} + +bool Ver4PatriciaTrieWritingHelper::TraversePolicyToUpdateAllPtNodeFlagsAndTerminalIds + ::onVisitingPtNode(const PtNodeParams *const ptNodeParams) { + if (!ptNodeParams->isTerminal()) { + return true; + } + TerminalPositionLookupTable::TerminalIdMap::const_iterator it = + mTerminalIdMap->find(ptNodeParams->getTerminalId()); + if (it == mTerminalIdMap->end()) { + AKLOGE("terminal Id %d is not in the terminal position map. map size: %zd", + ptNodeParams->getTerminalId(), mTerminalIdMap->size()); + return false; + } + if (!mPtNodeWriter->updateTerminalId(ptNodeParams, it->second)) { + AKLOGE("Cannot update terminal id. %d -> %d", it->first, it->second); + } + return mPtNodeWriter->updatePtNodeHasBigramsAndShortcutTargetsFlags(ptNodeParams); +} + +} // namespace v401 +} // namespace backward +} // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_writing_helper.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_writing_helper.h new file mode 100644 index 000000000..be44aaa33 --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_patricia_trie_writing_helper.h @@ -0,0 +1,140 @@ +/* + * Copyright (C) 2013, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * !!!!! DO NOT EDIT THIS FILE !!!!! + * + * This file was generated from + * suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.h + */ + +#ifndef LATINIME_BACKWARD_V401_VER4_PATRICIA_TRIE_WRITING_HELPER_H +#define LATINIME_BACKWARD_V401_VER4_PATRICIA_TRIE_WRITING_HELPER_H + +#include "defines.h" +#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_gc_event_listeners.h" +#include "suggest/policyimpl/dictionary/structure/backward/v401/content/terminal_position_lookup_table.h" + +namespace latinime { +namespace backward { +namespace v401 { + +} // namespace v401 +} // namespace backward +class HeaderPolicy; +namespace backward { +namespace v401 { +class Ver4DictBuffers; +class Ver4PatriciaTrieNodeReader; +class Ver4PatriciaTrieNodeWriter; + +class Ver4PatriciaTrieWritingHelper { + public: + Ver4PatriciaTrieWritingHelper(Ver4DictBuffers *const buffers) + : mBuffers(buffers) {} + + bool writeToDictFile(const char *const dictDirPath, const int unigramCount, + const int bigramCount) const; + + // This method cannot be const because the original dictionary buffer will be updated to detect + // useless PtNodes during GC. + bool writeToDictFileWithGC(const int rootPtNodeArrayPos, const char *const dictDirPath); + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4PatriciaTrieWritingHelper); + + class TraversePolicyToUpdateAllPtNodeFlagsAndTerminalIds + : public DynamicPtReadingHelper::TraversingEventListener { + public: + TraversePolicyToUpdateAllPtNodeFlagsAndTerminalIds( + Ver4PatriciaTrieNodeWriter *const ptNodeWriter, + const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap) + : mPtNodeWriter(ptNodeWriter), mTerminalIdMap(terminalIdMap) {} + + bool onAscend() { return true; } + + bool onDescend(const int ptNodeArrayPos) { return true; } + + bool onReadingPtNodeArrayTail() { return true; } + + bool onVisitingPtNode(const PtNodeParams *const ptNodeParams); + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(TraversePolicyToUpdateAllPtNodeFlagsAndTerminalIds); + + Ver4PatriciaTrieNodeWriter *const mPtNodeWriter; + const TerminalPositionLookupTable::TerminalIdMap *const mTerminalIdMap; + }; + + // For truncateUnigrams() and truncateBigrams(). + class DictProbability { + public: + DictProbability(const int dictPos, const int probability, const int timestamp) + : mDictPos(dictPos), mProbability(probability), mTimestamp(timestamp) {} + + int getDictPos() const { + return mDictPos; + } + + int getProbability() const { + return mProbability; + } + + int getTimestamp() const { + return mTimestamp; + } + + private: + DISALLOW_DEFAULT_CONSTRUCTOR(DictProbability); + + int mDictPos; + int mProbability; + int mTimestamp; + }; + + // For truncateUnigrams() and truncateBigrams(). + class DictProbabilityComparator { + public: + bool operator()(const DictProbability &left, const DictProbability &right) { + if (left.getProbability() != right.getProbability()) { + return left.getProbability() > right.getProbability(); + } + if (left.getTimestamp() != right.getTimestamp()) { + return left.getTimestamp() < right.getTimestamp(); + } + return left.getDictPos() > right.getDictPos(); + } + + private: + DISALLOW_ASSIGNMENT_OPERATOR(DictProbabilityComparator); + }; + + bool runGC(const int rootPtNodeArrayPos, const HeaderPolicy *const headerPolicy, + Ver4DictBuffers *const buffersToWrite, int *const outUnigramCount, + int *const outBigramCount); + + bool truncateUnigrams(const Ver4PatriciaTrieNodeReader *const ptNodeReader, + Ver4PatriciaTrieNodeWriter *const ptNodeWriter, const int maxUnigramCount); + + bool truncateBigrams(const int maxBigramCount); + + Ver4DictBuffers *const mBuffers; +}; +} // namespace v401 +} // namespace backward +} // namespace latinime + +#endif /* LATINIME_BACKWARD_V401_VER4_PATRICIA_TRIE_WRITING_HELPER_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_pt_node_array_reader.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_pt_node_array_reader.cpp new file mode 100644 index 000000000..33e4e55e2 --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_pt_node_array_reader.cpp @@ -0,0 +1,90 @@ +/* + * Copyright (C) 2014, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * !!!!! DO NOT EDIT THIS FILE !!!!! + * + * This file was generated from + * suggest/policyimpl/dictionary/structure/v4/ver4_pt_node_array_reader.cpp + */ + +#include "suggest/policyimpl/dictionary/structure/backward/v401/ver4_pt_node_array_reader.h" + +#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.h" +#include "suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.h" +#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" + +namespace latinime { +namespace backward { +namespace v401 { + +bool Ver4PtNodeArrayReader::readPtNodeArrayInfoAndReturnIfValid(const int ptNodeArrayPos, + int *const outPtNodeCount, int *const outFirstPtNodePos) const { + if (ptNodeArrayPos < 0 || ptNodeArrayPos >= mBuffer->getTailPosition()) { + // Reading invalid position because of a bug or a broken dictionary. + AKLOGE("Reading PtNode array info from invalid dictionary position: %d, dict size: %d", + ptNodeArrayPos, mBuffer->getTailPosition()); + ASSERT(false); + return false; + } + const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(ptNodeArrayPos); + const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer); + int readingPos = ptNodeArrayPos; + if (usesAdditionalBuffer) { + readingPos -= mBuffer->getOriginalBufferSize(); + } + const int ptNodeCountInArray = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition( + dictBuf, &readingPos); + if (usesAdditionalBuffer) { + readingPos += mBuffer->getOriginalBufferSize(); + } + if (ptNodeCountInArray < 0) { + AKLOGE("Invalid PtNode count in an array: %d.", ptNodeCountInArray); + return false; + } + *outPtNodeCount = ptNodeCountInArray; + *outFirstPtNodePos = readingPos; + return true; +} + +bool Ver4PtNodeArrayReader::readForwardLinkAndReturnIfValid(const int forwordLinkPos, + int *const outNextPtNodeArrayPos) const { + if (forwordLinkPos < 0 || forwordLinkPos >= mBuffer->getTailPosition()) { + // Reading invalid position because of bug or broken dictionary. + AKLOGE("Reading forward link from invalid dictionary position: %d, dict size: %d", + forwordLinkPos, mBuffer->getTailPosition()); + ASSERT(false); + return false; + } + const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(forwordLinkPos); + const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer); + int readingPos = forwordLinkPos; + if (usesAdditionalBuffer) { + readingPos -= mBuffer->getOriginalBufferSize(); + } + const int nextPtNodeArrayOffset = + DynamicPtReadingUtils::getForwardLinkPosition(dictBuf, readingPos); + if (DynamicPtReadingUtils::isValidForwardLinkPosition(nextPtNodeArrayOffset)) { + *outNextPtNodeArrayPos = forwordLinkPos + nextPtNodeArrayOffset; + } else { + *outNextPtNodeArrayPos = NOT_A_DICT_POS; + } + return true; +} + +} // namespace v401 +} // namespace backward +} // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_pt_node_array_reader.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_pt_node_array_reader.h new file mode 100644 index 000000000..3a7eefa44 --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v401/ver4_pt_node_array_reader.h @@ -0,0 +1,57 @@ +/* + * Copyright (C) 2014, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * !!!!! DO NOT EDIT THIS FILE !!!!! + * + * This file was generated from + * suggest/policyimpl/dictionary/structure/v4/ver4_pt_node_array_reader.h + */ + +#ifndef LATINIME_BACKWARD_V401_VER4_PT_NODE_ARRAY_READER_H +#define LATINIME_BACKWARD_V401_VER4_PT_NODE_ARRAY_READER_H + +#include "defines.h" +#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_array_reader.h" + +namespace latinime { +namespace backward { +namespace v401 { + +} // namespace v401 +} // namespace backward +class BufferWithExtendableBuffer; +namespace backward { +namespace v401 { + +class Ver4PtNodeArrayReader : public PtNodeArrayReader { + public: + Ver4PtNodeArrayReader(const BufferWithExtendableBuffer *const buffer) : mBuffer(buffer) {}; + + virtual bool readPtNodeArrayInfoAndReturnIfValid(const int ptNodeArrayPos, + int *const outPtNodeCount, int *const outFirstPtNodePos) const; + virtual bool readForwardLinkAndReturnIfValid(const int forwordLinkPos, + int *const outNextPtNodeArrayPos) const; + + private: + DISALLOW_COPY_AND_ASSIGN(Ver4PtNodeArrayReader); + + const BufferWithExtendableBuffer *const mBuffer; +}; +} // namespace v401 +} // namespace backward +} // namespace latinime +#endif /* LATINIME_BACKWARD_V401_VER4_PT_NODE_ARRAY_READER_H */