From 99808b7b0d5e764fcaa097bb45603c60232e9164 Mon Sep 17 00:00:00 2001 From: Keisuke Kuroyanagi Date: Tue, 12 Nov 2013 21:10:01 +0900 Subject: [PATCH] Implement ver4 dictionary bigram writing methods. Bug: 11073222 Change-Id: Ibb0e697fe14dac1edb1f3074fb59e2cdc91039d4 --- native/jni/Android.mk | 7 +- .../bigram/ver4_bigram_list_policy.cpp | 115 ++++++++++++++++++ .../bigram/ver4_bigram_list_policy.h | 33 ++--- .../dynamic_patricia_trie_reading_helper.cpp | 3 + .../v4/content/bigram_dict_content.cpp | 69 +++++++++++ .../v4/content/bigram_dict_content.h | 33 ++--- .../v4/content/sparse_table_dict_content.h | 8 ++ .../structure/v4/ver4_dict_buffers.h | 4 + .../structure/v4/ver4_dict_constants.cpp | 2 + .../structure/v4/ver4_dict_constants.h | 2 + .../v4/ver4_patricia_trie_node_writer.cpp | 8 +- .../v4/ver4_patricia_trie_policy.cpp | 36 +++++- .../structure/v4/ver4_patricia_trie_policy.h | 2 +- .../utils/buffer_with_extendable_buffer.cpp | 5 + .../utils/buffer_with_extendable_buffer.h | 2 + .../dictionary/utils/sparse_table.cpp | 59 +++++++-- .../dictionary/utils/sparse_table.h | 9 ++ .../latin/Ver4BinaryDictionaryTests.java | 40 ++++++ 18 files changed, 381 insertions(+), 56 deletions(-) create mode 100644 native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp create mode 100644 native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp diff --git a/native/jni/Android.mk b/native/jni/Android.mk index a5757fd58..b61a66ce6 100644 --- a/native/jni/Android.mk +++ b/native/jni/Android.mk @@ -68,12 +68,14 @@ LATIN_IME_CORE_SRC_FILES := \ suggest/core/policy/weighting.cpp \ suggest/core/session/dic_traverse_session.cpp \ $(addprefix suggest/policyimpl/dictionary/, \ - bigram/bigram_list_read_write_utils.cpp \ - bigram/dynamic_bigram_list_policy.cpp \ header/header_policy.cpp \ header/header_read_write_utils.cpp \ shortcut/shortcut_list_reading_utils.cpp \ structure/dictionary_structure_with_buffer_policy_factory.cpp) \ + $(addprefix suggest/policyimpl/dictionary/bigram/, \ + bigram_list_read_write_utils.cpp \ + dynamic_bigram_list_policy.cpp \ + ver4_bigram_list_policy.cpp) \ $(addprefix suggest/policyimpl/dictionary/structure/v2/, \ patricia_trie_policy.cpp \ patricia_trie_reading_utils.cpp) \ @@ -88,6 +90,7 @@ LATIN_IME_CORE_SRC_FILES := \ dynamic_patricia_trie_writing_helper.cpp \ dynamic_patricia_trie_writing_utils.cpp) \ $(addprefix suggest/policyimpl/dictionary/structure/v4/, \ + content/bigram_dict_content.cpp \ ver4_dict_constants.cpp \ ver4_patricia_trie_node_reader.cpp \ ver4_patricia_trie_node_writer.cpp \ diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp new file mode 100644 index 000000000..028078111 --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.cpp @@ -0,0 +1,115 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h" + +#include "suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h" +#include "suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h" +#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h" +#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h" + +namespace latinime { + +void Ver4BigramListPolicy::getNextBigram(int *const outBigramPos, int *const outProbability, + bool *const outHasNext, int *const bigramEntryPos) const { + int targetTerminalId = Ver4DictConstants::NOT_A_TERMINAL_ID; + mBigramDictContent->getBigramEntryAndAdvancePosition(outProbability, outHasNext, + &targetTerminalId, bigramEntryPos); + if (outBigramPos) { + // Lookup target PtNode position. + *outBigramPos = mTerminalPositionLookupTable->getTerminalPtNodePosition(targetTerminalId); + } +} + +bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTargetTerminalId, + const int newProbability, bool *const outAddedNewEntry) { + if (outAddedNewEntry) { + *outAddedNewEntry = false; + } + const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId); + if (bigramListPos == NOT_A_DICT_POS) { + // Updating PtNode doesn't have a bigram list. + // Create new bigram list. + if (!mBigramDictContent->createNewBigramList(terminalId)) { + return false; + } + // Write an entry. + int writingPos = mBigramDictContent->getBigramListHeadPos(terminalId); + if (!mBigramDictContent->writeBigramEntryAndAdvancePosition(newProbability, + false /* hasNext */, newTargetTerminalId, &writingPos)) { + return false; + } + return true; + } + + const int entryPosToUpdate = getEntryPosToUpdate(newTargetTerminalId, bigramListPos); + if (entryPosToUpdate != NOT_A_DICT_POS) { + // Overwrite existing entry. + int readingPos = entryPosToUpdate; + bool hasNext = false; + int probability = NOT_A_PROBABILITY; + int targetTerminalId = Ver4DictConstants::NOT_A_TERMINAL_ID; + mBigramDictContent->getBigramEntryAndAdvancePosition(&probability, &hasNext, + &targetTerminalId, &readingPos); + if (targetTerminalId == Ver4DictConstants::NOT_A_TERMINAL_ID && outAddedNewEntry) { + // Reuse invalid entry. + *outAddedNewEntry = true; + } + int writingPos = entryPosToUpdate; + return mBigramDictContent->writeBigramEntryAndAdvancePosition(newProbability, hasNext, + newTargetTerminalId, &writingPos); + } + + // Add new entry to the bigram list. + // Create new bigram list. + if (!mBigramDictContent->createNewBigramList(terminalId)) { + return false; + } + // Write new entry at a head position of the bigram list. + int writingPos = mBigramDictContent->getBigramListHeadPos(terminalId); + if (!mBigramDictContent->writeBigramEntryAndAdvancePosition(newProbability, + true /* hasNext */, newTargetTerminalId, &writingPos)) { + return false; + } + if (outAddedNewEntry) { + *outAddedNewEntry = true; + } + // Append existing entries by copying. + return mBigramDictContent->copyBigramList(bigramListPos, writingPos); +} + +int Ver4BigramListPolicy::getEntryPosToUpdate(const int targetTerminalIdToFind, + const int bigramListPos) const { + bool hasNext = true; + int invalidEntryPos = NOT_A_DICT_POS; + int readingPos = bigramListPos; + while(hasNext) { + const int entryPos = readingPos; + int targetTerminalId = Ver4DictConstants::NOT_A_TERMINAL_ID; + mBigramDictContent->getBigramEntryAndAdvancePosition(0 /* probability */, &hasNext, + &targetTerminalId, &readingPos); + if (targetTerminalId == targetTerminalIdToFind) { + // Entry with same target is found. + return entryPos; + } else if (targetTerminalId == Ver4DictConstants::NOT_A_TERMINAL_ID) { + // Invalid entry that can be reused is found. + invalidEntryPos = entryPos; + } + } + return invalidEntryPos; +} + +} // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h index 875a0ff9b..e599a2c8d 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h @@ -19,46 +19,35 @@ #include "defines.h" #include "suggest/core/policy/dictionary_bigrams_structure_policy.h" -#include "suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h" -#include "suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h" -#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h" namespace latinime { +class BigramDictContent; +class TerminalPositionLookupTable; + class Ver4BigramListPolicy : public DictionaryBigramsStructurePolicy { public: - Ver4BigramListPolicy(const BigramDictContent *const bigramDictContent, + Ver4BigramListPolicy(BigramDictContent *const bigramDictContent, const TerminalPositionLookupTable *const terminalPositionLookupTable) : mBigramDictContent(bigramDictContent), mTerminalPositionLookupTable(terminalPositionLookupTable) {} void getNextBigram(int *const outBigramPos, int *const outProbability, - bool *const outHasNext, int *const bigramEntryPos) const { - int bigramFlags = 0; - int targetTerminalId = Ver4DictConstants::NOT_A_TERMINAL_ID; - mBigramDictContent->getBigramEntryAndAdvancePosition(&bigramFlags, &targetTerminalId, - bigramEntryPos); - if (outProbability) { - *outProbability = BigramListReadWriteUtils::getProbabilityFromFlags(bigramFlags); - } - if (outHasNext) { - *outHasNext = BigramListReadWriteUtils::hasNext(bigramFlags); - } - if (outBigramPos) { - // Lookup target PtNode position. - *outBigramPos = - mTerminalPositionLookupTable->getTerminalPtNodePosition(targetTerminalId); - } - } + bool *const outHasNext, int *const bigramEntryPos) const; void skipAllBigrams(int *const pos) const { // Do nothing because we don't need to skip bigram lists in ver4 dictionaries. } + bool addNewEntry(const int terminalId, const int newTargetTerminalId, const int newProbability, + bool *const outAddedNewEntry); + private: DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4BigramListPolicy); - const BigramDictContent *const mBigramDictContent; + int getEntryPosToUpdate(const int targetTerminalIdToFind, const int bigramListPos) const; + + BigramDictContent *const mBigramDictContent; const TerminalPositionLookupTable *const mTerminalPositionLookupTable; }; } // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.cpp index c3fe03d37..b3fdbeb78 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.cpp @@ -238,6 +238,9 @@ int DynamicPatriciaTrieReadingHelper::getTerminalPtNodePositionOfWord(const int } // All characters are matched. if (length == getTotalCodePointCount(ptNodeParams)) { + if (!ptNodeParams.isTerminal()) { + return NOT_A_DICT_POS; + } // Terminal position is found. return ptNodeParams.getHeadPos(); } diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp new file mode 100644 index 000000000..f02f14582 --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp @@ -0,0 +1,69 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h" + +#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" + +namespace latinime { + +void BigramDictContent::getBigramEntryAndAdvancePosition(int *const outProbability, + bool *const outHasNext, int *const outTargetTerminalId, int *const bigramEntryPos) const { + const BufferWithExtendableBuffer *const bigramListBuffer = getContentBuffer(); + const int bigramFlags = bigramListBuffer->readUintAndAdvancePosition( + Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE, bigramEntryPos); + if (outProbability) { + *outProbability = bigramFlags & Ver4DictConstants::BIGRAM_PROBABILITY_MASK; + } + if (outHasNext) { + *outHasNext = (bigramFlags & Ver4DictConstants::BIGRAM_HAS_NEXT_MASK) != 0; + } + if (outTargetTerminalId) { + *outTargetTerminalId = bigramListBuffer->readUintAndAdvancePosition( + Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE, bigramEntryPos); + } +} + +bool BigramDictContent::writeBigramEntryAndAdvancePosition(const int probability, const int hasNext, + const int targetTerminalId, int *const entryWritingPos) { + BufferWithExtendableBuffer *const bigramListBuffer = getWritableContentBuffer(); + const int bigramFlags = createAndGetBigramFlags(probability, hasNext); + if (!bigramListBuffer->writeUintAndAdvancePosition(bigramFlags, + Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE, entryWritingPos)) { + return false; + } + return bigramListBuffer->writeUintAndAdvancePosition(targetTerminalId, + Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE, entryWritingPos); +} + +bool BigramDictContent::copyBigramList(const int bigramListPos, const int toPos) { + bool hasNext = true; + int readingPos = bigramListPos; + int writingPos = toPos; + while(hasNext) { + int probability = NOT_A_PROBABILITY; + int targetTerminalId = Ver4DictConstants::NOT_A_TERMINAL_ID; + getBigramEntryAndAdvancePosition(&probability, &hasNext, &targetTerminalId, + &readingPos); + if (!writeBigramEntryAndAdvancePosition(probability, hasNext, targetTerminalId, + &writingPos)) { + return false; + } + } + return true; +} + +} // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h index 5eed13e70..a112667e2 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h @@ -33,21 +33,11 @@ class BigramDictContent : public SparseTableDictContent { Ver4DictConstants::BIGRAM_ADDRESS_TABLE_BLOCK_SIZE, Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE) {} - void getBigramEntryAndAdvancePosition(int *const outBigramFlags, - int *const outTargetTerminalId, int *const bigramEntryPos) const { - const BufferWithExtendableBuffer *const bigramListBuffer = getContentBuffer(); - if (outBigramFlags) { - *outBigramFlags = bigramListBuffer->readUintAndAdvancePosition( - Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE, bigramEntryPos); - } - if (outTargetTerminalId) { - *outTargetTerminalId = bigramListBuffer->readUintAndAdvancePosition( - Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE, bigramEntryPos); - } - } + void getBigramEntryAndAdvancePosition(int *const outProbability, bool *const outHasNext, + int *const outTargetTerminalId, int *const bigramEntryPos) const; - // Returns head position of bigram list for a PtNode specified by terminalId. - int getBigramListHeadPos(const int terminalId) const { + // Returns head position of bigram list for a PtNode specified by terminalId. + int getBigramListHeadPos(const int terminalId) const { const SparseTable *const addressLookupTable = getAddressLookupTable(); if (!addressLookupTable->contains(terminalId)) { return NOT_A_DICT_POS; @@ -55,8 +45,23 @@ class BigramDictContent : public SparseTableDictContent { return addressLookupTable->get(terminalId); } + bool writeBigramEntryAndAdvancePosition(const int probability, const int hasNext, + const int targetTerminalId, int *const entryWritingPos); + + bool createNewBigramList(const int terminalId) { + const int bigramListPos = getContentBuffer()->getTailPosition(); + return getUpdatableAddressLookupTable()->set(terminalId, bigramListPos); + } + + bool copyBigramList(const int bigramListPos, const int toPos); + private: DISALLOW_IMPLICIT_CONSTRUCTORS(BigramDictContent); + + int createAndGetBigramFlags(const int probability, const bool hasNext) const { + return (probability & Ver4DictConstants::BIGRAM_PROBABILITY_MASK) + | (hasNext ? Ver4DictConstants::BIGRAM_HAS_NEXT_MASK : 0); + } }; } // namespace latinime #endif /* LATINIME_BIGRAM_DICT_CONTENT_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h index 71868e9ca..fe9efe874 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h @@ -59,10 +59,18 @@ class SparseTableDictContent : public DictContent { } protected: + SparseTable *getUpdatableAddressLookupTable() { + return &mAddressLookupTable; + } + const SparseTable *getAddressLookupTable() const { return &mAddressLookupTable; } + BufferWithExtendableBuffer *getWritableContentBuffer() { + return &mExpandableContentBuffer; + } + const BufferWithExtendableBuffer *getContentBuffer() const { return &mExpandableContentBuffer; } diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h index 6476478e5..b9e56a254 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h @@ -70,6 +70,10 @@ class Ver4DictBuffers { return &mProbabilityDictContent; } + AK_FORCE_INLINE BigramDictContent *getUpdatableBigramDictContent() { + return &mBigramDictContent; + } + AK_FORCE_INLINE const BigramDictContent *getBigramDictContent() const { return &mBigramDictContent; } diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp index 941bcd594..2b113e38b 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp @@ -43,6 +43,8 @@ const int Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_DATA_SIZE = 4; const int Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE = 3; const int Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE = 1; +const int Ver4DictConstants::BIGRAM_PROBABILITY_MASK = 0x0F; +const int Ver4DictConstants::BIGRAM_HAS_NEXT_MASK = 0x80; const int Ver4DictConstants::SHORTCUT_FLAGS_FIELD_SIZE = 1; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h index 7270d9e6e..a1830dd0c 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h @@ -47,6 +47,8 @@ class Ver4DictConstants { static const int BIGRAM_FLAGS_FIELD_SIZE; static const int BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE; + static const int BIGRAM_PROBABILITY_MASK; + static const int BIGRAM_HAS_NEXT_MASK; static const int SHORTCUT_FLAGS_FIELD_SIZE; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp index 8b0ea823e..7c66a6691 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp @@ -16,7 +16,7 @@ #include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h" -#include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h" +#include "suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h" #include "suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h" #include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h" #include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h" @@ -167,8 +167,6 @@ bool Ver4PatriciaTrieNodeWriter::writePtNodeAndAdvancePosition( ptNodeParams->getChildrenPos(), ptNodeWritingPos)) { return false; } - // TODO: Implement bigram and shortcut writing. - // Create node flags and write them. PatriciaTrieReadingUtils::NodeFlags nodeFlags = PatriciaTrieReadingUtils::createAndGetFlags(ptNodeParams->isBlacklisted(), @@ -188,8 +186,8 @@ bool Ver4PatriciaTrieNodeWriter::addNewBigramEntry( const PtNodeParams *const sourcePtNodeParams, const PtNodeParams *const targetPtNodeParam, const int probability, bool *const outAddedNewBigram) { - // TODO: Implement. - return false; + return mBigramPolicy->addNewEntry(sourcePtNodeParams->getTerminalId(), + targetPtNodeParam->getTerminalId(), probability, outAddedNewBigram); } bool Ver4PatriciaTrieNodeWriter::removeBigramEntry( diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp index 520ffc080..57201e4a8 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp @@ -135,9 +135,9 @@ bool Ver4PatriciaTriePolicy::addUnigramWord(const int *const word, const int len AKLOGI("Warning: addUnigramWord() is called for non-updatable dictionary."); return false; } - if (mDictBuffer.getTailPosition() - >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) { - AKLOGE("The dictionary is too large to dynamically update."); + if (mDictBuffer.getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) { + AKLOGE("The dictionary is too large to dynamically update. Dictionary size: %d", + mDictBuffer.getTailPosition()); return false; } DynamicPatriciaTrieReadingHelper readingHelper(&mDictBuffer, &mNodeReader); @@ -156,8 +156,34 @@ bool Ver4PatriciaTriePolicy::addUnigramWord(const int *const word, const int len bool Ver4PatriciaTriePolicy::addBigramWords(const int *const word0, const int length0, const int *const word1, const int length1, const int probability) { - // TODO: Implement. - return false; + if (!mBuffers.get()->isUpdatable()) { + AKLOGI("Warning: addBigramWords() is called for non-updatable dictionary."); + return false; + } + if (mDictBuffer.getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) { + AKLOGE("The dictionary is too large to dynamically update. Dictionary size: %d", + mDictBuffer.getTailPosition()); + return false; + } + const int word0Pos = getTerminalPtNodePositionOfWord(word0, length0, + false /* forceLowerCaseSearch */); + if (word0Pos == NOT_A_DICT_POS) { + return false; + } + const int word1Pos = getTerminalPtNodePositionOfWord(word1, length1, + false /* forceLowerCaseSearch */); + if (word1Pos == NOT_A_DICT_POS) { + return false; + } + bool addedNewBigram = false; + if (mUpdatingHelper.addBigramWords(word0Pos, word1Pos, probability, &addedNewBigram)) { + if (addedNewBigram) { + mBigramCount++; + } + return true; + } else { + return false; + } } bool Ver4PatriciaTriePolicy::removeBigramWords(const int *const word0, const int length0, diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h index fdb7ac69b..93cc4b7a2 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h @@ -42,7 +42,7 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { mDictBuffer(mBuffers.get()->getRawDictBuffer() + mHeaderPolicy.getSize(), mBuffers.get()->getRawDictBufferSize() - mHeaderPolicy.getSize(), BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE), - mBigramPolicy(mBuffers.get()->getBigramDictContent(), + mBigramPolicy(mBuffers.get()->getUpdatableBigramDictContent(), mBuffers.get()->getTerminalPositionLookupTable()), mShortcutPolicy(mBuffers.get()->getShortcutDictContent(), mBuffers.get()->getTerminalPositionLookupTable()), diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp index f17a0d1c0..26eafcd44 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.cpp @@ -49,6 +49,11 @@ void BufferWithExtendableBuffer::readCodePointsAndAdvancePosition(const int maxC } } +bool BufferWithExtendableBuffer::writeUint(const uint32_t data, const int size, const int pos) { + int writingPos = pos; + return writeUintAndAdvancePosition(data, size, &writingPos); +} + bool BufferWithExtendableBuffer::writeUintAndAdvancePosition(const uint32_t data, const int size, int *const pos) { if (!(size >= 1 && size <= 4)) { diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h index 13dce9b61..ee6107ad7 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h @@ -93,6 +93,8 @@ class BufferWithExtendableBuffer { * Writing is allowed for original buffer, already written region of additional buffer and the * tail of additional buffer. */ + bool writeUint(const uint32_t data, const int size, const int pos); + bool writeUintAndAdvancePosition(const uint32_t data, const int size, int *const pos); bool writeCodePointsAndAdvancePosition(const int *const codePoints, const int codePointCount, diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/sparse_table.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/sparse_table.cpp index 2678b8c7b..9be35620c 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/sparse_table.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/sparse_table.cpp @@ -19,23 +19,68 @@ namespace latinime { const int SparseTable::NOT_EXIST = -1; +const int SparseTable::INDEX_SIZE = 4; bool SparseTable::contains(const int id) const { - const int readingPos = id / mBlockSize * mDataSize; + const int readingPos = getPosInIndexTable(id); if (id < 0 || mIndexTableBuffer->getTailPosition() <= readingPos) { return false; } - const int index = mIndexTableBuffer->readUint(mDataSize, readingPos); + const int index = mIndexTableBuffer->readUint(INDEX_SIZE, readingPos); return index != NOT_EXIST; } uint32_t SparseTable::get(const int id) const { - const int indexTableIndex = id / mBlockSize; - int readingPos = indexTableIndex * mDataSize; - const int index = mIndexTableBuffer->readUint(mDataSize, readingPos); + const int indexTableReadingPos = getPosInIndexTable(id); + const int index = mIndexTableBuffer->readUint(INDEX_SIZE, indexTableReadingPos); + const int contentTableReadingPos = getPosInContentTable(id, index); + return mContentTableBuffer->readUint(mDataSize, contentTableReadingPos); +} + +bool SparseTable::set(const int id, const uint32_t value) { + const int posInIndexTable = getPosInIndexTable(id); + // Extends the index table if needed. + if (mIndexTableBuffer->getTailPosition() < posInIndexTable) { + int tailPos = mIndexTableBuffer->getTailPosition(); + while(tailPos < posInIndexTable) { + if (!mIndexTableBuffer->writeUintAndAdvancePosition(NOT_EXIST, INDEX_SIZE, &tailPos)) { + return false; + } + } + } + if (contains(id)) { + // The entry is already in the content table. + const int index = mIndexTableBuffer->readUint(INDEX_SIZE, posInIndexTable); + return mContentTableBuffer->writeUint(value, mDataSize, getPosInContentTable(id, index)); + } + // The entry is not in the content table. + // Create new entry in the content table. + const int index = getIndexFromContentTablePos(mContentTableBuffer->getTailPosition()); + if (!mIndexTableBuffer->writeUint(index, INDEX_SIZE, posInIndexTable)) { + return false; + } + // Write a new block that containing the entry to be set. + int writingPos = getPosInContentTable(0 /* id */, index); + for (int i = 0; i < mBlockSize; ++i) { + if (!mContentTableBuffer->writeUintAndAdvancePosition(NOT_A_DICT_POS, mDataSize, + &writingPos)) { + return false; + } + } + return mContentTableBuffer->writeUint(value, mDataSize, getPosInContentTable(id, index)); +} + +int SparseTable::getIndexFromContentTablePos(const int contentTablePos) const { + return contentTablePos / mDataSize / mBlockSize; +} + +int SparseTable::getPosInIndexTable(const int id) const { + return (id / mBlockSize) * INDEX_SIZE; +} + +int SparseTable::getPosInContentTable(const int id, const int index) const { const int offset = id % mBlockSize; - readingPos = (index * mDataSize + offset) * mBlockSize; - return mContentTableBuffer->readUint(mDataSize, readingPos); + return (index * mDataSize + offset) * mBlockSize; } } // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/sparse_table.h b/native/jni/src/suggest/policyimpl/dictionary/utils/sparse_table.h index d71756c63..21c167506 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/sparse_table.h +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/sparse_table.h @@ -38,10 +38,19 @@ class SparseTable { uint32_t get(const int id) const; + bool set(const int id, const uint32_t value); + private: DISALLOW_IMPLICIT_CONSTRUCTORS(SparseTable); + int getIndexFromContentTablePos(const int contentTablePos) const; + + int getPosInIndexTable(const int id) const; + + int getPosInContentTable(const int id, const int index) const; + static const int NOT_EXIST; + static const int INDEX_SIZE; BufferWithExtendableBuffer *const mIndexTableBuffer; BufferWithExtendableBuffer *const mContentTableBuffer; diff --git a/tests/src/com/android/inputmethod/latin/Ver4BinaryDictionaryTests.java b/tests/src/com/android/inputmethod/latin/Ver4BinaryDictionaryTests.java index 85e6243e4..b7cef738b 100644 --- a/tests/src/com/android/inputmethod/latin/Ver4BinaryDictionaryTests.java +++ b/tests/src/com/android/inputmethod/latin/Ver4BinaryDictionaryTests.java @@ -206,4 +206,44 @@ public class Ver4BinaryDictionaryTests extends AndroidTestCase { assertEquals(probability, binaryDictionary.getFrequency("y")); } + public void testWriteBigrams() { + final String dictVersion = Long.toString(System.currentTimeMillis()); + final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), + getDictionaryOptions(TEST_LOCALE, dictVersion)); + final DictEncoder encoder = new Ver4DictEncoder(getContext().getCacheDir()); + try { + encoder.writeDictionary(dict, FORMAT_OPTIONS); + } catch (IOException e) { + Log.e(TAG, "IOException while writing dictionary", e); + } catch (UnsupportedFormatException e) { + Log.e(TAG, "Unsupported format", e); + } + final File trieFile = getTrieFile(TEST_LOCALE, dictVersion); + final BinaryDictionary binaryDictionary = new BinaryDictionary(trieFile.getAbsolutePath(), + 0 /* offset */, trieFile.length(), true /* useFullEditDistance */, + Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); + assertTrue(binaryDictionary.isValidDictionary()); + + final int unigramProbability = 100; + final int bigramProbability = 10; + final int updatedBigramProbability = 15; + binaryDictionary.addUnigramWord("aaa", unigramProbability); + binaryDictionary.addUnigramWord("abb", unigramProbability); + binaryDictionary.addUnigramWord("bcc", unigramProbability); + binaryDictionary.addBigramWords("aaa", "abb", bigramProbability); + binaryDictionary.addBigramWords("aaa", "bcc", bigramProbability); + binaryDictionary.addBigramWords("abb", "aaa", bigramProbability); + binaryDictionary.addBigramWords("abb", "bcc", bigramProbability); + + final int probability = binaryDictionary.calculateProbability(unigramProbability, + bigramProbability); + assertEquals(true, binaryDictionary.isValidBigram("aaa", "abb")); + assertEquals(true, binaryDictionary.isValidBigram("aaa", "bcc")); + assertEquals(true, binaryDictionary.isValidBigram("abb", "aaa")); + assertEquals(true, binaryDictionary.isValidBigram("abb", "bcc")); + assertEquals(probability, binaryDictionary.getBigramProbability("aaa", "abb")); + assertEquals(probability, binaryDictionary.getBigramProbability("aaa", "bcc")); + assertEquals(probability, binaryDictionary.getBigramProbability("abb", "aaa")); + assertEquals(probability, binaryDictionary.getBigramProbability("abb", "bcc")); + } }