From 3fe9458b6d4a777d87d243734ff34de49cbda03a Mon Sep 17 00:00:00 2001 From: Keisuke Kuroyanagi Date: Thu, 7 Nov 2013 17:58:27 +0900 Subject: [PATCH] Implement ver4 dictionary unigram writing methods. Bug: 11073222 Change-Id: Ibdb6846fee98919bb5f845170c19d7d571fcb88d --- native/jni/Android.mk | 1 + .../v4/content/probability_dict_content.h | 22 ++ .../content/terminal_position_lookup_table.h | 20 +- .../structure/v4/ver4_dict_buffers.h | 17 +- .../structure/v4/ver4_dict_constants.cpp | 1 + .../structure/v4/ver4_dict_constants.h | 1 + .../v4/ver4_patricia_trie_node_writer.cpp | 201 ++++++++++++++++++ .../v4/ver4_patricia_trie_node_writer.h | 84 ++++++++ .../v4/ver4_patricia_trie_policy.cpp | 28 ++- .../structure/v4/ver4_patricia_trie_policy.h | 25 ++- .../latin/Ver4BinaryDictionaryTests.java | 50 ++++- 11 files changed, 434 insertions(+), 16 deletions(-) create mode 100644 native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp create mode 100644 native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h diff --git a/native/jni/Android.mk b/native/jni/Android.mk index 35fa352d9..a5757fd58 100644 --- a/native/jni/Android.mk +++ b/native/jni/Android.mk @@ -90,6 +90,7 @@ LATIN_IME_CORE_SRC_FILES := \ $(addprefix suggest/policyimpl/dictionary/structure/v4/, \ ver4_dict_constants.cpp \ ver4_patricia_trie_node_reader.cpp \ + ver4_patricia_trie_node_writer.cpp \ ver4_patricia_trie_policy.cpp \ ver4_patricia_trie_reading_utils.cpp ) \ $(addprefix suggest/policyimpl/dictionary/utils/, \ diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h index daaf08f61..e85bbe18e 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h @@ -38,6 +38,28 @@ class ProbabilityDictContent : public SingleDictContent { return Ver4PatriciaTrieReadingUtils::getProbability(getBuffer(), terminalId); } + bool setProbability(const int terminalId, const int probability) { + if (terminalId < 0 || terminalId > getSize()) { + return false; + } + if (terminalId == getSize()) { + // Write new entry. + int flagWritingPos = terminalId * (Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE + + Ver4DictConstants::PROBABILITY_SIZE); + const int dummyFlags = 0; + // Write dummy flags. + if (!getWritableBuffer()->writeUintAndAdvancePosition(dummyFlags, + Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE, &flagWritingPos)) { + return false; + } + } + int probabilityWritingPos = terminalId * (Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE + + Ver4DictConstants::PROBABILITY_SIZE) + + Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE; + return getWritableBuffer()->writeUintAndAdvancePosition(probability, + Ver4DictConstants::PROBABILITY_SIZE, &probabilityWritingPos); + } + private: DISALLOW_IMPLICIT_CONSTRUCTORS(ProbabilityDictContent); diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h index 173d0da05..f6ced31b4 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h @@ -47,10 +47,28 @@ class TerminalPositionLookupTable : public SingleDictContent { readingPos) - mHeaderRegionSize; } + bool setTerminalPtNodePosition(const int terminalId, const int terminalPtNodePos) { + if (terminalId < 0 || terminalId > mSize) { + return NOT_A_DICT_POS; + } + if (terminalId == mSize) { + // Use new terminal id. + mSize += 1; + } + int writingPos = terminalId * Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE; + return getWritableBuffer()->writeUintAndAdvancePosition( + terminalPtNodePos + mHeaderRegionSize, + Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE, &writingPos); + } + + int getNextTerminalId() const { + return mSize; + } + private: DISALLOW_IMPLICIT_CONSTRUCTORS(TerminalPositionLookupTable); - const int mSize; + int mSize; const int mHeaderRegionSize; }; } // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h index 6ee6e63e4..6476478e5 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h @@ -53,10 +53,19 @@ class Ver4DictBuffers { return mDictBuffer.get()->getBufferSize(); } + + AK_FORCE_INLINE TerminalPositionLookupTable *getUpdatableTerminalPositionLookupTable() { + return &mTerminalPositionLookupTable; + } + AK_FORCE_INLINE const TerminalPositionLookupTable *getTerminalPositionLookupTable() const { return &mTerminalPositionLookupTable; } + AK_FORCE_INLINE ProbabilityDictContent *getUpdatableProbabilityDictContent() { + return &mProbabilityDictContent; + } + AK_FORCE_INLINE const ProbabilityDictContent *getProbabilityDictContent() const { return &mProbabilityDictContent; } @@ -69,6 +78,10 @@ class Ver4DictBuffers { return &mShortcutDictContent; } + AK_FORCE_INLINE bool isUpdatable() const { + return mIsUpdatable; + } + private: DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4DictBuffers); @@ -80,13 +93,15 @@ class Ver4DictBuffers { HeaderReadWriteUtils::getHeaderSize(mDictBuffer.get()->getBuffer())), mProbabilityDictContent(dictDirPath, isUpdatable), mBigramDictContent(dictDirPath, isUpdatable), - mShortcutDictContent(dictDirPath, isUpdatable) {} + mShortcutDictContent(dictDirPath, isUpdatable), + mIsUpdatable(isUpdatable) {} const MmappedBuffer::MmappedBufferPtr mDictBuffer; TerminalPositionLookupTable mTerminalPositionLookupTable; ProbabilityDictContent mProbabilityDictContent; BigramDictContent mBigramDictContent; ShortcutDictContent mShortcutDictContent; + const int mIsUpdatable; }; } // namespace latinime #endif /* LATINIME_VER4_DICT_BUFFER_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp index 20adb927f..941bcd594 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp @@ -34,6 +34,7 @@ const int Ver4DictConstants::NOT_A_TERMINAL_ID = -1; const int Ver4DictConstants::PROBABILITY_SIZE = 1; const int Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE = 1; const int Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE = 3; +const int Ver4DictConstants::TERMINAL_ID_FIELD_SIZE = 4; const int Ver4DictConstants::BIGRAM_ADDRESS_TABLE_BLOCK_SIZE = 4; const int Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE = 4; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h index 522581873..7270d9e6e 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h @@ -38,6 +38,7 @@ class Ver4DictConstants { static const int PROBABILITY_SIZE; static const int FLAGS_IN_PROBABILITY_FILE_SIZE; static const int TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE; + static const int TERMINAL_ID_FIELD_SIZE; static const int BIGRAM_ADDRESS_TABLE_BLOCK_SIZE; static const int BIGRAM_ADDRESS_TABLE_DATA_SIZE; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp new file mode 100644 index 000000000..8b0ea823e --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp @@ -0,0 +1,201 @@ +/* + * Copyright (C) 2013, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h" + +#include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h" +#include "suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h" +#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h" +#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_utils.h" +#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h" +#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" + +namespace latinime { + +const int Ver4PatriciaTrieNodeWriter::CHILDREN_POSITION_FIELD_SIZE = 3; + +bool Ver4PatriciaTrieNodeWriter::markPtNodeAsDeleted( + const PtNodeParams *const toBeUpdatedPtNodeParams) { + int pos = toBeUpdatedPtNodeParams->getHeadPos(); + const bool usesAdditionalBuffer = mTrieBuffer->isInAdditionalBuffer(pos); + const uint8_t *const dictBuf = mTrieBuffer->getBuffer(usesAdditionalBuffer); + if (usesAdditionalBuffer) { + pos -= mTrieBuffer->getOriginalBufferSize(); + } + // Read original flags + const PatriciaTrieReadingUtils::NodeFlags originalFlags = + PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos); + const PatriciaTrieReadingUtils::NodeFlags updatedFlags = + DynamicPatriciaTrieReadingUtils::updateAndGetFlags(originalFlags, false /* isMoved */, + true /* isDeleted */); + int writingPos = toBeUpdatedPtNodeParams->getHeadPos(); + // Update flags. + return DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mTrieBuffer, updatedFlags, + &writingPos); +} + +bool Ver4PatriciaTrieNodeWriter::markPtNodeAsMoved( + const PtNodeParams *const toBeUpdatedPtNodeParams, + const int movedPos, const int bigramLinkedNodePos) { + int pos = toBeUpdatedPtNodeParams->getHeadPos(); + const bool usesAdditionalBuffer = mTrieBuffer->isInAdditionalBuffer(pos); + const uint8_t *const dictBuf = mTrieBuffer->getBuffer(usesAdditionalBuffer); + if (usesAdditionalBuffer) { + pos -= mTrieBuffer->getOriginalBufferSize(); + } + // Read original flags + const PatriciaTrieReadingUtils::NodeFlags originalFlags = + PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos); + const PatriciaTrieReadingUtils::NodeFlags updatedFlags = + DynamicPatriciaTrieReadingUtils::updateAndGetFlags(originalFlags, true /* isMoved */, + false /* isDeleted */); + int writingPos = toBeUpdatedPtNodeParams->getHeadPos(); + // Update flags. + if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mTrieBuffer, updatedFlags, + &writingPos)) { + return false; + } + // Update moved position, which is stored in the parent offset field. + if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition( + mTrieBuffer, movedPos, toBeUpdatedPtNodeParams->getHeadPos(), &writingPos)) { + return false; + } + // Update bigram linked node position, which is stored in the children position field. + int childrenPosFieldPos = toBeUpdatedPtNodeParams->getChildrenPosFieldPos(); + if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition( + mTrieBuffer, bigramLinkedNodePos, &childrenPosFieldPos)) { + return false; + } + if (toBeUpdatedPtNodeParams->hasChildren()) { + // Update children's parent position. + mReadingHelper.initWithPtNodeArrayPos(toBeUpdatedPtNodeParams->getChildrenPos()); + while (!mReadingHelper.isEnd()) { + const PtNodeParams childPtNodeParams(mReadingHelper.getPtNodeParams()); + int parentOffsetFieldPos = childPtNodeParams.getHeadPos() + + DynamicPatriciaTrieWritingUtils::NODE_FLAG_FIELD_SIZE; + if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition( + mTrieBuffer, bigramLinkedNodePos, childPtNodeParams.getHeadPos(), + &parentOffsetFieldPos)) { + // Parent offset cannot be written because of a bug or a broken dictionary; thus, + // we give up to update dictionary. + return false; + } + mReadingHelper.readNextSiblingNode(childPtNodeParams); + } + } + return true; +} + +bool Ver4PatriciaTrieNodeWriter::updatePtNodeProbability( + const PtNodeParams *const toBeUpdatedPtNodeParams, const int newProbability) { + if (!toBeUpdatedPtNodeParams->isTerminal()) { + return false; + } + return mBuffers->getUpdatableProbabilityDictContent()->setProbability( + toBeUpdatedPtNodeParams->getTerminalId(), newProbability); +} + +bool Ver4PatriciaTrieNodeWriter::updateChildrenPosition( + const PtNodeParams *const toBeUpdatedPtNodeParams, const int newChildrenPosition) { + int childrenPosFieldPos = toBeUpdatedPtNodeParams->getChildrenPosFieldPos(); + return DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(mTrieBuffer, + newChildrenPosition, &childrenPosFieldPos); +} + +bool Ver4PatriciaTrieNodeWriter::writePtNodeAndAdvancePosition( + const PtNodeParams *const ptNodeParams, int *const ptNodeWritingPos) { + const int nodePos = *ptNodeWritingPos; + // Write dummy flags. The Node flags are updated with appropriate flags at the last step of the + // PtNode writing. + if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mTrieBuffer, + 0 /* nodeFlags */, ptNodeWritingPos)) { + return false; + } + // Calculate a parent offset and write the offset. + if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition(mTrieBuffer, + ptNodeParams->getParentPos(), nodePos, ptNodeWritingPos)) { + return false; + } + // Write code points + if (!DynamicPatriciaTrieWritingUtils::writeCodePointsAndAdvancePosition(mTrieBuffer, + ptNodeParams->getCodePoints(), ptNodeParams->getCodePointCount(), ptNodeWritingPos)) { + return false; + } + int terminalId = Ver4DictConstants::NOT_A_TERMINAL_ID; + if (ptNodeParams->getTerminalId() != Ver4DictConstants::NOT_A_TERMINAL_ID) { + terminalId = ptNodeParams->getTerminalId(); + } else if (ptNodeParams->getProbability() != NOT_A_PROBABILITY) { + // Write terminal information using a new terminal id. + // Get a new unused terminal id. + terminalId = mBuffers->getTerminalPositionLookupTable()->getNextTerminalId(); + } + const int isTerminal = terminalId != Ver4DictConstants::NOT_A_TERMINAL_ID; + if (isTerminal) { + // Update the lookup table. + if (!mBuffers->getUpdatableTerminalPositionLookupTable()->setTerminalPtNodePosition( + terminalId, nodePos)) { + return false; + } + // Write terminal Id. + if (!mTrieBuffer->writeUintAndAdvancePosition(terminalId, + Ver4DictConstants::TERMINAL_ID_FIELD_SIZE, ptNodeWritingPos)) { + return false; + } + // Write probability. + if (!mBuffers->getUpdatableProbabilityDictContent()->setProbability( + terminalId, ptNodeParams->getProbability())) { + return false; + } + } + // Write children position + if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(mTrieBuffer, + ptNodeParams->getChildrenPos(), ptNodeWritingPos)) { + return false; + } + // TODO: Implement bigram and shortcut writing. + + // Create node flags and write them. + PatriciaTrieReadingUtils::NodeFlags nodeFlags = + PatriciaTrieReadingUtils::createAndGetFlags(ptNodeParams->isBlacklisted(), + ptNodeParams->isNotAWord(), isTerminal, + false /* hasShortcutTargets */, false /* hasBigrams */, + ptNodeParams->getCodePointCount() > 1 /* hasMultipleChars */, + CHILDREN_POSITION_FIELD_SIZE); + int flagsFieldPos = nodePos; + if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mTrieBuffer, nodeFlags, + &flagsFieldPos)) { + return false; + } + return true; +} + +bool Ver4PatriciaTrieNodeWriter::addNewBigramEntry( + const PtNodeParams *const sourcePtNodeParams, + const PtNodeParams *const targetPtNodeParam, const int probability, + bool *const outAddedNewBigram) { + // TODO: Implement. + return false; +} + +bool Ver4PatriciaTrieNodeWriter::removeBigramEntry( + const PtNodeParams *const sourcePtNodeParams, const PtNodeParams *const targetPtNodeParam) { + // TODO: Implement. + return false; +} + +} diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h new file mode 100644 index 000000000..d11952304 --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h @@ -0,0 +1,84 @@ +/* + * Copyright (C) 2013, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_VER4_PATRICIA_TRIE_NODE_WRITER_H +#define LATINIME_VER4_PATRICIA_TRIE_NODE_WRITER_H + +#include + +#include "defines.h" +#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h" +#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.h" +#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h" + +namespace latinime { + +class BufferWithExtendableBuffer; +class Ver4BigramListPolicy; +class Ver4DictBuffers; +class Ver4ShortcutListPolicy; + +/* + * This class is used for helping to writes nodes of ver4 patricia trie. + */ +class Ver4PatriciaTrieNodeWriter : public PtNodeWriter { + public: + Ver4PatriciaTrieNodeWriter(BufferWithExtendableBuffer *const trieBuffer, + Ver4DictBuffers *const buffers, const Ver4PatriciaTrieNodeReader *const ptNodeReader, + Ver4BigramListPolicy *const bigramPolicy, Ver4ShortcutListPolicy *const shortcutPolicy) + : mTrieBuffer(trieBuffer), mBuffers(buffers), mPtNodeReader(ptNodeReader), + mReadingHelper(mTrieBuffer, mPtNodeReader), + mBigramPolicy(bigramPolicy), mShortcutPolicy(shortcutPolicy) {} + + virtual ~Ver4PatriciaTrieNodeWriter() {} + + virtual bool markPtNodeAsDeleted(const PtNodeParams *const toBeUpdatedPtNodeParams); + + virtual bool markPtNodeAsMoved(const PtNodeParams *const toBeUpdatedPtNodeParams, + const int movedPos, const int bigramLinkedNodePos); + + virtual bool updatePtNodeProbability(const PtNodeParams *const toBeUpdatedPtNodeParams, + const int newProbability); + + virtual bool updateChildrenPosition(const PtNodeParams *const toBeUpdatedPtNodeParams, + const int newChildrenPosition); + + virtual bool writePtNodeAndAdvancePosition(const PtNodeParams *const ptNodeParams, + int *const ptNodeWritingPos); + + virtual bool addNewBigramEntry(const PtNodeParams *const sourcePtNodeParams, + const PtNodeParams *const targetPtNodeParam, const int probability, + bool *const outAddedNewBigram); + + virtual bool removeBigramEntry(const PtNodeParams *const sourcePtNodeParams, + const PtNodeParams *const targetPtNodeParam); + + private: + DISALLOW_COPY_AND_ASSIGN(Ver4PatriciaTrieNodeWriter); + + static const int CHILDREN_POSITION_FIELD_SIZE; + + BufferWithExtendableBuffer *const mTrieBuffer; + Ver4DictBuffers *const mBuffers; + const Ver4PatriciaTrieNodeReader *const mPtNodeReader; + DynamicPatriciaTrieReadingHelper mReadingHelper; + Ver4BigramListPolicy *const mBigramPolicy; + Ver4ShortcutListPolicy *const mShortcutPolicy; + +}; +} // namespace latinime +#endif /* LATINIME_VER4_PATRICIA_TRIE_NODE_WRITER_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp index ae5a094d1..520ffc080 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp @@ -19,12 +19,17 @@ #include "suggest/core/dicnode/dic_node.h" #include "suggest/core/dicnode/dic_node_vector.h" #include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_helper.h" #include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h" #include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h" #include "suggest/policyimpl/dictionary/utils/probability_utils.h" namespace latinime { +const int Ver4PatriciaTriePolicy::MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS = 1024; +const int Ver4PatriciaTriePolicy::MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS = + DynamicPatriciaTrieWritingHelper::MAX_DICTIONARY_SIZE - MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS; + void Ver4PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const dicNode, DicNodeVector *const childDicNodes) const { if (!dicNode->hasChildren()) { @@ -126,8 +131,27 @@ int Ver4PatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) cons bool Ver4PatriciaTriePolicy::addUnigramWord(const int *const word, const int length, const int probability) { - // TODO: Implement. - return false; + if (!mBuffers.get()->isUpdatable()) { + AKLOGI("Warning: addUnigramWord() is called for non-updatable dictionary."); + return false; + } + if (mDictBuffer.getTailPosition() + >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) { + AKLOGE("The dictionary is too large to dynamically update."); + return false; + } + DynamicPatriciaTrieReadingHelper readingHelper(&mDictBuffer, &mNodeReader); + readingHelper.initWithPtNodeArrayPos(getRootPosition()); + bool addedNewUnigram = false; + if (mUpdatingHelper.addUnigramWord(&readingHelper, word, length, probability, + &addedNewUnigram)) { + if (addedNewUnigram) { + mUnigramCount++; + } + return true; + } else { + return false; + } } bool Ver4PatriciaTriePolicy::addBigramWords(const int *const word0, const int length0, diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h index 10b9125f0..fdb7ac69b 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h @@ -22,8 +22,10 @@ #include "suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h" #include "suggest/policyimpl/dictionary/header/header_policy.h" #include "suggest/policyimpl/dictionary/shortcut/ver4_shortcut_list_policy.h" +#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_updating_helper.h" #include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h" #include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h" +#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h" #include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" namespace latinime { @@ -44,7 +46,13 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { mBuffers.get()->getTerminalPositionLookupTable()), mShortcutPolicy(mBuffers.get()->getShortcutDictContent(), mBuffers.get()->getTerminalPositionLookupTable()), - mNodeReader(&mDictBuffer, mBuffers.get()->getProbabilityDictContent()) {}; + mNodeReader(&mDictBuffer, mBuffers.get()->getProbabilityDictContent()), + mNodeWriter(&mDictBuffer, mBuffers.get(), &mNodeReader, &mBigramPolicy, + &mShortcutPolicy), + mUpdatingHelper(&mDictBuffer, &mNodeReader, &mNodeWriter, + mHeaderPolicy.isDecayingDict()), + mUnigramCount(mHeaderPolicy.getUnigramCount()), + mBigramCount(mHeaderPolicy.getBigramCount()) {}; AK_FORCE_INLINE int getRootPosition() const { return 0; @@ -100,12 +108,21 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { private: DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4PatriciaTriePolicy); - const Ver4DictBuffers::Ver4DictBuffersPtr mBuffers; + // When the dictionary size is near the maximum size, we have to refuse dynamic operations to + // prevent the dictionary from overflowing. + static const int MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS; + static const int MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS; + + Ver4DictBuffers::Ver4DictBuffersPtr mBuffers; const HeaderPolicy mHeaderPolicy; BufferWithExtendableBuffer mDictBuffer; - const Ver4BigramListPolicy mBigramPolicy; - const Ver4ShortcutListPolicy mShortcutPolicy; + Ver4BigramListPolicy mBigramPolicy; + Ver4ShortcutListPolicy mShortcutPolicy; Ver4PatriciaTrieNodeReader mNodeReader; + Ver4PatriciaTrieNodeWriter mNodeWriter; + DynamicPatriciaTrieUpdatingHelper mUpdatingHelper; + int mUnigramCount; + int mBigramCount; }; } // namespace latinime #endif // LATINIME_VER4_PATRICIA_TRIE_POLICY_H diff --git a/tests/src/com/android/inputmethod/latin/Ver4BinaryDictionaryTests.java b/tests/src/com/android/inputmethod/latin/Ver4BinaryDictionaryTests.java index e43335265..85e6243e4 100644 --- a/tests/src/com/android/inputmethod/latin/Ver4BinaryDictionaryTests.java +++ b/tests/src/com/android/inputmethod/latin/Ver4BinaryDictionaryTests.java @@ -78,7 +78,7 @@ public class Ver4BinaryDictionaryTests extends AndroidTestCase { final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), getDictionaryOptions(TEST_LOCALE, dictVersion)); - DictEncoder encoder = new Ver4DictEncoder(getContext().getCacheDir()); + final DictEncoder encoder = new Ver4DictEncoder(getContext().getCacheDir()); try { encoder.writeDictionary(dict, FORMAT_OPTIONS); } catch (IOException e) { @@ -104,7 +104,7 @@ public class Ver4BinaryDictionaryTests extends AndroidTestCase { dict.add("aaa", frequency, null, false /* isNotAWord */); dict.add("ab", frequency, null, false /* isNotAWord */); - DictEncoder encoder = new Ver4DictEncoder(getContext().getCacheDir()); + final DictEncoder encoder = new Ver4DictEncoder(getContext().getCacheDir()); try { encoder.writeDictionary(dict, FORMAT_OPTIONS); } catch (IOException e) { @@ -112,8 +112,8 @@ public class Ver4BinaryDictionaryTests extends AndroidTestCase { } catch (UnsupportedFormatException e) { Log.e(TAG, "Unsupported format", e); } - File trieFile = getTrieFile(TEST_LOCALE, dictVersion); - BinaryDictionary binaryDictionary = new BinaryDictionary(trieFile.getAbsolutePath(), + final File trieFile = getTrieFile(TEST_LOCALE, dictVersion); + final BinaryDictionary binaryDictionary = new BinaryDictionary(trieFile.getAbsolutePath(), 0 /* offset */, trieFile.length(), true /* useFullEditDistance */, Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); assertTrue(binaryDictionary.isValidDictionary()); @@ -122,7 +122,7 @@ public class Ver4BinaryDictionaryTests extends AndroidTestCase { assertEquals(frequency, binaryDictionary.getFrequency("ab")); } - public static int getCalculatedBigramProbabiliy(BinaryDictionary binaryDictionary, + public static int getCalculatedBigramProbabiliy(final BinaryDictionary binaryDictionary, final int unigramFrequency, final int bigramFrequency) { final int bigramFrequencyDiff = BinaryDictEncoderUtils.getBigramFrequencyDiff( unigramFrequency, bigramFrequency); @@ -146,7 +146,7 @@ public class Ver4BinaryDictionaryTests extends AndroidTestCase { dict.setBigram("a", "ab", bigramFrequency1); dict.setBigram("aaa", "ab", bigramFrequency2); - DictEncoder encoder = new Ver4DictEncoder(getContext().getCacheDir()); + final DictEncoder encoder = new Ver4DictEncoder(getContext().getCacheDir()); try { encoder.writeDictionary(dict, FORMAT_OPTIONS); } catch (IOException e) { @@ -154,8 +154,8 @@ public class Ver4BinaryDictionaryTests extends AndroidTestCase { } catch (UnsupportedFormatException e) { Log.e(TAG, "Unsupported format", e); } - File trieFile = getTrieFile(TEST_LOCALE, dictVersion); - BinaryDictionary binaryDictionary = new BinaryDictionary(trieFile.getAbsolutePath(), + final File trieFile = getTrieFile(TEST_LOCALE, dictVersion); + final BinaryDictionary binaryDictionary = new BinaryDictionary(trieFile.getAbsolutePath(), 0 /* offset */, trieFile.length(), true /* useFullEditDistance */, Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); @@ -172,4 +172,38 @@ public class Ver4BinaryDictionaryTests extends AndroidTestCase { assertFalse(binaryDictionary.isValidBigram("ab", "a")); assertFalse(binaryDictionary.isValidBigram("ab", "aaa")); } + + // TODO: Add large tests. + public void testWriteUnigrams() { + final String dictVersion = Long.toString(System.currentTimeMillis()); + final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), + getDictionaryOptions(TEST_LOCALE, dictVersion)); + final DictEncoder encoder = new Ver4DictEncoder(getContext().getCacheDir()); + try { + encoder.writeDictionary(dict, FORMAT_OPTIONS); + } catch (IOException e) { + Log.e(TAG, "IOException while writing dictionary", e); + } catch (UnsupportedFormatException e) { + Log.e(TAG, "Unsupported format", e); + } + final File trieFile = getTrieFile(TEST_LOCALE, dictVersion); + final BinaryDictionary binaryDictionary = new BinaryDictionary(trieFile.getAbsolutePath(), + 0 /* offset */, trieFile.length(), true /* useFullEditDistance */, + Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); + assertTrue(binaryDictionary.isValidDictionary()); + + final int probability = 100; + binaryDictionary.addUnigramWord("aaa", probability); + binaryDictionary.addUnigramWord("abc", probability); + binaryDictionary.addUnigramWord("bcd", probability); + binaryDictionary.addUnigramWord("x", probability); + binaryDictionary.addUnigramWord("y", probability); + + assertEquals(probability, binaryDictionary.getFrequency("aaa")); + assertEquals(probability, binaryDictionary.getFrequency("abc")); + assertEquals(probability, binaryDictionary.getFrequency("bcd")); + assertEquals(probability, binaryDictionary.getFrequency("x")); + assertEquals(probability, binaryDictionary.getFrequency("y")); + } + }