Implement ver4 dictionary unigram writing methods.

Bug: 11073222
Change-Id: Ibdb6846fee98919bb5f845170c19d7d571fcb88d
This commit is contained in:
Keisuke Kuroyanagi 2013-11-07 17:58:27 +09:00
parent 956ca35157
commit 3fe9458b6d
11 changed files with 434 additions and 16 deletions

View file

@ -90,6 +90,7 @@ LATIN_IME_CORE_SRC_FILES := \
$(addprefix suggest/policyimpl/dictionary/structure/v4/, \ $(addprefix suggest/policyimpl/dictionary/structure/v4/, \
ver4_dict_constants.cpp \ ver4_dict_constants.cpp \
ver4_patricia_trie_node_reader.cpp \ ver4_patricia_trie_node_reader.cpp \
ver4_patricia_trie_node_writer.cpp \
ver4_patricia_trie_policy.cpp \ ver4_patricia_trie_policy.cpp \
ver4_patricia_trie_reading_utils.cpp ) \ ver4_patricia_trie_reading_utils.cpp ) \
$(addprefix suggest/policyimpl/dictionary/utils/, \ $(addprefix suggest/policyimpl/dictionary/utils/, \

View file

@ -38,6 +38,28 @@ class ProbabilityDictContent : public SingleDictContent {
return Ver4PatriciaTrieReadingUtils::getProbability(getBuffer(), terminalId); return Ver4PatriciaTrieReadingUtils::getProbability(getBuffer(), terminalId);
} }
bool setProbability(const int terminalId, const int probability) {
if (terminalId < 0 || terminalId > getSize()) {
return false;
}
if (terminalId == getSize()) {
// Write new entry.
int flagWritingPos = terminalId * (Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE
+ Ver4DictConstants::PROBABILITY_SIZE);
const int dummyFlags = 0;
// Write dummy flags.
if (!getWritableBuffer()->writeUintAndAdvancePosition(dummyFlags,
Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE, &flagWritingPos)) {
return false;
}
}
int probabilityWritingPos = terminalId * (Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE
+ Ver4DictConstants::PROBABILITY_SIZE)
+ Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE;
return getWritableBuffer()->writeUintAndAdvancePosition(probability,
Ver4DictConstants::PROBABILITY_SIZE, &probabilityWritingPos);
}
private: private:
DISALLOW_IMPLICIT_CONSTRUCTORS(ProbabilityDictContent); DISALLOW_IMPLICIT_CONSTRUCTORS(ProbabilityDictContent);

View file

@ -47,10 +47,28 @@ class TerminalPositionLookupTable : public SingleDictContent {
readingPos) - mHeaderRegionSize; readingPos) - mHeaderRegionSize;
} }
bool setTerminalPtNodePosition(const int terminalId, const int terminalPtNodePos) {
if (terminalId < 0 || terminalId > mSize) {
return NOT_A_DICT_POS;
}
if (terminalId == mSize) {
// Use new terminal id.
mSize += 1;
}
int writingPos = terminalId * Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE;
return getWritableBuffer()->writeUintAndAdvancePosition(
terminalPtNodePos + mHeaderRegionSize,
Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE, &writingPos);
}
int getNextTerminalId() const {
return mSize;
}
private: private:
DISALLOW_IMPLICIT_CONSTRUCTORS(TerminalPositionLookupTable); DISALLOW_IMPLICIT_CONSTRUCTORS(TerminalPositionLookupTable);
const int mSize; int mSize;
const int mHeaderRegionSize; const int mHeaderRegionSize;
}; };
} // namespace latinime } // namespace latinime

View file

@ -53,10 +53,19 @@ class Ver4DictBuffers {
return mDictBuffer.get()->getBufferSize(); return mDictBuffer.get()->getBufferSize();
} }
AK_FORCE_INLINE TerminalPositionLookupTable *getUpdatableTerminalPositionLookupTable() {
return &mTerminalPositionLookupTable;
}
AK_FORCE_INLINE const TerminalPositionLookupTable *getTerminalPositionLookupTable() const { AK_FORCE_INLINE const TerminalPositionLookupTable *getTerminalPositionLookupTable() const {
return &mTerminalPositionLookupTable; return &mTerminalPositionLookupTable;
} }
AK_FORCE_INLINE ProbabilityDictContent *getUpdatableProbabilityDictContent() {
return &mProbabilityDictContent;
}
AK_FORCE_INLINE const ProbabilityDictContent *getProbabilityDictContent() const { AK_FORCE_INLINE const ProbabilityDictContent *getProbabilityDictContent() const {
return &mProbabilityDictContent; return &mProbabilityDictContent;
} }
@ -69,6 +78,10 @@ class Ver4DictBuffers {
return &mShortcutDictContent; return &mShortcutDictContent;
} }
AK_FORCE_INLINE bool isUpdatable() const {
return mIsUpdatable;
}
private: private:
DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4DictBuffers); DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4DictBuffers);
@ -80,13 +93,15 @@ class Ver4DictBuffers {
HeaderReadWriteUtils::getHeaderSize(mDictBuffer.get()->getBuffer())), HeaderReadWriteUtils::getHeaderSize(mDictBuffer.get()->getBuffer())),
mProbabilityDictContent(dictDirPath, isUpdatable), mProbabilityDictContent(dictDirPath, isUpdatable),
mBigramDictContent(dictDirPath, isUpdatable), mBigramDictContent(dictDirPath, isUpdatable),
mShortcutDictContent(dictDirPath, isUpdatable) {} mShortcutDictContent(dictDirPath, isUpdatable),
mIsUpdatable(isUpdatable) {}
const MmappedBuffer::MmappedBufferPtr mDictBuffer; const MmappedBuffer::MmappedBufferPtr mDictBuffer;
TerminalPositionLookupTable mTerminalPositionLookupTable; TerminalPositionLookupTable mTerminalPositionLookupTable;
ProbabilityDictContent mProbabilityDictContent; ProbabilityDictContent mProbabilityDictContent;
BigramDictContent mBigramDictContent; BigramDictContent mBigramDictContent;
ShortcutDictContent mShortcutDictContent; ShortcutDictContent mShortcutDictContent;
const int mIsUpdatable;
}; };
} // namespace latinime } // namespace latinime
#endif /* LATINIME_VER4_DICT_BUFFER_H */ #endif /* LATINIME_VER4_DICT_BUFFER_H */

View file

@ -34,6 +34,7 @@ const int Ver4DictConstants::NOT_A_TERMINAL_ID = -1;
const int Ver4DictConstants::PROBABILITY_SIZE = 1; const int Ver4DictConstants::PROBABILITY_SIZE = 1;
const int Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE = 1; const int Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE = 1;
const int Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE = 3; const int Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE = 3;
const int Ver4DictConstants::TERMINAL_ID_FIELD_SIZE = 4;
const int Ver4DictConstants::BIGRAM_ADDRESS_TABLE_BLOCK_SIZE = 4; const int Ver4DictConstants::BIGRAM_ADDRESS_TABLE_BLOCK_SIZE = 4;
const int Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE = 4; const int Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE = 4;

View file

@ -38,6 +38,7 @@ class Ver4DictConstants {
static const int PROBABILITY_SIZE; static const int PROBABILITY_SIZE;
static const int FLAGS_IN_PROBABILITY_FILE_SIZE; static const int FLAGS_IN_PROBABILITY_FILE_SIZE;
static const int TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE; static const int TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE;
static const int TERMINAL_ID_FIELD_SIZE;
static const int BIGRAM_ADDRESS_TABLE_BLOCK_SIZE; static const int BIGRAM_ADDRESS_TABLE_BLOCK_SIZE;
static const int BIGRAM_ADDRESS_TABLE_DATA_SIZE; static const int BIGRAM_ADDRESS_TABLE_DATA_SIZE;

View file

@ -0,0 +1,201 @@
/*
* Copyright (C) 2013, The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h"
#include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h"
#include "suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h"
#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h"
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.h"
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_utils.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
namespace latinime {
const int Ver4PatriciaTrieNodeWriter::CHILDREN_POSITION_FIELD_SIZE = 3;
bool Ver4PatriciaTrieNodeWriter::markPtNodeAsDeleted(
const PtNodeParams *const toBeUpdatedPtNodeParams) {
int pos = toBeUpdatedPtNodeParams->getHeadPos();
const bool usesAdditionalBuffer = mTrieBuffer->isInAdditionalBuffer(pos);
const uint8_t *const dictBuf = mTrieBuffer->getBuffer(usesAdditionalBuffer);
if (usesAdditionalBuffer) {
pos -= mTrieBuffer->getOriginalBufferSize();
}
// Read original flags
const PatriciaTrieReadingUtils::NodeFlags originalFlags =
PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos);
const PatriciaTrieReadingUtils::NodeFlags updatedFlags =
DynamicPatriciaTrieReadingUtils::updateAndGetFlags(originalFlags, false /* isMoved */,
true /* isDeleted */);
int writingPos = toBeUpdatedPtNodeParams->getHeadPos();
// Update flags.
return DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mTrieBuffer, updatedFlags,
&writingPos);
}
bool Ver4PatriciaTrieNodeWriter::markPtNodeAsMoved(
const PtNodeParams *const toBeUpdatedPtNodeParams,
const int movedPos, const int bigramLinkedNodePos) {
int pos = toBeUpdatedPtNodeParams->getHeadPos();
const bool usesAdditionalBuffer = mTrieBuffer->isInAdditionalBuffer(pos);
const uint8_t *const dictBuf = mTrieBuffer->getBuffer(usesAdditionalBuffer);
if (usesAdditionalBuffer) {
pos -= mTrieBuffer->getOriginalBufferSize();
}
// Read original flags
const PatriciaTrieReadingUtils::NodeFlags originalFlags =
PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos);
const PatriciaTrieReadingUtils::NodeFlags updatedFlags =
DynamicPatriciaTrieReadingUtils::updateAndGetFlags(originalFlags, true /* isMoved */,
false /* isDeleted */);
int writingPos = toBeUpdatedPtNodeParams->getHeadPos();
// Update flags.
if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mTrieBuffer, updatedFlags,
&writingPos)) {
return false;
}
// Update moved position, which is stored in the parent offset field.
if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition(
mTrieBuffer, movedPos, toBeUpdatedPtNodeParams->getHeadPos(), &writingPos)) {
return false;
}
// Update bigram linked node position, which is stored in the children position field.
int childrenPosFieldPos = toBeUpdatedPtNodeParams->getChildrenPosFieldPos();
if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(
mTrieBuffer, bigramLinkedNodePos, &childrenPosFieldPos)) {
return false;
}
if (toBeUpdatedPtNodeParams->hasChildren()) {
// Update children's parent position.
mReadingHelper.initWithPtNodeArrayPos(toBeUpdatedPtNodeParams->getChildrenPos());
while (!mReadingHelper.isEnd()) {
const PtNodeParams childPtNodeParams(mReadingHelper.getPtNodeParams());
int parentOffsetFieldPos = childPtNodeParams.getHeadPos()
+ DynamicPatriciaTrieWritingUtils::NODE_FLAG_FIELD_SIZE;
if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition(
mTrieBuffer, bigramLinkedNodePos, childPtNodeParams.getHeadPos(),
&parentOffsetFieldPos)) {
// Parent offset cannot be written because of a bug or a broken dictionary; thus,
// we give up to update dictionary.
return false;
}
mReadingHelper.readNextSiblingNode(childPtNodeParams);
}
}
return true;
}
bool Ver4PatriciaTrieNodeWriter::updatePtNodeProbability(
const PtNodeParams *const toBeUpdatedPtNodeParams, const int newProbability) {
if (!toBeUpdatedPtNodeParams->isTerminal()) {
return false;
}
return mBuffers->getUpdatableProbabilityDictContent()->setProbability(
toBeUpdatedPtNodeParams->getTerminalId(), newProbability);
}
bool Ver4PatriciaTrieNodeWriter::updateChildrenPosition(
const PtNodeParams *const toBeUpdatedPtNodeParams, const int newChildrenPosition) {
int childrenPosFieldPos = toBeUpdatedPtNodeParams->getChildrenPosFieldPos();
return DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(mTrieBuffer,
newChildrenPosition, &childrenPosFieldPos);
}
bool Ver4PatriciaTrieNodeWriter::writePtNodeAndAdvancePosition(
const PtNodeParams *const ptNodeParams, int *const ptNodeWritingPos) {
const int nodePos = *ptNodeWritingPos;
// Write dummy flags. The Node flags are updated with appropriate flags at the last step of the
// PtNode writing.
if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mTrieBuffer,
0 /* nodeFlags */, ptNodeWritingPos)) {
return false;
}
// Calculate a parent offset and write the offset.
if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition(mTrieBuffer,
ptNodeParams->getParentPos(), nodePos, ptNodeWritingPos)) {
return false;
}
// Write code points
if (!DynamicPatriciaTrieWritingUtils::writeCodePointsAndAdvancePosition(mTrieBuffer,
ptNodeParams->getCodePoints(), ptNodeParams->getCodePointCount(), ptNodeWritingPos)) {
return false;
}
int terminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
if (ptNodeParams->getTerminalId() != Ver4DictConstants::NOT_A_TERMINAL_ID) {
terminalId = ptNodeParams->getTerminalId();
} else if (ptNodeParams->getProbability() != NOT_A_PROBABILITY) {
// Write terminal information using a new terminal id.
// Get a new unused terminal id.
terminalId = mBuffers->getTerminalPositionLookupTable()->getNextTerminalId();
}
const int isTerminal = terminalId != Ver4DictConstants::NOT_A_TERMINAL_ID;
if (isTerminal) {
// Update the lookup table.
if (!mBuffers->getUpdatableTerminalPositionLookupTable()->setTerminalPtNodePosition(
terminalId, nodePos)) {
return false;
}
// Write terminal Id.
if (!mTrieBuffer->writeUintAndAdvancePosition(terminalId,
Ver4DictConstants::TERMINAL_ID_FIELD_SIZE, ptNodeWritingPos)) {
return false;
}
// Write probability.
if (!mBuffers->getUpdatableProbabilityDictContent()->setProbability(
terminalId, ptNodeParams->getProbability())) {
return false;
}
}
// Write children position
if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(mTrieBuffer,
ptNodeParams->getChildrenPos(), ptNodeWritingPos)) {
return false;
}
// TODO: Implement bigram and shortcut writing.
// Create node flags and write them.
PatriciaTrieReadingUtils::NodeFlags nodeFlags =
PatriciaTrieReadingUtils::createAndGetFlags(ptNodeParams->isBlacklisted(),
ptNodeParams->isNotAWord(), isTerminal,
false /* hasShortcutTargets */, false /* hasBigrams */,
ptNodeParams->getCodePointCount() > 1 /* hasMultipleChars */,
CHILDREN_POSITION_FIELD_SIZE);
int flagsFieldPos = nodePos;
if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mTrieBuffer, nodeFlags,
&flagsFieldPos)) {
return false;
}
return true;
}
bool Ver4PatriciaTrieNodeWriter::addNewBigramEntry(
const PtNodeParams *const sourcePtNodeParams,
const PtNodeParams *const targetPtNodeParam, const int probability,
bool *const outAddedNewBigram) {
// TODO: Implement.
return false;
}
bool Ver4PatriciaTrieNodeWriter::removeBigramEntry(
const PtNodeParams *const sourcePtNodeParams, const PtNodeParams *const targetPtNodeParam) {
// TODO: Implement.
return false;
}
}

View file

@ -0,0 +1,84 @@
/*
* Copyright (C) 2013, The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef LATINIME_VER4_PATRICIA_TRIE_NODE_WRITER_H
#define LATINIME_VER4_PATRICIA_TRIE_NODE_WRITER_H
#include <stdint.h>
#include "defines.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h"
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h"
namespace latinime {
class BufferWithExtendableBuffer;
class Ver4BigramListPolicy;
class Ver4DictBuffers;
class Ver4ShortcutListPolicy;
/*
* This class is used for helping to writes nodes of ver4 patricia trie.
*/
class Ver4PatriciaTrieNodeWriter : public PtNodeWriter {
public:
Ver4PatriciaTrieNodeWriter(BufferWithExtendableBuffer *const trieBuffer,
Ver4DictBuffers *const buffers, const Ver4PatriciaTrieNodeReader *const ptNodeReader,
Ver4BigramListPolicy *const bigramPolicy, Ver4ShortcutListPolicy *const shortcutPolicy)
: mTrieBuffer(trieBuffer), mBuffers(buffers), mPtNodeReader(ptNodeReader),
mReadingHelper(mTrieBuffer, mPtNodeReader),
mBigramPolicy(bigramPolicy), mShortcutPolicy(shortcutPolicy) {}
virtual ~Ver4PatriciaTrieNodeWriter() {}
virtual bool markPtNodeAsDeleted(const PtNodeParams *const toBeUpdatedPtNodeParams);
virtual bool markPtNodeAsMoved(const PtNodeParams *const toBeUpdatedPtNodeParams,
const int movedPos, const int bigramLinkedNodePos);
virtual bool updatePtNodeProbability(const PtNodeParams *const toBeUpdatedPtNodeParams,
const int newProbability);
virtual bool updateChildrenPosition(const PtNodeParams *const toBeUpdatedPtNodeParams,
const int newChildrenPosition);
virtual bool writePtNodeAndAdvancePosition(const PtNodeParams *const ptNodeParams,
int *const ptNodeWritingPos);
virtual bool addNewBigramEntry(const PtNodeParams *const sourcePtNodeParams,
const PtNodeParams *const targetPtNodeParam, const int probability,
bool *const outAddedNewBigram);
virtual bool removeBigramEntry(const PtNodeParams *const sourcePtNodeParams,
const PtNodeParams *const targetPtNodeParam);
private:
DISALLOW_COPY_AND_ASSIGN(Ver4PatriciaTrieNodeWriter);
static const int CHILDREN_POSITION_FIELD_SIZE;
BufferWithExtendableBuffer *const mTrieBuffer;
Ver4DictBuffers *const mBuffers;
const Ver4PatriciaTrieNodeReader *const mPtNodeReader;
DynamicPatriciaTrieReadingHelper mReadingHelper;
Ver4BigramListPolicy *const mBigramPolicy;
Ver4ShortcutListPolicy *const mShortcutPolicy;
};
} // namespace latinime
#endif /* LATINIME_VER4_PATRICIA_TRIE_NODE_WRITER_H */

View file

@ -19,12 +19,17 @@
#include "suggest/core/dicnode/dic_node.h" #include "suggest/core/dicnode/dic_node.h"
#include "suggest/core/dicnode/dic_node_vector.h" #include "suggest/core/dicnode/dic_node_vector.h"
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.h" #include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.h"
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_helper.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h" #include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h"
#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h" #include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
#include "suggest/policyimpl/dictionary/utils/probability_utils.h" #include "suggest/policyimpl/dictionary/utils/probability_utils.h"
namespace latinime { namespace latinime {
const int Ver4PatriciaTriePolicy::MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS = 1024;
const int Ver4PatriciaTriePolicy::MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS =
DynamicPatriciaTrieWritingHelper::MAX_DICTIONARY_SIZE - MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS;
void Ver4PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const dicNode, void Ver4PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const dicNode,
DicNodeVector *const childDicNodes) const { DicNodeVector *const childDicNodes) const {
if (!dicNode->hasChildren()) { if (!dicNode->hasChildren()) {
@ -126,8 +131,27 @@ int Ver4PatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) cons
bool Ver4PatriciaTriePolicy::addUnigramWord(const int *const word, const int length, bool Ver4PatriciaTriePolicy::addUnigramWord(const int *const word, const int length,
const int probability) { const int probability) {
// TODO: Implement. if (!mBuffers.get()->isUpdatable()) {
return false; AKLOGI("Warning: addUnigramWord() is called for non-updatable dictionary.");
return false;
}
if (mDictBuffer.getTailPosition()
>= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) {
AKLOGE("The dictionary is too large to dynamically update.");
return false;
}
DynamicPatriciaTrieReadingHelper readingHelper(&mDictBuffer, &mNodeReader);
readingHelper.initWithPtNodeArrayPos(getRootPosition());
bool addedNewUnigram = false;
if (mUpdatingHelper.addUnigramWord(&readingHelper, word, length, probability,
&addedNewUnigram)) {
if (addedNewUnigram) {
mUnigramCount++;
}
return true;
} else {
return false;
}
} }
bool Ver4PatriciaTriePolicy::addBigramWords(const int *const word0, const int length0, bool Ver4PatriciaTriePolicy::addBigramWords(const int *const word0, const int length0,

View file

@ -22,8 +22,10 @@
#include "suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h" #include "suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h"
#include "suggest/policyimpl/dictionary/header/header_policy.h" #include "suggest/policyimpl/dictionary/header/header_policy.h"
#include "suggest/policyimpl/dictionary/shortcut/ver4_shortcut_list_policy.h" #include "suggest/policyimpl/dictionary/shortcut/ver4_shortcut_list_policy.h"
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_updating_helper.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h" #include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h" #include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" #include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
namespace latinime { namespace latinime {
@ -44,7 +46,13 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
mBuffers.get()->getTerminalPositionLookupTable()), mBuffers.get()->getTerminalPositionLookupTable()),
mShortcutPolicy(mBuffers.get()->getShortcutDictContent(), mShortcutPolicy(mBuffers.get()->getShortcutDictContent(),
mBuffers.get()->getTerminalPositionLookupTable()), mBuffers.get()->getTerminalPositionLookupTable()),
mNodeReader(&mDictBuffer, mBuffers.get()->getProbabilityDictContent()) {}; mNodeReader(&mDictBuffer, mBuffers.get()->getProbabilityDictContent()),
mNodeWriter(&mDictBuffer, mBuffers.get(), &mNodeReader, &mBigramPolicy,
&mShortcutPolicy),
mUpdatingHelper(&mDictBuffer, &mNodeReader, &mNodeWriter,
mHeaderPolicy.isDecayingDict()),
mUnigramCount(mHeaderPolicy.getUnigramCount()),
mBigramCount(mHeaderPolicy.getBigramCount()) {};
AK_FORCE_INLINE int getRootPosition() const { AK_FORCE_INLINE int getRootPosition() const {
return 0; return 0;
@ -100,12 +108,21 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
private: private:
DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4PatriciaTriePolicy); DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4PatriciaTriePolicy);
const Ver4DictBuffers::Ver4DictBuffersPtr mBuffers; // When the dictionary size is near the maximum size, we have to refuse dynamic operations to
// prevent the dictionary from overflowing.
static const int MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS;
static const int MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS;
Ver4DictBuffers::Ver4DictBuffersPtr mBuffers;
const HeaderPolicy mHeaderPolicy; const HeaderPolicy mHeaderPolicy;
BufferWithExtendableBuffer mDictBuffer; BufferWithExtendableBuffer mDictBuffer;
const Ver4BigramListPolicy mBigramPolicy; Ver4BigramListPolicy mBigramPolicy;
const Ver4ShortcutListPolicy mShortcutPolicy; Ver4ShortcutListPolicy mShortcutPolicy;
Ver4PatriciaTrieNodeReader mNodeReader; Ver4PatriciaTrieNodeReader mNodeReader;
Ver4PatriciaTrieNodeWriter mNodeWriter;
DynamicPatriciaTrieUpdatingHelper mUpdatingHelper;
int mUnigramCount;
int mBigramCount;
}; };
} // namespace latinime } // namespace latinime
#endif // LATINIME_VER4_PATRICIA_TRIE_POLICY_H #endif // LATINIME_VER4_PATRICIA_TRIE_POLICY_H

View file

@ -78,7 +78,7 @@ public class Ver4BinaryDictionaryTests extends AndroidTestCase {
final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
getDictionaryOptions(TEST_LOCALE, dictVersion)); getDictionaryOptions(TEST_LOCALE, dictVersion));
DictEncoder encoder = new Ver4DictEncoder(getContext().getCacheDir()); final DictEncoder encoder = new Ver4DictEncoder(getContext().getCacheDir());
try { try {
encoder.writeDictionary(dict, FORMAT_OPTIONS); encoder.writeDictionary(dict, FORMAT_OPTIONS);
} catch (IOException e) { } catch (IOException e) {
@ -104,7 +104,7 @@ public class Ver4BinaryDictionaryTests extends AndroidTestCase {
dict.add("aaa", frequency, null, false /* isNotAWord */); dict.add("aaa", frequency, null, false /* isNotAWord */);
dict.add("ab", frequency, null, false /* isNotAWord */); dict.add("ab", frequency, null, false /* isNotAWord */);
DictEncoder encoder = new Ver4DictEncoder(getContext().getCacheDir()); final DictEncoder encoder = new Ver4DictEncoder(getContext().getCacheDir());
try { try {
encoder.writeDictionary(dict, FORMAT_OPTIONS); encoder.writeDictionary(dict, FORMAT_OPTIONS);
} catch (IOException e) { } catch (IOException e) {
@ -112,8 +112,8 @@ public class Ver4BinaryDictionaryTests extends AndroidTestCase {
} catch (UnsupportedFormatException e) { } catch (UnsupportedFormatException e) {
Log.e(TAG, "Unsupported format", e); Log.e(TAG, "Unsupported format", e);
} }
File trieFile = getTrieFile(TEST_LOCALE, dictVersion); final File trieFile = getTrieFile(TEST_LOCALE, dictVersion);
BinaryDictionary binaryDictionary = new BinaryDictionary(trieFile.getAbsolutePath(), final BinaryDictionary binaryDictionary = new BinaryDictionary(trieFile.getAbsolutePath(),
0 /* offset */, trieFile.length(), true /* useFullEditDistance */, 0 /* offset */, trieFile.length(), true /* useFullEditDistance */,
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
assertTrue(binaryDictionary.isValidDictionary()); assertTrue(binaryDictionary.isValidDictionary());
@ -122,7 +122,7 @@ public class Ver4BinaryDictionaryTests extends AndroidTestCase {
assertEquals(frequency, binaryDictionary.getFrequency("ab")); assertEquals(frequency, binaryDictionary.getFrequency("ab"));
} }
public static int getCalculatedBigramProbabiliy(BinaryDictionary binaryDictionary, public static int getCalculatedBigramProbabiliy(final BinaryDictionary binaryDictionary,
final int unigramFrequency, final int bigramFrequency) { final int unigramFrequency, final int bigramFrequency) {
final int bigramFrequencyDiff = BinaryDictEncoderUtils.getBigramFrequencyDiff( final int bigramFrequencyDiff = BinaryDictEncoderUtils.getBigramFrequencyDiff(
unigramFrequency, bigramFrequency); unigramFrequency, bigramFrequency);
@ -146,7 +146,7 @@ public class Ver4BinaryDictionaryTests extends AndroidTestCase {
dict.setBigram("a", "ab", bigramFrequency1); dict.setBigram("a", "ab", bigramFrequency1);
dict.setBigram("aaa", "ab", bigramFrequency2); dict.setBigram("aaa", "ab", bigramFrequency2);
DictEncoder encoder = new Ver4DictEncoder(getContext().getCacheDir()); final DictEncoder encoder = new Ver4DictEncoder(getContext().getCacheDir());
try { try {
encoder.writeDictionary(dict, FORMAT_OPTIONS); encoder.writeDictionary(dict, FORMAT_OPTIONS);
} catch (IOException e) { } catch (IOException e) {
@ -154,8 +154,8 @@ public class Ver4BinaryDictionaryTests extends AndroidTestCase {
} catch (UnsupportedFormatException e) { } catch (UnsupportedFormatException e) {
Log.e(TAG, "Unsupported format", e); Log.e(TAG, "Unsupported format", e);
} }
File trieFile = getTrieFile(TEST_LOCALE, dictVersion); final File trieFile = getTrieFile(TEST_LOCALE, dictVersion);
BinaryDictionary binaryDictionary = new BinaryDictionary(trieFile.getAbsolutePath(), final BinaryDictionary binaryDictionary = new BinaryDictionary(trieFile.getAbsolutePath(),
0 /* offset */, trieFile.length(), true /* useFullEditDistance */, 0 /* offset */, trieFile.length(), true /* useFullEditDistance */,
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
@ -172,4 +172,38 @@ public class Ver4BinaryDictionaryTests extends AndroidTestCase {
assertFalse(binaryDictionary.isValidBigram("ab", "a")); assertFalse(binaryDictionary.isValidBigram("ab", "a"));
assertFalse(binaryDictionary.isValidBigram("ab", "aaa")); assertFalse(binaryDictionary.isValidBigram("ab", "aaa"));
} }
// TODO: Add large tests.
public void testWriteUnigrams() {
final String dictVersion = Long.toString(System.currentTimeMillis());
final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
getDictionaryOptions(TEST_LOCALE, dictVersion));
final DictEncoder encoder = new Ver4DictEncoder(getContext().getCacheDir());
try {
encoder.writeDictionary(dict, FORMAT_OPTIONS);
} catch (IOException e) {
Log.e(TAG, "IOException while writing dictionary", e);
} catch (UnsupportedFormatException e) {
Log.e(TAG, "Unsupported format", e);
}
final File trieFile = getTrieFile(TEST_LOCALE, dictVersion);
final BinaryDictionary binaryDictionary = new BinaryDictionary(trieFile.getAbsolutePath(),
0 /* offset */, trieFile.length(), true /* useFullEditDistance */,
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
assertTrue(binaryDictionary.isValidDictionary());
final int probability = 100;
binaryDictionary.addUnigramWord("aaa", probability);
binaryDictionary.addUnigramWord("abc", probability);
binaryDictionary.addUnigramWord("bcd", probability);
binaryDictionary.addUnigramWord("x", probability);
binaryDictionary.addUnigramWord("y", probability);
assertEquals(probability, binaryDictionary.getFrequency("aaa"));
assertEquals(probability, binaryDictionary.getFrequency("abc"));
assertEquals(probability, binaryDictionary.getFrequency("bcd"));
assertEquals(probability, binaryDictionary.getFrequency("x"));
assertEquals(probability, binaryDictionary.getFrequency("y"));
}
} }