Implement ver4 dictionary unigram writing methods.
Bug: 11073222 Change-Id: Ibdb6846fee98919bb5f845170c19d7d571fcb88d
This commit is contained in:
parent
956ca35157
commit
3fe9458b6d
11 changed files with 434 additions and 16 deletions
|
@ -90,6 +90,7 @@ LATIN_IME_CORE_SRC_FILES := \
|
||||||
$(addprefix suggest/policyimpl/dictionary/structure/v4/, \
|
$(addprefix suggest/policyimpl/dictionary/structure/v4/, \
|
||||||
ver4_dict_constants.cpp \
|
ver4_dict_constants.cpp \
|
||||||
ver4_patricia_trie_node_reader.cpp \
|
ver4_patricia_trie_node_reader.cpp \
|
||||||
|
ver4_patricia_trie_node_writer.cpp \
|
||||||
ver4_patricia_trie_policy.cpp \
|
ver4_patricia_trie_policy.cpp \
|
||||||
ver4_patricia_trie_reading_utils.cpp ) \
|
ver4_patricia_trie_reading_utils.cpp ) \
|
||||||
$(addprefix suggest/policyimpl/dictionary/utils/, \
|
$(addprefix suggest/policyimpl/dictionary/utils/, \
|
||||||
|
|
|
@ -38,6 +38,28 @@ class ProbabilityDictContent : public SingleDictContent {
|
||||||
return Ver4PatriciaTrieReadingUtils::getProbability(getBuffer(), terminalId);
|
return Ver4PatriciaTrieReadingUtils::getProbability(getBuffer(), terminalId);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool setProbability(const int terminalId, const int probability) {
|
||||||
|
if (terminalId < 0 || terminalId > getSize()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (terminalId == getSize()) {
|
||||||
|
// Write new entry.
|
||||||
|
int flagWritingPos = terminalId * (Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE
|
||||||
|
+ Ver4DictConstants::PROBABILITY_SIZE);
|
||||||
|
const int dummyFlags = 0;
|
||||||
|
// Write dummy flags.
|
||||||
|
if (!getWritableBuffer()->writeUintAndAdvancePosition(dummyFlags,
|
||||||
|
Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE, &flagWritingPos)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
int probabilityWritingPos = terminalId * (Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE
|
||||||
|
+ Ver4DictConstants::PROBABILITY_SIZE)
|
||||||
|
+ Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE;
|
||||||
|
return getWritableBuffer()->writeUintAndAdvancePosition(probability,
|
||||||
|
Ver4DictConstants::PROBABILITY_SIZE, &probabilityWritingPos);
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
DISALLOW_IMPLICIT_CONSTRUCTORS(ProbabilityDictContent);
|
DISALLOW_IMPLICIT_CONSTRUCTORS(ProbabilityDictContent);
|
||||||
|
|
||||||
|
|
|
@ -47,10 +47,28 @@ class TerminalPositionLookupTable : public SingleDictContent {
|
||||||
readingPos) - mHeaderRegionSize;
|
readingPos) - mHeaderRegionSize;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool setTerminalPtNodePosition(const int terminalId, const int terminalPtNodePos) {
|
||||||
|
if (terminalId < 0 || terminalId > mSize) {
|
||||||
|
return NOT_A_DICT_POS;
|
||||||
|
}
|
||||||
|
if (terminalId == mSize) {
|
||||||
|
// Use new terminal id.
|
||||||
|
mSize += 1;
|
||||||
|
}
|
||||||
|
int writingPos = terminalId * Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE;
|
||||||
|
return getWritableBuffer()->writeUintAndAdvancePosition(
|
||||||
|
terminalPtNodePos + mHeaderRegionSize,
|
||||||
|
Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE, &writingPos);
|
||||||
|
}
|
||||||
|
|
||||||
|
int getNextTerminalId() const {
|
||||||
|
return mSize;
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
DISALLOW_IMPLICIT_CONSTRUCTORS(TerminalPositionLookupTable);
|
DISALLOW_IMPLICIT_CONSTRUCTORS(TerminalPositionLookupTable);
|
||||||
|
|
||||||
const int mSize;
|
int mSize;
|
||||||
const int mHeaderRegionSize;
|
const int mHeaderRegionSize;
|
||||||
};
|
};
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
|
|
|
@ -53,10 +53,19 @@ class Ver4DictBuffers {
|
||||||
return mDictBuffer.get()->getBufferSize();
|
return mDictBuffer.get()->getBufferSize();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
AK_FORCE_INLINE TerminalPositionLookupTable *getUpdatableTerminalPositionLookupTable() {
|
||||||
|
return &mTerminalPositionLookupTable;
|
||||||
|
}
|
||||||
|
|
||||||
AK_FORCE_INLINE const TerminalPositionLookupTable *getTerminalPositionLookupTable() const {
|
AK_FORCE_INLINE const TerminalPositionLookupTable *getTerminalPositionLookupTable() const {
|
||||||
return &mTerminalPositionLookupTable;
|
return &mTerminalPositionLookupTable;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
AK_FORCE_INLINE ProbabilityDictContent *getUpdatableProbabilityDictContent() {
|
||||||
|
return &mProbabilityDictContent;
|
||||||
|
}
|
||||||
|
|
||||||
AK_FORCE_INLINE const ProbabilityDictContent *getProbabilityDictContent() const {
|
AK_FORCE_INLINE const ProbabilityDictContent *getProbabilityDictContent() const {
|
||||||
return &mProbabilityDictContent;
|
return &mProbabilityDictContent;
|
||||||
}
|
}
|
||||||
|
@ -69,6 +78,10 @@ class Ver4DictBuffers {
|
||||||
return &mShortcutDictContent;
|
return &mShortcutDictContent;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
AK_FORCE_INLINE bool isUpdatable() const {
|
||||||
|
return mIsUpdatable;
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4DictBuffers);
|
DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4DictBuffers);
|
||||||
|
|
||||||
|
@ -80,13 +93,15 @@ class Ver4DictBuffers {
|
||||||
HeaderReadWriteUtils::getHeaderSize(mDictBuffer.get()->getBuffer())),
|
HeaderReadWriteUtils::getHeaderSize(mDictBuffer.get()->getBuffer())),
|
||||||
mProbabilityDictContent(dictDirPath, isUpdatable),
|
mProbabilityDictContent(dictDirPath, isUpdatable),
|
||||||
mBigramDictContent(dictDirPath, isUpdatable),
|
mBigramDictContent(dictDirPath, isUpdatable),
|
||||||
mShortcutDictContent(dictDirPath, isUpdatable) {}
|
mShortcutDictContent(dictDirPath, isUpdatable),
|
||||||
|
mIsUpdatable(isUpdatable) {}
|
||||||
|
|
||||||
const MmappedBuffer::MmappedBufferPtr mDictBuffer;
|
const MmappedBuffer::MmappedBufferPtr mDictBuffer;
|
||||||
TerminalPositionLookupTable mTerminalPositionLookupTable;
|
TerminalPositionLookupTable mTerminalPositionLookupTable;
|
||||||
ProbabilityDictContent mProbabilityDictContent;
|
ProbabilityDictContent mProbabilityDictContent;
|
||||||
BigramDictContent mBigramDictContent;
|
BigramDictContent mBigramDictContent;
|
||||||
ShortcutDictContent mShortcutDictContent;
|
ShortcutDictContent mShortcutDictContent;
|
||||||
|
const int mIsUpdatable;
|
||||||
};
|
};
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
#endif /* LATINIME_VER4_DICT_BUFFER_H */
|
#endif /* LATINIME_VER4_DICT_BUFFER_H */
|
||||||
|
|
|
@ -34,6 +34,7 @@ const int Ver4DictConstants::NOT_A_TERMINAL_ID = -1;
|
||||||
const int Ver4DictConstants::PROBABILITY_SIZE = 1;
|
const int Ver4DictConstants::PROBABILITY_SIZE = 1;
|
||||||
const int Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE = 1;
|
const int Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE = 1;
|
||||||
const int Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE = 3;
|
const int Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE = 3;
|
||||||
|
const int Ver4DictConstants::TERMINAL_ID_FIELD_SIZE = 4;
|
||||||
|
|
||||||
const int Ver4DictConstants::BIGRAM_ADDRESS_TABLE_BLOCK_SIZE = 4;
|
const int Ver4DictConstants::BIGRAM_ADDRESS_TABLE_BLOCK_SIZE = 4;
|
||||||
const int Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE = 4;
|
const int Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE = 4;
|
||||||
|
|
|
@ -38,6 +38,7 @@ class Ver4DictConstants {
|
||||||
static const int PROBABILITY_SIZE;
|
static const int PROBABILITY_SIZE;
|
||||||
static const int FLAGS_IN_PROBABILITY_FILE_SIZE;
|
static const int FLAGS_IN_PROBABILITY_FILE_SIZE;
|
||||||
static const int TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE;
|
static const int TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE;
|
||||||
|
static const int TERMINAL_ID_FIELD_SIZE;
|
||||||
|
|
||||||
static const int BIGRAM_ADDRESS_TABLE_BLOCK_SIZE;
|
static const int BIGRAM_ADDRESS_TABLE_BLOCK_SIZE;
|
||||||
static const int BIGRAM_ADDRESS_TABLE_DATA_SIZE;
|
static const int BIGRAM_ADDRESS_TABLE_DATA_SIZE;
|
||||||
|
|
|
@ -0,0 +1,201 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2013, The Android Open Source Project
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h"
|
||||||
|
|
||||||
|
#include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_utils.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
|
||||||
|
|
||||||
|
namespace latinime {
|
||||||
|
|
||||||
|
const int Ver4PatriciaTrieNodeWriter::CHILDREN_POSITION_FIELD_SIZE = 3;
|
||||||
|
|
||||||
|
bool Ver4PatriciaTrieNodeWriter::markPtNodeAsDeleted(
|
||||||
|
const PtNodeParams *const toBeUpdatedPtNodeParams) {
|
||||||
|
int pos = toBeUpdatedPtNodeParams->getHeadPos();
|
||||||
|
const bool usesAdditionalBuffer = mTrieBuffer->isInAdditionalBuffer(pos);
|
||||||
|
const uint8_t *const dictBuf = mTrieBuffer->getBuffer(usesAdditionalBuffer);
|
||||||
|
if (usesAdditionalBuffer) {
|
||||||
|
pos -= mTrieBuffer->getOriginalBufferSize();
|
||||||
|
}
|
||||||
|
// Read original flags
|
||||||
|
const PatriciaTrieReadingUtils::NodeFlags originalFlags =
|
||||||
|
PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos);
|
||||||
|
const PatriciaTrieReadingUtils::NodeFlags updatedFlags =
|
||||||
|
DynamicPatriciaTrieReadingUtils::updateAndGetFlags(originalFlags, false /* isMoved */,
|
||||||
|
true /* isDeleted */);
|
||||||
|
int writingPos = toBeUpdatedPtNodeParams->getHeadPos();
|
||||||
|
// Update flags.
|
||||||
|
return DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mTrieBuffer, updatedFlags,
|
||||||
|
&writingPos);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool Ver4PatriciaTrieNodeWriter::markPtNodeAsMoved(
|
||||||
|
const PtNodeParams *const toBeUpdatedPtNodeParams,
|
||||||
|
const int movedPos, const int bigramLinkedNodePos) {
|
||||||
|
int pos = toBeUpdatedPtNodeParams->getHeadPos();
|
||||||
|
const bool usesAdditionalBuffer = mTrieBuffer->isInAdditionalBuffer(pos);
|
||||||
|
const uint8_t *const dictBuf = mTrieBuffer->getBuffer(usesAdditionalBuffer);
|
||||||
|
if (usesAdditionalBuffer) {
|
||||||
|
pos -= mTrieBuffer->getOriginalBufferSize();
|
||||||
|
}
|
||||||
|
// Read original flags
|
||||||
|
const PatriciaTrieReadingUtils::NodeFlags originalFlags =
|
||||||
|
PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos);
|
||||||
|
const PatriciaTrieReadingUtils::NodeFlags updatedFlags =
|
||||||
|
DynamicPatriciaTrieReadingUtils::updateAndGetFlags(originalFlags, true /* isMoved */,
|
||||||
|
false /* isDeleted */);
|
||||||
|
int writingPos = toBeUpdatedPtNodeParams->getHeadPos();
|
||||||
|
// Update flags.
|
||||||
|
if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mTrieBuffer, updatedFlags,
|
||||||
|
&writingPos)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
// Update moved position, which is stored in the parent offset field.
|
||||||
|
if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition(
|
||||||
|
mTrieBuffer, movedPos, toBeUpdatedPtNodeParams->getHeadPos(), &writingPos)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
// Update bigram linked node position, which is stored in the children position field.
|
||||||
|
int childrenPosFieldPos = toBeUpdatedPtNodeParams->getChildrenPosFieldPos();
|
||||||
|
if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(
|
||||||
|
mTrieBuffer, bigramLinkedNodePos, &childrenPosFieldPos)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (toBeUpdatedPtNodeParams->hasChildren()) {
|
||||||
|
// Update children's parent position.
|
||||||
|
mReadingHelper.initWithPtNodeArrayPos(toBeUpdatedPtNodeParams->getChildrenPos());
|
||||||
|
while (!mReadingHelper.isEnd()) {
|
||||||
|
const PtNodeParams childPtNodeParams(mReadingHelper.getPtNodeParams());
|
||||||
|
int parentOffsetFieldPos = childPtNodeParams.getHeadPos()
|
||||||
|
+ DynamicPatriciaTrieWritingUtils::NODE_FLAG_FIELD_SIZE;
|
||||||
|
if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition(
|
||||||
|
mTrieBuffer, bigramLinkedNodePos, childPtNodeParams.getHeadPos(),
|
||||||
|
&parentOffsetFieldPos)) {
|
||||||
|
// Parent offset cannot be written because of a bug or a broken dictionary; thus,
|
||||||
|
// we give up to update dictionary.
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
mReadingHelper.readNextSiblingNode(childPtNodeParams);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool Ver4PatriciaTrieNodeWriter::updatePtNodeProbability(
|
||||||
|
const PtNodeParams *const toBeUpdatedPtNodeParams, const int newProbability) {
|
||||||
|
if (!toBeUpdatedPtNodeParams->isTerminal()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return mBuffers->getUpdatableProbabilityDictContent()->setProbability(
|
||||||
|
toBeUpdatedPtNodeParams->getTerminalId(), newProbability);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool Ver4PatriciaTrieNodeWriter::updateChildrenPosition(
|
||||||
|
const PtNodeParams *const toBeUpdatedPtNodeParams, const int newChildrenPosition) {
|
||||||
|
int childrenPosFieldPos = toBeUpdatedPtNodeParams->getChildrenPosFieldPos();
|
||||||
|
return DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(mTrieBuffer,
|
||||||
|
newChildrenPosition, &childrenPosFieldPos);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool Ver4PatriciaTrieNodeWriter::writePtNodeAndAdvancePosition(
|
||||||
|
const PtNodeParams *const ptNodeParams, int *const ptNodeWritingPos) {
|
||||||
|
const int nodePos = *ptNodeWritingPos;
|
||||||
|
// Write dummy flags. The Node flags are updated with appropriate flags at the last step of the
|
||||||
|
// PtNode writing.
|
||||||
|
if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mTrieBuffer,
|
||||||
|
0 /* nodeFlags */, ptNodeWritingPos)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
// Calculate a parent offset and write the offset.
|
||||||
|
if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition(mTrieBuffer,
|
||||||
|
ptNodeParams->getParentPos(), nodePos, ptNodeWritingPos)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
// Write code points
|
||||||
|
if (!DynamicPatriciaTrieWritingUtils::writeCodePointsAndAdvancePosition(mTrieBuffer,
|
||||||
|
ptNodeParams->getCodePoints(), ptNodeParams->getCodePointCount(), ptNodeWritingPos)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
int terminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
|
||||||
|
if (ptNodeParams->getTerminalId() != Ver4DictConstants::NOT_A_TERMINAL_ID) {
|
||||||
|
terminalId = ptNodeParams->getTerminalId();
|
||||||
|
} else if (ptNodeParams->getProbability() != NOT_A_PROBABILITY) {
|
||||||
|
// Write terminal information using a new terminal id.
|
||||||
|
// Get a new unused terminal id.
|
||||||
|
terminalId = mBuffers->getTerminalPositionLookupTable()->getNextTerminalId();
|
||||||
|
}
|
||||||
|
const int isTerminal = terminalId != Ver4DictConstants::NOT_A_TERMINAL_ID;
|
||||||
|
if (isTerminal) {
|
||||||
|
// Update the lookup table.
|
||||||
|
if (!mBuffers->getUpdatableTerminalPositionLookupTable()->setTerminalPtNodePosition(
|
||||||
|
terminalId, nodePos)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
// Write terminal Id.
|
||||||
|
if (!mTrieBuffer->writeUintAndAdvancePosition(terminalId,
|
||||||
|
Ver4DictConstants::TERMINAL_ID_FIELD_SIZE, ptNodeWritingPos)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
// Write probability.
|
||||||
|
if (!mBuffers->getUpdatableProbabilityDictContent()->setProbability(
|
||||||
|
terminalId, ptNodeParams->getProbability())) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Write children position
|
||||||
|
if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(mTrieBuffer,
|
||||||
|
ptNodeParams->getChildrenPos(), ptNodeWritingPos)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
// TODO: Implement bigram and shortcut writing.
|
||||||
|
|
||||||
|
// Create node flags and write them.
|
||||||
|
PatriciaTrieReadingUtils::NodeFlags nodeFlags =
|
||||||
|
PatriciaTrieReadingUtils::createAndGetFlags(ptNodeParams->isBlacklisted(),
|
||||||
|
ptNodeParams->isNotAWord(), isTerminal,
|
||||||
|
false /* hasShortcutTargets */, false /* hasBigrams */,
|
||||||
|
ptNodeParams->getCodePointCount() > 1 /* hasMultipleChars */,
|
||||||
|
CHILDREN_POSITION_FIELD_SIZE);
|
||||||
|
int flagsFieldPos = nodePos;
|
||||||
|
if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mTrieBuffer, nodeFlags,
|
||||||
|
&flagsFieldPos)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool Ver4PatriciaTrieNodeWriter::addNewBigramEntry(
|
||||||
|
const PtNodeParams *const sourcePtNodeParams,
|
||||||
|
const PtNodeParams *const targetPtNodeParam, const int probability,
|
||||||
|
bool *const outAddedNewBigram) {
|
||||||
|
// TODO: Implement.
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool Ver4PatriciaTrieNodeWriter::removeBigramEntry(
|
||||||
|
const PtNodeParams *const sourcePtNodeParams, const PtNodeParams *const targetPtNodeParam) {
|
||||||
|
// TODO: Implement.
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,84 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2013, The Android Open Source Project
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef LATINIME_VER4_PATRICIA_TRIE_NODE_WRITER_H
|
||||||
|
#define LATINIME_VER4_PATRICIA_TRIE_NODE_WRITER_H
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
#include "defines.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h"
|
||||||
|
|
||||||
|
namespace latinime {
|
||||||
|
|
||||||
|
class BufferWithExtendableBuffer;
|
||||||
|
class Ver4BigramListPolicy;
|
||||||
|
class Ver4DictBuffers;
|
||||||
|
class Ver4ShortcutListPolicy;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This class is used for helping to writes nodes of ver4 patricia trie.
|
||||||
|
*/
|
||||||
|
class Ver4PatriciaTrieNodeWriter : public PtNodeWriter {
|
||||||
|
public:
|
||||||
|
Ver4PatriciaTrieNodeWriter(BufferWithExtendableBuffer *const trieBuffer,
|
||||||
|
Ver4DictBuffers *const buffers, const Ver4PatriciaTrieNodeReader *const ptNodeReader,
|
||||||
|
Ver4BigramListPolicy *const bigramPolicy, Ver4ShortcutListPolicy *const shortcutPolicy)
|
||||||
|
: mTrieBuffer(trieBuffer), mBuffers(buffers), mPtNodeReader(ptNodeReader),
|
||||||
|
mReadingHelper(mTrieBuffer, mPtNodeReader),
|
||||||
|
mBigramPolicy(bigramPolicy), mShortcutPolicy(shortcutPolicy) {}
|
||||||
|
|
||||||
|
virtual ~Ver4PatriciaTrieNodeWriter() {}
|
||||||
|
|
||||||
|
virtual bool markPtNodeAsDeleted(const PtNodeParams *const toBeUpdatedPtNodeParams);
|
||||||
|
|
||||||
|
virtual bool markPtNodeAsMoved(const PtNodeParams *const toBeUpdatedPtNodeParams,
|
||||||
|
const int movedPos, const int bigramLinkedNodePos);
|
||||||
|
|
||||||
|
virtual bool updatePtNodeProbability(const PtNodeParams *const toBeUpdatedPtNodeParams,
|
||||||
|
const int newProbability);
|
||||||
|
|
||||||
|
virtual bool updateChildrenPosition(const PtNodeParams *const toBeUpdatedPtNodeParams,
|
||||||
|
const int newChildrenPosition);
|
||||||
|
|
||||||
|
virtual bool writePtNodeAndAdvancePosition(const PtNodeParams *const ptNodeParams,
|
||||||
|
int *const ptNodeWritingPos);
|
||||||
|
|
||||||
|
virtual bool addNewBigramEntry(const PtNodeParams *const sourcePtNodeParams,
|
||||||
|
const PtNodeParams *const targetPtNodeParam, const int probability,
|
||||||
|
bool *const outAddedNewBigram);
|
||||||
|
|
||||||
|
virtual bool removeBigramEntry(const PtNodeParams *const sourcePtNodeParams,
|
||||||
|
const PtNodeParams *const targetPtNodeParam);
|
||||||
|
|
||||||
|
private:
|
||||||
|
DISALLOW_COPY_AND_ASSIGN(Ver4PatriciaTrieNodeWriter);
|
||||||
|
|
||||||
|
static const int CHILDREN_POSITION_FIELD_SIZE;
|
||||||
|
|
||||||
|
BufferWithExtendableBuffer *const mTrieBuffer;
|
||||||
|
Ver4DictBuffers *const mBuffers;
|
||||||
|
const Ver4PatriciaTrieNodeReader *const mPtNodeReader;
|
||||||
|
DynamicPatriciaTrieReadingHelper mReadingHelper;
|
||||||
|
Ver4BigramListPolicy *const mBigramPolicy;
|
||||||
|
Ver4ShortcutListPolicy *const mShortcutPolicy;
|
||||||
|
|
||||||
|
};
|
||||||
|
} // namespace latinime
|
||||||
|
#endif /* LATINIME_VER4_PATRICIA_TRIE_NODE_WRITER_H */
|
|
@ -19,12 +19,17 @@
|
||||||
#include "suggest/core/dicnode/dic_node.h"
|
#include "suggest/core/dicnode/dic_node.h"
|
||||||
#include "suggest/core/dicnode/dic_node_vector.h"
|
#include "suggest/core/dicnode/dic_node_vector.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.h"
|
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_helper.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h"
|
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h"
|
||||||
#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
|
#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
|
||||||
#include "suggest/policyimpl/dictionary/utils/probability_utils.h"
|
#include "suggest/policyimpl/dictionary/utils/probability_utils.h"
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
|
||||||
|
const int Ver4PatriciaTriePolicy::MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS = 1024;
|
||||||
|
const int Ver4PatriciaTriePolicy::MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS =
|
||||||
|
DynamicPatriciaTrieWritingHelper::MAX_DICTIONARY_SIZE - MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS;
|
||||||
|
|
||||||
void Ver4PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const dicNode,
|
void Ver4PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const dicNode,
|
||||||
DicNodeVector *const childDicNodes) const {
|
DicNodeVector *const childDicNodes) const {
|
||||||
if (!dicNode->hasChildren()) {
|
if (!dicNode->hasChildren()) {
|
||||||
|
@ -126,8 +131,27 @@ int Ver4PatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) cons
|
||||||
|
|
||||||
bool Ver4PatriciaTriePolicy::addUnigramWord(const int *const word, const int length,
|
bool Ver4PatriciaTriePolicy::addUnigramWord(const int *const word, const int length,
|
||||||
const int probability) {
|
const int probability) {
|
||||||
// TODO: Implement.
|
if (!mBuffers.get()->isUpdatable()) {
|
||||||
return false;
|
AKLOGI("Warning: addUnigramWord() is called for non-updatable dictionary.");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (mDictBuffer.getTailPosition()
|
||||||
|
>= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) {
|
||||||
|
AKLOGE("The dictionary is too large to dynamically update.");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
DynamicPatriciaTrieReadingHelper readingHelper(&mDictBuffer, &mNodeReader);
|
||||||
|
readingHelper.initWithPtNodeArrayPos(getRootPosition());
|
||||||
|
bool addedNewUnigram = false;
|
||||||
|
if (mUpdatingHelper.addUnigramWord(&readingHelper, word, length, probability,
|
||||||
|
&addedNewUnigram)) {
|
||||||
|
if (addedNewUnigram) {
|
||||||
|
mUnigramCount++;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
} else {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Ver4PatriciaTriePolicy::addBigramWords(const int *const word0, const int length0,
|
bool Ver4PatriciaTriePolicy::addBigramWords(const int *const word0, const int length0,
|
||||||
|
|
|
@ -22,8 +22,10 @@
|
||||||
#include "suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h"
|
#include "suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h"
|
||||||
#include "suggest/policyimpl/dictionary/header/header_policy.h"
|
#include "suggest/policyimpl/dictionary/header/header_policy.h"
|
||||||
#include "suggest/policyimpl/dictionary/shortcut/ver4_shortcut_list_policy.h"
|
#include "suggest/policyimpl/dictionary/shortcut/ver4_shortcut_list_policy.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_updating_helper.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h"
|
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h"
|
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h"
|
||||||
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
|
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
@ -44,7 +46,13 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
||||||
mBuffers.get()->getTerminalPositionLookupTable()),
|
mBuffers.get()->getTerminalPositionLookupTable()),
|
||||||
mShortcutPolicy(mBuffers.get()->getShortcutDictContent(),
|
mShortcutPolicy(mBuffers.get()->getShortcutDictContent(),
|
||||||
mBuffers.get()->getTerminalPositionLookupTable()),
|
mBuffers.get()->getTerminalPositionLookupTable()),
|
||||||
mNodeReader(&mDictBuffer, mBuffers.get()->getProbabilityDictContent()) {};
|
mNodeReader(&mDictBuffer, mBuffers.get()->getProbabilityDictContent()),
|
||||||
|
mNodeWriter(&mDictBuffer, mBuffers.get(), &mNodeReader, &mBigramPolicy,
|
||||||
|
&mShortcutPolicy),
|
||||||
|
mUpdatingHelper(&mDictBuffer, &mNodeReader, &mNodeWriter,
|
||||||
|
mHeaderPolicy.isDecayingDict()),
|
||||||
|
mUnigramCount(mHeaderPolicy.getUnigramCount()),
|
||||||
|
mBigramCount(mHeaderPolicy.getBigramCount()) {};
|
||||||
|
|
||||||
AK_FORCE_INLINE int getRootPosition() const {
|
AK_FORCE_INLINE int getRootPosition() const {
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -100,12 +108,21 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
||||||
private:
|
private:
|
||||||
DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4PatriciaTriePolicy);
|
DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4PatriciaTriePolicy);
|
||||||
|
|
||||||
const Ver4DictBuffers::Ver4DictBuffersPtr mBuffers;
|
// When the dictionary size is near the maximum size, we have to refuse dynamic operations to
|
||||||
|
// prevent the dictionary from overflowing.
|
||||||
|
static const int MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS;
|
||||||
|
static const int MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS;
|
||||||
|
|
||||||
|
Ver4DictBuffers::Ver4DictBuffersPtr mBuffers;
|
||||||
const HeaderPolicy mHeaderPolicy;
|
const HeaderPolicy mHeaderPolicy;
|
||||||
BufferWithExtendableBuffer mDictBuffer;
|
BufferWithExtendableBuffer mDictBuffer;
|
||||||
const Ver4BigramListPolicy mBigramPolicy;
|
Ver4BigramListPolicy mBigramPolicy;
|
||||||
const Ver4ShortcutListPolicy mShortcutPolicy;
|
Ver4ShortcutListPolicy mShortcutPolicy;
|
||||||
Ver4PatriciaTrieNodeReader mNodeReader;
|
Ver4PatriciaTrieNodeReader mNodeReader;
|
||||||
|
Ver4PatriciaTrieNodeWriter mNodeWriter;
|
||||||
|
DynamicPatriciaTrieUpdatingHelper mUpdatingHelper;
|
||||||
|
int mUnigramCount;
|
||||||
|
int mBigramCount;
|
||||||
};
|
};
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
#endif // LATINIME_VER4_PATRICIA_TRIE_POLICY_H
|
#endif // LATINIME_VER4_PATRICIA_TRIE_POLICY_H
|
||||||
|
|
|
@ -78,7 +78,7 @@ public class Ver4BinaryDictionaryTests extends AndroidTestCase {
|
||||||
|
|
||||||
final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
|
final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
|
||||||
getDictionaryOptions(TEST_LOCALE, dictVersion));
|
getDictionaryOptions(TEST_LOCALE, dictVersion));
|
||||||
DictEncoder encoder = new Ver4DictEncoder(getContext().getCacheDir());
|
final DictEncoder encoder = new Ver4DictEncoder(getContext().getCacheDir());
|
||||||
try {
|
try {
|
||||||
encoder.writeDictionary(dict, FORMAT_OPTIONS);
|
encoder.writeDictionary(dict, FORMAT_OPTIONS);
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
|
@ -104,7 +104,7 @@ public class Ver4BinaryDictionaryTests extends AndroidTestCase {
|
||||||
dict.add("aaa", frequency, null, false /* isNotAWord */);
|
dict.add("aaa", frequency, null, false /* isNotAWord */);
|
||||||
dict.add("ab", frequency, null, false /* isNotAWord */);
|
dict.add("ab", frequency, null, false /* isNotAWord */);
|
||||||
|
|
||||||
DictEncoder encoder = new Ver4DictEncoder(getContext().getCacheDir());
|
final DictEncoder encoder = new Ver4DictEncoder(getContext().getCacheDir());
|
||||||
try {
|
try {
|
||||||
encoder.writeDictionary(dict, FORMAT_OPTIONS);
|
encoder.writeDictionary(dict, FORMAT_OPTIONS);
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
|
@ -112,8 +112,8 @@ public class Ver4BinaryDictionaryTests extends AndroidTestCase {
|
||||||
} catch (UnsupportedFormatException e) {
|
} catch (UnsupportedFormatException e) {
|
||||||
Log.e(TAG, "Unsupported format", e);
|
Log.e(TAG, "Unsupported format", e);
|
||||||
}
|
}
|
||||||
File trieFile = getTrieFile(TEST_LOCALE, dictVersion);
|
final File trieFile = getTrieFile(TEST_LOCALE, dictVersion);
|
||||||
BinaryDictionary binaryDictionary = new BinaryDictionary(trieFile.getAbsolutePath(),
|
final BinaryDictionary binaryDictionary = new BinaryDictionary(trieFile.getAbsolutePath(),
|
||||||
0 /* offset */, trieFile.length(), true /* useFullEditDistance */,
|
0 /* offset */, trieFile.length(), true /* useFullEditDistance */,
|
||||||
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
|
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
|
||||||
assertTrue(binaryDictionary.isValidDictionary());
|
assertTrue(binaryDictionary.isValidDictionary());
|
||||||
|
@ -122,7 +122,7 @@ public class Ver4BinaryDictionaryTests extends AndroidTestCase {
|
||||||
assertEquals(frequency, binaryDictionary.getFrequency("ab"));
|
assertEquals(frequency, binaryDictionary.getFrequency("ab"));
|
||||||
}
|
}
|
||||||
|
|
||||||
public static int getCalculatedBigramProbabiliy(BinaryDictionary binaryDictionary,
|
public static int getCalculatedBigramProbabiliy(final BinaryDictionary binaryDictionary,
|
||||||
final int unigramFrequency, final int bigramFrequency) {
|
final int unigramFrequency, final int bigramFrequency) {
|
||||||
final int bigramFrequencyDiff = BinaryDictEncoderUtils.getBigramFrequencyDiff(
|
final int bigramFrequencyDiff = BinaryDictEncoderUtils.getBigramFrequencyDiff(
|
||||||
unigramFrequency, bigramFrequency);
|
unigramFrequency, bigramFrequency);
|
||||||
|
@ -146,7 +146,7 @@ public class Ver4BinaryDictionaryTests extends AndroidTestCase {
|
||||||
dict.setBigram("a", "ab", bigramFrequency1);
|
dict.setBigram("a", "ab", bigramFrequency1);
|
||||||
dict.setBigram("aaa", "ab", bigramFrequency2);
|
dict.setBigram("aaa", "ab", bigramFrequency2);
|
||||||
|
|
||||||
DictEncoder encoder = new Ver4DictEncoder(getContext().getCacheDir());
|
final DictEncoder encoder = new Ver4DictEncoder(getContext().getCacheDir());
|
||||||
try {
|
try {
|
||||||
encoder.writeDictionary(dict, FORMAT_OPTIONS);
|
encoder.writeDictionary(dict, FORMAT_OPTIONS);
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
|
@ -154,8 +154,8 @@ public class Ver4BinaryDictionaryTests extends AndroidTestCase {
|
||||||
} catch (UnsupportedFormatException e) {
|
} catch (UnsupportedFormatException e) {
|
||||||
Log.e(TAG, "Unsupported format", e);
|
Log.e(TAG, "Unsupported format", e);
|
||||||
}
|
}
|
||||||
File trieFile = getTrieFile(TEST_LOCALE, dictVersion);
|
final File trieFile = getTrieFile(TEST_LOCALE, dictVersion);
|
||||||
BinaryDictionary binaryDictionary = new BinaryDictionary(trieFile.getAbsolutePath(),
|
final BinaryDictionary binaryDictionary = new BinaryDictionary(trieFile.getAbsolutePath(),
|
||||||
0 /* offset */, trieFile.length(), true /* useFullEditDistance */,
|
0 /* offset */, trieFile.length(), true /* useFullEditDistance */,
|
||||||
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
|
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
|
||||||
|
|
||||||
|
@ -172,4 +172,38 @@ public class Ver4BinaryDictionaryTests extends AndroidTestCase {
|
||||||
assertFalse(binaryDictionary.isValidBigram("ab", "a"));
|
assertFalse(binaryDictionary.isValidBigram("ab", "a"));
|
||||||
assertFalse(binaryDictionary.isValidBigram("ab", "aaa"));
|
assertFalse(binaryDictionary.isValidBigram("ab", "aaa"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO: Add large tests.
|
||||||
|
public void testWriteUnigrams() {
|
||||||
|
final String dictVersion = Long.toString(System.currentTimeMillis());
|
||||||
|
final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
|
||||||
|
getDictionaryOptions(TEST_LOCALE, dictVersion));
|
||||||
|
final DictEncoder encoder = new Ver4DictEncoder(getContext().getCacheDir());
|
||||||
|
try {
|
||||||
|
encoder.writeDictionary(dict, FORMAT_OPTIONS);
|
||||||
|
} catch (IOException e) {
|
||||||
|
Log.e(TAG, "IOException while writing dictionary", e);
|
||||||
|
} catch (UnsupportedFormatException e) {
|
||||||
|
Log.e(TAG, "Unsupported format", e);
|
||||||
|
}
|
||||||
|
final File trieFile = getTrieFile(TEST_LOCALE, dictVersion);
|
||||||
|
final BinaryDictionary binaryDictionary = new BinaryDictionary(trieFile.getAbsolutePath(),
|
||||||
|
0 /* offset */, trieFile.length(), true /* useFullEditDistance */,
|
||||||
|
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
|
||||||
|
assertTrue(binaryDictionary.isValidDictionary());
|
||||||
|
|
||||||
|
final int probability = 100;
|
||||||
|
binaryDictionary.addUnigramWord("aaa", probability);
|
||||||
|
binaryDictionary.addUnigramWord("abc", probability);
|
||||||
|
binaryDictionary.addUnigramWord("bcd", probability);
|
||||||
|
binaryDictionary.addUnigramWord("x", probability);
|
||||||
|
binaryDictionary.addUnigramWord("y", probability);
|
||||||
|
|
||||||
|
assertEquals(probability, binaryDictionary.getFrequency("aaa"));
|
||||||
|
assertEquals(probability, binaryDictionary.getFrequency("abc"));
|
||||||
|
assertEquals(probability, binaryDictionary.getFrequency("bcd"));
|
||||||
|
assertEquals(probability, binaryDictionary.getFrequency("x"));
|
||||||
|
assertEquals(probability, binaryDictionary.getFrequency("y"));
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue