Purge ver3 dictionary reading in native code.

Bug: 11073222
Change-Id: I52892516fc9b8f1efdddb175da1bc87cbc0c66f0
main
Keisuke Kuroyanagi 2013-12-02 21:56:00 +09:00
parent d1501ea789
commit 647ea07eb1
13 changed files with 1 additions and 868 deletions

View File

@ -81,8 +81,6 @@ LATIN_IME_CORE_SRC_FILES := \
patricia_trie_reading_utils.cpp) \ patricia_trie_reading_utils.cpp) \
$(addprefix suggest/policyimpl/dictionary/structure/v3/, \ $(addprefix suggest/policyimpl/dictionary/structure/v3/, \
dynamic_patricia_trie_gc_event_listeners.cpp \ dynamic_patricia_trie_gc_event_listeners.cpp \
dynamic_patricia_trie_node_reader.cpp \
dynamic_patricia_trie_policy.cpp \
dynamic_patricia_trie_reading_helper.cpp \ dynamic_patricia_trie_reading_helper.cpp \
dynamic_patricia_trie_reading_utils.cpp \ dynamic_patricia_trie_reading_utils.cpp \
dynamic_patricia_trie_updating_helper.cpp \ dynamic_patricia_trie_updating_helper.cpp \

View File

@ -38,7 +38,6 @@ const BigramListReadWriteUtils::BigramFlags BigramListReadWriteUtils::FLAG_ATTRI
// Mask for attribute probability, stored on 4 bits inside the flags byte. // Mask for attribute probability, stored on 4 bits inside the flags byte.
const BigramListReadWriteUtils::BigramFlags const BigramListReadWriteUtils::BigramFlags
BigramListReadWriteUtils::MASK_ATTRIBUTE_PROBABILITY = 0x0F; BigramListReadWriteUtils::MASK_ATTRIBUTE_PROBABILITY = 0x0F;
const int BigramListReadWriteUtils::ATTRIBUTE_ADDRESS_SHIFT = 4;
/* static */ void BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition( /* static */ void BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition(
const uint8_t *const bigramsBuf, BigramFlags *const outBigramFlags, const uint8_t *const bigramsBuf, BigramFlags *const outBigramFlags,
@ -91,92 +90,4 @@ const int BigramListReadWriteUtils::ATTRIBUTE_ADDRESS_SHIFT = 4;
} }
} }
/* static */ bool BigramListReadWriteUtils::setHasNextFlag(
BufferWithExtendableBuffer *const buffer, const bool hasNext, const int entryPos) {
const bool usesAdditionalBuffer = buffer->isInAdditionalBuffer(entryPos);
int readingPos = entryPos;
if (usesAdditionalBuffer) {
readingPos -= buffer->getOriginalBufferSize();
}
BigramFlags bigramFlags = ByteArrayUtils::readUint8AndAdvancePosition(
buffer->getBuffer(usesAdditionalBuffer), &readingPos);
if (hasNext) {
bigramFlags = bigramFlags | FLAG_ATTRIBUTE_HAS_NEXT;
} else {
bigramFlags = bigramFlags & (~FLAG_ATTRIBUTE_HAS_NEXT);
}
int writingPos = entryPos;
return buffer->writeUintAndAdvancePosition(bigramFlags, 1 /* size */, &writingPos);
}
/* static */ bool BigramListReadWriteUtils::createAndWriteBigramEntry(
BufferWithExtendableBuffer *const buffer, const int targetPos, const int probability,
const bool hasNext, int *const writingPos) {
BigramFlags flags;
if (!createAndGetBigramFlags(*writingPos, targetPos, probability, hasNext, &flags)) {
return false;
}
return writeBigramEntry(buffer, flags, targetPos, writingPos);
}
/* static */ bool BigramListReadWriteUtils::writeBigramEntry(
BufferWithExtendableBuffer *const bufferToWrite, const BigramFlags flags,
const int targetPtNodePos, int *const writingPos) {
const int offset = getBigramTargetOffset(targetPtNodePos, *writingPos);
const BigramFlags flagsToWrite = (offset < 0) ?
(flags | FLAG_ATTRIBUTE_OFFSET_NEGATIVE) : (flags & ~FLAG_ATTRIBUTE_OFFSET_NEGATIVE);
if (!bufferToWrite->writeUintAndAdvancePosition(flagsToWrite, 1 /* size */, writingPos)) {
return false;
}
const uint32_t absOffest = abs(offset);
const int bigramTargetFieldSize = attributeAddressSize(flags);
return bufferToWrite->writeUintAndAdvancePosition(absOffest, bigramTargetFieldSize,
writingPos);
}
// Returns true if the bigram entry is valid and put entry flags into out*.
/* static */ bool BigramListReadWriteUtils::createAndGetBigramFlags(const int entryPos,
const int targetPtNodePos, const int probability, const bool hasNext,
BigramFlags *const outBigramFlags) {
BigramFlags flags = probability & MASK_ATTRIBUTE_PROBABILITY;
if (hasNext) {
flags |= FLAG_ATTRIBUTE_HAS_NEXT;
}
const int offset = getBigramTargetOffset(targetPtNodePos, entryPos);
if (offset < 0) {
flags |= FLAG_ATTRIBUTE_OFFSET_NEGATIVE;
}
const uint32_t absOffest = abs(offset);
if ((absOffest >> 24) != 0) {
// Offset is too large.
return false;
} else if ((absOffest >> 16) != 0) {
flags |= FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES;
} else if ((absOffest >> 8) != 0) {
flags |= FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES;
} else {
flags |= FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE;
}
// Currently, all newly written bigram position fields are 3 bytes to simplify dictionary
// writing.
// TODO: Remove following 2 lines and optimize memory space.
flags = (flags & (~MASK_ATTRIBUTE_ADDRESS_TYPE)) | FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES;
*outBigramFlags = flags;
return true;
}
/* static */ int BigramListReadWriteUtils::getBigramTargetOffset(const int targetPtNodePos,
const int entryPos) {
if (targetPtNodePos == NOT_A_DICT_POS) {
return DynamicPatriciaTrieReadingUtils::DICT_OFFSET_INVALID;
} else {
const int offset = targetPtNodePos - (entryPos + 1 /* bigramFlagsField */);
if (offset == 0) {
return DynamicPatriciaTrieReadingUtils::DICT_OFFSET_ZERO_OFFSET;
} else {
return offset;
}
}
}
} // namespace latinime } // namespace latinime

View File

@ -45,34 +45,6 @@ public:
// Bigrams reading methods // Bigrams reading methods
static void skipExistingBigrams(const uint8_t *const bigramsBuf, int *const bigramListPos); static void skipExistingBigrams(const uint8_t *const bigramsBuf, int *const bigramListPos);
// Returns the size of the bigram position field that is stored in bigram flags.
static AK_FORCE_INLINE int attributeAddressSize(const BigramFlags flags) {
return (flags & MASK_ATTRIBUTE_ADDRESS_TYPE) >> ATTRIBUTE_ADDRESS_SHIFT;
/* Note: this is a value-dependant optimization of what may probably be
more readably written this way:
switch (flags * BinaryFormat::MASK_ATTRIBUTE_ADDRESS_TYPE) {
case FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE: return 1;
case FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES: return 2;
case FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTE: return 3;
default: return 0;
}
*/
}
static bool setHasNextFlag(BufferWithExtendableBuffer *const buffer,
const bool hasNext, const int entryPos);
static AK_FORCE_INLINE BigramFlags setProbabilityInFlags(const BigramFlags flags,
const int probability) {
return (flags & (~MASK_ATTRIBUTE_PROBABILITY)) | (probability & MASK_ATTRIBUTE_PROBABILITY);
}
static bool createAndWriteBigramEntry(BufferWithExtendableBuffer *const buffer,
const int targetPos, const int probability, const bool hasNext, int *const writingPos);
static bool writeBigramEntry(BufferWithExtendableBuffer *const buffer, const BigramFlags flags,
const int targetOffset, int *const writingPos);
private: private:
DISALLOW_IMPLICIT_CONSTRUCTORS(BigramListReadWriteUtils); DISALLOW_IMPLICIT_CONSTRUCTORS(BigramListReadWriteUtils);
@ -83,11 +55,6 @@ private:
static const BigramFlags FLAG_ATTRIBUTE_OFFSET_NEGATIVE; static const BigramFlags FLAG_ATTRIBUTE_OFFSET_NEGATIVE;
static const BigramFlags FLAG_ATTRIBUTE_HAS_NEXT; static const BigramFlags FLAG_ATTRIBUTE_HAS_NEXT;
static const BigramFlags MASK_ATTRIBUTE_PROBABILITY; static const BigramFlags MASK_ATTRIBUTE_PROBABILITY;
static const int ATTRIBUTE_ADDRESS_SHIFT;
// Returns true if the bigram entry is valid and put entry flags into out*.
static bool createAndGetBigramFlags(const int entryPos, const int targetPos,
const int probability, const bool hasNext, BigramFlags *const outBigramFlags);
static AK_FORCE_INLINE bool isOffsetNegative(const BigramFlags flags) { static AK_FORCE_INLINE bool isOffsetNegative(const BigramFlags flags) {
return (flags & FLAG_ATTRIBUTE_OFFSET_NEGATIVE) != 0; return (flags & FLAG_ATTRIBUTE_OFFSET_NEGATIVE) != 0;
@ -95,8 +62,6 @@ private:
static int getBigramAddressAndAdvancePosition(const uint8_t *const bigramsBuf, static int getBigramAddressAndAdvancePosition(const uint8_t *const bigramsBuf,
const BigramFlags flags, int *const pos); const BigramFlags flags, int *const pos);
static int getBigramTargetOffset(const int targetPtNodePos, const int entryPos);
}; };
} // namespace latinime } // namespace latinime
#endif // LATINIME_BIGRAM_LIST_READ_WRITE_UTILS_H #endif // LATINIME_BIGRAM_LIST_READ_WRITE_UTILS_H

View File

@ -1,92 +0,0 @@
/*
* Copyright (C) 2013 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h"
#include "suggest/core/policy/dictionary_shortcuts_structure_policy.h"
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
namespace latinime {
const int DynamicBigramListPolicy::CONTINUING_BIGRAM_LINK_COUNT_LIMIT = 10000;
const int DynamicBigramListPolicy::BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT = 100000;
void DynamicBigramListPolicy::getNextBigram(int *const outBigramPos, int *const outProbability,
bool *const outHasNext, int *const bigramEntryPos) const {
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*bigramEntryPos);
const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer);
if (usesAdditionalBuffer) {
*bigramEntryPos -= mBuffer->getOriginalBufferSize();
}
BigramListReadWriteUtils::BigramFlags bigramFlags;
int originalBigramPos;
BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition(buffer, &bigramFlags,
&originalBigramPos, bigramEntryPos);
if (usesAdditionalBuffer && originalBigramPos != NOT_A_DICT_POS) {
originalBigramPos += mBuffer->getOriginalBufferSize();
}
*outProbability = BigramListReadWriteUtils::getProbabilityFromFlags(bigramFlags);
*outHasNext = BigramListReadWriteUtils::hasNext(bigramFlags);
if (mIsDecayingDict && !ForgettingCurveUtils::isValidEncodedProbability(*outProbability)) {
// This bigram is too weak to output.
*outBigramPos = NOT_A_DICT_POS;
} else {
*outBigramPos = followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos);
}
if (usesAdditionalBuffer) {
*bigramEntryPos += mBuffer->getOriginalBufferSize();
}
}
void DynamicBigramListPolicy::skipAllBigrams(int *const bigramListPos) const {
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*bigramListPos);
const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer);
if (usesAdditionalBuffer) {
*bigramListPos -= mBuffer->getOriginalBufferSize();
}
BigramListReadWriteUtils::skipExistingBigrams(buffer, bigramListPos);
if (usesAdditionalBuffer) {
*bigramListPos += mBuffer->getOriginalBufferSize();
}
}
int DynamicBigramListPolicy::followBigramLinkAndGetCurrentBigramPtNodePos(
const int originalBigramPos) const {
if (originalBigramPos == NOT_A_DICT_POS) {
return NOT_A_DICT_POS;
}
DynamicPatriciaTrieNodeReader nodeReader(mBuffer, this /* bigramsPolicy */, mShortcutPolicy);
int currentPos = NOT_A_DICT_POS;
int bigramLinkCount = 0;
int bigramLinkedNodePos = originalBigramPos;
do {
currentPos = bigramLinkedNodePos;
const PtNodeParams ptNodeParams(nodeReader.fetchNodeInfoInBufferFromPtNodePos(currentPos));
bigramLinkedNodePos = ptNodeParams.getBigramLinkedNodePos();
bigramLinkCount++;
if (bigramLinkCount > CONTINUING_BIGRAM_LINK_COUNT_LIMIT) {
AKLOGE("Bigram link is invalid. start position: %d", originalBigramPos);
ASSERT(false);
return NOT_A_DICT_POS;
}
bigramLinkedNodePos = ptNodeParams.getBigramLinkedNodePos();
} while (bigramLinkedNodePos != NOT_A_DICT_POS);
return currentPos;
}
} // namespace latinime

View File

@ -1,67 +0,0 @@
/*
* Copyright (C) 2013 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef LATINIME_DYNAMIC_BIGRAM_LIST_POLICY_H
#define LATINIME_DYNAMIC_BIGRAM_LIST_POLICY_H
#include <stdint.h>
#include "defines.h"
#include "suggest/core/policy/dictionary_bigrams_structure_policy.h"
#include "suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h"
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_gc_event_listeners.h"
namespace latinime {
class BufferWithExtendableBuffer;
class DictionaryHeaderStructurePolicy;
class DictionaryShortcutsStructurePolicy;
/*
* This is a dynamic version of BigramListPolicy and supports an additional buffer.
*/
class DynamicBigramListPolicy : public DictionaryBigramsStructurePolicy {
public:
DynamicBigramListPolicy(const DictionaryHeaderStructurePolicy *const headerPolicy,
BufferWithExtendableBuffer *const buffer,
const DictionaryShortcutsStructurePolicy *const shortcutPolicy,
const bool isDecayingDict)
: mHeaderPolicy(headerPolicy), mBuffer(buffer), mShortcutPolicy(shortcutPolicy),
mIsDecayingDict(isDecayingDict) {}
~DynamicBigramListPolicy() {}
void getNextBigram(int *const outBigramPos, int *const outProbability, bool *const outHasNext,
int *const bigramEntryPos) const;
void skipAllBigrams(int *const bigramListPos) const;
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicBigramListPolicy);
static const int CONTINUING_BIGRAM_LINK_COUNT_LIMIT;
static const int BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT;
const DictionaryHeaderStructurePolicy *const mHeaderPolicy;
BufferWithExtendableBuffer *const mBuffer;
const DictionaryShortcutsStructurePolicy *const mShortcutPolicy;
const bool mIsDecayingDict;
// Follow bigram link and return the position of bigram target PtNode that is currently valid.
int followBigramLinkAndGetCurrentBigramPtNodePos(const int originalBigramPos) const;
};
} // namespace latinime
#endif // LATINIME_DYNAMIC_BIGRAM_LIST_POLICY_H

View File

@ -1,123 +0,0 @@
/*
* Copyright (C) 2013 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef LATINIME_DYNAMIC_SHORTCUT_LIST_POLICY_H
#define LATINIME_DYNAMIC_SHORTCUT_LIST_POLICY_H
#include <stdint.h>
#include "defines.h"
#include "suggest/core/policy/dictionary_shortcuts_structure_policy.h"
#include "suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
namespace latinime {
/*
* This is a dynamic version of ShortcutListPolicy and supports an additional buffer.
*/
class DynamicShortcutListPolicy : public DictionaryShortcutsStructurePolicy {
public:
explicit DynamicShortcutListPolicy(const BufferWithExtendableBuffer *const buffer)
: mBuffer(buffer) {}
~DynamicShortcutListPolicy() {}
int getStartPos(const int pos) const {
if (pos == NOT_A_DICT_POS) {
return NOT_A_DICT_POS;
}
return pos + ShortcutListReadingUtils::getShortcutListSizeFieldSize();
}
void getNextShortcut(const int maxCodePointCount, int *const outCodePoint,
int *const outCodePointCount, bool *const outIsWhitelist, bool *const outHasNext,
int *const pos) const {
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*pos);
const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer);
if (usesAdditionalBuffer) {
*pos -= mBuffer->getOriginalBufferSize();
}
const ShortcutListReadingUtils::ShortcutFlags flags =
ShortcutListReadingUtils::getFlagsAndForwardPointer(buffer, pos);
if (outHasNext) {
*outHasNext = ShortcutListReadingUtils::hasNext(flags);
}
if (outIsWhitelist) {
*outIsWhitelist = ShortcutListReadingUtils::isWhitelist(flags);
}
if (outCodePoint) {
*outCodePointCount = ShortcutListReadingUtils::readShortcutTarget(
buffer, maxCodePointCount, outCodePoint, pos);
}
if (usesAdditionalBuffer) {
*pos += mBuffer->getOriginalBufferSize();
}
}
void skipAllShortcuts(int *const pos) const {
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*pos);
const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer);
if (usesAdditionalBuffer) {
*pos -= mBuffer->getOriginalBufferSize();
}
const int shortcutListSize = ShortcutListReadingUtils
::getShortcutListSizeAndForwardPointer(buffer, pos);
*pos += shortcutListSize;
if (usesAdditionalBuffer) {
*pos += mBuffer->getOriginalBufferSize();
}
}
// Copy shortcuts from the shortcut list that starts at fromPos in mBuffer to toPos in
// bufferToWrite and advance these positions after the shortcut lists. This returns whether
// the copy was succeeded or not.
bool copyAllShortcutsAndReturnIfSucceededOrNot(BufferWithExtendableBuffer *const bufferToWrite,
int *const fromPos, int *const toPos) const {
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*fromPos);
if (usesAdditionalBuffer) {
*fromPos -= mBuffer->getOriginalBufferSize();
}
const int shortcutListSize = ShortcutListReadingUtils
::getShortcutListSizeAndForwardPointer(mBuffer->getBuffer(usesAdditionalBuffer),
fromPos);
// Copy shortcut list size.
if (!bufferToWrite->writeUintAndAdvancePosition(
shortcutListSize + ShortcutListReadingUtils::getShortcutListSizeFieldSize(),
ShortcutListReadingUtils::getShortcutListSizeFieldSize(), toPos)) {
return false;
}
// Copy shortcut list.
for (int i = 0; i < shortcutListSize; ++i) {
const uint8_t data = ByteArrayUtils::readUint8AndAdvancePosition(
mBuffer->getBuffer(usesAdditionalBuffer), fromPos);
if (!bufferToWrite->writeUintAndAdvancePosition(data, 1 /* size */, toPos)) {
return false;
}
}
if (usesAdditionalBuffer) {
*fromPos += mBuffer->getOriginalBufferSize();
}
return true;
}
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicShortcutListPolicy);
const BufferWithExtendableBuffer *const mBuffer;
};
} // namespace latinime
#endif // LATINIME_DYNAMIC_SHORTCUT_LIST_POLICY_H

View File

@ -20,7 +20,6 @@
#include "defines.h" #include "defines.h"
#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h" #include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h"
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_policy.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h" #include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h" #include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h"
#include "suggest/policyimpl/dictionary/utils/file_utils.h" #include "suggest/policyimpl/dictionary/utils/file_utils.h"
@ -45,9 +44,6 @@ namespace latinime {
case FormatUtils::VERSION_2: case FormatUtils::VERSION_2:
return DictionaryStructureWithBufferPolicy::StructurePoilcyPtr( return DictionaryStructureWithBufferPolicy::StructurePoilcyPtr(
new PatriciaTriePolicy(mmappedBuffer)); new PatriciaTriePolicy(mmappedBuffer));
case FormatUtils::VERSION_3:
return DictionaryStructureWithBufferPolicy::StructurePoilcyPtr(
new DynamicPatriciaTriePolicy(mmappedBuffer));
case FormatUtils::VERSION_4: { case FormatUtils::VERSION_4: {
const int dictDirPathBufSize = strlen(path) + 1 /* terminator */; const int dictDirPathBufSize = strlen(path) + 1 /* terminator */;
char dictDirPath[dictDirPathBufSize]; char dictDirPath[dictDirPathBufSize];

View File

@ -53,23 +53,6 @@ class PtNodeParams {
memcpy(mCodePoints, ptNodeParams.getCodePoints(), sizeof(int) * mCodePointCount); memcpy(mCodePoints, ptNodeParams.getCodePoints(), sizeof(int) * mCodePointCount);
} }
// PtNode without terminal id.
PtNodeParams(const int headPos, const PatriciaTrieReadingUtils::NodeFlags flags,
const int parentPos, const int codePointCount, const int *const codePoints,
const int probabilityFieldPos, const int probability, const int childrenPosFieldPos,
const int childrenPos, const int bigramLinkedNodePos, const int shortcutPos,
const int bigramPos, const int siblingPos)
: mHeadPos(headPos), mFlags(flags), mParentPos(parentPos),
mCodePointCount(codePointCount), mCodePoints(),
mTerminalIdFieldPos(NOT_A_DICT_POS),
mTerminalId(Ver4DictConstants::NOT_A_TERMINAL_ID),
mProbabilityFieldPos(probabilityFieldPos), mProbability(probability),
mChildrenPosFieldPos(childrenPosFieldPos), mChildrenPos(childrenPos),
mBigramLinkedNodePos(bigramLinkedNodePos), mShortcutPos(shortcutPos),
mBigramPos(bigramPos), mSiblingPos(siblingPos) {
memcpy(mCodePoints, codePoints, sizeof(int) * mCodePointCount);
}
// PtNode with a terminal id. // PtNode with a terminal id.
PtNodeParams(const int headPos, const PatriciaTrieReadingUtils::NodeFlags flags, PtNodeParams(const int headPos, const PatriciaTrieReadingUtils::NodeFlags flags,
const int parentPos, const int codePointCount, const int *const codePoints, const int parentPos, const int codePointCount, const int *const codePoints,

View File

@ -1,107 +0,0 @@
/*
* Copyright (C) 2013, The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.h"
#include "suggest/core/policy/dictionary_bigrams_structure_policy.h"
#include "suggest/core/policy/dictionary_shortcuts_structure_policy.h"
#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h"
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
namespace latinime {
const PtNodeParams DynamicPatriciaTrieNodeReader::fetchPtNodeInfoFromBufferAndProcessMovedPtNode(
const int ptNodePos, const int siblingNodePos, const int bigramLinkedNodePos) const {
if (ptNodePos < 0 || ptNodePos >= mBuffer->getTailPosition()) {
// Reading invalid position because of bug or broken dictionary.
AKLOGE("Fetching PtNode info from invalid dictionary position: %d, dictionary size: %d",
ptNodePos, mBuffer->getTailPosition());
ASSERT(false);
return PtNodeParams();
}
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(ptNodePos);
const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
int pos = ptNodePos;
const int headPos = ptNodePos;
if (usesAdditionalBuffer) {
pos -= mBuffer->getOriginalBufferSize();
}
const PatriciaTrieReadingUtils::NodeFlags flags =
PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos);
const int parentPosOffset =
DynamicPatriciaTrieReadingUtils::getParentPtNodePosOffsetAndAdvancePosition(dictBuf,
&pos);
const int parentPos =
DynamicPatriciaTrieReadingUtils::getParentPtNodePos(parentPosOffset, headPos);
int codePoints[MAX_WORD_LENGTH];
const int codePonitCount = PatriciaTrieReadingUtils::getCharsAndAdvancePosition(
dictBuf, flags, MAX_WORD_LENGTH, codePoints, &pos);
int probability = NOT_A_PROBABILITY;
int probabilityFieldPos = NOT_A_DICT_POS;
if (PatriciaTrieReadingUtils::isTerminal(flags)) {
probabilityFieldPos = pos;
if (usesAdditionalBuffer) {
probabilityFieldPos += mBuffer->getOriginalBufferSize();
}
probability = PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(dictBuf, &pos);
}
int childrenPosFieldPos = pos;
if (usesAdditionalBuffer) {
childrenPosFieldPos += mBuffer->getOriginalBufferSize();
}
int childrenPos = DynamicPatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(
dictBuf, &pos);
if (usesAdditionalBuffer && childrenPos != NOT_A_DICT_POS) {
childrenPos += mBuffer->getOriginalBufferSize();
}
int newBigramLinkedNodePos = bigramLinkedNodePos;
if (siblingNodePos == NOT_A_DICT_POS) {
if (DynamicPatriciaTrieReadingUtils::isMoved(flags)) {
newBigramLinkedNodePos = childrenPos;
}
}
if (usesAdditionalBuffer) {
pos += mBuffer->getOriginalBufferSize();
}
int shortcutsPos = NOT_A_DICT_POS;
if (PatriciaTrieReadingUtils::hasShortcutTargets(flags)) {
shortcutsPos = pos;
mShortcutsPolicy->skipAllShortcuts(&pos);
}
int bigramsPos = NOT_A_DICT_POS;
if (PatriciaTrieReadingUtils::hasBigrams(flags)) {
bigramsPos = pos;
mBigramsPolicy->skipAllBigrams(&pos);
}
int newSiblingNodePos = siblingNodePos;
if (siblingNodePos == NOT_A_DICT_POS) {
// Sibling position is the tail position of current node.
newSiblingNodePos = pos;
}
// Read destination node if the read node is a moved node.
if (DynamicPatriciaTrieReadingUtils::isMoved(flags)) {
// The destination position is stored at the same place as the parent position.
return fetchPtNodeInfoFromBufferAndProcessMovedPtNode(parentPos, newSiblingNodePos,
newBigramLinkedNodePos);
} else {
return PtNodeParams(headPos, flags, parentPos, codePonitCount, codePoints,
probabilityFieldPos, probability, childrenPosFieldPos, childrenPos,
newBigramLinkedNodePos, shortcutsPos, bigramsPos, newSiblingNodePos);
}
}
}

View File

@ -1,62 +0,0 @@
/*
* Copyright (C) 2013, The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef LATINIME_DYNAMIC_PATRICIA_TRIE_NODE_READER_H
#define LATINIME_DYNAMIC_PATRICIA_TRIE_NODE_READER_H
#include <stdint.h>
#include "defines.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_reader.h"
namespace latinime {
class BufferWithExtendableBuffer;
class DictionaryBigramsStructurePolicy;
class DictionaryShortcutsStructurePolicy;
/*
* This class is used for helping to read nodes of dynamic patricia trie. This class handles moved
* node and reads node attributes.
*/
class DynamicPatriciaTrieNodeReader : public PtNodeReader {
public:
DynamicPatriciaTrieNodeReader(const BufferWithExtendableBuffer *const buffer,
const DictionaryBigramsStructurePolicy *const bigramsPolicy,
const DictionaryShortcutsStructurePolicy *const shortcutsPolicy)
: mBuffer(buffer), mBigramsPolicy(bigramsPolicy),
mShortcutsPolicy(shortcutsPolicy) {}
~DynamicPatriciaTrieNodeReader() {}
virtual const PtNodeParams fetchNodeInfoInBufferFromPtNodePos(const int ptNodePos) const {
return fetchPtNodeInfoFromBufferAndProcessMovedPtNode(ptNodePos,
NOT_A_DICT_POS /* siblingNodePos */, NOT_A_DICT_POS /* bigramLinkedNodePos */);
}
private:
DISALLOW_COPY_AND_ASSIGN(DynamicPatriciaTrieNodeReader);
const BufferWithExtendableBuffer *const mBuffer;
const DictionaryBigramsStructurePolicy *const mBigramsPolicy;
const DictionaryShortcutsStructurePolicy *const mShortcutsPolicy;
const PtNodeParams fetchPtNodeInfoFromBufferAndProcessMovedPtNode(const int ptNodePos,
const int siblingNodePos, const int bigramLinkedNodePos) const;
};
} // namespace latinime
#endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_NODE_READER_H */

View File

@ -1,129 +0,0 @@
/*
* Copyright (C) 2013, The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_policy.h"
#include <cstdio>
#include <cstring>
#include <ctime>
#include "defines.h"
#include "suggest/core/dicnode/dic_node.h"
#include "suggest/core/dicnode/dic_node_vector.h"
#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h"
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.h"
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.h"
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.h"
#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
#include "suggest/policyimpl/dictionary/utils/probability_utils.h"
namespace latinime {
void DynamicPatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const dicNode,
DicNodeVector *const childDicNodes) const {
if (!dicNode->hasChildren()) {
return;
}
DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer, &mNodeReader);
readingHelper.initWithPtNodeArrayPos(dicNode->getChildrenPtNodeArrayPos());
while (!readingHelper.isEnd()) {
const PtNodeParams ptNodeParams(readingHelper.getPtNodeParams());
if (!ptNodeParams.isValid()) {
break;
}
bool isTerminal = ptNodeParams.isTerminal() && !ptNodeParams.isDeleted();
if (isTerminal && mHeaderPolicy.isDecayingDict()) {
// A DecayingDict may have a terminal PtNode that has a terminal DicNode whose
// probability is NOT_A_PROBABILITY. In such case, we don't want to treat it as a
// valid terminal DicNode.
isTerminal = getProbability(ptNodeParams.getProbability(), NOT_A_PROBABILITY)
!= NOT_A_PROBABILITY;
}
childDicNodes->pushLeavingChild(dicNode, ptNodeParams.getHeadPos(),
ptNodeParams.getChildrenPos(), ptNodeParams.getProbability(), isTerminal,
ptNodeParams.hasChildren(),
ptNodeParams.isBlacklisted() || ptNodeParams.isNotAWord(),
ptNodeParams.getCodePointCount(), ptNodeParams.getCodePoints());
readingHelper.readNextSiblingNode(ptNodeParams);
}
}
int DynamicPatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
const int ptNodePos, const int maxCodePointCount, int *const outCodePoints,
int *const outUnigramProbability) const {
DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer, &mNodeReader);
readingHelper.initWithPtNodePos(ptNodePos);
return readingHelper.getCodePointsAndProbabilityAndReturnCodePointCount(maxCodePointCount,
outCodePoints, outUnigramProbability);
}
int DynamicPatriciaTriePolicy::getTerminalPtNodePositionOfWord(const int *const inWord,
const int length, const bool forceLowerCaseSearch) const {
DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer, &mNodeReader);
readingHelper.initWithPtNodeArrayPos(getRootPosition());
return readingHelper.getTerminalPtNodePositionOfWord(inWord, length, forceLowerCaseSearch);
}
int DynamicPatriciaTriePolicy::getProbability(const int unigramProbability,
const int bigramProbability) const {
if (mHeaderPolicy.isDecayingDict()) {
return ForgettingCurveUtils::getProbability(unigramProbability, bigramProbability);
} else {
if (unigramProbability == NOT_A_PROBABILITY) {
return NOT_A_PROBABILITY;
} else if (bigramProbability == NOT_A_PROBABILITY) {
return ProbabilityUtils::backoff(unigramProbability);
} else {
return ProbabilityUtils::computeProbabilityForBigram(unigramProbability,
bigramProbability);
}
}
}
int DynamicPatriciaTriePolicy::getUnigramProbabilityOfPtNode(const int ptNodePos) const {
if (ptNodePos == NOT_A_DICT_POS) {
return NOT_A_PROBABILITY;
}
const PtNodeParams ptNodeParams(mNodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos));
if (ptNodeParams.isDeleted() || ptNodeParams.isBlacklisted() || ptNodeParams.isNotAWord()) {
return NOT_A_PROBABILITY;
}
return getProbability(ptNodeParams.getProbability(), NOT_A_PROBABILITY);
}
int DynamicPatriciaTriePolicy::getShortcutPositionOfPtNode(const int ptNodePos) const {
if (ptNodePos == NOT_A_DICT_POS) {
return NOT_A_DICT_POS;
}
const PtNodeParams ptNodeParams(mNodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos));
if (ptNodeParams.isDeleted()) {
return NOT_A_DICT_POS;
}
return ptNodeParams.getShortcutPos();
}
int DynamicPatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) const {
if (ptNodePos == NOT_A_DICT_POS) {
return NOT_A_DICT_POS;
}
const PtNodeParams ptNodeParams(mNodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos));
if (ptNodeParams.isDeleted()) {
return NOT_A_DICT_POS;
}
return ptNodeParams.getBigramsPos();
}
} // namespace latinime

View File

@ -1,140 +0,0 @@
/*
* Copyright (C) 2013, The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef LATINIME_DYNAMIC_PATRICIA_TRIE_POLICY_H
#define LATINIME_DYNAMIC_PATRICIA_TRIE_POLICY_H
#include "defines.h"
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
#include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h"
#include "suggest/policyimpl/dictionary/header/header_policy.h"
#include "suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h"
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
#include "suggest/policyimpl/dictionary/utils/format_utils.h"
#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
namespace latinime {
class DicNode;
class DicNodeVector;
class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
public:
DynamicPatriciaTriePolicy(const MmappedBuffer::MmappedBufferPtr &mmappedBuffer)
: mMmappedBuffer(mmappedBuffer),
mHeaderPolicy(mMmappedBuffer.get()->getBuffer(), FormatUtils::VERSION_3),
mBufferWithExtendableBuffer(mMmappedBuffer.get()->getBuffer()
+ mHeaderPolicy.getSize(), mMmappedBuffer.get()->getBufferSize()
- mHeaderPolicy.getSize(),
BufferWithExtendableBuffer
::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
mShortcutListPolicy(&mBufferWithExtendableBuffer),
mBigramListPolicy(&mHeaderPolicy, &mBufferWithExtendableBuffer, &mShortcutListPolicy,
mHeaderPolicy.isDecayingDict()),
mNodeReader(&mBufferWithExtendableBuffer, &mBigramListPolicy, &mShortcutListPolicy) {}
AK_FORCE_INLINE int getRootPosition() const {
return 0;
}
void createAndGetAllChildDicNodes(const DicNode *const dicNode,
DicNodeVector *const childDicNodes) const;
int getCodePointsAndProbabilityAndReturnCodePointCount(
const int terminalPtNodePos, const int maxCodePointCount, int *const outCodePoints,
int *const outUnigramProbability) const;
int getTerminalPtNodePositionOfWord(const int *const inWord,
const int length, const bool forceLowerCaseSearch) const;
int getProbability(const int unigramProbability, const int bigramProbability) const;
int getUnigramProbabilityOfPtNode(const int ptNodePos) const;
int getShortcutPositionOfPtNode(const int ptNodePos) const;
int getBigramsPositionOfPtNode(const int ptNodePos) const;
const DictionaryHeaderStructurePolicy *getHeaderStructurePolicy() const {
return &mHeaderPolicy;
}
const DictionaryBigramsStructurePolicy *getBigramsStructurePolicy() const {
return &mBigramListPolicy;
}
const DictionaryShortcutsStructurePolicy *getShortcutsStructurePolicy() const {
return &mShortcutListPolicy;
}
bool addUnigramWord(const int *const word, const int length, const int probability,
const int timestamp) {
// This method should not be called for non-updatable dictionary.
AKLOGI("Warning: addUnigramWord() is called for non-updatable dictionary.");
return false;
}
bool addBigramWords(const int *const word0, const int length0, const int *const word1,
const int length1, const int probability, const int timestamp) {
// This method should not be called for non-updatable dictionary.
AKLOGI("Warning: addBigramWords() is called for non-updatable dictionary.");
return false;
}
bool removeBigramWords(const int *const word0, const int length0, const int *const word1,
const int length1) {
// This method should not be called for non-updatable dictionary.
AKLOGI("Warning: removeBigramWords() is called for non-updatable dictionary.");
return false;
}
void flush(const char *const filePath) {
// This method should not be called for non-updatable dictionary.
AKLOGI("Warning: flush() is called for non-updatable dictionary.");
}
void flushWithGC(const char *const filePath) {
// This method should not be called for non-updatable dictionary.
AKLOGI("Warning: flushWithGC() is called for non-updatable dictionary.");
}
bool needsToRunGC(const bool mindsBlockByGC) const {
// This method should not be called for non-updatable dictionary.
AKLOGI("Warning: needsToRunGC() is called for non-updatable dictionary.");
return false;
}
void getProperty(const char *const query, const int queryLength, char *const outResult,
const int maxResultLength) {
// getProperty is not supported for this class.
if (maxResultLength > 0) {
outResult[0] = '\0';
}
}
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTriePolicy);
const MmappedBuffer::MmappedBufferPtr mMmappedBuffer;
const HeaderPolicy mHeaderPolicy;
BufferWithExtendableBuffer mBufferWithExtendableBuffer;
DynamicShortcutListPolicy mShortcutListPolicy;
DynamicBigramListPolicy mBigramListPolicy;
DynamicPatriciaTrieNodeReader mNodeReader;
};
} // namespace latinime
#endif // LATINIME_DYNAMIC_PATRICIA_TRIE_POLICY_H

View File

@ -17,7 +17,7 @@
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h" #include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h"
#include "suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h" #include "suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h"
#include "suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h" #include "suggest/policyimpl/dictionary/shortcut/ver4_shortcut_list_policy.h"
#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h" #include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h"
#include "suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h" #include "suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h" #include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h"