Merge "Purge ver3 dictionary reading in native code."
commit
cf66bb8cbc
|
@ -81,8 +81,6 @@ LATIN_IME_CORE_SRC_FILES := \
|
||||||
patricia_trie_reading_utils.cpp) \
|
patricia_trie_reading_utils.cpp) \
|
||||||
$(addprefix suggest/policyimpl/dictionary/structure/v3/, \
|
$(addprefix suggest/policyimpl/dictionary/structure/v3/, \
|
||||||
dynamic_patricia_trie_gc_event_listeners.cpp \
|
dynamic_patricia_trie_gc_event_listeners.cpp \
|
||||||
dynamic_patricia_trie_node_reader.cpp \
|
|
||||||
dynamic_patricia_trie_policy.cpp \
|
|
||||||
dynamic_patricia_trie_reading_helper.cpp \
|
dynamic_patricia_trie_reading_helper.cpp \
|
||||||
dynamic_patricia_trie_reading_utils.cpp \
|
dynamic_patricia_trie_reading_utils.cpp \
|
||||||
dynamic_patricia_trie_updating_helper.cpp \
|
dynamic_patricia_trie_updating_helper.cpp \
|
||||||
|
|
|
@ -38,7 +38,6 @@ const BigramListReadWriteUtils::BigramFlags BigramListReadWriteUtils::FLAG_ATTRI
|
||||||
// Mask for attribute probability, stored on 4 bits inside the flags byte.
|
// Mask for attribute probability, stored on 4 bits inside the flags byte.
|
||||||
const BigramListReadWriteUtils::BigramFlags
|
const BigramListReadWriteUtils::BigramFlags
|
||||||
BigramListReadWriteUtils::MASK_ATTRIBUTE_PROBABILITY = 0x0F;
|
BigramListReadWriteUtils::MASK_ATTRIBUTE_PROBABILITY = 0x0F;
|
||||||
const int BigramListReadWriteUtils::ATTRIBUTE_ADDRESS_SHIFT = 4;
|
|
||||||
|
|
||||||
/* static */ void BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition(
|
/* static */ void BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition(
|
||||||
const uint8_t *const bigramsBuf, BigramFlags *const outBigramFlags,
|
const uint8_t *const bigramsBuf, BigramFlags *const outBigramFlags,
|
||||||
|
@ -91,92 +90,4 @@ const int BigramListReadWriteUtils::ATTRIBUTE_ADDRESS_SHIFT = 4;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* static */ bool BigramListReadWriteUtils::setHasNextFlag(
|
|
||||||
BufferWithExtendableBuffer *const buffer, const bool hasNext, const int entryPos) {
|
|
||||||
const bool usesAdditionalBuffer = buffer->isInAdditionalBuffer(entryPos);
|
|
||||||
int readingPos = entryPos;
|
|
||||||
if (usesAdditionalBuffer) {
|
|
||||||
readingPos -= buffer->getOriginalBufferSize();
|
|
||||||
}
|
|
||||||
BigramFlags bigramFlags = ByteArrayUtils::readUint8AndAdvancePosition(
|
|
||||||
buffer->getBuffer(usesAdditionalBuffer), &readingPos);
|
|
||||||
if (hasNext) {
|
|
||||||
bigramFlags = bigramFlags | FLAG_ATTRIBUTE_HAS_NEXT;
|
|
||||||
} else {
|
|
||||||
bigramFlags = bigramFlags & (~FLAG_ATTRIBUTE_HAS_NEXT);
|
|
||||||
}
|
|
||||||
int writingPos = entryPos;
|
|
||||||
return buffer->writeUintAndAdvancePosition(bigramFlags, 1 /* size */, &writingPos);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* static */ bool BigramListReadWriteUtils::createAndWriteBigramEntry(
|
|
||||||
BufferWithExtendableBuffer *const buffer, const int targetPos, const int probability,
|
|
||||||
const bool hasNext, int *const writingPos) {
|
|
||||||
BigramFlags flags;
|
|
||||||
if (!createAndGetBigramFlags(*writingPos, targetPos, probability, hasNext, &flags)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
return writeBigramEntry(buffer, flags, targetPos, writingPos);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* static */ bool BigramListReadWriteUtils::writeBigramEntry(
|
|
||||||
BufferWithExtendableBuffer *const bufferToWrite, const BigramFlags flags,
|
|
||||||
const int targetPtNodePos, int *const writingPos) {
|
|
||||||
const int offset = getBigramTargetOffset(targetPtNodePos, *writingPos);
|
|
||||||
const BigramFlags flagsToWrite = (offset < 0) ?
|
|
||||||
(flags | FLAG_ATTRIBUTE_OFFSET_NEGATIVE) : (flags & ~FLAG_ATTRIBUTE_OFFSET_NEGATIVE);
|
|
||||||
if (!bufferToWrite->writeUintAndAdvancePosition(flagsToWrite, 1 /* size */, writingPos)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
const uint32_t absOffest = abs(offset);
|
|
||||||
const int bigramTargetFieldSize = attributeAddressSize(flags);
|
|
||||||
return bufferToWrite->writeUintAndAdvancePosition(absOffest, bigramTargetFieldSize,
|
|
||||||
writingPos);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Returns true if the bigram entry is valid and put entry flags into out*.
|
|
||||||
/* static */ bool BigramListReadWriteUtils::createAndGetBigramFlags(const int entryPos,
|
|
||||||
const int targetPtNodePos, const int probability, const bool hasNext,
|
|
||||||
BigramFlags *const outBigramFlags) {
|
|
||||||
BigramFlags flags = probability & MASK_ATTRIBUTE_PROBABILITY;
|
|
||||||
if (hasNext) {
|
|
||||||
flags |= FLAG_ATTRIBUTE_HAS_NEXT;
|
|
||||||
}
|
|
||||||
const int offset = getBigramTargetOffset(targetPtNodePos, entryPos);
|
|
||||||
if (offset < 0) {
|
|
||||||
flags |= FLAG_ATTRIBUTE_OFFSET_NEGATIVE;
|
|
||||||
}
|
|
||||||
const uint32_t absOffest = abs(offset);
|
|
||||||
if ((absOffest >> 24) != 0) {
|
|
||||||
// Offset is too large.
|
|
||||||
return false;
|
|
||||||
} else if ((absOffest >> 16) != 0) {
|
|
||||||
flags |= FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES;
|
|
||||||
} else if ((absOffest >> 8) != 0) {
|
|
||||||
flags |= FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES;
|
|
||||||
} else {
|
|
||||||
flags |= FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE;
|
|
||||||
}
|
|
||||||
// Currently, all newly written bigram position fields are 3 bytes to simplify dictionary
|
|
||||||
// writing.
|
|
||||||
// TODO: Remove following 2 lines and optimize memory space.
|
|
||||||
flags = (flags & (~MASK_ATTRIBUTE_ADDRESS_TYPE)) | FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES;
|
|
||||||
*outBigramFlags = flags;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* static */ int BigramListReadWriteUtils::getBigramTargetOffset(const int targetPtNodePos,
|
|
||||||
const int entryPos) {
|
|
||||||
if (targetPtNodePos == NOT_A_DICT_POS) {
|
|
||||||
return DynamicPatriciaTrieReadingUtils::DICT_OFFSET_INVALID;
|
|
||||||
} else {
|
|
||||||
const int offset = targetPtNodePos - (entryPos + 1 /* bigramFlagsField */);
|
|
||||||
if (offset == 0) {
|
|
||||||
return DynamicPatriciaTrieReadingUtils::DICT_OFFSET_ZERO_OFFSET;
|
|
||||||
} else {
|
|
||||||
return offset;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
|
|
|
@ -45,34 +45,6 @@ public:
|
||||||
// Bigrams reading methods
|
// Bigrams reading methods
|
||||||
static void skipExistingBigrams(const uint8_t *const bigramsBuf, int *const bigramListPos);
|
static void skipExistingBigrams(const uint8_t *const bigramsBuf, int *const bigramListPos);
|
||||||
|
|
||||||
// Returns the size of the bigram position field that is stored in bigram flags.
|
|
||||||
static AK_FORCE_INLINE int attributeAddressSize(const BigramFlags flags) {
|
|
||||||
return (flags & MASK_ATTRIBUTE_ADDRESS_TYPE) >> ATTRIBUTE_ADDRESS_SHIFT;
|
|
||||||
/* Note: this is a value-dependant optimization of what may probably be
|
|
||||||
more readably written this way:
|
|
||||||
switch (flags * BinaryFormat::MASK_ATTRIBUTE_ADDRESS_TYPE) {
|
|
||||||
case FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE: return 1;
|
|
||||||
case FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES: return 2;
|
|
||||||
case FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTE: return 3;
|
|
||||||
default: return 0;
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
}
|
|
||||||
|
|
||||||
static bool setHasNextFlag(BufferWithExtendableBuffer *const buffer,
|
|
||||||
const bool hasNext, const int entryPos);
|
|
||||||
|
|
||||||
static AK_FORCE_INLINE BigramFlags setProbabilityInFlags(const BigramFlags flags,
|
|
||||||
const int probability) {
|
|
||||||
return (flags & (~MASK_ATTRIBUTE_PROBABILITY)) | (probability & MASK_ATTRIBUTE_PROBABILITY);
|
|
||||||
}
|
|
||||||
|
|
||||||
static bool createAndWriteBigramEntry(BufferWithExtendableBuffer *const buffer,
|
|
||||||
const int targetPos, const int probability, const bool hasNext, int *const writingPos);
|
|
||||||
|
|
||||||
static bool writeBigramEntry(BufferWithExtendableBuffer *const buffer, const BigramFlags flags,
|
|
||||||
const int targetOffset, int *const writingPos);
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
DISALLOW_IMPLICIT_CONSTRUCTORS(BigramListReadWriteUtils);
|
DISALLOW_IMPLICIT_CONSTRUCTORS(BigramListReadWriteUtils);
|
||||||
|
|
||||||
|
@ -83,11 +55,6 @@ private:
|
||||||
static const BigramFlags FLAG_ATTRIBUTE_OFFSET_NEGATIVE;
|
static const BigramFlags FLAG_ATTRIBUTE_OFFSET_NEGATIVE;
|
||||||
static const BigramFlags FLAG_ATTRIBUTE_HAS_NEXT;
|
static const BigramFlags FLAG_ATTRIBUTE_HAS_NEXT;
|
||||||
static const BigramFlags MASK_ATTRIBUTE_PROBABILITY;
|
static const BigramFlags MASK_ATTRIBUTE_PROBABILITY;
|
||||||
static const int ATTRIBUTE_ADDRESS_SHIFT;
|
|
||||||
|
|
||||||
// Returns true if the bigram entry is valid and put entry flags into out*.
|
|
||||||
static bool createAndGetBigramFlags(const int entryPos, const int targetPos,
|
|
||||||
const int probability, const bool hasNext, BigramFlags *const outBigramFlags);
|
|
||||||
|
|
||||||
static AK_FORCE_INLINE bool isOffsetNegative(const BigramFlags flags) {
|
static AK_FORCE_INLINE bool isOffsetNegative(const BigramFlags flags) {
|
||||||
return (flags & FLAG_ATTRIBUTE_OFFSET_NEGATIVE) != 0;
|
return (flags & FLAG_ATTRIBUTE_OFFSET_NEGATIVE) != 0;
|
||||||
|
@ -95,8 +62,6 @@ private:
|
||||||
|
|
||||||
static int getBigramAddressAndAdvancePosition(const uint8_t *const bigramsBuf,
|
static int getBigramAddressAndAdvancePosition(const uint8_t *const bigramsBuf,
|
||||||
const BigramFlags flags, int *const pos);
|
const BigramFlags flags, int *const pos);
|
||||||
|
|
||||||
static int getBigramTargetOffset(const int targetPtNodePos, const int entryPos);
|
|
||||||
};
|
};
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
#endif // LATINIME_BIGRAM_LIST_READ_WRITE_UTILS_H
|
#endif // LATINIME_BIGRAM_LIST_READ_WRITE_UTILS_H
|
||||||
|
|
|
@ -1,92 +0,0 @@
|
||||||
/*
|
|
||||||
* Copyright (C) 2013 The Android Open Source Project
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
* you may not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h"
|
|
||||||
|
|
||||||
#include "suggest/core/policy/dictionary_shortcuts_structure_policy.h"
|
|
||||||
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.h"
|
|
||||||
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
|
|
||||||
#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
|
|
||||||
|
|
||||||
namespace latinime {
|
|
||||||
|
|
||||||
const int DynamicBigramListPolicy::CONTINUING_BIGRAM_LINK_COUNT_LIMIT = 10000;
|
|
||||||
const int DynamicBigramListPolicy::BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT = 100000;
|
|
||||||
|
|
||||||
void DynamicBigramListPolicy::getNextBigram(int *const outBigramPos, int *const outProbability,
|
|
||||||
bool *const outHasNext, int *const bigramEntryPos) const {
|
|
||||||
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*bigramEntryPos);
|
|
||||||
const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer);
|
|
||||||
if (usesAdditionalBuffer) {
|
|
||||||
*bigramEntryPos -= mBuffer->getOriginalBufferSize();
|
|
||||||
}
|
|
||||||
BigramListReadWriteUtils::BigramFlags bigramFlags;
|
|
||||||
int originalBigramPos;
|
|
||||||
BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition(buffer, &bigramFlags,
|
|
||||||
&originalBigramPos, bigramEntryPos);
|
|
||||||
if (usesAdditionalBuffer && originalBigramPos != NOT_A_DICT_POS) {
|
|
||||||
originalBigramPos += mBuffer->getOriginalBufferSize();
|
|
||||||
}
|
|
||||||
*outProbability = BigramListReadWriteUtils::getProbabilityFromFlags(bigramFlags);
|
|
||||||
*outHasNext = BigramListReadWriteUtils::hasNext(bigramFlags);
|
|
||||||
if (mIsDecayingDict && !ForgettingCurveUtils::isValidEncodedProbability(*outProbability)) {
|
|
||||||
// This bigram is too weak to output.
|
|
||||||
*outBigramPos = NOT_A_DICT_POS;
|
|
||||||
} else {
|
|
||||||
*outBigramPos = followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos);
|
|
||||||
}
|
|
||||||
if (usesAdditionalBuffer) {
|
|
||||||
*bigramEntryPos += mBuffer->getOriginalBufferSize();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void DynamicBigramListPolicy::skipAllBigrams(int *const bigramListPos) const {
|
|
||||||
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*bigramListPos);
|
|
||||||
const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer);
|
|
||||||
if (usesAdditionalBuffer) {
|
|
||||||
*bigramListPos -= mBuffer->getOriginalBufferSize();
|
|
||||||
}
|
|
||||||
BigramListReadWriteUtils::skipExistingBigrams(buffer, bigramListPos);
|
|
||||||
if (usesAdditionalBuffer) {
|
|
||||||
*bigramListPos += mBuffer->getOriginalBufferSize();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
int DynamicBigramListPolicy::followBigramLinkAndGetCurrentBigramPtNodePos(
|
|
||||||
const int originalBigramPos) const {
|
|
||||||
if (originalBigramPos == NOT_A_DICT_POS) {
|
|
||||||
return NOT_A_DICT_POS;
|
|
||||||
}
|
|
||||||
DynamicPatriciaTrieNodeReader nodeReader(mBuffer, this /* bigramsPolicy */, mShortcutPolicy);
|
|
||||||
int currentPos = NOT_A_DICT_POS;
|
|
||||||
int bigramLinkCount = 0;
|
|
||||||
int bigramLinkedNodePos = originalBigramPos;
|
|
||||||
do {
|
|
||||||
currentPos = bigramLinkedNodePos;
|
|
||||||
const PtNodeParams ptNodeParams(nodeReader.fetchNodeInfoInBufferFromPtNodePos(currentPos));
|
|
||||||
bigramLinkedNodePos = ptNodeParams.getBigramLinkedNodePos();
|
|
||||||
bigramLinkCount++;
|
|
||||||
if (bigramLinkCount > CONTINUING_BIGRAM_LINK_COUNT_LIMIT) {
|
|
||||||
AKLOGE("Bigram link is invalid. start position: %d", originalBigramPos);
|
|
||||||
ASSERT(false);
|
|
||||||
return NOT_A_DICT_POS;
|
|
||||||
}
|
|
||||||
bigramLinkedNodePos = ptNodeParams.getBigramLinkedNodePos();
|
|
||||||
} while (bigramLinkedNodePos != NOT_A_DICT_POS);
|
|
||||||
return currentPos;
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace latinime
|
|
|
@ -1,67 +0,0 @@
|
||||||
/*
|
|
||||||
* Copyright (C) 2013 The Android Open Source Project
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
* you may not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef LATINIME_DYNAMIC_BIGRAM_LIST_POLICY_H
|
|
||||||
#define LATINIME_DYNAMIC_BIGRAM_LIST_POLICY_H
|
|
||||||
|
|
||||||
#include <stdint.h>
|
|
||||||
|
|
||||||
#include "defines.h"
|
|
||||||
#include "suggest/core/policy/dictionary_bigrams_structure_policy.h"
|
|
||||||
#include "suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h"
|
|
||||||
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_gc_event_listeners.h"
|
|
||||||
|
|
||||||
namespace latinime {
|
|
||||||
|
|
||||||
class BufferWithExtendableBuffer;
|
|
||||||
class DictionaryHeaderStructurePolicy;
|
|
||||||
class DictionaryShortcutsStructurePolicy;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* This is a dynamic version of BigramListPolicy and supports an additional buffer.
|
|
||||||
*/
|
|
||||||
class DynamicBigramListPolicy : public DictionaryBigramsStructurePolicy {
|
|
||||||
public:
|
|
||||||
DynamicBigramListPolicy(const DictionaryHeaderStructurePolicy *const headerPolicy,
|
|
||||||
BufferWithExtendableBuffer *const buffer,
|
|
||||||
const DictionaryShortcutsStructurePolicy *const shortcutPolicy,
|
|
||||||
const bool isDecayingDict)
|
|
||||||
: mHeaderPolicy(headerPolicy), mBuffer(buffer), mShortcutPolicy(shortcutPolicy),
|
|
||||||
mIsDecayingDict(isDecayingDict) {}
|
|
||||||
|
|
||||||
~DynamicBigramListPolicy() {}
|
|
||||||
|
|
||||||
void getNextBigram(int *const outBigramPos, int *const outProbability, bool *const outHasNext,
|
|
||||||
int *const bigramEntryPos) const;
|
|
||||||
|
|
||||||
void skipAllBigrams(int *const bigramListPos) const;
|
|
||||||
|
|
||||||
private:
|
|
||||||
DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicBigramListPolicy);
|
|
||||||
|
|
||||||
static const int CONTINUING_BIGRAM_LINK_COUNT_LIMIT;
|
|
||||||
static const int BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT;
|
|
||||||
|
|
||||||
const DictionaryHeaderStructurePolicy *const mHeaderPolicy;
|
|
||||||
BufferWithExtendableBuffer *const mBuffer;
|
|
||||||
const DictionaryShortcutsStructurePolicy *const mShortcutPolicy;
|
|
||||||
const bool mIsDecayingDict;
|
|
||||||
|
|
||||||
// Follow bigram link and return the position of bigram target PtNode that is currently valid.
|
|
||||||
int followBigramLinkAndGetCurrentBigramPtNodePos(const int originalBigramPos) const;
|
|
||||||
};
|
|
||||||
} // namespace latinime
|
|
||||||
#endif // LATINIME_DYNAMIC_BIGRAM_LIST_POLICY_H
|
|
|
@ -1,123 +0,0 @@
|
||||||
/*
|
|
||||||
* Copyright (C) 2013 The Android Open Source Project
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
* you may not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef LATINIME_DYNAMIC_SHORTCUT_LIST_POLICY_H
|
|
||||||
#define LATINIME_DYNAMIC_SHORTCUT_LIST_POLICY_H
|
|
||||||
|
|
||||||
#include <stdint.h>
|
|
||||||
|
|
||||||
#include "defines.h"
|
|
||||||
#include "suggest/core/policy/dictionary_shortcuts_structure_policy.h"
|
|
||||||
#include "suggest/policyimpl/dictionary/shortcut/shortcut_list_reading_utils.h"
|
|
||||||
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
|
|
||||||
|
|
||||||
namespace latinime {
|
|
||||||
|
|
||||||
/*
|
|
||||||
* This is a dynamic version of ShortcutListPolicy and supports an additional buffer.
|
|
||||||
*/
|
|
||||||
class DynamicShortcutListPolicy : public DictionaryShortcutsStructurePolicy {
|
|
||||||
public:
|
|
||||||
explicit DynamicShortcutListPolicy(const BufferWithExtendableBuffer *const buffer)
|
|
||||||
: mBuffer(buffer) {}
|
|
||||||
|
|
||||||
~DynamicShortcutListPolicy() {}
|
|
||||||
|
|
||||||
int getStartPos(const int pos) const {
|
|
||||||
if (pos == NOT_A_DICT_POS) {
|
|
||||||
return NOT_A_DICT_POS;
|
|
||||||
}
|
|
||||||
return pos + ShortcutListReadingUtils::getShortcutListSizeFieldSize();
|
|
||||||
}
|
|
||||||
|
|
||||||
void getNextShortcut(const int maxCodePointCount, int *const outCodePoint,
|
|
||||||
int *const outCodePointCount, bool *const outIsWhitelist, bool *const outHasNext,
|
|
||||||
int *const pos) const {
|
|
||||||
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*pos);
|
|
||||||
const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer);
|
|
||||||
if (usesAdditionalBuffer) {
|
|
||||||
*pos -= mBuffer->getOriginalBufferSize();
|
|
||||||
}
|
|
||||||
const ShortcutListReadingUtils::ShortcutFlags flags =
|
|
||||||
ShortcutListReadingUtils::getFlagsAndForwardPointer(buffer, pos);
|
|
||||||
if (outHasNext) {
|
|
||||||
*outHasNext = ShortcutListReadingUtils::hasNext(flags);
|
|
||||||
}
|
|
||||||
if (outIsWhitelist) {
|
|
||||||
*outIsWhitelist = ShortcutListReadingUtils::isWhitelist(flags);
|
|
||||||
}
|
|
||||||
if (outCodePoint) {
|
|
||||||
*outCodePointCount = ShortcutListReadingUtils::readShortcutTarget(
|
|
||||||
buffer, maxCodePointCount, outCodePoint, pos);
|
|
||||||
}
|
|
||||||
if (usesAdditionalBuffer) {
|
|
||||||
*pos += mBuffer->getOriginalBufferSize();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void skipAllShortcuts(int *const pos) const {
|
|
||||||
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*pos);
|
|
||||||
const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer);
|
|
||||||
if (usesAdditionalBuffer) {
|
|
||||||
*pos -= mBuffer->getOriginalBufferSize();
|
|
||||||
}
|
|
||||||
const int shortcutListSize = ShortcutListReadingUtils
|
|
||||||
::getShortcutListSizeAndForwardPointer(buffer, pos);
|
|
||||||
*pos += shortcutListSize;
|
|
||||||
if (usesAdditionalBuffer) {
|
|
||||||
*pos += mBuffer->getOriginalBufferSize();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Copy shortcuts from the shortcut list that starts at fromPos in mBuffer to toPos in
|
|
||||||
// bufferToWrite and advance these positions after the shortcut lists. This returns whether
|
|
||||||
// the copy was succeeded or not.
|
|
||||||
bool copyAllShortcutsAndReturnIfSucceededOrNot(BufferWithExtendableBuffer *const bufferToWrite,
|
|
||||||
int *const fromPos, int *const toPos) const {
|
|
||||||
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*fromPos);
|
|
||||||
if (usesAdditionalBuffer) {
|
|
||||||
*fromPos -= mBuffer->getOriginalBufferSize();
|
|
||||||
}
|
|
||||||
const int shortcutListSize = ShortcutListReadingUtils
|
|
||||||
::getShortcutListSizeAndForwardPointer(mBuffer->getBuffer(usesAdditionalBuffer),
|
|
||||||
fromPos);
|
|
||||||
// Copy shortcut list size.
|
|
||||||
if (!bufferToWrite->writeUintAndAdvancePosition(
|
|
||||||
shortcutListSize + ShortcutListReadingUtils::getShortcutListSizeFieldSize(),
|
|
||||||
ShortcutListReadingUtils::getShortcutListSizeFieldSize(), toPos)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
// Copy shortcut list.
|
|
||||||
for (int i = 0; i < shortcutListSize; ++i) {
|
|
||||||
const uint8_t data = ByteArrayUtils::readUint8AndAdvancePosition(
|
|
||||||
mBuffer->getBuffer(usesAdditionalBuffer), fromPos);
|
|
||||||
if (!bufferToWrite->writeUintAndAdvancePosition(data, 1 /* size */, toPos)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (usesAdditionalBuffer) {
|
|
||||||
*fromPos += mBuffer->getOriginalBufferSize();
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
|
||||||
DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicShortcutListPolicy);
|
|
||||||
|
|
||||||
const BufferWithExtendableBuffer *const mBuffer;
|
|
||||||
};
|
|
||||||
} // namespace latinime
|
|
||||||
#endif // LATINIME_DYNAMIC_SHORTCUT_LIST_POLICY_H
|
|
|
@ -20,7 +20,6 @@
|
||||||
|
|
||||||
#include "defines.h"
|
#include "defines.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h"
|
#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_policy.h"
|
|
||||||
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h"
|
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h"
|
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h"
|
||||||
#include "suggest/policyimpl/dictionary/utils/file_utils.h"
|
#include "suggest/policyimpl/dictionary/utils/file_utils.h"
|
||||||
|
@ -45,9 +44,6 @@ namespace latinime {
|
||||||
case FormatUtils::VERSION_2:
|
case FormatUtils::VERSION_2:
|
||||||
return DictionaryStructureWithBufferPolicy::StructurePoilcyPtr(
|
return DictionaryStructureWithBufferPolicy::StructurePoilcyPtr(
|
||||||
new PatriciaTriePolicy(mmappedBuffer));
|
new PatriciaTriePolicy(mmappedBuffer));
|
||||||
case FormatUtils::VERSION_3:
|
|
||||||
return DictionaryStructureWithBufferPolicy::StructurePoilcyPtr(
|
|
||||||
new DynamicPatriciaTriePolicy(mmappedBuffer));
|
|
||||||
case FormatUtils::VERSION_4: {
|
case FormatUtils::VERSION_4: {
|
||||||
const int dictDirPathBufSize = strlen(path) + 1 /* terminator */;
|
const int dictDirPathBufSize = strlen(path) + 1 /* terminator */;
|
||||||
char dictDirPath[dictDirPathBufSize];
|
char dictDirPath[dictDirPathBufSize];
|
||||||
|
|
|
@ -53,23 +53,6 @@ class PtNodeParams {
|
||||||
memcpy(mCodePoints, ptNodeParams.getCodePoints(), sizeof(int) * mCodePointCount);
|
memcpy(mCodePoints, ptNodeParams.getCodePoints(), sizeof(int) * mCodePointCount);
|
||||||
}
|
}
|
||||||
|
|
||||||
// PtNode without terminal id.
|
|
||||||
PtNodeParams(const int headPos, const PatriciaTrieReadingUtils::NodeFlags flags,
|
|
||||||
const int parentPos, const int codePointCount, const int *const codePoints,
|
|
||||||
const int probabilityFieldPos, const int probability, const int childrenPosFieldPos,
|
|
||||||
const int childrenPos, const int bigramLinkedNodePos, const int shortcutPos,
|
|
||||||
const int bigramPos, const int siblingPos)
|
|
||||||
: mHeadPos(headPos), mFlags(flags), mParentPos(parentPos),
|
|
||||||
mCodePointCount(codePointCount), mCodePoints(),
|
|
||||||
mTerminalIdFieldPos(NOT_A_DICT_POS),
|
|
||||||
mTerminalId(Ver4DictConstants::NOT_A_TERMINAL_ID),
|
|
||||||
mProbabilityFieldPos(probabilityFieldPos), mProbability(probability),
|
|
||||||
mChildrenPosFieldPos(childrenPosFieldPos), mChildrenPos(childrenPos),
|
|
||||||
mBigramLinkedNodePos(bigramLinkedNodePos), mShortcutPos(shortcutPos),
|
|
||||||
mBigramPos(bigramPos), mSiblingPos(siblingPos) {
|
|
||||||
memcpy(mCodePoints, codePoints, sizeof(int) * mCodePointCount);
|
|
||||||
}
|
|
||||||
|
|
||||||
// PtNode with a terminal id.
|
// PtNode with a terminal id.
|
||||||
PtNodeParams(const int headPos, const PatriciaTrieReadingUtils::NodeFlags flags,
|
PtNodeParams(const int headPos, const PatriciaTrieReadingUtils::NodeFlags flags,
|
||||||
const int parentPos, const int codePointCount, const int *const codePoints,
|
const int parentPos, const int codePointCount, const int *const codePoints,
|
||||||
|
|
|
@ -1,107 +0,0 @@
|
||||||
/*
|
|
||||||
* Copyright (C) 2013, The Android Open Source Project
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
* you may not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.h"
|
|
||||||
|
|
||||||
#include "suggest/core/policy/dictionary_bigrams_structure_policy.h"
|
|
||||||
#include "suggest/core/policy/dictionary_shortcuts_structure_policy.h"
|
|
||||||
#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h"
|
|
||||||
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.h"
|
|
||||||
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
|
|
||||||
|
|
||||||
namespace latinime {
|
|
||||||
|
|
||||||
const PtNodeParams DynamicPatriciaTrieNodeReader::fetchPtNodeInfoFromBufferAndProcessMovedPtNode(
|
|
||||||
const int ptNodePos, const int siblingNodePos, const int bigramLinkedNodePos) const {
|
|
||||||
if (ptNodePos < 0 || ptNodePos >= mBuffer->getTailPosition()) {
|
|
||||||
// Reading invalid position because of bug or broken dictionary.
|
|
||||||
AKLOGE("Fetching PtNode info from invalid dictionary position: %d, dictionary size: %d",
|
|
||||||
ptNodePos, mBuffer->getTailPosition());
|
|
||||||
ASSERT(false);
|
|
||||||
return PtNodeParams();
|
|
||||||
}
|
|
||||||
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(ptNodePos);
|
|
||||||
const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
|
|
||||||
int pos = ptNodePos;
|
|
||||||
const int headPos = ptNodePos;
|
|
||||||
if (usesAdditionalBuffer) {
|
|
||||||
pos -= mBuffer->getOriginalBufferSize();
|
|
||||||
}
|
|
||||||
const PatriciaTrieReadingUtils::NodeFlags flags =
|
|
||||||
PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos);
|
|
||||||
const int parentPosOffset =
|
|
||||||
DynamicPatriciaTrieReadingUtils::getParentPtNodePosOffsetAndAdvancePosition(dictBuf,
|
|
||||||
&pos);
|
|
||||||
const int parentPos =
|
|
||||||
DynamicPatriciaTrieReadingUtils::getParentPtNodePos(parentPosOffset, headPos);
|
|
||||||
int codePoints[MAX_WORD_LENGTH];
|
|
||||||
const int codePonitCount = PatriciaTrieReadingUtils::getCharsAndAdvancePosition(
|
|
||||||
dictBuf, flags, MAX_WORD_LENGTH, codePoints, &pos);
|
|
||||||
int probability = NOT_A_PROBABILITY;
|
|
||||||
int probabilityFieldPos = NOT_A_DICT_POS;
|
|
||||||
if (PatriciaTrieReadingUtils::isTerminal(flags)) {
|
|
||||||
probabilityFieldPos = pos;
|
|
||||||
if (usesAdditionalBuffer) {
|
|
||||||
probabilityFieldPos += mBuffer->getOriginalBufferSize();
|
|
||||||
}
|
|
||||||
probability = PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(dictBuf, &pos);
|
|
||||||
}
|
|
||||||
int childrenPosFieldPos = pos;
|
|
||||||
if (usesAdditionalBuffer) {
|
|
||||||
childrenPosFieldPos += mBuffer->getOriginalBufferSize();
|
|
||||||
}
|
|
||||||
int childrenPos = DynamicPatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(
|
|
||||||
dictBuf, &pos);
|
|
||||||
if (usesAdditionalBuffer && childrenPos != NOT_A_DICT_POS) {
|
|
||||||
childrenPos += mBuffer->getOriginalBufferSize();
|
|
||||||
}
|
|
||||||
int newBigramLinkedNodePos = bigramLinkedNodePos;
|
|
||||||
if (siblingNodePos == NOT_A_DICT_POS) {
|
|
||||||
if (DynamicPatriciaTrieReadingUtils::isMoved(flags)) {
|
|
||||||
newBigramLinkedNodePos = childrenPos;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (usesAdditionalBuffer) {
|
|
||||||
pos += mBuffer->getOriginalBufferSize();
|
|
||||||
}
|
|
||||||
int shortcutsPos = NOT_A_DICT_POS;
|
|
||||||
if (PatriciaTrieReadingUtils::hasShortcutTargets(flags)) {
|
|
||||||
shortcutsPos = pos;
|
|
||||||
mShortcutsPolicy->skipAllShortcuts(&pos);
|
|
||||||
}
|
|
||||||
int bigramsPos = NOT_A_DICT_POS;
|
|
||||||
if (PatriciaTrieReadingUtils::hasBigrams(flags)) {
|
|
||||||
bigramsPos = pos;
|
|
||||||
mBigramsPolicy->skipAllBigrams(&pos);
|
|
||||||
}
|
|
||||||
int newSiblingNodePos = siblingNodePos;
|
|
||||||
if (siblingNodePos == NOT_A_DICT_POS) {
|
|
||||||
// Sibling position is the tail position of current node.
|
|
||||||
newSiblingNodePos = pos;
|
|
||||||
}
|
|
||||||
// Read destination node if the read node is a moved node.
|
|
||||||
if (DynamicPatriciaTrieReadingUtils::isMoved(flags)) {
|
|
||||||
// The destination position is stored at the same place as the parent position.
|
|
||||||
return fetchPtNodeInfoFromBufferAndProcessMovedPtNode(parentPos, newSiblingNodePos,
|
|
||||||
newBigramLinkedNodePos);
|
|
||||||
} else {
|
|
||||||
return PtNodeParams(headPos, flags, parentPos, codePonitCount, codePoints,
|
|
||||||
probabilityFieldPos, probability, childrenPosFieldPos, childrenPos,
|
|
||||||
newBigramLinkedNodePos, shortcutsPos, bigramsPos, newSiblingNodePos);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,62 +0,0 @@
|
||||||
/*
|
|
||||||
* Copyright (C) 2013, The Android Open Source Project
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
* you may not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef LATINIME_DYNAMIC_PATRICIA_TRIE_NODE_READER_H
|
|
||||||
#define LATINIME_DYNAMIC_PATRICIA_TRIE_NODE_READER_H
|
|
||||||
|
|
||||||
#include <stdint.h>
|
|
||||||
|
|
||||||
#include "defines.h"
|
|
||||||
#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h"
|
|
||||||
#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_reader.h"
|
|
||||||
|
|
||||||
namespace latinime {
|
|
||||||
|
|
||||||
class BufferWithExtendableBuffer;
|
|
||||||
class DictionaryBigramsStructurePolicy;
|
|
||||||
class DictionaryShortcutsStructurePolicy;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* This class is used for helping to read nodes of dynamic patricia trie. This class handles moved
|
|
||||||
* node and reads node attributes.
|
|
||||||
*/
|
|
||||||
class DynamicPatriciaTrieNodeReader : public PtNodeReader {
|
|
||||||
public:
|
|
||||||
DynamicPatriciaTrieNodeReader(const BufferWithExtendableBuffer *const buffer,
|
|
||||||
const DictionaryBigramsStructurePolicy *const bigramsPolicy,
|
|
||||||
const DictionaryShortcutsStructurePolicy *const shortcutsPolicy)
|
|
||||||
: mBuffer(buffer), mBigramsPolicy(bigramsPolicy),
|
|
||||||
mShortcutsPolicy(shortcutsPolicy) {}
|
|
||||||
|
|
||||||
~DynamicPatriciaTrieNodeReader() {}
|
|
||||||
|
|
||||||
virtual const PtNodeParams fetchNodeInfoInBufferFromPtNodePos(const int ptNodePos) const {
|
|
||||||
return fetchPtNodeInfoFromBufferAndProcessMovedPtNode(ptNodePos,
|
|
||||||
NOT_A_DICT_POS /* siblingNodePos */, NOT_A_DICT_POS /* bigramLinkedNodePos */);
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
|
||||||
DISALLOW_COPY_AND_ASSIGN(DynamicPatriciaTrieNodeReader);
|
|
||||||
|
|
||||||
const BufferWithExtendableBuffer *const mBuffer;
|
|
||||||
const DictionaryBigramsStructurePolicy *const mBigramsPolicy;
|
|
||||||
const DictionaryShortcutsStructurePolicy *const mShortcutsPolicy;
|
|
||||||
|
|
||||||
const PtNodeParams fetchPtNodeInfoFromBufferAndProcessMovedPtNode(const int ptNodePos,
|
|
||||||
const int siblingNodePos, const int bigramLinkedNodePos) const;
|
|
||||||
};
|
|
||||||
} // namespace latinime
|
|
||||||
#endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_NODE_READER_H */
|
|
|
@ -1,129 +0,0 @@
|
||||||
/*
|
|
||||||
* Copyright (C) 2013, The Android Open Source Project
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
* you may not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_policy.h"
|
|
||||||
|
|
||||||
#include <cstdio>
|
|
||||||
#include <cstring>
|
|
||||||
#include <ctime>
|
|
||||||
|
|
||||||
#include "defines.h"
|
|
||||||
#include "suggest/core/dicnode/dic_node.h"
|
|
||||||
#include "suggest/core/dicnode/dic_node_vector.h"
|
|
||||||
#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h"
|
|
||||||
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.h"
|
|
||||||
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_helper.h"
|
|
||||||
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_reading_utils.h"
|
|
||||||
#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
|
|
||||||
#include "suggest/policyimpl/dictionary/utils/probability_utils.h"
|
|
||||||
|
|
||||||
namespace latinime {
|
|
||||||
|
|
||||||
void DynamicPatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const dicNode,
|
|
||||||
DicNodeVector *const childDicNodes) const {
|
|
||||||
if (!dicNode->hasChildren()) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer, &mNodeReader);
|
|
||||||
readingHelper.initWithPtNodeArrayPos(dicNode->getChildrenPtNodeArrayPos());
|
|
||||||
while (!readingHelper.isEnd()) {
|
|
||||||
const PtNodeParams ptNodeParams(readingHelper.getPtNodeParams());
|
|
||||||
if (!ptNodeParams.isValid()) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
bool isTerminal = ptNodeParams.isTerminal() && !ptNodeParams.isDeleted();
|
|
||||||
if (isTerminal && mHeaderPolicy.isDecayingDict()) {
|
|
||||||
// A DecayingDict may have a terminal PtNode that has a terminal DicNode whose
|
|
||||||
// probability is NOT_A_PROBABILITY. In such case, we don't want to treat it as a
|
|
||||||
// valid terminal DicNode.
|
|
||||||
isTerminal = getProbability(ptNodeParams.getProbability(), NOT_A_PROBABILITY)
|
|
||||||
!= NOT_A_PROBABILITY;
|
|
||||||
}
|
|
||||||
childDicNodes->pushLeavingChild(dicNode, ptNodeParams.getHeadPos(),
|
|
||||||
ptNodeParams.getChildrenPos(), ptNodeParams.getProbability(), isTerminal,
|
|
||||||
ptNodeParams.hasChildren(),
|
|
||||||
ptNodeParams.isBlacklisted() || ptNodeParams.isNotAWord(),
|
|
||||||
ptNodeParams.getCodePointCount(), ptNodeParams.getCodePoints());
|
|
||||||
readingHelper.readNextSiblingNode(ptNodeParams);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
int DynamicPatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
|
|
||||||
const int ptNodePos, const int maxCodePointCount, int *const outCodePoints,
|
|
||||||
int *const outUnigramProbability) const {
|
|
||||||
DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer, &mNodeReader);
|
|
||||||
readingHelper.initWithPtNodePos(ptNodePos);
|
|
||||||
return readingHelper.getCodePointsAndProbabilityAndReturnCodePointCount(maxCodePointCount,
|
|
||||||
outCodePoints, outUnigramProbability);
|
|
||||||
}
|
|
||||||
|
|
||||||
int DynamicPatriciaTriePolicy::getTerminalPtNodePositionOfWord(const int *const inWord,
|
|
||||||
const int length, const bool forceLowerCaseSearch) const {
|
|
||||||
DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer, &mNodeReader);
|
|
||||||
readingHelper.initWithPtNodeArrayPos(getRootPosition());
|
|
||||||
return readingHelper.getTerminalPtNodePositionOfWord(inWord, length, forceLowerCaseSearch);
|
|
||||||
}
|
|
||||||
|
|
||||||
int DynamicPatriciaTriePolicy::getProbability(const int unigramProbability,
|
|
||||||
const int bigramProbability) const {
|
|
||||||
if (mHeaderPolicy.isDecayingDict()) {
|
|
||||||
return ForgettingCurveUtils::getProbability(unigramProbability, bigramProbability);
|
|
||||||
} else {
|
|
||||||
if (unigramProbability == NOT_A_PROBABILITY) {
|
|
||||||
return NOT_A_PROBABILITY;
|
|
||||||
} else if (bigramProbability == NOT_A_PROBABILITY) {
|
|
||||||
return ProbabilityUtils::backoff(unigramProbability);
|
|
||||||
} else {
|
|
||||||
return ProbabilityUtils::computeProbabilityForBigram(unigramProbability,
|
|
||||||
bigramProbability);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
int DynamicPatriciaTriePolicy::getUnigramProbabilityOfPtNode(const int ptNodePos) const {
|
|
||||||
if (ptNodePos == NOT_A_DICT_POS) {
|
|
||||||
return NOT_A_PROBABILITY;
|
|
||||||
}
|
|
||||||
const PtNodeParams ptNodeParams(mNodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos));
|
|
||||||
if (ptNodeParams.isDeleted() || ptNodeParams.isBlacklisted() || ptNodeParams.isNotAWord()) {
|
|
||||||
return NOT_A_PROBABILITY;
|
|
||||||
}
|
|
||||||
return getProbability(ptNodeParams.getProbability(), NOT_A_PROBABILITY);
|
|
||||||
}
|
|
||||||
|
|
||||||
int DynamicPatriciaTriePolicy::getShortcutPositionOfPtNode(const int ptNodePos) const {
|
|
||||||
if (ptNodePos == NOT_A_DICT_POS) {
|
|
||||||
return NOT_A_DICT_POS;
|
|
||||||
}
|
|
||||||
const PtNodeParams ptNodeParams(mNodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos));
|
|
||||||
if (ptNodeParams.isDeleted()) {
|
|
||||||
return NOT_A_DICT_POS;
|
|
||||||
}
|
|
||||||
return ptNodeParams.getShortcutPos();
|
|
||||||
}
|
|
||||||
|
|
||||||
int DynamicPatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) const {
|
|
||||||
if (ptNodePos == NOT_A_DICT_POS) {
|
|
||||||
return NOT_A_DICT_POS;
|
|
||||||
}
|
|
||||||
const PtNodeParams ptNodeParams(mNodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos));
|
|
||||||
if (ptNodeParams.isDeleted()) {
|
|
||||||
return NOT_A_DICT_POS;
|
|
||||||
}
|
|
||||||
return ptNodeParams.getBigramsPos();
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace latinime
|
|
|
@ -1,140 +0,0 @@
|
||||||
/*
|
|
||||||
* Copyright (C) 2013, The Android Open Source Project
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
* you may not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef LATINIME_DYNAMIC_PATRICIA_TRIE_POLICY_H
|
|
||||||
#define LATINIME_DYNAMIC_PATRICIA_TRIE_POLICY_H
|
|
||||||
|
|
||||||
#include "defines.h"
|
|
||||||
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
|
|
||||||
#include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h"
|
|
||||||
#include "suggest/policyimpl/dictionary/header/header_policy.h"
|
|
||||||
#include "suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h"
|
|
||||||
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_node_reader.h"
|
|
||||||
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
|
|
||||||
#include "suggest/policyimpl/dictionary/utils/format_utils.h"
|
|
||||||
#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
|
|
||||||
|
|
||||||
namespace latinime {
|
|
||||||
|
|
||||||
class DicNode;
|
|
||||||
class DicNodeVector;
|
|
||||||
|
|
||||||
class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
|
||||||
public:
|
|
||||||
DynamicPatriciaTriePolicy(const MmappedBuffer::MmappedBufferPtr &mmappedBuffer)
|
|
||||||
: mMmappedBuffer(mmappedBuffer),
|
|
||||||
mHeaderPolicy(mMmappedBuffer.get()->getBuffer(), FormatUtils::VERSION_3),
|
|
||||||
mBufferWithExtendableBuffer(mMmappedBuffer.get()->getBuffer()
|
|
||||||
+ mHeaderPolicy.getSize(), mMmappedBuffer.get()->getBufferSize()
|
|
||||||
- mHeaderPolicy.getSize(),
|
|
||||||
BufferWithExtendableBuffer
|
|
||||||
::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE),
|
|
||||||
mShortcutListPolicy(&mBufferWithExtendableBuffer),
|
|
||||||
mBigramListPolicy(&mHeaderPolicy, &mBufferWithExtendableBuffer, &mShortcutListPolicy,
|
|
||||||
mHeaderPolicy.isDecayingDict()),
|
|
||||||
mNodeReader(&mBufferWithExtendableBuffer, &mBigramListPolicy, &mShortcutListPolicy) {}
|
|
||||||
|
|
||||||
AK_FORCE_INLINE int getRootPosition() const {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
void createAndGetAllChildDicNodes(const DicNode *const dicNode,
|
|
||||||
DicNodeVector *const childDicNodes) const;
|
|
||||||
|
|
||||||
int getCodePointsAndProbabilityAndReturnCodePointCount(
|
|
||||||
const int terminalPtNodePos, const int maxCodePointCount, int *const outCodePoints,
|
|
||||||
int *const outUnigramProbability) const;
|
|
||||||
|
|
||||||
int getTerminalPtNodePositionOfWord(const int *const inWord,
|
|
||||||
const int length, const bool forceLowerCaseSearch) const;
|
|
||||||
|
|
||||||
int getProbability(const int unigramProbability, const int bigramProbability) const;
|
|
||||||
|
|
||||||
int getUnigramProbabilityOfPtNode(const int ptNodePos) const;
|
|
||||||
|
|
||||||
int getShortcutPositionOfPtNode(const int ptNodePos) const;
|
|
||||||
|
|
||||||
int getBigramsPositionOfPtNode(const int ptNodePos) const;
|
|
||||||
|
|
||||||
const DictionaryHeaderStructurePolicy *getHeaderStructurePolicy() const {
|
|
||||||
return &mHeaderPolicy;
|
|
||||||
}
|
|
||||||
|
|
||||||
const DictionaryBigramsStructurePolicy *getBigramsStructurePolicy() const {
|
|
||||||
return &mBigramListPolicy;
|
|
||||||
}
|
|
||||||
|
|
||||||
const DictionaryShortcutsStructurePolicy *getShortcutsStructurePolicy() const {
|
|
||||||
return &mShortcutListPolicy;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool addUnigramWord(const int *const word, const int length, const int probability,
|
|
||||||
const int timestamp) {
|
|
||||||
// This method should not be called for non-updatable dictionary.
|
|
||||||
AKLOGI("Warning: addUnigramWord() is called for non-updatable dictionary.");
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool addBigramWords(const int *const word0, const int length0, const int *const word1,
|
|
||||||
const int length1, const int probability, const int timestamp) {
|
|
||||||
// This method should not be called for non-updatable dictionary.
|
|
||||||
AKLOGI("Warning: addBigramWords() is called for non-updatable dictionary.");
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool removeBigramWords(const int *const word0, const int length0, const int *const word1,
|
|
||||||
const int length1) {
|
|
||||||
// This method should not be called for non-updatable dictionary.
|
|
||||||
AKLOGI("Warning: removeBigramWords() is called for non-updatable dictionary.");
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
void flush(const char *const filePath) {
|
|
||||||
// This method should not be called for non-updatable dictionary.
|
|
||||||
AKLOGI("Warning: flush() is called for non-updatable dictionary.");
|
|
||||||
}
|
|
||||||
|
|
||||||
void flushWithGC(const char *const filePath) {
|
|
||||||
// This method should not be called for non-updatable dictionary.
|
|
||||||
AKLOGI("Warning: flushWithGC() is called for non-updatable dictionary.");
|
|
||||||
}
|
|
||||||
|
|
||||||
bool needsToRunGC(const bool mindsBlockByGC) const {
|
|
||||||
// This method should not be called for non-updatable dictionary.
|
|
||||||
AKLOGI("Warning: needsToRunGC() is called for non-updatable dictionary.");
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
void getProperty(const char *const query, const int queryLength, char *const outResult,
|
|
||||||
const int maxResultLength) {
|
|
||||||
// getProperty is not supported for this class.
|
|
||||||
if (maxResultLength > 0) {
|
|
||||||
outResult[0] = '\0';
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
|
||||||
DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTriePolicy);
|
|
||||||
|
|
||||||
const MmappedBuffer::MmappedBufferPtr mMmappedBuffer;
|
|
||||||
const HeaderPolicy mHeaderPolicy;
|
|
||||||
BufferWithExtendableBuffer mBufferWithExtendableBuffer;
|
|
||||||
DynamicShortcutListPolicy mShortcutListPolicy;
|
|
||||||
DynamicBigramListPolicy mBigramListPolicy;
|
|
||||||
DynamicPatriciaTrieNodeReader mNodeReader;
|
|
||||||
};
|
|
||||||
} // namespace latinime
|
|
||||||
#endif // LATINIME_DYNAMIC_PATRICIA_TRIE_POLICY_H
|
|
|
@ -17,7 +17,7 @@
|
||||||
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h"
|
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h"
|
||||||
|
|
||||||
#include "suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h"
|
#include "suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h"
|
||||||
#include "suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h"
|
#include "suggest/policyimpl/dictionary/shortcut/ver4_shortcut_list_policy.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h"
|
#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h"
|
#include "suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h"
|
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h"
|
||||||
|
|
Loading…
Reference in New Issue