am 14f5cec0
: Merge "Move bigrams iteration methods to policy."
* commit '14f5cec07b173a2de924f495b2929a795967a494': Move bigrams iteration methods to policy.
This commit is contained in:
commit
119258153e
16 changed files with 190 additions and 77 deletions
|
@ -118,8 +118,8 @@ namespace latinime {
|
|||
return ProbabilityUtils::backoff(unigramProbability);
|
||||
}
|
||||
if (multiBigramMap) {
|
||||
return multiBigramMap->getBigramProbability(
|
||||
binaryDictionaryInfo, prevWordPos, wordPos, unigramProbability);
|
||||
return multiBigramMap->getBigramProbability(binaryDictionaryInfo->getStructurePolicy(),
|
||||
prevWordPos, wordPos, unigramProbability);
|
||||
}
|
||||
return ProbabilityUtils::backoff(unigramProbability);
|
||||
}
|
||||
|
|
|
@ -112,10 +112,12 @@ int BigramDictionary::getPredictions(const int *prevWord, const int prevWordLeng
|
|||
int bigramCount = 0;
|
||||
int unigramProbability = 0;
|
||||
int bigramBuffer[MAX_WORD_LENGTH];
|
||||
BinaryDictionaryBigramsIterator bigramsIt(mBinaryDictionaryInfo, pos);
|
||||
const DictionaryStructureWithBufferPolicy *const structurePolicy =
|
||||
mBinaryDictionaryInfo->getStructurePolicy();
|
||||
BinaryDictionaryBigramsIterator bigramsIt(structurePolicy->getBigramsStructurePolicy(), pos);
|
||||
while (bigramsIt.hasNext()) {
|
||||
bigramsIt.next();
|
||||
const int length = mBinaryDictionaryInfo->getStructurePolicy()->
|
||||
const int length = structurePolicy->
|
||||
getCodePointsAndProbabilityAndReturnCodePointCount(bigramsIt.getBigramPos(),
|
||||
MAX_WORD_LENGTH, bigramBuffer, &unigramProbability);
|
||||
// Due to space constraints, the probability for bigrams is approximate - the lower the
|
||||
|
@ -137,10 +139,12 @@ int BigramDictionary::getPredictions(const int *prevWord, const int prevWordLeng
|
|||
int BigramDictionary::getBigramListPositionForWord(const int *prevWord, const int prevWordLength,
|
||||
const bool forceLowerCaseSearch) const {
|
||||
if (0 >= prevWordLength) return NOT_A_DICT_POS;
|
||||
int pos = mBinaryDictionaryInfo->getStructurePolicy()->getTerminalNodePositionOfWord(
|
||||
prevWord, prevWordLength, forceLowerCaseSearch);
|
||||
const DictionaryStructureWithBufferPolicy *const structurePolicy =
|
||||
mBinaryDictionaryInfo->getStructurePolicy();
|
||||
int pos = structurePolicy->getTerminalNodePositionOfWord(prevWord, prevWordLength,
|
||||
forceLowerCaseSearch);
|
||||
if (NOT_A_VALID_WORD_POS == pos) return NOT_A_DICT_POS;
|
||||
return mBinaryDictionaryInfo->getStructurePolicy()->getBigramsPositionOfNode(pos);
|
||||
return structurePolicy->getBigramsPositionOfNode(pos);
|
||||
}
|
||||
|
||||
bool BigramDictionary::isValidBigram(const int *word0, int length0, const int *word1,
|
||||
|
@ -148,11 +152,13 @@ bool BigramDictionary::isValidBigram(const int *word0, int length0, const int *w
|
|||
int pos = getBigramListPositionForWord(word0, length0, false /* forceLowerCaseSearch */);
|
||||
// getBigramListPositionForWord returns 0 if this word isn't in the dictionary or has no bigrams
|
||||
if (NOT_A_DICT_POS == pos) return false;
|
||||
int nextWordPos = mBinaryDictionaryInfo->getStructurePolicy()->getTerminalNodePositionOfWord(
|
||||
word1, length1, false /* forceLowerCaseSearch */);
|
||||
const DictionaryStructureWithBufferPolicy *const structurePolicy =
|
||||
mBinaryDictionaryInfo->getStructurePolicy();
|
||||
int nextWordPos = structurePolicy->getTerminalNodePositionOfWord(word1, length1,
|
||||
false /* forceLowerCaseSearch */);
|
||||
if (NOT_A_VALID_WORD_POS == nextWordPos) return false;
|
||||
|
||||
BinaryDictionaryBigramsIterator bigramsIt(mBinaryDictionaryInfo, pos);
|
||||
BinaryDictionaryBigramsIterator bigramsIt(structurePolicy->getBigramsStructurePolicy(), pos);
|
||||
while (bigramsIt.hasNext()) {
|
||||
bigramsIt.next();
|
||||
if (bigramsIt.getBigramPos() == nextWordPos) {
|
||||
|
|
|
@ -18,51 +18,41 @@
|
|||
#define LATINIME_BINARY_DICTIONARY_BIGRAMS_ITERATOR_H
|
||||
|
||||
#include "defines.h"
|
||||
#include "suggest/core/dictionary/binary_dictionary_info.h"
|
||||
#include "suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.h"
|
||||
#include "suggest/core/policy/dictionary_bigrams_structure_policy.h"
|
||||
|
||||
namespace latinime {
|
||||
|
||||
class BinaryDictionaryBigramsIterator {
|
||||
public:
|
||||
BinaryDictionaryBigramsIterator(
|
||||
const BinaryDictionaryInfo *const binaryDictionaryInfo, const int pos)
|
||||
: mBinaryDictionaryInfo(binaryDictionaryInfo), mPos(pos), mBigramFlags(0),
|
||||
mBigramPos(NOT_A_DICT_POS), mHasNext(pos != NOT_A_DICT_POS) {}
|
||||
const DictionaryBigramsStructurePolicy *const bigramsStructurePolicy, const int pos)
|
||||
: mBigramsStructurePolicy(bigramsStructurePolicy), mPos(pos),
|
||||
mBigramPos(NOT_A_DICT_POS), mProbability(NOT_A_PROBABILITY),
|
||||
mHasNext(pos != NOT_A_DICT_POS) {}
|
||||
|
||||
AK_FORCE_INLINE bool hasNext() const {
|
||||
return mHasNext;
|
||||
}
|
||||
|
||||
AK_FORCE_INLINE void next() {
|
||||
mBigramFlags = BinaryDictionaryTerminalAttributesReadingUtils::getFlagsAndForwardPointer(
|
||||
mBinaryDictionaryInfo, &mPos);
|
||||
mBigramPos =
|
||||
BinaryDictionaryTerminalAttributesReadingUtils::getBigramAddressAndForwardPointer(
|
||||
mBinaryDictionaryInfo, mBigramFlags, &mPos);
|
||||
mHasNext = BinaryDictionaryTerminalAttributesReadingUtils::hasNext(mBigramFlags);
|
||||
mBigramsStructurePolicy->getNextBigram(&mBigramPos, &mProbability, &mHasNext, &mPos);
|
||||
}
|
||||
|
||||
AK_FORCE_INLINE int getProbability() const {
|
||||
return BinaryDictionaryTerminalAttributesReadingUtils::getProbabilityFromFlags(
|
||||
mBigramFlags);
|
||||
return mProbability;
|
||||
}
|
||||
|
||||
AK_FORCE_INLINE int getBigramPos() const {
|
||||
return mBigramPos;
|
||||
}
|
||||
|
||||
AK_FORCE_INLINE int getFlags() const {
|
||||
return mBigramFlags;
|
||||
}
|
||||
|
||||
private:
|
||||
DISALLOW_COPY_AND_ASSIGN(BinaryDictionaryBigramsIterator);
|
||||
|
||||
const BinaryDictionaryInfo *const mBinaryDictionaryInfo;
|
||||
const DictionaryBigramsStructurePolicy *const mBigramsStructurePolicy;
|
||||
int mPos;
|
||||
BinaryDictionaryTerminalAttributesReadingUtils::BigramFlags mBigramFlags;
|
||||
int mBigramPos;
|
||||
int mProbability;
|
||||
bool mHasNext;
|
||||
};
|
||||
} // namespace latinime
|
||||
|
|
|
@ -16,7 +16,6 @@
|
|||
|
||||
#include "suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.h"
|
||||
|
||||
#include "suggest/core/dictionary/binary_dictionary_info.h"
|
||||
#include "suggest/core/dictionary/byte_array_utils.h"
|
||||
|
||||
namespace latinime {
|
||||
|
@ -38,22 +37,19 @@ const int TaUtils::SHORTCUT_LIST_SIZE_FIELD_SIZE = 2;
|
|||
const int TaUtils::WHITELIST_SHORTCUT_PROBABILITY = 15;
|
||||
|
||||
/* static */ int TaUtils::getBigramAddressAndForwardPointer(
|
||||
const BinaryDictionaryInfo *const binaryDictionaryInfo, const TerminalAttributeFlags flags,
|
||||
const uint8_t *const dictRoot, const TerminalAttributeFlags flags,
|
||||
int *const pos) {
|
||||
int offset = 0;
|
||||
const int origin = *pos;
|
||||
switch (MASK_ATTRIBUTE_ADDRESS_TYPE & flags) {
|
||||
case FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE:
|
||||
offset = ByteArrayUtils::readUint8AndAdvancePosition(
|
||||
binaryDictionaryInfo->getDictRoot(), pos);
|
||||
offset = ByteArrayUtils::readUint8AndAdvancePosition(dictRoot, pos);
|
||||
break;
|
||||
case FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES:
|
||||
offset = ByteArrayUtils::readUint16AndAdvancePosition(
|
||||
binaryDictionaryInfo->getDictRoot(), pos);
|
||||
offset = ByteArrayUtils::readUint16AndAdvancePosition(dictRoot, pos);
|
||||
break;
|
||||
case FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES:
|
||||
offset = ByteArrayUtils::readUint24AndAdvancePosition(
|
||||
binaryDictionaryInfo->getDictRoot(), pos);
|
||||
offset = ByteArrayUtils::readUint24AndAdvancePosition(dictRoot, pos);
|
||||
break;
|
||||
}
|
||||
if (isOffsetNegative(flags)) {
|
||||
|
|
|
@ -32,9 +32,8 @@ class BinaryDictionaryTerminalAttributesReadingUtils {
|
|||
typedef TerminalAttributeFlags ShortcutFlags;
|
||||
|
||||
static AK_FORCE_INLINE TerminalAttributeFlags getFlagsAndForwardPointer(
|
||||
const BinaryDictionaryInfo *const binaryDictionaryInfo, int *const pos) {
|
||||
return ByteArrayUtils::readUint8AndAdvancePosition(
|
||||
binaryDictionaryInfo->getDictRoot(), pos);
|
||||
const uint8_t *const dictRoot, int *const pos) {
|
||||
return ByteArrayUtils::readUint8AndAdvancePosition(dictRoot, pos);
|
||||
}
|
||||
|
||||
static AK_FORCE_INLINE int getProbabilityFromFlags(const TerminalAttributeFlags flags) {
|
||||
|
@ -47,18 +46,17 @@ class BinaryDictionaryTerminalAttributesReadingUtils {
|
|||
|
||||
// Bigrams reading methods
|
||||
static AK_FORCE_INLINE void skipExistingBigrams(
|
||||
const BinaryDictionaryInfo *const binaryDictionaryInfo, int *const pos) {
|
||||
BigramFlags flags = getFlagsAndForwardPointer(binaryDictionaryInfo, pos);
|
||||
const uint8_t *const dictRoot, int *const pos) {
|
||||
BigramFlags flags = getFlagsAndForwardPointer(dictRoot, pos);
|
||||
while (hasNext(flags)) {
|
||||
*pos += attributeAddressSize(flags);
|
||||
flags = getFlagsAndForwardPointer(binaryDictionaryInfo, pos);
|
||||
flags = getFlagsAndForwardPointer(dictRoot, pos);
|
||||
}
|
||||
*pos += attributeAddressSize(flags);
|
||||
}
|
||||
|
||||
static int getBigramAddressAndForwardPointer(
|
||||
const BinaryDictionaryInfo *const binaryDictionaryInfo, const BigramFlags flags,
|
||||
int *const pos);
|
||||
const uint8_t *const dictRoot, const BigramFlags flags, int *const pos);
|
||||
|
||||
// Shortcuts reading methods
|
||||
// This method returns the size of the shortcut list region excluding the shortcut list size
|
||||
|
|
|
@ -21,9 +21,9 @@
|
|||
|
||||
#include "defines.h"
|
||||
#include "suggest/core/dictionary/binary_dictionary_bigrams_iterator.h"
|
||||
#include "suggest/core/dictionary/binary_dictionary_info.h"
|
||||
#include "suggest/core/dictionary/bloom_filter.h"
|
||||
#include "suggest/core/dictionary/probability_utils.h"
|
||||
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
|
||||
#include "utils/hash_map_compat.h"
|
||||
|
||||
namespace latinime {
|
||||
|
@ -38,7 +38,7 @@ class MultiBigramMap {
|
|||
|
||||
// Look up the bigram probability for the given word pair from the cached bigram maps.
|
||||
// Also caches the bigrams if there is space remaining and they have not been cached already.
|
||||
int getBigramProbability(const BinaryDictionaryInfo *const binaryDictionaryInfo,
|
||||
int getBigramProbability(const DictionaryStructureWithBufferPolicy *const structurePolicy,
|
||||
const int wordPosition, const int nextWordPosition, const int unigramProbability) {
|
||||
hash_map_compat<int, BigramMap>::const_iterator mapPosition =
|
||||
mBigramMaps.find(wordPosition);
|
||||
|
@ -46,12 +46,12 @@ class MultiBigramMap {
|
|||
return mapPosition->second.getBigramProbability(nextWordPosition, unigramProbability);
|
||||
}
|
||||
if (mBigramMaps.size() < MAX_CACHED_PREV_WORDS_IN_BIGRAM_MAP) {
|
||||
addBigramsForWordPosition(binaryDictionaryInfo, wordPosition);
|
||||
addBigramsForWordPosition(structurePolicy, wordPosition);
|
||||
return mBigramMaps[wordPosition].getBigramProbability(
|
||||
nextWordPosition, unigramProbability);
|
||||
}
|
||||
return readBigramProbabilityFromBinaryDictionary(binaryDictionaryInfo,
|
||||
wordPosition, nextWordPosition, unigramProbability);
|
||||
return readBigramProbabilityFromBinaryDictionary(structurePolicy, wordPosition,
|
||||
nextWordPosition, unigramProbability);
|
||||
}
|
||||
|
||||
void clear() {
|
||||
|
@ -66,10 +66,11 @@ class MultiBigramMap {
|
|||
BigramMap() : mBigramMap(DEFAULT_HASH_MAP_SIZE_FOR_EACH_BIGRAM_MAP), mBloomFilter() {}
|
||||
~BigramMap() {}
|
||||
|
||||
void init(const BinaryDictionaryInfo *const binaryDictionaryInfo, const int nodePos) {
|
||||
const int bigramsListPos = binaryDictionaryInfo->getStructurePolicy()->
|
||||
getBigramsPositionOfNode(nodePos);
|
||||
BinaryDictionaryBigramsIterator bigramsIt(binaryDictionaryInfo, bigramsListPos);
|
||||
void init(const DictionaryStructureWithBufferPolicy *const structurePolicy,
|
||||
const int nodePos) {
|
||||
const int bigramsListPos = structurePolicy->getBigramsPositionOfNode(nodePos);
|
||||
BinaryDictionaryBigramsIterator bigramsIt(structurePolicy->getBigramsStructurePolicy(),
|
||||
bigramsListPos);
|
||||
while (bigramsIt.hasNext()) {
|
||||
bigramsIt.next();
|
||||
mBigramMap[bigramsIt.getBigramPos()] = bigramsIt.getProbability();
|
||||
|
@ -100,16 +101,16 @@ class MultiBigramMap {
|
|||
};
|
||||
|
||||
AK_FORCE_INLINE void addBigramsForWordPosition(
|
||||
const BinaryDictionaryInfo *const binaryDictionaryInfo, const int position) {
|
||||
mBigramMaps[position].init(binaryDictionaryInfo, position);
|
||||
const DictionaryStructureWithBufferPolicy *const structurePolicy, const int position) {
|
||||
mBigramMaps[position].init(structurePolicy, position);
|
||||
}
|
||||
|
||||
AK_FORCE_INLINE int readBigramProbabilityFromBinaryDictionary(
|
||||
const BinaryDictionaryInfo *const binaryDictionaryInfo, const int nodePos,
|
||||
const DictionaryStructureWithBufferPolicy *const structurePolicy, const int nodePos,
|
||||
const int nextWordPosition, const int unigramProbability) {
|
||||
const int bigramsListPos = binaryDictionaryInfo->getStructurePolicy()->
|
||||
getBigramsPositionOfNode(nodePos);
|
||||
BinaryDictionaryBigramsIterator bigramsIt(binaryDictionaryInfo, bigramsListPos);
|
||||
const int bigramsListPos = structurePolicy->getBigramsPositionOfNode(nodePos);
|
||||
BinaryDictionaryBigramsIterator bigramsIt(structurePolicy->getBigramsStructurePolicy(),
|
||||
bigramsListPos);
|
||||
while (bigramsIt.hasNext()) {
|
||||
bigramsIt.next();
|
||||
if (bigramsIt.getBigramPos() == nextWordPosition) {
|
||||
|
|
|
@ -49,7 +49,7 @@ class TerminalAttributes {
|
|||
bool *const outIsWhitelist) {
|
||||
const BinaryDictionaryTerminalAttributesReadingUtils::ShortcutFlags flags =
|
||||
BinaryDictionaryTerminalAttributesReadingUtils::getFlagsAndForwardPointer(
|
||||
mBinaryDictionaryInfo, &mPos);
|
||||
mBinaryDictionaryInfo->getDictRoot(), &mPos);
|
||||
mHasNextShortcutTarget =
|
||||
BinaryDictionaryTerminalAttributesReadingUtils::hasNext(flags);
|
||||
if (outIsWhitelist) {
|
||||
|
|
|
@ -0,0 +1,42 @@
|
|||
/*
|
||||
* Copyright (C) 2013, The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef LATINIME_DICTIONARY_BIGRAMS_STRUCTURE_POLICY_H
|
||||
#define LATINIME_DICTIONARY_BIGRAMS_STRUCTURE_POLICY_H
|
||||
|
||||
#include "defines.h"
|
||||
|
||||
namespace latinime {
|
||||
|
||||
/*
|
||||
* This class abstracts structure of bigrams.
|
||||
*/
|
||||
class DictionaryBigramsStructurePolicy {
|
||||
public:
|
||||
virtual ~DictionaryBigramsStructurePolicy() {}
|
||||
|
||||
virtual void getNextBigram(int *const outBigramPos, int *const outProbability,
|
||||
bool *const outHasNext, int *const pos) const = 0;
|
||||
virtual void skipAllBigrams(int *const pos) const = 0;
|
||||
|
||||
protected:
|
||||
DictionaryBigramsStructurePolicy() {}
|
||||
|
||||
private:
|
||||
DISALLOW_COPY_AND_ASSIGN(DictionaryBigramsStructurePolicy);
|
||||
};
|
||||
} // namespace latinime
|
||||
#endif /* LATINIME_DICTIONARY_BIGRAMS_STRUCTURE_POLICY_H */
|
|
@ -23,6 +23,7 @@ namespace latinime {
|
|||
|
||||
class DicNode;
|
||||
class DicNodeVector;
|
||||
class DictionaryBigramsStructurePolicy;
|
||||
|
||||
/*
|
||||
* This class abstracts structure of dictionaries.
|
||||
|
@ -63,6 +64,8 @@ class DictionaryStructureWithBufferPolicy {
|
|||
|
||||
virtual int getBigramsPositionOfNode(const int nodePos) const = 0;
|
||||
|
||||
virtual const DictionaryBigramsStructurePolicy *getBigramsStructurePolicy() const = 0;
|
||||
|
||||
protected:
|
||||
DictionaryStructureWithBufferPolicy() {}
|
||||
|
||||
|
|
|
@ -0,0 +1,58 @@
|
|||
/*
|
||||
* Copyright (C) 2013 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef LATINIME_BIGRAM_LIST_POLICY_H
|
||||
#define LATINIME_BIGRAM_LIST_POLICY_H
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include "defines.h"
|
||||
#include "suggest/core/policy/dictionary_bigrams_structure_policy.h"
|
||||
// TODO: Move bigrams reading methods to policyimpl.
|
||||
#include "suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.h"
|
||||
|
||||
namespace latinime {
|
||||
|
||||
class BigramListPolicy : public DictionaryBigramsStructurePolicy {
|
||||
public:
|
||||
explicit BigramListPolicy(const uint8_t *const bigramsBuf) : mBigramsBuf(bigramsBuf) {}
|
||||
|
||||
~BigramListPolicy() {}
|
||||
|
||||
void getNextBigram(int *const outBigramPos, int *const outProbability, bool *const outHasNext,
|
||||
int *const pos) const {
|
||||
const BinaryDictionaryTerminalAttributesReadingUtils::BigramFlags flags =
|
||||
BinaryDictionaryTerminalAttributesReadingUtils::getFlagsAndForwardPointer(
|
||||
mBigramsBuf, pos);
|
||||
*outBigramPos =
|
||||
BinaryDictionaryTerminalAttributesReadingUtils::getBigramAddressAndForwardPointer(
|
||||
mBigramsBuf, flags, pos);
|
||||
*outProbability =
|
||||
BinaryDictionaryTerminalAttributesReadingUtils::getProbabilityFromFlags(flags);
|
||||
*outHasNext = BinaryDictionaryTerminalAttributesReadingUtils::hasNext(flags);
|
||||
}
|
||||
|
||||
void skipAllBigrams(int *const pos) const {
|
||||
BinaryDictionaryTerminalAttributesReadingUtils::skipExistingBigrams(mBigramsBuf, pos);
|
||||
}
|
||||
|
||||
private:
|
||||
DISALLOW_IMPLICIT_CONSTRUCTORS(BigramListPolicy);
|
||||
|
||||
const uint8_t *const mBigramsBuf;
|
||||
};
|
||||
} // namespace latinime
|
||||
#endif // LATINIME_BIGRAM_LIST_POLICY_H
|
|
@ -18,6 +18,7 @@
|
|||
|
||||
#include "suggest/core/dictionary/binary_dictionary_info.h"
|
||||
#include "suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.h"
|
||||
#include "suggest/policyimpl/dictionary/bigrams/bigram_list_policy.h"
|
||||
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h"
|
||||
|
||||
namespace latinime {
|
||||
|
@ -56,8 +57,7 @@ void DynamicPatriciaTrieNodeReader::fetchNodeInfoFromBufferAndProcessMovedNode(c
|
|||
}
|
||||
if (PatriciaTrieReadingUtils::hasBigrams(mFlags)) {
|
||||
mBigramPos = pos;
|
||||
BinaryDictionaryTerminalAttributesReadingUtils::skipExistingBigrams(
|
||||
mBinaryDictionaryInfo, &pos);
|
||||
mBigramsPolicy->skipAllBigrams(&pos);
|
||||
} else {
|
||||
mBigramPos = NOT_A_DICT_POS;
|
||||
}
|
||||
|
|
|
@ -17,13 +17,17 @@
|
|||
#ifndef LATINIME_DYNAMIC_PATRICIA_TRIE_NODE_READER_H
|
||||
#define LATINIME_DYNAMIC_PATRICIA_TRIE_NODE_READER_H
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include "defines.h"
|
||||
#include "suggest/policyimpl/dictionary/bigrams/bigram_list_policy.h"
|
||||
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h"
|
||||
#include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h"
|
||||
|
||||
namespace latinime {
|
||||
|
||||
class BinaryDictionaryInfo;
|
||||
class DictionaryBigramsStructurePolicy;
|
||||
|
||||
/*
|
||||
* This class is used for helping to read nodes of dynamic patricia trie. This class handles moved
|
||||
|
@ -31,10 +35,11 @@ class BinaryDictionaryInfo;
|
|||
*/
|
||||
class DynamicPatriciaTrieNodeReader {
|
||||
public:
|
||||
explicit DynamicPatriciaTrieNodeReader(const BinaryDictionaryInfo *const binaryDictionaryInfo)
|
||||
: mBinaryDictionaryInfo(binaryDictionaryInfo), mNodePos(NOT_A_VALID_WORD_POS),
|
||||
mFlags(0), mParentPos(NOT_A_DICT_POS), mCodePointCount(0),
|
||||
mProbability(NOT_A_PROBABILITY), mChildrenPos(NOT_A_DICT_POS),
|
||||
DynamicPatriciaTrieNodeReader(const BinaryDictionaryInfo *const binaryDictionaryInfo,
|
||||
const DictionaryBigramsStructurePolicy *const bigramsPolicy)
|
||||
: mBinaryDictionaryInfo(binaryDictionaryInfo), mBigramsPolicy(bigramsPolicy),
|
||||
mNodePos(NOT_A_VALID_WORD_POS), mFlags(0), mParentPos(NOT_A_DICT_POS),
|
||||
mCodePointCount(0), mProbability(NOT_A_PROBABILITY), mChildrenPos(NOT_A_DICT_POS),
|
||||
mShortcutPos(NOT_A_DICT_POS), mBigramPos(NOT_A_DICT_POS),
|
||||
mSiblingPos(NOT_A_VALID_WORD_POS) {}
|
||||
|
||||
|
@ -117,6 +122,7 @@ class DynamicPatriciaTrieNodeReader {
|
|||
DISALLOW_COPY_AND_ASSIGN(DynamicPatriciaTrieNodeReader);
|
||||
|
||||
const BinaryDictionaryInfo *const mBinaryDictionaryInfo;
|
||||
const DictionaryBigramsStructurePolicy *const mBigramsPolicy;
|
||||
int mNodePos;
|
||||
DynamicPatriciaTrieReadingUtils::NodeFlags mFlags;
|
||||
int mParentPos;
|
||||
|
|
|
@ -34,7 +34,7 @@ void DynamicPatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const d
|
|||
if (!dicNode->hasChildren()) {
|
||||
return;
|
||||
}
|
||||
DynamicPatriciaTrieNodeReader nodeReader(mBinaryDictionaryInfo);
|
||||
DynamicPatriciaTrieNodeReader nodeReader(mBinaryDictionaryInfo, getBigramsStructurePolicy());
|
||||
int mergedNodeCodePoints[MAX_WORD_LENGTH];
|
||||
int nextPos = dicNode->getChildrenPos();
|
||||
int totalChildCount = 0;
|
||||
|
@ -79,7 +79,7 @@ int DynamicPatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCoun
|
|||
int mergedNodeCodePoints[maxCodePointCount];
|
||||
int codePointCount = 0;
|
||||
|
||||
DynamicPatriciaTrieNodeReader nodeReader(mBinaryDictionaryInfo);
|
||||
DynamicPatriciaTrieNodeReader nodeReader(mBinaryDictionaryInfo, getBigramsStructurePolicy());
|
||||
// First, read terminal node and get its probability.
|
||||
nodeReader.fetchNodeInfoFromBufferAndGetNodeCodePoints(nodePos, maxCodePointCount,
|
||||
mergedNodeCodePoints);
|
||||
|
@ -123,7 +123,7 @@ int DynamicPatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const in
|
|||
int mergedNodeCodePoints[MAX_WORD_LENGTH];
|
||||
int currentLength = 0;
|
||||
int pos = getRootPosition();
|
||||
DynamicPatriciaTrieNodeReader nodeReader(mBinaryDictionaryInfo);
|
||||
DynamicPatriciaTrieNodeReader nodeReader(mBinaryDictionaryInfo, getBigramsStructurePolicy());
|
||||
while (currentLength <= length) {
|
||||
// When foundMatchedNode becomes true, currentLength is increased at least once.
|
||||
bool foundMatchedNode = false;
|
||||
|
@ -194,7 +194,7 @@ int DynamicPatriciaTriePolicy::getUnigramProbability(const int nodePos) const {
|
|||
if (nodePos == NOT_A_VALID_WORD_POS) {
|
||||
return NOT_A_PROBABILITY;
|
||||
}
|
||||
DynamicPatriciaTrieNodeReader nodeReader(mBinaryDictionaryInfo);
|
||||
DynamicPatriciaTrieNodeReader nodeReader(mBinaryDictionaryInfo, getBigramsStructurePolicy());
|
||||
nodeReader.fetchNodeInfoFromBuffer(nodePos);
|
||||
if (nodeReader.isDeleted() || nodeReader.isBlacklisted() || nodeReader.isNotAWord()) {
|
||||
return NOT_A_PROBABILITY;
|
||||
|
@ -206,7 +206,7 @@ int DynamicPatriciaTriePolicy::getShortcutPositionOfNode(const int nodePos) cons
|
|||
if (nodePos == NOT_A_VALID_WORD_POS) {
|
||||
return NOT_A_DICT_POS;
|
||||
}
|
||||
DynamicPatriciaTrieNodeReader nodeReader(mBinaryDictionaryInfo);
|
||||
DynamicPatriciaTrieNodeReader nodeReader(mBinaryDictionaryInfo, getBigramsStructurePolicy());
|
||||
nodeReader.fetchNodeInfoFromBuffer(nodePos);
|
||||
if (nodeReader.isDeleted()) {
|
||||
return NOT_A_DICT_POS;
|
||||
|
@ -218,7 +218,7 @@ int DynamicPatriciaTriePolicy::getBigramsPositionOfNode(const int nodePos) const
|
|||
if (nodePos == NOT_A_VALID_WORD_POS) {
|
||||
return NOT_A_DICT_POS;
|
||||
}
|
||||
DynamicPatriciaTrieNodeReader nodeReader(mBinaryDictionaryInfo);
|
||||
DynamicPatriciaTrieNodeReader nodeReader(mBinaryDictionaryInfo, getBigramsStructurePolicy());
|
||||
nodeReader.fetchNodeInfoFromBuffer(nodePos);
|
||||
if (nodeReader.isDeleted()) {
|
||||
return NOT_A_DICT_POS;
|
||||
|
|
|
@ -21,6 +21,7 @@
|
|||
|
||||
#include "defines.h"
|
||||
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
|
||||
#include "suggest/policyimpl/dictionary/bigrams/bigram_list_policy.h"
|
||||
|
||||
namespace latinime {
|
||||
|
||||
|
@ -32,7 +33,8 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
|||
public:
|
||||
DynamicPatriciaTriePolicy(const uint8_t *const dictRoot,
|
||||
const BinaryDictionaryInfo *const binaryDictionaryInfo)
|
||||
: mDictRoot(dictRoot), mBinaryDictionaryInfo(binaryDictionaryInfo) {}
|
||||
: mDictRoot(dictRoot), mBinaryDictionaryInfo(binaryDictionaryInfo),
|
||||
mBigramListPolicy(dictRoot) {}
|
||||
|
||||
~DynamicPatriciaTriePolicy() {}
|
||||
|
||||
|
@ -56,6 +58,10 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
|||
|
||||
int getBigramsPositionOfNode(const int nodePos) const;
|
||||
|
||||
const DictionaryBigramsStructurePolicy *getBigramsStructurePolicy() const {
|
||||
return &mBigramListPolicy;
|
||||
}
|
||||
|
||||
private:
|
||||
DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTriePolicy);
|
||||
static const int MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP;
|
||||
|
@ -63,6 +69,7 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
|||
const uint8_t *const mDictRoot;
|
||||
// TODO: remove
|
||||
const BinaryDictionaryInfo *const mBinaryDictionaryInfo;
|
||||
const BigramListPolicy mBigramListPolicy;
|
||||
};
|
||||
} // namespace latinime
|
||||
#endif // LATINIME_DYNAMIC_PATRICIA_TRIE_POLICY_H
|
||||
|
|
|
@ -136,8 +136,7 @@ int PatriciaTriePolicy::createAndGetLeavingChildNode(const DicNode *const dicNod
|
|||
BinaryDictionaryTerminalAttributesReadingUtils::skipShortcuts(mBinaryDictionaryInfo, &pos);
|
||||
}
|
||||
if (PatriciaTrieReadingUtils::hasBigrams(flags)) {
|
||||
BinaryDictionaryTerminalAttributesReadingUtils::skipExistingBigrams(
|
||||
mBinaryDictionaryInfo, &pos);
|
||||
getBigramsStructurePolicy()->skipAllBigrams(&pos);
|
||||
}
|
||||
if (!childrenFilter->isFilteredOut(mergedNodeCodePoints[0])) {
|
||||
childDicNodes->pushLeavingChild(dicNode, nodePos, childrenPos, probability,
|
||||
|
|
|
@ -21,6 +21,7 @@
|
|||
|
||||
#include "defines.h"
|
||||
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
|
||||
#include "suggest/policyimpl/dictionary/bigrams/bigram_list_policy.h"
|
||||
|
||||
namespace latinime {
|
||||
|
||||
|
@ -32,7 +33,8 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
|||
public:
|
||||
PatriciaTriePolicy(const uint8_t *const dictRoot,
|
||||
const BinaryDictionaryInfo *const binaryDictionaryInfo)
|
||||
: mDictRoot(dictRoot), mBinaryDictionaryInfo(binaryDictionaryInfo) {}
|
||||
: mDictRoot(dictRoot), mBinaryDictionaryInfo(binaryDictionaryInfo),
|
||||
mBigramListPolicy(dictRoot) {}
|
||||
|
||||
~PatriciaTriePolicy() {}
|
||||
|
||||
|
@ -56,12 +58,17 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
|||
|
||||
int getBigramsPositionOfNode(const int nodePos) const;
|
||||
|
||||
const DictionaryBigramsStructurePolicy *getBigramsStructurePolicy() const {
|
||||
return &mBigramListPolicy;
|
||||
}
|
||||
|
||||
private:
|
||||
DISALLOW_IMPLICIT_CONSTRUCTORS(PatriciaTriePolicy);
|
||||
|
||||
const uint8_t *const mDictRoot;
|
||||
// TODO: remove
|
||||
const BinaryDictionaryInfo *const mBinaryDictionaryInfo;
|
||||
const BigramListPolicy mBigramListPolicy;
|
||||
|
||||
int createAndGetLeavingChildNode(const DicNode *const dicNode, const int nodePos,
|
||||
const NodeFilter *const nodeFilter, DicNodeVector *const childDicNodes) const;
|
||||
|
|
Loading…
Reference in a new issue