Move bigrams iteration methods to policy.

To support various bigram format.

Bug: 6669677
Change-Id: Ifc8c1a855b03cd5a39d97a6e10872ef8ef76475b
main
Keisuke Kuroyanagi 2013-08-09 20:20:34 +09:00
parent 7ec9db2c34
commit 668870be43
16 changed files with 190 additions and 77 deletions

View File

@ -118,8 +118,8 @@ namespace latinime {
return ProbabilityUtils::backoff(unigramProbability); return ProbabilityUtils::backoff(unigramProbability);
} }
if (multiBigramMap) { if (multiBigramMap) {
return multiBigramMap->getBigramProbability( return multiBigramMap->getBigramProbability(binaryDictionaryInfo->getStructurePolicy(),
binaryDictionaryInfo, prevWordPos, wordPos, unigramProbability); prevWordPos, wordPos, unigramProbability);
} }
return ProbabilityUtils::backoff(unigramProbability); return ProbabilityUtils::backoff(unigramProbability);
} }

View File

@ -112,10 +112,12 @@ int BigramDictionary::getPredictions(const int *prevWord, const int prevWordLeng
int bigramCount = 0; int bigramCount = 0;
int unigramProbability = 0; int unigramProbability = 0;
int bigramBuffer[MAX_WORD_LENGTH]; int bigramBuffer[MAX_WORD_LENGTH];
BinaryDictionaryBigramsIterator bigramsIt(mBinaryDictionaryInfo, pos); const DictionaryStructureWithBufferPolicy *const structurePolicy =
mBinaryDictionaryInfo->getStructurePolicy();
BinaryDictionaryBigramsIterator bigramsIt(structurePolicy->getBigramsStructurePolicy(), pos);
while (bigramsIt.hasNext()) { while (bigramsIt.hasNext()) {
bigramsIt.next(); bigramsIt.next();
const int length = mBinaryDictionaryInfo->getStructurePolicy()-> const int length = structurePolicy->
getCodePointsAndProbabilityAndReturnCodePointCount(bigramsIt.getBigramPos(), getCodePointsAndProbabilityAndReturnCodePointCount(bigramsIt.getBigramPos(),
MAX_WORD_LENGTH, bigramBuffer, &unigramProbability); MAX_WORD_LENGTH, bigramBuffer, &unigramProbability);
// Due to space constraints, the probability for bigrams is approximate - the lower the // Due to space constraints, the probability for bigrams is approximate - the lower the
@ -137,10 +139,12 @@ int BigramDictionary::getPredictions(const int *prevWord, const int prevWordLeng
int BigramDictionary::getBigramListPositionForWord(const int *prevWord, const int prevWordLength, int BigramDictionary::getBigramListPositionForWord(const int *prevWord, const int prevWordLength,
const bool forceLowerCaseSearch) const { const bool forceLowerCaseSearch) const {
if (0 >= prevWordLength) return NOT_A_DICT_POS; if (0 >= prevWordLength) return NOT_A_DICT_POS;
int pos = mBinaryDictionaryInfo->getStructurePolicy()->getTerminalNodePositionOfWord( const DictionaryStructureWithBufferPolicy *const structurePolicy =
prevWord, prevWordLength, forceLowerCaseSearch); mBinaryDictionaryInfo->getStructurePolicy();
int pos = structurePolicy->getTerminalNodePositionOfWord(prevWord, prevWordLength,
forceLowerCaseSearch);
if (NOT_A_VALID_WORD_POS == pos) return NOT_A_DICT_POS; if (NOT_A_VALID_WORD_POS == pos) return NOT_A_DICT_POS;
return mBinaryDictionaryInfo->getStructurePolicy()->getBigramsPositionOfNode(pos); return structurePolicy->getBigramsPositionOfNode(pos);
} }
bool BigramDictionary::isValidBigram(const int *word0, int length0, const int *word1, bool BigramDictionary::isValidBigram(const int *word0, int length0, const int *word1,
@ -148,11 +152,13 @@ bool BigramDictionary::isValidBigram(const int *word0, int length0, const int *w
int pos = getBigramListPositionForWord(word0, length0, false /* forceLowerCaseSearch */); int pos = getBigramListPositionForWord(word0, length0, false /* forceLowerCaseSearch */);
// getBigramListPositionForWord returns 0 if this word isn't in the dictionary or has no bigrams // getBigramListPositionForWord returns 0 if this word isn't in the dictionary or has no bigrams
if (NOT_A_DICT_POS == pos) return false; if (NOT_A_DICT_POS == pos) return false;
int nextWordPos = mBinaryDictionaryInfo->getStructurePolicy()->getTerminalNodePositionOfWord( const DictionaryStructureWithBufferPolicy *const structurePolicy =
word1, length1, false /* forceLowerCaseSearch */); mBinaryDictionaryInfo->getStructurePolicy();
int nextWordPos = structurePolicy->getTerminalNodePositionOfWord(word1, length1,
false /* forceLowerCaseSearch */);
if (NOT_A_VALID_WORD_POS == nextWordPos) return false; if (NOT_A_VALID_WORD_POS == nextWordPos) return false;
BinaryDictionaryBigramsIterator bigramsIt(mBinaryDictionaryInfo, pos); BinaryDictionaryBigramsIterator bigramsIt(structurePolicy->getBigramsStructurePolicy(), pos);
while (bigramsIt.hasNext()) { while (bigramsIt.hasNext()) {
bigramsIt.next(); bigramsIt.next();
if (bigramsIt.getBigramPos() == nextWordPos) { if (bigramsIt.getBigramPos() == nextWordPos) {

View File

@ -18,51 +18,41 @@
#define LATINIME_BINARY_DICTIONARY_BIGRAMS_ITERATOR_H #define LATINIME_BINARY_DICTIONARY_BIGRAMS_ITERATOR_H
#include "defines.h" #include "defines.h"
#include "suggest/core/dictionary/binary_dictionary_info.h" #include "suggest/core/policy/dictionary_bigrams_structure_policy.h"
#include "suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.h"
namespace latinime { namespace latinime {
class BinaryDictionaryBigramsIterator { class BinaryDictionaryBigramsIterator {
public: public:
BinaryDictionaryBigramsIterator( BinaryDictionaryBigramsIterator(
const BinaryDictionaryInfo *const binaryDictionaryInfo, const int pos) const DictionaryBigramsStructurePolicy *const bigramsStructurePolicy, const int pos)
: mBinaryDictionaryInfo(binaryDictionaryInfo), mPos(pos), mBigramFlags(0), : mBigramsStructurePolicy(bigramsStructurePolicy), mPos(pos),
mBigramPos(NOT_A_DICT_POS), mHasNext(pos != NOT_A_DICT_POS) {} mBigramPos(NOT_A_DICT_POS), mProbability(NOT_A_PROBABILITY),
mHasNext(pos != NOT_A_DICT_POS) {}
AK_FORCE_INLINE bool hasNext() const { AK_FORCE_INLINE bool hasNext() const {
return mHasNext; return mHasNext;
} }
AK_FORCE_INLINE void next() { AK_FORCE_INLINE void next() {
mBigramFlags = BinaryDictionaryTerminalAttributesReadingUtils::getFlagsAndForwardPointer( mBigramsStructurePolicy->getNextBigram(&mBigramPos, &mProbability, &mHasNext, &mPos);
mBinaryDictionaryInfo, &mPos);
mBigramPos =
BinaryDictionaryTerminalAttributesReadingUtils::getBigramAddressAndForwardPointer(
mBinaryDictionaryInfo, mBigramFlags, &mPos);
mHasNext = BinaryDictionaryTerminalAttributesReadingUtils::hasNext(mBigramFlags);
} }
AK_FORCE_INLINE int getProbability() const { AK_FORCE_INLINE int getProbability() const {
return BinaryDictionaryTerminalAttributesReadingUtils::getProbabilityFromFlags( return mProbability;
mBigramFlags);
} }
AK_FORCE_INLINE int getBigramPos() const { AK_FORCE_INLINE int getBigramPos() const {
return mBigramPos; return mBigramPos;
} }
AK_FORCE_INLINE int getFlags() const {
return mBigramFlags;
}
private: private:
DISALLOW_COPY_AND_ASSIGN(BinaryDictionaryBigramsIterator); DISALLOW_COPY_AND_ASSIGN(BinaryDictionaryBigramsIterator);
const BinaryDictionaryInfo *const mBinaryDictionaryInfo; const DictionaryBigramsStructurePolicy *const mBigramsStructurePolicy;
int mPos; int mPos;
BinaryDictionaryTerminalAttributesReadingUtils::BigramFlags mBigramFlags;
int mBigramPos; int mBigramPos;
int mProbability;
bool mHasNext; bool mHasNext;
}; };
} // namespace latinime } // namespace latinime

View File

@ -16,7 +16,6 @@
#include "suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.h" #include "suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.h"
#include "suggest/core/dictionary/binary_dictionary_info.h"
#include "suggest/core/dictionary/byte_array_utils.h" #include "suggest/core/dictionary/byte_array_utils.h"
namespace latinime { namespace latinime {
@ -38,22 +37,19 @@ const int TaUtils::SHORTCUT_LIST_SIZE_FIELD_SIZE = 2;
const int TaUtils::WHITELIST_SHORTCUT_PROBABILITY = 15; const int TaUtils::WHITELIST_SHORTCUT_PROBABILITY = 15;
/* static */ int TaUtils::getBigramAddressAndForwardPointer( /* static */ int TaUtils::getBigramAddressAndForwardPointer(
const BinaryDictionaryInfo *const binaryDictionaryInfo, const TerminalAttributeFlags flags, const uint8_t *const dictRoot, const TerminalAttributeFlags flags,
int *const pos) { int *const pos) {
int offset = 0; int offset = 0;
const int origin = *pos; const int origin = *pos;
switch (MASK_ATTRIBUTE_ADDRESS_TYPE & flags) { switch (MASK_ATTRIBUTE_ADDRESS_TYPE & flags) {
case FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE: case FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE:
offset = ByteArrayUtils::readUint8AndAdvancePosition( offset = ByteArrayUtils::readUint8AndAdvancePosition(dictRoot, pos);
binaryDictionaryInfo->getDictRoot(), pos);
break; break;
case FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES: case FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES:
offset = ByteArrayUtils::readUint16AndAdvancePosition( offset = ByteArrayUtils::readUint16AndAdvancePosition(dictRoot, pos);
binaryDictionaryInfo->getDictRoot(), pos);
break; break;
case FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES: case FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES:
offset = ByteArrayUtils::readUint24AndAdvancePosition( offset = ByteArrayUtils::readUint24AndAdvancePosition(dictRoot, pos);
binaryDictionaryInfo->getDictRoot(), pos);
break; break;
} }
if (isOffsetNegative(flags)) { if (isOffsetNegative(flags)) {

View File

@ -32,9 +32,8 @@ class BinaryDictionaryTerminalAttributesReadingUtils {
typedef TerminalAttributeFlags ShortcutFlags; typedef TerminalAttributeFlags ShortcutFlags;
static AK_FORCE_INLINE TerminalAttributeFlags getFlagsAndForwardPointer( static AK_FORCE_INLINE TerminalAttributeFlags getFlagsAndForwardPointer(
const BinaryDictionaryInfo *const binaryDictionaryInfo, int *const pos) { const uint8_t *const dictRoot, int *const pos) {
return ByteArrayUtils::readUint8AndAdvancePosition( return ByteArrayUtils::readUint8AndAdvancePosition(dictRoot, pos);
binaryDictionaryInfo->getDictRoot(), pos);
} }
static AK_FORCE_INLINE int getProbabilityFromFlags(const TerminalAttributeFlags flags) { static AK_FORCE_INLINE int getProbabilityFromFlags(const TerminalAttributeFlags flags) {
@ -47,18 +46,17 @@ class BinaryDictionaryTerminalAttributesReadingUtils {
// Bigrams reading methods // Bigrams reading methods
static AK_FORCE_INLINE void skipExistingBigrams( static AK_FORCE_INLINE void skipExistingBigrams(
const BinaryDictionaryInfo *const binaryDictionaryInfo, int *const pos) { const uint8_t *const dictRoot, int *const pos) {
BigramFlags flags = getFlagsAndForwardPointer(binaryDictionaryInfo, pos); BigramFlags flags = getFlagsAndForwardPointer(dictRoot, pos);
while (hasNext(flags)) { while (hasNext(flags)) {
*pos += attributeAddressSize(flags); *pos += attributeAddressSize(flags);
flags = getFlagsAndForwardPointer(binaryDictionaryInfo, pos); flags = getFlagsAndForwardPointer(dictRoot, pos);
} }
*pos += attributeAddressSize(flags); *pos += attributeAddressSize(flags);
} }
static int getBigramAddressAndForwardPointer( static int getBigramAddressAndForwardPointer(
const BinaryDictionaryInfo *const binaryDictionaryInfo, const BigramFlags flags, const uint8_t *const dictRoot, const BigramFlags flags, int *const pos);
int *const pos);
// Shortcuts reading methods // Shortcuts reading methods
// This method returns the size of the shortcut list region excluding the shortcut list size // This method returns the size of the shortcut list region excluding the shortcut list size

View File

@ -21,9 +21,9 @@
#include "defines.h" #include "defines.h"
#include "suggest/core/dictionary/binary_dictionary_bigrams_iterator.h" #include "suggest/core/dictionary/binary_dictionary_bigrams_iterator.h"
#include "suggest/core/dictionary/binary_dictionary_info.h"
#include "suggest/core/dictionary/bloom_filter.h" #include "suggest/core/dictionary/bloom_filter.h"
#include "suggest/core/dictionary/probability_utils.h" #include "suggest/core/dictionary/probability_utils.h"
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
#include "utils/hash_map_compat.h" #include "utils/hash_map_compat.h"
namespace latinime { namespace latinime {
@ -38,7 +38,7 @@ class MultiBigramMap {
// Look up the bigram probability for the given word pair from the cached bigram maps. // Look up the bigram probability for the given word pair from the cached bigram maps.
// Also caches the bigrams if there is space remaining and they have not been cached already. // Also caches the bigrams if there is space remaining and they have not been cached already.
int getBigramProbability(const BinaryDictionaryInfo *const binaryDictionaryInfo, int getBigramProbability(const DictionaryStructureWithBufferPolicy *const structurePolicy,
const int wordPosition, const int nextWordPosition, const int unigramProbability) { const int wordPosition, const int nextWordPosition, const int unigramProbability) {
hash_map_compat<int, BigramMap>::const_iterator mapPosition = hash_map_compat<int, BigramMap>::const_iterator mapPosition =
mBigramMaps.find(wordPosition); mBigramMaps.find(wordPosition);
@ -46,12 +46,12 @@ class MultiBigramMap {
return mapPosition->second.getBigramProbability(nextWordPosition, unigramProbability); return mapPosition->second.getBigramProbability(nextWordPosition, unigramProbability);
} }
if (mBigramMaps.size() < MAX_CACHED_PREV_WORDS_IN_BIGRAM_MAP) { if (mBigramMaps.size() < MAX_CACHED_PREV_WORDS_IN_BIGRAM_MAP) {
addBigramsForWordPosition(binaryDictionaryInfo, wordPosition); addBigramsForWordPosition(structurePolicy, wordPosition);
return mBigramMaps[wordPosition].getBigramProbability( return mBigramMaps[wordPosition].getBigramProbability(
nextWordPosition, unigramProbability); nextWordPosition, unigramProbability);
} }
return readBigramProbabilityFromBinaryDictionary(binaryDictionaryInfo, return readBigramProbabilityFromBinaryDictionary(structurePolicy, wordPosition,
wordPosition, nextWordPosition, unigramProbability); nextWordPosition, unigramProbability);
} }
void clear() { void clear() {
@ -66,10 +66,11 @@ class MultiBigramMap {
BigramMap() : mBigramMap(DEFAULT_HASH_MAP_SIZE_FOR_EACH_BIGRAM_MAP), mBloomFilter() {} BigramMap() : mBigramMap(DEFAULT_HASH_MAP_SIZE_FOR_EACH_BIGRAM_MAP), mBloomFilter() {}
~BigramMap() {} ~BigramMap() {}
void init(const BinaryDictionaryInfo *const binaryDictionaryInfo, const int nodePos) { void init(const DictionaryStructureWithBufferPolicy *const structurePolicy,
const int bigramsListPos = binaryDictionaryInfo->getStructurePolicy()-> const int nodePos) {
getBigramsPositionOfNode(nodePos); const int bigramsListPos = structurePolicy->getBigramsPositionOfNode(nodePos);
BinaryDictionaryBigramsIterator bigramsIt(binaryDictionaryInfo, bigramsListPos); BinaryDictionaryBigramsIterator bigramsIt(structurePolicy->getBigramsStructurePolicy(),
bigramsListPos);
while (bigramsIt.hasNext()) { while (bigramsIt.hasNext()) {
bigramsIt.next(); bigramsIt.next();
mBigramMap[bigramsIt.getBigramPos()] = bigramsIt.getProbability(); mBigramMap[bigramsIt.getBigramPos()] = bigramsIt.getProbability();
@ -100,16 +101,16 @@ class MultiBigramMap {
}; };
AK_FORCE_INLINE void addBigramsForWordPosition( AK_FORCE_INLINE void addBigramsForWordPosition(
const BinaryDictionaryInfo *const binaryDictionaryInfo, const int position) { const DictionaryStructureWithBufferPolicy *const structurePolicy, const int position) {
mBigramMaps[position].init(binaryDictionaryInfo, position); mBigramMaps[position].init(structurePolicy, position);
} }
AK_FORCE_INLINE int readBigramProbabilityFromBinaryDictionary( AK_FORCE_INLINE int readBigramProbabilityFromBinaryDictionary(
const BinaryDictionaryInfo *const binaryDictionaryInfo, const int nodePos, const DictionaryStructureWithBufferPolicy *const structurePolicy, const int nodePos,
const int nextWordPosition, const int unigramProbability) { const int nextWordPosition, const int unigramProbability) {
const int bigramsListPos = binaryDictionaryInfo->getStructurePolicy()-> const int bigramsListPos = structurePolicy->getBigramsPositionOfNode(nodePos);
getBigramsPositionOfNode(nodePos); BinaryDictionaryBigramsIterator bigramsIt(structurePolicy->getBigramsStructurePolicy(),
BinaryDictionaryBigramsIterator bigramsIt(binaryDictionaryInfo, bigramsListPos); bigramsListPos);
while (bigramsIt.hasNext()) { while (bigramsIt.hasNext()) {
bigramsIt.next(); bigramsIt.next();
if (bigramsIt.getBigramPos() == nextWordPosition) { if (bigramsIt.getBigramPos() == nextWordPosition) {

View File

@ -49,7 +49,7 @@ class TerminalAttributes {
bool *const outIsWhitelist) { bool *const outIsWhitelist) {
const BinaryDictionaryTerminalAttributesReadingUtils::ShortcutFlags flags = const BinaryDictionaryTerminalAttributesReadingUtils::ShortcutFlags flags =
BinaryDictionaryTerminalAttributesReadingUtils::getFlagsAndForwardPointer( BinaryDictionaryTerminalAttributesReadingUtils::getFlagsAndForwardPointer(
mBinaryDictionaryInfo, &mPos); mBinaryDictionaryInfo->getDictRoot(), &mPos);
mHasNextShortcutTarget = mHasNextShortcutTarget =
BinaryDictionaryTerminalAttributesReadingUtils::hasNext(flags); BinaryDictionaryTerminalAttributesReadingUtils::hasNext(flags);
if (outIsWhitelist) { if (outIsWhitelist) {

View File

@ -0,0 +1,42 @@
/*
* Copyright (C) 2013, The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef LATINIME_DICTIONARY_BIGRAMS_STRUCTURE_POLICY_H
#define LATINIME_DICTIONARY_BIGRAMS_STRUCTURE_POLICY_H
#include "defines.h"
namespace latinime {
/*
* This class abstracts structure of bigrams.
*/
class DictionaryBigramsStructurePolicy {
public:
virtual ~DictionaryBigramsStructurePolicy() {}
virtual void getNextBigram(int *const outBigramPos, int *const outProbability,
bool *const outHasNext, int *const pos) const = 0;
virtual void skipAllBigrams(int *const pos) const = 0;
protected:
DictionaryBigramsStructurePolicy() {}
private:
DISALLOW_COPY_AND_ASSIGN(DictionaryBigramsStructurePolicy);
};
} // namespace latinime
#endif /* LATINIME_DICTIONARY_BIGRAMS_STRUCTURE_POLICY_H */

View File

@ -23,6 +23,7 @@ namespace latinime {
class DicNode; class DicNode;
class DicNodeVector; class DicNodeVector;
class DictionaryBigramsStructurePolicy;
/* /*
* This class abstracts structure of dictionaries. * This class abstracts structure of dictionaries.
@ -63,6 +64,8 @@ class DictionaryStructureWithBufferPolicy {
virtual int getBigramsPositionOfNode(const int nodePos) const = 0; virtual int getBigramsPositionOfNode(const int nodePos) const = 0;
virtual const DictionaryBigramsStructurePolicy *getBigramsStructurePolicy() const = 0;
protected: protected:
DictionaryStructureWithBufferPolicy() {} DictionaryStructureWithBufferPolicy() {}

View File

@ -0,0 +1,58 @@
/*
* Copyright (C) 2013 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef LATINIME_BIGRAM_LIST_POLICY_H
#define LATINIME_BIGRAM_LIST_POLICY_H
#include <stdint.h>
#include "defines.h"
#include "suggest/core/policy/dictionary_bigrams_structure_policy.h"
// TODO: Move bigrams reading methods to policyimpl.
#include "suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.h"
namespace latinime {
class BigramListPolicy : public DictionaryBigramsStructurePolicy {
public:
explicit BigramListPolicy(const uint8_t *const bigramsBuf) : mBigramsBuf(bigramsBuf) {}
~BigramListPolicy() {}
void getNextBigram(int *const outBigramPos, int *const outProbability, bool *const outHasNext,
int *const pos) const {
const BinaryDictionaryTerminalAttributesReadingUtils::BigramFlags flags =
BinaryDictionaryTerminalAttributesReadingUtils::getFlagsAndForwardPointer(
mBigramsBuf, pos);
*outBigramPos =
BinaryDictionaryTerminalAttributesReadingUtils::getBigramAddressAndForwardPointer(
mBigramsBuf, flags, pos);
*outProbability =
BinaryDictionaryTerminalAttributesReadingUtils::getProbabilityFromFlags(flags);
*outHasNext = BinaryDictionaryTerminalAttributesReadingUtils::hasNext(flags);
}
void skipAllBigrams(int *const pos) const {
BinaryDictionaryTerminalAttributesReadingUtils::skipExistingBigrams(mBigramsBuf, pos);
}
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(BigramListPolicy);
const uint8_t *const mBigramsBuf;
};
} // namespace latinime
#endif // LATINIME_BIGRAM_LIST_POLICY_H

View File

@ -18,6 +18,7 @@
#include "suggest/core/dictionary/binary_dictionary_info.h" #include "suggest/core/dictionary/binary_dictionary_info.h"
#include "suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.h" #include "suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.h"
#include "suggest/policyimpl/dictionary/bigrams/bigram_list_policy.h"
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h" #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h"
namespace latinime { namespace latinime {
@ -56,8 +57,7 @@ void DynamicPatriciaTrieNodeReader::fetchNodeInfoFromBufferAndProcessMovedNode(c
} }
if (PatriciaTrieReadingUtils::hasBigrams(mFlags)) { if (PatriciaTrieReadingUtils::hasBigrams(mFlags)) {
mBigramPos = pos; mBigramPos = pos;
BinaryDictionaryTerminalAttributesReadingUtils::skipExistingBigrams( mBigramsPolicy->skipAllBigrams(&pos);
mBinaryDictionaryInfo, &pos);
} else { } else {
mBigramPos = NOT_A_DICT_POS; mBigramPos = NOT_A_DICT_POS;
} }

View File

@ -17,13 +17,17 @@
#ifndef LATINIME_DYNAMIC_PATRICIA_TRIE_NODE_READER_H #ifndef LATINIME_DYNAMIC_PATRICIA_TRIE_NODE_READER_H
#define LATINIME_DYNAMIC_PATRICIA_TRIE_NODE_READER_H #define LATINIME_DYNAMIC_PATRICIA_TRIE_NODE_READER_H
#include <stdint.h>
#include "defines.h" #include "defines.h"
#include "suggest/policyimpl/dictionary/bigrams/bigram_list_policy.h"
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h" #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h"
#include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h" #include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h"
namespace latinime { namespace latinime {
class BinaryDictionaryInfo; class BinaryDictionaryInfo;
class DictionaryBigramsStructurePolicy;
/* /*
* This class is used for helping to read nodes of dynamic patricia trie. This class handles moved * This class is used for helping to read nodes of dynamic patricia trie. This class handles moved
@ -31,10 +35,11 @@ class BinaryDictionaryInfo;
*/ */
class DynamicPatriciaTrieNodeReader { class DynamicPatriciaTrieNodeReader {
public: public:
explicit DynamicPatriciaTrieNodeReader(const BinaryDictionaryInfo *const binaryDictionaryInfo) DynamicPatriciaTrieNodeReader(const BinaryDictionaryInfo *const binaryDictionaryInfo,
: mBinaryDictionaryInfo(binaryDictionaryInfo), mNodePos(NOT_A_VALID_WORD_POS), const DictionaryBigramsStructurePolicy *const bigramsPolicy)
mFlags(0), mParentPos(NOT_A_DICT_POS), mCodePointCount(0), : mBinaryDictionaryInfo(binaryDictionaryInfo), mBigramsPolicy(bigramsPolicy),
mProbability(NOT_A_PROBABILITY), mChildrenPos(NOT_A_DICT_POS), mNodePos(NOT_A_VALID_WORD_POS), mFlags(0), mParentPos(NOT_A_DICT_POS),
mCodePointCount(0), mProbability(NOT_A_PROBABILITY), mChildrenPos(NOT_A_DICT_POS),
mShortcutPos(NOT_A_DICT_POS), mBigramPos(NOT_A_DICT_POS), mShortcutPos(NOT_A_DICT_POS), mBigramPos(NOT_A_DICT_POS),
mSiblingPos(NOT_A_VALID_WORD_POS) {} mSiblingPos(NOT_A_VALID_WORD_POS) {}
@ -117,6 +122,7 @@ class DynamicPatriciaTrieNodeReader {
DISALLOW_COPY_AND_ASSIGN(DynamicPatriciaTrieNodeReader); DISALLOW_COPY_AND_ASSIGN(DynamicPatriciaTrieNodeReader);
const BinaryDictionaryInfo *const mBinaryDictionaryInfo; const BinaryDictionaryInfo *const mBinaryDictionaryInfo;
const DictionaryBigramsStructurePolicy *const mBigramsPolicy;
int mNodePos; int mNodePos;
DynamicPatriciaTrieReadingUtils::NodeFlags mFlags; DynamicPatriciaTrieReadingUtils::NodeFlags mFlags;
int mParentPos; int mParentPos;

View File

@ -34,7 +34,7 @@ void DynamicPatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const d
if (!dicNode->hasChildren()) { if (!dicNode->hasChildren()) {
return; return;
} }
DynamicPatriciaTrieNodeReader nodeReader(mBinaryDictionaryInfo); DynamicPatriciaTrieNodeReader nodeReader(mBinaryDictionaryInfo, getBigramsStructurePolicy());
int mergedNodeCodePoints[MAX_WORD_LENGTH]; int mergedNodeCodePoints[MAX_WORD_LENGTH];
int nextPos = dicNode->getChildrenPos(); int nextPos = dicNode->getChildrenPos();
int totalChildCount = 0; int totalChildCount = 0;
@ -79,7 +79,7 @@ int DynamicPatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCoun
int mergedNodeCodePoints[maxCodePointCount]; int mergedNodeCodePoints[maxCodePointCount];
int codePointCount = 0; int codePointCount = 0;
DynamicPatriciaTrieNodeReader nodeReader(mBinaryDictionaryInfo); DynamicPatriciaTrieNodeReader nodeReader(mBinaryDictionaryInfo, getBigramsStructurePolicy());
// First, read terminal node and get its probability. // First, read terminal node and get its probability.
nodeReader.fetchNodeInfoFromBufferAndGetNodeCodePoints(nodePos, maxCodePointCount, nodeReader.fetchNodeInfoFromBufferAndGetNodeCodePoints(nodePos, maxCodePointCount,
mergedNodeCodePoints); mergedNodeCodePoints);
@ -123,7 +123,7 @@ int DynamicPatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const in
int mergedNodeCodePoints[MAX_WORD_LENGTH]; int mergedNodeCodePoints[MAX_WORD_LENGTH];
int currentLength = 0; int currentLength = 0;
int pos = getRootPosition(); int pos = getRootPosition();
DynamicPatriciaTrieNodeReader nodeReader(mBinaryDictionaryInfo); DynamicPatriciaTrieNodeReader nodeReader(mBinaryDictionaryInfo, getBigramsStructurePolicy());
while (currentLength <= length) { while (currentLength <= length) {
// When foundMatchedNode becomes true, currentLength is increased at least once. // When foundMatchedNode becomes true, currentLength is increased at least once.
bool foundMatchedNode = false; bool foundMatchedNode = false;
@ -194,7 +194,7 @@ int DynamicPatriciaTriePolicy::getUnigramProbability(const int nodePos) const {
if (nodePos == NOT_A_VALID_WORD_POS) { if (nodePos == NOT_A_VALID_WORD_POS) {
return NOT_A_PROBABILITY; return NOT_A_PROBABILITY;
} }
DynamicPatriciaTrieNodeReader nodeReader(mBinaryDictionaryInfo); DynamicPatriciaTrieNodeReader nodeReader(mBinaryDictionaryInfo, getBigramsStructurePolicy());
nodeReader.fetchNodeInfoFromBuffer(nodePos); nodeReader.fetchNodeInfoFromBuffer(nodePos);
if (nodeReader.isDeleted() || nodeReader.isBlacklisted() || nodeReader.isNotAWord()) { if (nodeReader.isDeleted() || nodeReader.isBlacklisted() || nodeReader.isNotAWord()) {
return NOT_A_PROBABILITY; return NOT_A_PROBABILITY;
@ -206,7 +206,7 @@ int DynamicPatriciaTriePolicy::getShortcutPositionOfNode(const int nodePos) cons
if (nodePos == NOT_A_VALID_WORD_POS) { if (nodePos == NOT_A_VALID_WORD_POS) {
return NOT_A_DICT_POS; return NOT_A_DICT_POS;
} }
DynamicPatriciaTrieNodeReader nodeReader(mBinaryDictionaryInfo); DynamicPatriciaTrieNodeReader nodeReader(mBinaryDictionaryInfo, getBigramsStructurePolicy());
nodeReader.fetchNodeInfoFromBuffer(nodePos); nodeReader.fetchNodeInfoFromBuffer(nodePos);
if (nodeReader.isDeleted()) { if (nodeReader.isDeleted()) {
return NOT_A_DICT_POS; return NOT_A_DICT_POS;
@ -218,7 +218,7 @@ int DynamicPatriciaTriePolicy::getBigramsPositionOfNode(const int nodePos) const
if (nodePos == NOT_A_VALID_WORD_POS) { if (nodePos == NOT_A_VALID_WORD_POS) {
return NOT_A_DICT_POS; return NOT_A_DICT_POS;
} }
DynamicPatriciaTrieNodeReader nodeReader(mBinaryDictionaryInfo); DynamicPatriciaTrieNodeReader nodeReader(mBinaryDictionaryInfo, getBigramsStructurePolicy());
nodeReader.fetchNodeInfoFromBuffer(nodePos); nodeReader.fetchNodeInfoFromBuffer(nodePos);
if (nodeReader.isDeleted()) { if (nodeReader.isDeleted()) {
return NOT_A_DICT_POS; return NOT_A_DICT_POS;

View File

@ -21,6 +21,7 @@
#include "defines.h" #include "defines.h"
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" #include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
#include "suggest/policyimpl/dictionary/bigrams/bigram_list_policy.h"
namespace latinime { namespace latinime {
@ -32,7 +33,8 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
public: public:
DynamicPatriciaTriePolicy(const uint8_t *const dictRoot, DynamicPatriciaTriePolicy(const uint8_t *const dictRoot,
const BinaryDictionaryInfo *const binaryDictionaryInfo) const BinaryDictionaryInfo *const binaryDictionaryInfo)
: mDictRoot(dictRoot), mBinaryDictionaryInfo(binaryDictionaryInfo) {} : mDictRoot(dictRoot), mBinaryDictionaryInfo(binaryDictionaryInfo),
mBigramListPolicy(dictRoot) {}
~DynamicPatriciaTriePolicy() {} ~DynamicPatriciaTriePolicy() {}
@ -56,6 +58,10 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
int getBigramsPositionOfNode(const int nodePos) const; int getBigramsPositionOfNode(const int nodePos) const;
const DictionaryBigramsStructurePolicy *getBigramsStructurePolicy() const {
return &mBigramListPolicy;
}
private: private:
DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTriePolicy); DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTriePolicy);
static const int MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP; static const int MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP;
@ -63,6 +69,7 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
const uint8_t *const mDictRoot; const uint8_t *const mDictRoot;
// TODO: remove // TODO: remove
const BinaryDictionaryInfo *const mBinaryDictionaryInfo; const BinaryDictionaryInfo *const mBinaryDictionaryInfo;
const BigramListPolicy mBigramListPolicy;
}; };
} // namespace latinime } // namespace latinime
#endif // LATINIME_DYNAMIC_PATRICIA_TRIE_POLICY_H #endif // LATINIME_DYNAMIC_PATRICIA_TRIE_POLICY_H

View File

@ -136,8 +136,7 @@ int PatriciaTriePolicy::createAndGetLeavingChildNode(const DicNode *const dicNod
BinaryDictionaryTerminalAttributesReadingUtils::skipShortcuts(mBinaryDictionaryInfo, &pos); BinaryDictionaryTerminalAttributesReadingUtils::skipShortcuts(mBinaryDictionaryInfo, &pos);
} }
if (PatriciaTrieReadingUtils::hasBigrams(flags)) { if (PatriciaTrieReadingUtils::hasBigrams(flags)) {
BinaryDictionaryTerminalAttributesReadingUtils::skipExistingBigrams( getBigramsStructurePolicy()->skipAllBigrams(&pos);
mBinaryDictionaryInfo, &pos);
} }
if (!childrenFilter->isFilteredOut(mergedNodeCodePoints[0])) { if (!childrenFilter->isFilteredOut(mergedNodeCodePoints[0])) {
childDicNodes->pushLeavingChild(dicNode, nodePos, childrenPos, probability, childDicNodes->pushLeavingChild(dicNode, nodePos, childrenPos, probability,

View File

@ -21,6 +21,7 @@
#include "defines.h" #include "defines.h"
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" #include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
#include "suggest/policyimpl/dictionary/bigrams/bigram_list_policy.h"
namespace latinime { namespace latinime {
@ -32,7 +33,8 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
public: public:
PatriciaTriePolicy(const uint8_t *const dictRoot, PatriciaTriePolicy(const uint8_t *const dictRoot,
const BinaryDictionaryInfo *const binaryDictionaryInfo) const BinaryDictionaryInfo *const binaryDictionaryInfo)
: mDictRoot(dictRoot), mBinaryDictionaryInfo(binaryDictionaryInfo) {} : mDictRoot(dictRoot), mBinaryDictionaryInfo(binaryDictionaryInfo),
mBigramListPolicy(dictRoot) {}
~PatriciaTriePolicy() {} ~PatriciaTriePolicy() {}
@ -56,12 +58,17 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
int getBigramsPositionOfNode(const int nodePos) const; int getBigramsPositionOfNode(const int nodePos) const;
const DictionaryBigramsStructurePolicy *getBigramsStructurePolicy() const {
return &mBigramListPolicy;
}
private: private:
DISALLOW_IMPLICIT_CONSTRUCTORS(PatriciaTriePolicy); DISALLOW_IMPLICIT_CONSTRUCTORS(PatriciaTriePolicy);
const uint8_t *const mDictRoot; const uint8_t *const mDictRoot;
// TODO: remove // TODO: remove
const BinaryDictionaryInfo *const mBinaryDictionaryInfo; const BinaryDictionaryInfo *const mBinaryDictionaryInfo;
const BigramListPolicy mBigramListPolicy;
int createAndGetLeavingChildNode(const DicNode *const dicNode, const int nodePos, int createAndGetLeavingChildNode(const DicNode *const dicNode, const int nodePos,
const NodeFilter *const nodeFilter, DicNodeVector *const childDicNodes) const; const NodeFilter *const nodeFilter, DicNodeVector *const childDicNodes) const;