Make DictionaryStructurePolicy have buffer info.

To support various format dictionary, extendable
buffers or multiple files.

Bug: 6669677
Change-Id: I203a5a4adc049a6322cfba4eacb4cb6715f8dfc2
main
Keisuke Kuroyanagi 2013-08-09 14:14:39 +09:00
parent 5c057b3241
commit e1ebef6124
18 changed files with 202 additions and 195 deletions

View File

@ -71,6 +71,7 @@ LATIN_IME_CORE_SRC_FILES := \
suggest/core/policy/weighting.cpp \ suggest/core/policy/weighting.cpp \
suggest/core/session/dic_traverse_session.cpp \ suggest/core/session/dic_traverse_session.cpp \
$(addprefix suggest/policyimpl/dictionary/, \ $(addprefix suggest/policyimpl/dictionary/, \
dictionary_structure_with_buffer_policy_factory.cpp \
dynamic_patricia_trie_node_reader.cpp \ dynamic_patricia_trie_node_reader.cpp \
dynamic_patricia_trie_policy.cpp \ dynamic_patricia_trie_policy.cpp \
dynamic_patricia_trie_reading_utils.cpp \ dynamic_patricia_trie_reading_utils.cpp \

View File

@ -20,11 +20,11 @@
#include "defines.h" #include "defines.h"
#include "suggest/core/layout/proximity_info_state.h" #include "suggest/core/layout/proximity_info_state.h"
#include "suggest/core/layout/proximity_info_utils.h" #include "suggest/core/layout/proximity_info_utils.h"
#include "suggest/core/policy/dictionary_structure_policy.h" #include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
namespace latinime { namespace latinime {
class DicNodeProximityFilter : public DictionaryStructurePolicy::NodeFilter { class DicNodeProximityFilter : public DictionaryStructureWithBufferPolicy::NodeFilter {
public: public:
DicNodeProximityFilter(const ProximityInfoState *const pInfoState, DicNodeProximityFilter(const ProximityInfoState *const pInfoState,
const int pointIndex, const bool exactOnly) const int pointIndex, const bool exactOnly)

View File

@ -24,7 +24,7 @@
#include "suggest/core/dictionary/binary_dictionary_info.h" #include "suggest/core/dictionary/binary_dictionary_info.h"
#include "suggest/core/dictionary/multi_bigram_map.h" #include "suggest/core/dictionary/multi_bigram_map.h"
#include "suggest/core/dictionary/probability_utils.h" #include "suggest/core/dictionary/probability_utils.h"
#include "suggest/core/policy/dictionary_structure_policy.h" #include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
#include "utils/char_utils.h" #include "utils/char_utils.h"
namespace latinime { namespace latinime {
@ -83,7 +83,7 @@ namespace latinime {
DicNodeUtils::createAndGetPassingChildNode(dicNode, &childrenFilter, childDicNodes); DicNodeUtils::createAndGetPassingChildNode(dicNode, &childrenFilter, childDicNodes);
} else { } else {
binaryDictionaryInfo->getStructurePolicy()->createAndGetAllChildNodes(dicNode, binaryDictionaryInfo->getStructurePolicy()->createAndGetAllChildNodes(dicNode,
binaryDictionaryInfo, &childrenFilter, childDicNodes); &childrenFilter, childDicNodes);
} }
} }

View File

@ -116,9 +116,8 @@ int BigramDictionary::getPredictions(const int *prevWord, const int prevWordLeng
while (bigramsIt.hasNext()) { while (bigramsIt.hasNext()) {
bigramsIt.next(); bigramsIt.next();
const int length = mBinaryDictionaryInfo->getStructurePolicy()-> const int length = mBinaryDictionaryInfo->getStructurePolicy()->
getCodePointsAndProbabilityAndReturnCodePointCount( getCodePointsAndProbabilityAndReturnCodePointCount(bigramsIt.getBigramPos(),
mBinaryDictionaryInfo, bigramsIt.getBigramPos(), MAX_WORD_LENGTH, MAX_WORD_LENGTH, bigramBuffer, &unigramProbability);
bigramBuffer, &unigramProbability);
// Due to space constraints, the probability for bigrams is approximate - the lower the // Due to space constraints, the probability for bigrams is approximate - the lower the
// unigram probability, the worse the precision. The theoritical maximum error in // unigram probability, the worse the precision. The theoritical maximum error in
// resulting probability is 8 - although in the practice it's never bigger than 3 or 4 // resulting probability is 8 - although in the practice it's never bigger than 3 or 4
@ -139,10 +138,9 @@ int BigramDictionary::getBigramListPositionForWord(const int *prevWord, const in
const bool forceLowerCaseSearch) const { const bool forceLowerCaseSearch) const {
if (0 >= prevWordLength) return NOT_A_DICT_POS; if (0 >= prevWordLength) return NOT_A_DICT_POS;
int pos = mBinaryDictionaryInfo->getStructurePolicy()->getTerminalNodePositionOfWord( int pos = mBinaryDictionaryInfo->getStructurePolicy()->getTerminalNodePositionOfWord(
mBinaryDictionaryInfo, prevWord, prevWordLength, forceLowerCaseSearch); prevWord, prevWordLength, forceLowerCaseSearch);
if (NOT_A_VALID_WORD_POS == pos) return NOT_A_DICT_POS; if (NOT_A_VALID_WORD_POS == pos) return NOT_A_DICT_POS;
return mBinaryDictionaryInfo->getStructurePolicy()->getBigramsPositionOfNode( return mBinaryDictionaryInfo->getStructurePolicy()->getBigramsPositionOfNode(pos);
mBinaryDictionaryInfo, pos);
} }
bool BigramDictionary::isValidBigram(const int *word0, int length0, const int *word1, bool BigramDictionary::isValidBigram(const int *word0, int length0, const int *word1,
@ -151,7 +149,7 @@ bool BigramDictionary::isValidBigram(const int *word0, int length0, const int *w
// getBigramListPositionForWord returns 0 if this word isn't in the dictionary or has no bigrams // getBigramListPositionForWord returns 0 if this word isn't in the dictionary or has no bigrams
if (NOT_A_DICT_POS == pos) return false; if (NOT_A_DICT_POS == pos) return false;
int nextWordPos = mBinaryDictionaryInfo->getStructurePolicy()->getTerminalNodePositionOfWord( int nextWordPos = mBinaryDictionaryInfo->getStructurePolicy()->getTerminalNodePositionOfWord(
mBinaryDictionaryInfo, word1, length1, false /* forceLowerCaseSearch */); word1, length1, false /* forceLowerCaseSearch */);
if (NOT_A_VALID_WORD_POS == nextWordPos) return false; if (NOT_A_VALID_WORD_POS == nextWordPos) return false;
BinaryDictionaryBigramsIterator bigramsIt(mBinaryDictionaryInfo, pos); BinaryDictionaryBigramsIterator bigramsIt(mBinaryDictionaryInfo, pos);

View File

@ -23,7 +23,7 @@
#include "jni.h" #include "jni.h"
#include "suggest/core/dictionary/binary_dictionary_format_utils.h" #include "suggest/core/dictionary/binary_dictionary_format_utils.h"
#include "suggest/core/dictionary/binary_dictionary_header.h" #include "suggest/core/dictionary/binary_dictionary_header.h"
#include "suggest/policyimpl/dictionary/dictionary_structure_policy_factory.h" #include "suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.h"
#include "utils/log_utils.h" #include "utils/log_utils.h"
namespace latinime { namespace latinime {
@ -37,11 +37,16 @@ class BinaryDictionaryInfo {
mDictionaryFormat(BinaryDictionaryFormatUtils::detectFormatVersion( mDictionaryFormat(BinaryDictionaryFormatUtils::detectFormatVersion(
mDictBuf, mDictSize)), mDictBuf, mDictSize)),
mDictionaryHeader(this), mDictRoot(mDictBuf + mDictionaryHeader.getSize()), mDictionaryHeader(this), mDictRoot(mDictBuf + mDictionaryHeader.getSize()),
mStructurePolicy(DictionaryStructurePolicyFactory::getDictionaryStructurePolicy( // TODO: Remove.
mDictionaryFormat)) { mStructurePolicy(DictionaryStructureWithBufferPolicyFactory
::newDictionaryStructurePolicy(this)) {
logDictionaryInfo(env); logDictionaryInfo(env);
} }
~BinaryDictionaryInfo() {
delete mStructurePolicy;
}
AK_FORCE_INLINE const uint8_t *getDictBuf() const { AK_FORCE_INLINE const uint8_t *getDictBuf() const {
return mDictBuf; return mDictBuf;
} }
@ -66,6 +71,7 @@ class BinaryDictionaryInfo {
return mDictionaryFormat; return mDictionaryFormat;
} }
// TODO: Move to DictionaryStructurePolicy.
AK_FORCE_INLINE const BinaryDictionaryHeader *getHeader() const { AK_FORCE_INLINE const BinaryDictionaryHeader *getHeader() const {
return &mDictionaryHeader; return &mDictionaryHeader;
} }
@ -76,7 +82,8 @@ class BinaryDictionaryInfo {
return mIsUpdatable && isUpdatableDictionaryFormat; return mIsUpdatable && isUpdatableDictionaryFormat;
} }
AK_FORCE_INLINE const DictionaryStructurePolicy *getStructurePolicy() const { // TODO: remove
AK_FORCE_INLINE const DictionaryStructureWithBufferPolicy *getStructurePolicy() const {
return mStructurePolicy; return mStructurePolicy;
} }
@ -89,9 +96,12 @@ class BinaryDictionaryInfo {
const int mDictBufOffset; const int mDictBufOffset;
const bool mIsUpdatable; const bool mIsUpdatable;
const BinaryDictionaryFormatUtils::FORMAT_VERSION mDictionaryFormat; const BinaryDictionaryFormatUtils::FORMAT_VERSION mDictionaryFormat;
// TODO: Move BinaryDictionaryHeader to policyimpl and introduce dedicated API to the
// DictionaryStructurePolicy.
const BinaryDictionaryHeader mDictionaryHeader; const BinaryDictionaryHeader mDictionaryHeader;
const uint8_t *const mDictRoot; const uint8_t *const mDictRoot;
const DictionaryStructurePolicy *const mStructurePolicy; // TODO: remove
const DictionaryStructureWithBufferPolicy *const mStructurePolicy;
AK_FORCE_INLINE void logDictionaryInfo(JNIEnv *const env) const { AK_FORCE_INLINE void logDictionaryInfo(JNIEnv *const env) const {
const int BUFFER_SIZE = 16; const int BUFFER_SIZE = 16;

View File

@ -83,14 +83,14 @@ int Dictionary::getBigrams(const int *word, int length, int *outWords, int *freq
} }
int Dictionary::getProbability(const int *word, int length) const { int Dictionary::getProbability(const int *word, int length) const {
const DictionaryStructurePolicy *const structurePolicy = const DictionaryStructureWithBufferPolicy *const structurePolicy =
mBinaryDictionaryInfo.getStructurePolicy(); mBinaryDictionaryInfo.getStructurePolicy();
int pos = structurePolicy->getTerminalNodePositionOfWord(&mBinaryDictionaryInfo, word, length, int pos = structurePolicy->getTerminalNodePositionOfWord(word, length,
false /* forceLowerCaseSearch */); false /* forceLowerCaseSearch */);
if (NOT_A_VALID_WORD_POS == pos) { if (NOT_A_VALID_WORD_POS == pos) {
return NOT_A_PROBABILITY; return NOT_A_PROBABILITY;
} }
return structurePolicy->getUnigramProbability(&mBinaryDictionaryInfo, pos); return structurePolicy->getUnigramProbability(pos);
} }
bool Dictionary::isValidBigram(const int *word0, int length0, const int *word1, int length1) const { bool Dictionary::isValidBigram(const int *word0, int length0, const int *word1, int length1) const {

View File

@ -68,7 +68,7 @@ class MultiBigramMap {
void init(const BinaryDictionaryInfo *const binaryDictionaryInfo, const int nodePos) { void init(const BinaryDictionaryInfo *const binaryDictionaryInfo, const int nodePos) {
const int bigramsListPos = binaryDictionaryInfo->getStructurePolicy()-> const int bigramsListPos = binaryDictionaryInfo->getStructurePolicy()->
getBigramsPositionOfNode(binaryDictionaryInfo, nodePos); getBigramsPositionOfNode(nodePos);
BinaryDictionaryBigramsIterator bigramsIt(binaryDictionaryInfo, bigramsListPos); BinaryDictionaryBigramsIterator bigramsIt(binaryDictionaryInfo, bigramsListPos);
while (bigramsIt.hasNext()) { while (bigramsIt.hasNext()) {
bigramsIt.next(); bigramsIt.next();
@ -108,7 +108,7 @@ class MultiBigramMap {
const BinaryDictionaryInfo *const binaryDictionaryInfo, const int nodePos, const BinaryDictionaryInfo *const binaryDictionaryInfo, const int nodePos,
const int nextWordPosition, const int unigramProbability) { const int nextWordPosition, const int unigramProbability) {
const int bigramsListPos = binaryDictionaryInfo->getStructurePolicy()-> const int bigramsListPos = binaryDictionaryInfo->getStructurePolicy()->
getBigramsPositionOfNode(binaryDictionaryInfo, nodePos); getBigramsPositionOfNode(nodePos);
BinaryDictionaryBigramsIterator bigramsIt(binaryDictionaryInfo, bigramsListPos); BinaryDictionaryBigramsIterator bigramsIt(binaryDictionaryInfo, bigramsListPos);
while (bigramsIt.hasNext()) { while (bigramsIt.hasNext()) {
bigramsIt.next(); bigramsIt.next();

View File

@ -21,7 +21,6 @@
namespace latinime { namespace latinime {
class BinaryDictionaryInfo;
class DicNode; class DicNode;
class DicNodeVector; class DicNodeVector;
@ -29,7 +28,7 @@ class DicNodeVector;
* This class abstracts structure of dictionaries. * This class abstracts structure of dictionaries.
* Implement this policy to support additional dictionaries. * Implement this policy to support additional dictionaries.
*/ */
class DictionaryStructurePolicy { class DictionaryStructureWithBufferPolicy {
public: public:
// This provides a filtering method for filtering new node. // This provides a filtering method for filtering new node.
class NodeFilter { class NodeFilter {
@ -44,36 +43,31 @@ class DictionaryStructurePolicy {
DISALLOW_COPY_AND_ASSIGN(NodeFilter); DISALLOW_COPY_AND_ASSIGN(NodeFilter);
}; };
virtual ~DictionaryStructureWithBufferPolicy() {}
virtual int getRootPosition() const = 0; virtual int getRootPosition() const = 0;
virtual void createAndGetAllChildNodes(const DicNode *const dicNode, virtual void createAndGetAllChildNodes(const DicNode *const dicNode,
const BinaryDictionaryInfo *const binaryDictionaryInfo,
const NodeFilter *const nodeFilter, DicNodeVector *const childDicNodes) const = 0; const NodeFilter *const nodeFilter, DicNodeVector *const childDicNodes) const = 0;
virtual int getCodePointsAndProbabilityAndReturnCodePointCount( virtual int getCodePointsAndProbabilityAndReturnCodePointCount(
const BinaryDictionaryInfo *const binaryDictionaryInfo,
const int nodePos, const int maxCodePointCount, int *const outCodePoints, const int nodePos, const int maxCodePointCount, int *const outCodePoints,
int *const outUnigramProbability) const = 0; int *const outUnigramProbability) const = 0;
virtual int getTerminalNodePositionOfWord( virtual int getTerminalNodePositionOfWord(const int *const inWord,
const BinaryDictionaryInfo *const binaryDictionaryInfo, const int *const inWord,
const int length, const bool forceLowerCaseSearch) const = 0; const int length, const bool forceLowerCaseSearch) const = 0;
virtual int getUnigramProbability(const BinaryDictionaryInfo *const binaryDictionaryInfo, virtual int getUnigramProbability(const int nodePos) const = 0;
const int nodePos) const = 0;
virtual int getShortcutPositionOfNode(const BinaryDictionaryInfo *const binaryDictionaryInfo, virtual int getShortcutPositionOfNode(const int nodePos) const = 0;
const int nodePos) const = 0;
virtual int getBigramsPositionOfNode(const BinaryDictionaryInfo *const binaryDictionaryInfo, virtual int getBigramsPositionOfNode(const int nodePos) const = 0;
const int nodePos) const = 0;
protected: protected:
DictionaryStructurePolicy() {} DictionaryStructureWithBufferPolicy() {}
virtual ~DictionaryStructurePolicy() {}
private: private:
DISALLOW_COPY_AND_ASSIGN(DictionaryStructurePolicy); DISALLOW_COPY_AND_ASSIGN(DictionaryStructureWithBufferPolicy);
}; };
} // namespace latinime } // namespace latinime
#endif /* LATINIME_DICTIONARY_STRUCTURE_POLICY_H */ #endif /* LATINIME_DICTIONARY_STRUCTURE_POLICY_H */

View File

@ -37,12 +37,12 @@ void DicTraverseSession::init(const Dictionary *const dictionary, const int *pre
} }
// TODO: merge following similar calls to getTerminalPosition into one case-insensitive call. // TODO: merge following similar calls to getTerminalPosition into one case-insensitive call.
mPrevWordPos = binaryDictionaryInfo->getStructurePolicy()->getTerminalNodePositionOfWord( mPrevWordPos = binaryDictionaryInfo->getStructurePolicy()->getTerminalNodePositionOfWord(
binaryDictionaryInfo, prevWord, prevWordLength, false /* forceLowerCaseSearch */); prevWord, prevWordLength, false /* forceLowerCaseSearch */);
if (mPrevWordPos == NOT_A_VALID_WORD_POS) { if (mPrevWordPos == NOT_A_VALID_WORD_POS) {
// Check bigrams for lower-cased previous word if original was not found. Useful for // Check bigrams for lower-cased previous word if original was not found. Useful for
// auto-capitalized words like "The [current_word]". // auto-capitalized words like "The [current_word]".
mPrevWordPos = binaryDictionaryInfo->getStructurePolicy()->getTerminalNodePositionOfWord( mPrevWordPos = binaryDictionaryInfo->getStructurePolicy()->getTerminalNodePositionOfWord(
binaryDictionaryInfo, prevWord, prevWordLength, true /* forceLowerCaseSearch */); prevWord, prevWordLength, true /* forceLowerCaseSearch */);
} }
} }

View File

@ -75,7 +75,7 @@ class DicTraverseSession {
const int maxPointerCount); const int maxPointerCount);
void resetCache(const int nextActiveCacheSize, const int maxWords); void resetCache(const int nextActiveCacheSize, const int maxWords);
// TODO: Remove // TODO: Use DictionaryStructurePolicy instead of BinaryDictionaryInfo.
const BinaryDictionaryInfo *getBinaryDictionaryInfo() const; const BinaryDictionaryInfo *getBinaryDictionaryInfo() const;
//-------------------- //--------------------

View File

@ -215,7 +215,7 @@ int Suggest::outputSuggestions(DicTraverseSession *traverseSession, int *frequen
traverseSession->getBinaryDictionaryInfo(); traverseSession->getBinaryDictionaryInfo();
const TerminalAttributes terminalAttributes(traverseSession->getBinaryDictionaryInfo(), const TerminalAttributes terminalAttributes(traverseSession->getBinaryDictionaryInfo(),
binaryDictionaryInfo->getStructurePolicy()->getShortcutPositionOfNode( binaryDictionaryInfo->getStructurePolicy()->getShortcutPositionOfNode(
binaryDictionaryInfo, terminalDicNode->getPos())); terminalDicNode->getPos()));
// Shortcut is not supported for multiple words suggestions. // Shortcut is not supported for multiple words suggestions.
// TODO: Check shortcuts during traversal for multiple words suggestions. // TODO: Check shortcuts during traversal for multiple words suggestions.
const bool sameAsTyped = TRAVERSAL->sameAsTyped(traverseSession, terminalDicNode); const bool sameAsTyped = TRAVERSAL->sameAsTyped(traverseSession, terminalDicNode);

View File

@ -1,48 +0,0 @@
/*
* Copyright (C) 2013 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef LATINIME_DICTIONARY_STRUCTURE_POLICY_FACTORY_H
#define LATINIME_DICTIONARY_STRUCTURE_POLICY_FACTORY_H
#include "defines.h"
#include "suggest/core/dictionary/binary_dictionary_format_utils.h"
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h"
#include "suggest/policyimpl/dictionary/patricia_trie_policy.h"
namespace latinime {
class DictionaryStructurePolicy;
class DictionaryStructurePolicyFactory {
public:
static const DictionaryStructurePolicy *getDictionaryStructurePolicy(
const BinaryDictionaryFormatUtils::FORMAT_VERSION dictionaryFormat) {
switch (dictionaryFormat) {
case BinaryDictionaryFormatUtils::VERSION_2:
return PatriciaTriePolicy::getInstance();
case BinaryDictionaryFormatUtils::VERSION_3:
return DynamicPatriciaTriePolicy::getInstance();
default:
ASSERT(false);
return 0;
}
}
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(DictionaryStructurePolicyFactory);
};
} // namespace latinime
#endif // LATINIME_DICTIONARY_STRUCTURE_POLICY_FACTORY_H

View File

@ -0,0 +1,42 @@
/*
* Copyright (C) 2013 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.h"
#include "defines.h"
#include "suggest/core/dictionary/binary_dictionary_info.h"
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h"
#include "suggest/policyimpl/dictionary/patricia_trie_policy.h"
namespace latinime {
/* static */ DictionaryStructureWithBufferPolicy *DictionaryStructureWithBufferPolicyFactory
::newDictionaryStructurePolicy(
const BinaryDictionaryInfo *const binaryDictionaryInfo) {
switch (binaryDictionaryInfo->getFormat()) {
case BinaryDictionaryFormatUtils::VERSION_2:
return new PatriciaTriePolicy(binaryDictionaryInfo->getDictRoot(),
binaryDictionaryInfo);
case BinaryDictionaryFormatUtils::VERSION_3:
return new DynamicPatriciaTriePolicy(binaryDictionaryInfo->getDictRoot(),
binaryDictionaryInfo);
default:
ASSERT(false);
return 0;
}
}
} // namespace latinime

View File

@ -0,0 +1,37 @@
/*
* Copyright (C) 2013 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef LATINIME_DICTIONARY_STRUCTURE_WITH_BUFFER_POLICY_FACTORY_H
#define LATINIME_DICTIONARY_STRUCTURE_WITH_BUFFER_POLICY_FACTORY_H
#include "defines.h"
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
namespace latinime {
class BinaryDictionaryInfo;
class DictionaryStructureWithBufferPolicyFactory {
public:
static DictionaryStructureWithBufferPolicy *newDictionaryStructurePolicy(
const BinaryDictionaryInfo *const binaryDictionaryInfo);
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(DictionaryStructureWithBufferPolicyFactory);
};
} // namespace latinime
#endif // LATINIME_DICTIONARY_STRUCTURE_WITH_BUFFER_POLICY_FACTORY_H

View File

@ -26,23 +26,21 @@
namespace latinime { namespace latinime {
const DynamicPatriciaTriePolicy DynamicPatriciaTriePolicy::sInstance;
// To avoid infinite loop caused by invalid or malicious forward links. // To avoid infinite loop caused by invalid or malicious forward links.
const int DynamicPatriciaTriePolicy::MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP = 100000; const int DynamicPatriciaTriePolicy::MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP = 100000;
void DynamicPatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode, void DynamicPatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode,
const BinaryDictionaryInfo *const binaryDictionaryInfo,
const NodeFilter *const nodeFilter, DicNodeVector *const childDicNodes) const { const NodeFilter *const nodeFilter, DicNodeVector *const childDicNodes) const {
if (!dicNode->hasChildren()) { if (!dicNode->hasChildren()) {
return; return;
} }
DynamicPatriciaTrieNodeReader nodeReader(binaryDictionaryInfo); DynamicPatriciaTrieNodeReader nodeReader(mBinaryDictionaryInfo);
int mergedNodeCodePoints[MAX_WORD_LENGTH]; int mergedNodeCodePoints[MAX_WORD_LENGTH];
int nextPos = dicNode->getChildrenPos(); int nextPos = dicNode->getChildrenPos();
int totalChildCount = 0; int totalChildCount = 0;
do { do {
const int childCount = PatriciaTrieReadingUtils::getGroupCountAndAdvancePosition( const int childCount = PatriciaTrieReadingUtils::getGroupCountAndAdvancePosition(
binaryDictionaryInfo->getDictRoot(), &nextPos); mDictRoot, &nextPos);
totalChildCount += childCount; totalChildCount += childCount;
if (childCount <= 0 || totalChildCount > MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP) { if (childCount <= 0 || totalChildCount > MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP) {
// Invalid dictionary. // Invalid dictionary.
@ -64,13 +62,11 @@ void DynamicPatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const d
} }
nextPos = nodeReader.getSiblingNodePos(); nextPos = nodeReader.getSiblingNodePos();
} }
nextPos = DynamicPatriciaTrieReadingUtils::getForwardLinkPosition( nextPos = DynamicPatriciaTrieReadingUtils::getForwardLinkPosition(mDictRoot, nextPos);
binaryDictionaryInfo->getDictRoot(), nextPos);
} while (DynamicPatriciaTrieReadingUtils::isValidForwardLinkPosition(nextPos)); } while (DynamicPatriciaTrieReadingUtils::isValidForwardLinkPosition(nextPos));
} }
int DynamicPatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount( int DynamicPatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
const BinaryDictionaryInfo *const binaryDictionaryInfo,
const int nodePos, const int maxCodePointCount, int *const outCodePoints, const int nodePos, const int maxCodePointCount, int *const outCodePoints,
int *const outUnigramProbability) const { int *const outUnigramProbability) const {
if (nodePos == NOT_A_VALID_WORD_POS) { if (nodePos == NOT_A_VALID_WORD_POS) {
@ -83,7 +79,7 @@ int DynamicPatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCoun
int mergedNodeCodePoints[maxCodePointCount]; int mergedNodeCodePoints[maxCodePointCount];
int codePointCount = 0; int codePointCount = 0;
DynamicPatriciaTrieNodeReader nodeReader(binaryDictionaryInfo); DynamicPatriciaTrieNodeReader nodeReader(mBinaryDictionaryInfo);
// First, read terminal node and get its probability. // First, read terminal node and get its probability.
nodeReader.fetchNodeInfoFromBufferAndGetNodeCodePoints(nodePos, maxCodePointCount, nodeReader.fetchNodeInfoFromBufferAndGetNodeCodePoints(nodePos, maxCodePointCount,
mergedNodeCodePoints); mergedNodeCodePoints);
@ -118,8 +114,7 @@ int DynamicPatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCoun
return codePointCount; return codePointCount;
} }
int DynamicPatriciaTriePolicy::getTerminalNodePositionOfWord( int DynamicPatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const inWord,
const BinaryDictionaryInfo *const binaryDictionaryInfo, const int *const inWord,
const int length, const bool forceLowerCaseSearch) const { const int length, const bool forceLowerCaseSearch) const {
int searchCodePoints[length]; int searchCodePoints[length];
for (int i = 0; i < length; ++i) { for (int i = 0; i < length; ++i) {
@ -128,14 +123,14 @@ int DynamicPatriciaTriePolicy::getTerminalNodePositionOfWord(
int mergedNodeCodePoints[MAX_WORD_LENGTH]; int mergedNodeCodePoints[MAX_WORD_LENGTH];
int currentLength = 0; int currentLength = 0;
int pos = getRootPosition(); int pos = getRootPosition();
DynamicPatriciaTrieNodeReader nodeReader(binaryDictionaryInfo); DynamicPatriciaTrieNodeReader nodeReader(mBinaryDictionaryInfo);
while (currentLength <= length) { while (currentLength <= length) {
// When foundMatchedNode becomes true, currentLength is increased at least once. // When foundMatchedNode becomes true, currentLength is increased at least once.
bool foundMatchedNode = false; bool foundMatchedNode = false;
int totalChildCount = 0; int totalChildCount = 0;
do { do {
const int childCount = PatriciaTrieReadingUtils::getGroupCountAndAdvancePosition( const int childCount = PatriciaTrieReadingUtils::getGroupCountAndAdvancePosition(
binaryDictionaryInfo->getDictRoot(), &pos); mDictRoot, &pos);
totalChildCount += childCount; totalChildCount += childCount;
if (childCount <= 0 || totalChildCount > MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP) { if (childCount <= 0 || totalChildCount > MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP) {
// Invalid dictionary. // Invalid dictionary.
@ -183,7 +178,7 @@ int DynamicPatriciaTriePolicy::getTerminalNodePositionOfWord(
// If the matched node is not found in the current node group, try to follow the // If the matched node is not found in the current node group, try to follow the
// forward link. // forward link.
pos = DynamicPatriciaTrieReadingUtils::getForwardLinkPosition( pos = DynamicPatriciaTrieReadingUtils::getForwardLinkPosition(
binaryDictionaryInfo->getDictRoot(), pos); mDictRoot, pos);
} while (DynamicPatriciaTrieReadingUtils::isValidForwardLinkPosition(pos)); } while (DynamicPatriciaTrieReadingUtils::isValidForwardLinkPosition(pos));
if (!foundMatchedNode) { if (!foundMatchedNode) {
// Matched node is not found. // Matched node is not found.
@ -195,12 +190,11 @@ int DynamicPatriciaTriePolicy::getTerminalNodePositionOfWord(
return NOT_A_VALID_WORD_POS; return NOT_A_VALID_WORD_POS;
} }
int DynamicPatriciaTriePolicy::getUnigramProbability( int DynamicPatriciaTriePolicy::getUnigramProbability(const int nodePos) const {
const BinaryDictionaryInfo *const binaryDictionaryInfo, const int nodePos) const {
if (nodePos == NOT_A_VALID_WORD_POS) { if (nodePos == NOT_A_VALID_WORD_POS) {
return NOT_A_PROBABILITY; return NOT_A_PROBABILITY;
} }
DynamicPatriciaTrieNodeReader nodeReader(binaryDictionaryInfo); DynamicPatriciaTrieNodeReader nodeReader(mBinaryDictionaryInfo);
nodeReader.fetchNodeInfoFromBuffer(nodePos); nodeReader.fetchNodeInfoFromBuffer(nodePos);
if (nodeReader.isDeleted() || nodeReader.isBlacklisted() || nodeReader.isNotAWord()) { if (nodeReader.isDeleted() || nodeReader.isBlacklisted() || nodeReader.isNotAWord()) {
return NOT_A_PROBABILITY; return NOT_A_PROBABILITY;
@ -208,13 +202,11 @@ int DynamicPatriciaTriePolicy::getUnigramProbability(
return nodeReader.getProbability(); return nodeReader.getProbability();
} }
int DynamicPatriciaTriePolicy::getShortcutPositionOfNode( int DynamicPatriciaTriePolicy::getShortcutPositionOfNode(const int nodePos) const {
const BinaryDictionaryInfo *const binaryDictionaryInfo,
const int nodePos) const {
if (nodePos == NOT_A_VALID_WORD_POS) { if (nodePos == NOT_A_VALID_WORD_POS) {
return NOT_A_DICT_POS; return NOT_A_DICT_POS;
} }
DynamicPatriciaTrieNodeReader nodeReader(binaryDictionaryInfo); DynamicPatriciaTrieNodeReader nodeReader(mBinaryDictionaryInfo);
nodeReader.fetchNodeInfoFromBuffer(nodePos); nodeReader.fetchNodeInfoFromBuffer(nodePos);
if (nodeReader.isDeleted()) { if (nodeReader.isDeleted()) {
return NOT_A_DICT_POS; return NOT_A_DICT_POS;
@ -222,13 +214,11 @@ int DynamicPatriciaTriePolicy::getShortcutPositionOfNode(
return nodeReader.getShortcutPos(); return nodeReader.getShortcutPos();
} }
int DynamicPatriciaTriePolicy::getBigramsPositionOfNode( int DynamicPatriciaTriePolicy::getBigramsPositionOfNode(const int nodePos) const {
const BinaryDictionaryInfo *const binaryDictionaryInfo,
const int nodePos) const {
if (nodePos == NOT_A_VALID_WORD_POS) { if (nodePos == NOT_A_VALID_WORD_POS) {
return NOT_A_DICT_POS; return NOT_A_DICT_POS;
} }
DynamicPatriciaTrieNodeReader nodeReader(binaryDictionaryInfo); DynamicPatriciaTrieNodeReader nodeReader(mBinaryDictionaryInfo);
nodeReader.fetchNodeInfoFromBuffer(nodePos); nodeReader.fetchNodeInfoFromBuffer(nodePos);
if (nodeReader.isDeleted()) { if (nodeReader.isDeleted()) {
return NOT_A_DICT_POS; return NOT_A_DICT_POS;

View File

@ -17,8 +17,10 @@
#ifndef LATINIME_DYNAMIC_PATRICIA_TRIE_POLICY_H #ifndef LATINIME_DYNAMIC_PATRICIA_TRIE_POLICY_H
#define LATINIME_DYNAMIC_PATRICIA_TRIE_POLICY_H #define LATINIME_DYNAMIC_PATRICIA_TRIE_POLICY_H
#include <stdint.h>
#include "defines.h" #include "defines.h"
#include "suggest/core/policy/dictionary_structure_policy.h" #include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
namespace latinime { namespace latinime {
@ -26,45 +28,41 @@ class BinaryDictionaryInfo;
class DicNode; class DicNode;
class DicNodeVector; class DicNodeVector;
class DynamicPatriciaTriePolicy : public DictionaryStructurePolicy { class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
public: public:
static AK_FORCE_INLINE const DynamicPatriciaTriePolicy *getInstance() { DynamicPatriciaTriePolicy(const uint8_t *const dictRoot,
return &sInstance; const BinaryDictionaryInfo *const binaryDictionaryInfo)
} : mDictRoot(dictRoot), mBinaryDictionaryInfo(binaryDictionaryInfo) {}
~DynamicPatriciaTriePolicy() {}
AK_FORCE_INLINE int getRootPosition() const { AK_FORCE_INLINE int getRootPosition() const {
return 0; return 0;
} }
void createAndGetAllChildNodes(const DicNode *const dicNode, void createAndGetAllChildNodes(const DicNode *const dicNode,
const BinaryDictionaryInfo *const binaryDictionaryInfo,
const NodeFilter *const nodeFilter, DicNodeVector *const childDicNodes) const; const NodeFilter *const nodeFilter, DicNodeVector *const childDicNodes) const;
int getCodePointsAndProbabilityAndReturnCodePointCount( int getCodePointsAndProbabilityAndReturnCodePointCount(
const BinaryDictionaryInfo *const binaryDictionaryInfo,
const int terminalNodePos, const int maxCodePointCount, int *const outCodePoints, const int terminalNodePos, const int maxCodePointCount, int *const outCodePoints,
int *const outUnigramProbability) const; int *const outUnigramProbability) const;
int getTerminalNodePositionOfWord( int getTerminalNodePositionOfWord(const int *const inWord,
const BinaryDictionaryInfo *const binaryDictionaryInfo, const int *const inWord,
const int length, const bool forceLowerCaseSearch) const; const int length, const bool forceLowerCaseSearch) const;
int getUnigramProbability(const BinaryDictionaryInfo *const binaryDictionaryInfo, int getUnigramProbability(const int nodePos) const;
const int nodePos) const;
int getShortcutPositionOfNode(const BinaryDictionaryInfo *const binaryDictionaryInfo, int getShortcutPositionOfNode(const int nodePos) const;
const int nodePos) const;
int getBigramsPositionOfNode(const BinaryDictionaryInfo *const binaryDictionaryInfo, int getBigramsPositionOfNode(const int nodePos) const;
const int nodePos) const;
private: private:
DISALLOW_COPY_AND_ASSIGN(DynamicPatriciaTriePolicy); DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTriePolicy);
static const DynamicPatriciaTriePolicy sInstance;
static const int MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP; static const int MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP;
DynamicPatriciaTriePolicy() {} const uint8_t *const mDictRoot;
~DynamicPatriciaTriePolicy() {} // TODO: remove
const BinaryDictionaryInfo *const mBinaryDictionaryInfo;
}; };
} // namespace latinime } // namespace latinime
#endif // LATINIME_DYNAMIC_PATRICIA_TRIE_POLICY_H #endif // LATINIME_DYNAMIC_PATRICIA_TRIE_POLICY_H

View File

@ -27,48 +27,39 @@
namespace latinime { namespace latinime {
const PatriciaTriePolicy PatriciaTriePolicy::sInstance;
void PatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode, void PatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode,
const BinaryDictionaryInfo *const binaryDictionaryInfo,
const NodeFilter *const nodeFilter, DicNodeVector *const childDicNodes) const { const NodeFilter *const nodeFilter, DicNodeVector *const childDicNodes) const {
if (!dicNode->hasChildren()) { if (!dicNode->hasChildren()) {
return; return;
} }
int nextPos = dicNode->getChildrenPos(); int nextPos = dicNode->getChildrenPos();
const int childCount = PatriciaTrieReadingUtils::getGroupCountAndAdvancePosition( const int childCount = PatriciaTrieReadingUtils::getGroupCountAndAdvancePosition(
binaryDictionaryInfo->getDictRoot(), &nextPos); mDictRoot, &nextPos);
for (int i = 0; i < childCount; i++) { for (int i = 0; i < childCount; i++) {
nextPos = createAndGetLeavingChildNode(dicNode, nextPos, binaryDictionaryInfo, nextPos = createAndGetLeavingChildNode(dicNode, nextPos, nodeFilter, childDicNodes);
nodeFilter, childDicNodes);
} }
} }
int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount( int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
const BinaryDictionaryInfo *const binaryDictionaryInfo,
const int nodePos, const int maxCodePointCount, int *const outCodePoints, const int nodePos, const int maxCodePointCount, int *const outCodePoints,
int *const outUnigramProbability) const { int *const outUnigramProbability) const {
return BinaryFormat::getCodePointsAndProbabilityAndReturnCodePointCount( return BinaryFormat::getCodePointsAndProbabilityAndReturnCodePointCount(mDictRoot, nodePos,
binaryDictionaryInfo->getDictRoot(), nodePos,
maxCodePointCount, outCodePoints, outUnigramProbability); maxCodePointCount, outCodePoints, outUnigramProbability);
} }
int PatriciaTriePolicy::getTerminalNodePositionOfWord( int PatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const inWord,
const BinaryDictionaryInfo *const binaryDictionaryInfo, const int *const inWord,
const int length, const bool forceLowerCaseSearch) const { const int length, const bool forceLowerCaseSearch) const {
return BinaryFormat::getTerminalPosition(binaryDictionaryInfo->getDictRoot(), inWord, return BinaryFormat::getTerminalPosition(mDictRoot, inWord,
length, forceLowerCaseSearch); length, forceLowerCaseSearch);
} }
int PatriciaTriePolicy::getUnigramProbability( int PatriciaTriePolicy::getUnigramProbability(const int nodePos) const {
const BinaryDictionaryInfo *const binaryDictionaryInfo, const int nodePos) const {
if (nodePos == NOT_A_VALID_WORD_POS) { if (nodePos == NOT_A_VALID_WORD_POS) {
return NOT_A_PROBABILITY; return NOT_A_PROBABILITY;
} }
const uint8_t *const dictRoot = binaryDictionaryInfo->getDictRoot();
int pos = nodePos; int pos = nodePos;
const PatriciaTrieReadingUtils::NodeFlags flags = const PatriciaTrieReadingUtils::NodeFlags flags =
PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictRoot, &pos); PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos);
if (!PatriciaTrieReadingUtils::isTerminal(flags)) { if (!PatriciaTrieReadingUtils::isTerminal(flags)) {
return NOT_A_PROBABILITY; return NOT_A_PROBABILITY;
} }
@ -79,81 +70,74 @@ int PatriciaTriePolicy::getUnigramProbability(
// for shortcuts). // for shortcuts).
return NOT_A_PROBABILITY; return NOT_A_PROBABILITY;
} }
PatriciaTrieReadingUtils::skipCharacters(dictRoot, flags, MAX_WORD_LENGTH, &pos); PatriciaTrieReadingUtils::skipCharacters(mDictRoot, flags, MAX_WORD_LENGTH, &pos);
return PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(dictRoot, &pos); return PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos);
} }
int PatriciaTriePolicy::getShortcutPositionOfNode( int PatriciaTriePolicy::getShortcutPositionOfNode(const int nodePos) const {
const BinaryDictionaryInfo *const binaryDictionaryInfo,
const int nodePos) const {
if (nodePos == NOT_A_VALID_WORD_POS) { if (nodePos == NOT_A_VALID_WORD_POS) {
return NOT_A_DICT_POS; return NOT_A_DICT_POS;
} }
const uint8_t *const dictRoot = binaryDictionaryInfo->getDictRoot();
int pos = nodePos; int pos = nodePos;
const PatriciaTrieReadingUtils::NodeFlags flags = const PatriciaTrieReadingUtils::NodeFlags flags =
PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictRoot, &pos); PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos);
if (!PatriciaTrieReadingUtils::hasShortcutTargets(flags)) { if (!PatriciaTrieReadingUtils::hasShortcutTargets(flags)) {
return NOT_A_DICT_POS; return NOT_A_DICT_POS;
} }
PatriciaTrieReadingUtils::skipCharacters(dictRoot, flags, MAX_WORD_LENGTH, &pos); PatriciaTrieReadingUtils::skipCharacters(mDictRoot, flags, MAX_WORD_LENGTH, &pos);
if (PatriciaTrieReadingUtils::isTerminal(flags)) { if (PatriciaTrieReadingUtils::isTerminal(flags)) {
PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(dictRoot, &pos); PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos);
} }
if (PatriciaTrieReadingUtils::hasChildrenInFlags(flags)) { if (PatriciaTrieReadingUtils::hasChildrenInFlags(flags)) {
PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(dictRoot, flags, &pos); PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(mDictRoot, flags, &pos);
} }
return pos; return pos;
} }
int PatriciaTriePolicy::getBigramsPositionOfNode( int PatriciaTriePolicy::getBigramsPositionOfNode(const int nodePos) const {
const BinaryDictionaryInfo *const binaryDictionaryInfo,
const int nodePos) const {
if (nodePos == NOT_A_VALID_WORD_POS) { if (nodePos == NOT_A_VALID_WORD_POS) {
return NOT_A_DICT_POS; return NOT_A_DICT_POS;
} }
const uint8_t *const dictRoot = binaryDictionaryInfo->getDictRoot();
int pos = nodePos; int pos = nodePos;
const PatriciaTrieReadingUtils::NodeFlags flags = const PatriciaTrieReadingUtils::NodeFlags flags =
PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictRoot, &pos); PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos);
if (!PatriciaTrieReadingUtils::hasBigrams(flags)) { if (!PatriciaTrieReadingUtils::hasBigrams(flags)) {
return NOT_A_DICT_POS; return NOT_A_DICT_POS;
} }
PatriciaTrieReadingUtils::skipCharacters(dictRoot, flags, MAX_WORD_LENGTH, &pos); PatriciaTrieReadingUtils::skipCharacters(mDictRoot, flags, MAX_WORD_LENGTH, &pos);
if (PatriciaTrieReadingUtils::isTerminal(flags)) { if (PatriciaTrieReadingUtils::isTerminal(flags)) {
PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(dictRoot, &pos); PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos);
} }
if (PatriciaTrieReadingUtils::hasChildrenInFlags(flags)) { if (PatriciaTrieReadingUtils::hasChildrenInFlags(flags)) {
PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(dictRoot, flags, &pos); PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(mDictRoot, flags, &pos);
} }
if (PatriciaTrieReadingUtils::hasShortcutTargets(flags)) { if (PatriciaTrieReadingUtils::hasShortcutTargets(flags)) {
BinaryDictionaryTerminalAttributesReadingUtils::skipShortcuts(binaryDictionaryInfo, &pos); BinaryDictionaryTerminalAttributesReadingUtils::skipShortcuts(mBinaryDictionaryInfo, &pos);
} }
return pos; return pos;
} }
int PatriciaTriePolicy::createAndGetLeavingChildNode(const DicNode *const dicNode, int PatriciaTriePolicy::createAndGetLeavingChildNode(const DicNode *const dicNode,
const int nodePos, const BinaryDictionaryInfo *const binaryDictionaryInfo, const int nodePos, const NodeFilter *const childrenFilter,
const NodeFilter *const childrenFilter, DicNodeVector *childDicNodes) const { DicNodeVector *childDicNodes) const {
const uint8_t *const dictRoot = binaryDictionaryInfo->getDictRoot();
int pos = nodePos; int pos = nodePos;
const PatriciaTrieReadingUtils::NodeFlags flags = const PatriciaTrieReadingUtils::NodeFlags flags =
PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictRoot, &pos); PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos);
int mergedNodeCodePoints[MAX_WORD_LENGTH]; int mergedNodeCodePoints[MAX_WORD_LENGTH];
const int mergedNodeCodePointCount = PatriciaTrieReadingUtils::getCharsAndAdvancePosition( const int mergedNodeCodePointCount = PatriciaTrieReadingUtils::getCharsAndAdvancePosition(
dictRoot, flags, MAX_WORD_LENGTH, mergedNodeCodePoints, &pos); mDictRoot, flags, MAX_WORD_LENGTH, mergedNodeCodePoints, &pos);
const int probability = (PatriciaTrieReadingUtils::isTerminal(flags))? const int probability = (PatriciaTrieReadingUtils::isTerminal(flags))?
PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(dictRoot, &pos) PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos)
: NOT_A_PROBABILITY; : NOT_A_PROBABILITY;
const int childrenPos = PatriciaTrieReadingUtils::hasChildrenInFlags(flags) ? const int childrenPos = PatriciaTrieReadingUtils::hasChildrenInFlags(flags) ?
PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition( PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(
dictRoot, flags, &pos) : NOT_A_DICT_POS; mDictRoot, flags, &pos) : NOT_A_DICT_POS;
if (PatriciaTrieReadingUtils::hasShortcutTargets(flags)) { if (PatriciaTrieReadingUtils::hasShortcutTargets(flags)) {
BinaryDictionaryTerminalAttributesReadingUtils::skipShortcuts(binaryDictionaryInfo, &pos); BinaryDictionaryTerminalAttributesReadingUtils::skipShortcuts(mBinaryDictionaryInfo, &pos);
} }
if (PatriciaTrieReadingUtils::hasBigrams(flags)) { if (PatriciaTrieReadingUtils::hasBigrams(flags)) {
BinaryDictionaryTerminalAttributesReadingUtils::skipExistingBigrams( BinaryDictionaryTerminalAttributesReadingUtils::skipExistingBigrams(
binaryDictionaryInfo, &pos); mBinaryDictionaryInfo, &pos);
} }
if (!childrenFilter->isFilteredOut(mergedNodeCodePoints[0])) { if (!childrenFilter->isFilteredOut(mergedNodeCodePoints[0])) {
childDicNodes->pushLeavingChild(dicNode, nodePos, childrenPos, probability, childDicNodes->pushLeavingChild(dicNode, nodePos, childrenPos, probability,

View File

@ -17,52 +17,53 @@
#ifndef LATINIME_PATRICIA_TRIE_POLICY_H #ifndef LATINIME_PATRICIA_TRIE_POLICY_H
#define LATINIME_PATRICIA_TRIE_POLICY_H #define LATINIME_PATRICIA_TRIE_POLICY_H
#include <stdint.h>
#include "defines.h" #include "defines.h"
#include "suggest/core/policy/dictionary_structure_policy.h" #include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
namespace latinime { namespace latinime {
class PatriciaTriePolicy : public DictionaryStructurePolicy { class BinaryDictionaryInfo;
class DicNode;
class DicNodeVector;
class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
public: public:
static AK_FORCE_INLINE const PatriciaTriePolicy *getInstance() { PatriciaTriePolicy(const uint8_t *const dictRoot,
return &sInstance; const BinaryDictionaryInfo *const binaryDictionaryInfo)
} : mDictRoot(dictRoot), mBinaryDictionaryInfo(binaryDictionaryInfo) {}
~PatriciaTriePolicy() {}
AK_FORCE_INLINE int getRootPosition() const { AK_FORCE_INLINE int getRootPosition() const {
return 0; return 0;
} }
void createAndGetAllChildNodes(const DicNode *const dicNode, void createAndGetAllChildNodes(const DicNode *const dicNode,
const BinaryDictionaryInfo *const binaryDictionaryInfo,
const NodeFilter *const nodeFilter, DicNodeVector *const childDicNodes) const; const NodeFilter *const nodeFilter, DicNodeVector *const childDicNodes) const;
int getCodePointsAndProbabilityAndReturnCodePointCount( int getCodePointsAndProbabilityAndReturnCodePointCount(
const BinaryDictionaryInfo *const binaryDictionaryInfo,
const int terminalNodePos, const int maxCodePointCount, int *const outCodePoints, const int terminalNodePos, const int maxCodePointCount, int *const outCodePoints,
int *const outUnigramProbability) const; int *const outUnigramProbability) const;
int getTerminalNodePositionOfWord( int getTerminalNodePositionOfWord(const int *const inWord,
const BinaryDictionaryInfo *const binaryDictionaryInfo, const int *const inWord,
const int length, const bool forceLowerCaseSearch) const; const int length, const bool forceLowerCaseSearch) const;
int getUnigramProbability(const BinaryDictionaryInfo *const binaryDictionaryInfo, int getUnigramProbability(const int nodePos) const;
const int nodePos) const;
int getShortcutPositionOfNode(const BinaryDictionaryInfo *const binaryDictionaryInfo, int getShortcutPositionOfNode(const int nodePos) const;
const int nodePos) const;
int getBigramsPositionOfNode(const BinaryDictionaryInfo *const binaryDictionaryInfo, int getBigramsPositionOfNode(const int nodePos) const;
const int nodePos) const;
private: private:
DISALLOW_COPY_AND_ASSIGN(PatriciaTriePolicy); DISALLOW_IMPLICIT_CONSTRUCTORS(PatriciaTriePolicy);
static const PatriciaTriePolicy sInstance;
PatriciaTriePolicy() {} const uint8_t *const mDictRoot;
~PatriciaTriePolicy() {} // TODO: remove
const BinaryDictionaryInfo *const mBinaryDictionaryInfo;
int createAndGetLeavingChildNode(const DicNode *const dicNode, const int nodePos, int createAndGetLeavingChildNode(const DicNode *const dicNode, const int nodePos,
const BinaryDictionaryInfo *const binaryDictionaryInfo,
const NodeFilter *const nodeFilter, DicNodeVector *const childDicNodes) const; const NodeFilter *const nodeFilter, DicNodeVector *const childDicNodes) const;
}; };
} // namespace latinime } // namespace latinime