Merge "Make DictionaryStructurePolicy have buffer info."
commit
4571b0f682
|
@ -71,6 +71,7 @@ LATIN_IME_CORE_SRC_FILES := \
|
||||||
suggest/core/policy/weighting.cpp \
|
suggest/core/policy/weighting.cpp \
|
||||||
suggest/core/session/dic_traverse_session.cpp \
|
suggest/core/session/dic_traverse_session.cpp \
|
||||||
$(addprefix suggest/policyimpl/dictionary/, \
|
$(addprefix suggest/policyimpl/dictionary/, \
|
||||||
|
dictionary_structure_with_buffer_policy_factory.cpp \
|
||||||
dynamic_patricia_trie_node_reader.cpp \
|
dynamic_patricia_trie_node_reader.cpp \
|
||||||
dynamic_patricia_trie_policy.cpp \
|
dynamic_patricia_trie_policy.cpp \
|
||||||
dynamic_patricia_trie_reading_utils.cpp \
|
dynamic_patricia_trie_reading_utils.cpp \
|
||||||
|
|
|
@ -20,11 +20,11 @@
|
||||||
#include "defines.h"
|
#include "defines.h"
|
||||||
#include "suggest/core/layout/proximity_info_state.h"
|
#include "suggest/core/layout/proximity_info_state.h"
|
||||||
#include "suggest/core/layout/proximity_info_utils.h"
|
#include "suggest/core/layout/proximity_info_utils.h"
|
||||||
#include "suggest/core/policy/dictionary_structure_policy.h"
|
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
|
||||||
class DicNodeProximityFilter : public DictionaryStructurePolicy::NodeFilter {
|
class DicNodeProximityFilter : public DictionaryStructureWithBufferPolicy::NodeFilter {
|
||||||
public:
|
public:
|
||||||
DicNodeProximityFilter(const ProximityInfoState *const pInfoState,
|
DicNodeProximityFilter(const ProximityInfoState *const pInfoState,
|
||||||
const int pointIndex, const bool exactOnly)
|
const int pointIndex, const bool exactOnly)
|
||||||
|
|
|
@ -24,7 +24,7 @@
|
||||||
#include "suggest/core/dictionary/binary_dictionary_info.h"
|
#include "suggest/core/dictionary/binary_dictionary_info.h"
|
||||||
#include "suggest/core/dictionary/multi_bigram_map.h"
|
#include "suggest/core/dictionary/multi_bigram_map.h"
|
||||||
#include "suggest/core/dictionary/probability_utils.h"
|
#include "suggest/core/dictionary/probability_utils.h"
|
||||||
#include "suggest/core/policy/dictionary_structure_policy.h"
|
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
|
||||||
#include "utils/char_utils.h"
|
#include "utils/char_utils.h"
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
@ -83,7 +83,7 @@ namespace latinime {
|
||||||
DicNodeUtils::createAndGetPassingChildNode(dicNode, &childrenFilter, childDicNodes);
|
DicNodeUtils::createAndGetPassingChildNode(dicNode, &childrenFilter, childDicNodes);
|
||||||
} else {
|
} else {
|
||||||
binaryDictionaryInfo->getStructurePolicy()->createAndGetAllChildNodes(dicNode,
|
binaryDictionaryInfo->getStructurePolicy()->createAndGetAllChildNodes(dicNode,
|
||||||
binaryDictionaryInfo, &childrenFilter, childDicNodes);
|
&childrenFilter, childDicNodes);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -116,9 +116,8 @@ int BigramDictionary::getPredictions(const int *prevWord, const int prevWordLeng
|
||||||
while (bigramsIt.hasNext()) {
|
while (bigramsIt.hasNext()) {
|
||||||
bigramsIt.next();
|
bigramsIt.next();
|
||||||
const int length = mBinaryDictionaryInfo->getStructurePolicy()->
|
const int length = mBinaryDictionaryInfo->getStructurePolicy()->
|
||||||
getCodePointsAndProbabilityAndReturnCodePointCount(
|
getCodePointsAndProbabilityAndReturnCodePointCount(bigramsIt.getBigramPos(),
|
||||||
mBinaryDictionaryInfo, bigramsIt.getBigramPos(), MAX_WORD_LENGTH,
|
MAX_WORD_LENGTH, bigramBuffer, &unigramProbability);
|
||||||
bigramBuffer, &unigramProbability);
|
|
||||||
// Due to space constraints, the probability for bigrams is approximate - the lower the
|
// Due to space constraints, the probability for bigrams is approximate - the lower the
|
||||||
// unigram probability, the worse the precision. The theoritical maximum error in
|
// unigram probability, the worse the precision. The theoritical maximum error in
|
||||||
// resulting probability is 8 - although in the practice it's never bigger than 3 or 4
|
// resulting probability is 8 - although in the practice it's never bigger than 3 or 4
|
||||||
|
@ -139,10 +138,9 @@ int BigramDictionary::getBigramListPositionForWord(const int *prevWord, const in
|
||||||
const bool forceLowerCaseSearch) const {
|
const bool forceLowerCaseSearch) const {
|
||||||
if (0 >= prevWordLength) return NOT_A_DICT_POS;
|
if (0 >= prevWordLength) return NOT_A_DICT_POS;
|
||||||
int pos = mBinaryDictionaryInfo->getStructurePolicy()->getTerminalNodePositionOfWord(
|
int pos = mBinaryDictionaryInfo->getStructurePolicy()->getTerminalNodePositionOfWord(
|
||||||
mBinaryDictionaryInfo, prevWord, prevWordLength, forceLowerCaseSearch);
|
prevWord, prevWordLength, forceLowerCaseSearch);
|
||||||
if (NOT_A_VALID_WORD_POS == pos) return NOT_A_DICT_POS;
|
if (NOT_A_VALID_WORD_POS == pos) return NOT_A_DICT_POS;
|
||||||
return mBinaryDictionaryInfo->getStructurePolicy()->getBigramsPositionOfNode(
|
return mBinaryDictionaryInfo->getStructurePolicy()->getBigramsPositionOfNode(pos);
|
||||||
mBinaryDictionaryInfo, pos);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool BigramDictionary::isValidBigram(const int *word0, int length0, const int *word1,
|
bool BigramDictionary::isValidBigram(const int *word0, int length0, const int *word1,
|
||||||
|
@ -151,7 +149,7 @@ bool BigramDictionary::isValidBigram(const int *word0, int length0, const int *w
|
||||||
// getBigramListPositionForWord returns 0 if this word isn't in the dictionary or has no bigrams
|
// getBigramListPositionForWord returns 0 if this word isn't in the dictionary or has no bigrams
|
||||||
if (NOT_A_DICT_POS == pos) return false;
|
if (NOT_A_DICT_POS == pos) return false;
|
||||||
int nextWordPos = mBinaryDictionaryInfo->getStructurePolicy()->getTerminalNodePositionOfWord(
|
int nextWordPos = mBinaryDictionaryInfo->getStructurePolicy()->getTerminalNodePositionOfWord(
|
||||||
mBinaryDictionaryInfo, word1, length1, false /* forceLowerCaseSearch */);
|
word1, length1, false /* forceLowerCaseSearch */);
|
||||||
if (NOT_A_VALID_WORD_POS == nextWordPos) return false;
|
if (NOT_A_VALID_WORD_POS == nextWordPos) return false;
|
||||||
|
|
||||||
BinaryDictionaryBigramsIterator bigramsIt(mBinaryDictionaryInfo, pos);
|
BinaryDictionaryBigramsIterator bigramsIt(mBinaryDictionaryInfo, pos);
|
||||||
|
|
|
@ -23,7 +23,7 @@
|
||||||
#include "jni.h"
|
#include "jni.h"
|
||||||
#include "suggest/core/dictionary/binary_dictionary_format_utils.h"
|
#include "suggest/core/dictionary/binary_dictionary_format_utils.h"
|
||||||
#include "suggest/core/dictionary/binary_dictionary_header.h"
|
#include "suggest/core/dictionary/binary_dictionary_header.h"
|
||||||
#include "suggest/policyimpl/dictionary/dictionary_structure_policy_factory.h"
|
#include "suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.h"
|
||||||
#include "utils/log_utils.h"
|
#include "utils/log_utils.h"
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
@ -37,11 +37,16 @@ class BinaryDictionaryInfo {
|
||||||
mDictionaryFormat(BinaryDictionaryFormatUtils::detectFormatVersion(
|
mDictionaryFormat(BinaryDictionaryFormatUtils::detectFormatVersion(
|
||||||
mDictBuf, mDictSize)),
|
mDictBuf, mDictSize)),
|
||||||
mDictionaryHeader(this), mDictRoot(mDictBuf + mDictionaryHeader.getSize()),
|
mDictionaryHeader(this), mDictRoot(mDictBuf + mDictionaryHeader.getSize()),
|
||||||
mStructurePolicy(DictionaryStructurePolicyFactory::getDictionaryStructurePolicy(
|
// TODO: Remove.
|
||||||
mDictionaryFormat)) {
|
mStructurePolicy(DictionaryStructureWithBufferPolicyFactory
|
||||||
|
::newDictionaryStructurePolicy(this)) {
|
||||||
logDictionaryInfo(env);
|
logDictionaryInfo(env);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
~BinaryDictionaryInfo() {
|
||||||
|
delete mStructurePolicy;
|
||||||
|
}
|
||||||
|
|
||||||
AK_FORCE_INLINE const uint8_t *getDictBuf() const {
|
AK_FORCE_INLINE const uint8_t *getDictBuf() const {
|
||||||
return mDictBuf;
|
return mDictBuf;
|
||||||
}
|
}
|
||||||
|
@ -66,6 +71,7 @@ class BinaryDictionaryInfo {
|
||||||
return mDictionaryFormat;
|
return mDictionaryFormat;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO: Move to DictionaryStructurePolicy.
|
||||||
AK_FORCE_INLINE const BinaryDictionaryHeader *getHeader() const {
|
AK_FORCE_INLINE const BinaryDictionaryHeader *getHeader() const {
|
||||||
return &mDictionaryHeader;
|
return &mDictionaryHeader;
|
||||||
}
|
}
|
||||||
|
@ -76,7 +82,8 @@ class BinaryDictionaryInfo {
|
||||||
return mIsUpdatable && isUpdatableDictionaryFormat;
|
return mIsUpdatable && isUpdatableDictionaryFormat;
|
||||||
}
|
}
|
||||||
|
|
||||||
AK_FORCE_INLINE const DictionaryStructurePolicy *getStructurePolicy() const {
|
// TODO: remove
|
||||||
|
AK_FORCE_INLINE const DictionaryStructureWithBufferPolicy *getStructurePolicy() const {
|
||||||
return mStructurePolicy;
|
return mStructurePolicy;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -89,9 +96,12 @@ class BinaryDictionaryInfo {
|
||||||
const int mDictBufOffset;
|
const int mDictBufOffset;
|
||||||
const bool mIsUpdatable;
|
const bool mIsUpdatable;
|
||||||
const BinaryDictionaryFormatUtils::FORMAT_VERSION mDictionaryFormat;
|
const BinaryDictionaryFormatUtils::FORMAT_VERSION mDictionaryFormat;
|
||||||
|
// TODO: Move BinaryDictionaryHeader to policyimpl and introduce dedicated API to the
|
||||||
|
// DictionaryStructurePolicy.
|
||||||
const BinaryDictionaryHeader mDictionaryHeader;
|
const BinaryDictionaryHeader mDictionaryHeader;
|
||||||
const uint8_t *const mDictRoot;
|
const uint8_t *const mDictRoot;
|
||||||
const DictionaryStructurePolicy *const mStructurePolicy;
|
// TODO: remove
|
||||||
|
const DictionaryStructureWithBufferPolicy *const mStructurePolicy;
|
||||||
|
|
||||||
AK_FORCE_INLINE void logDictionaryInfo(JNIEnv *const env) const {
|
AK_FORCE_INLINE void logDictionaryInfo(JNIEnv *const env) const {
|
||||||
const int BUFFER_SIZE = 16;
|
const int BUFFER_SIZE = 16;
|
||||||
|
|
|
@ -83,14 +83,14 @@ int Dictionary::getBigrams(const int *word, int length, int *outWords, int *freq
|
||||||
}
|
}
|
||||||
|
|
||||||
int Dictionary::getProbability(const int *word, int length) const {
|
int Dictionary::getProbability(const int *word, int length) const {
|
||||||
const DictionaryStructurePolicy *const structurePolicy =
|
const DictionaryStructureWithBufferPolicy *const structurePolicy =
|
||||||
mBinaryDictionaryInfo.getStructurePolicy();
|
mBinaryDictionaryInfo.getStructurePolicy();
|
||||||
int pos = structurePolicy->getTerminalNodePositionOfWord(&mBinaryDictionaryInfo, word, length,
|
int pos = structurePolicy->getTerminalNodePositionOfWord(word, length,
|
||||||
false /* forceLowerCaseSearch */);
|
false /* forceLowerCaseSearch */);
|
||||||
if (NOT_A_VALID_WORD_POS == pos) {
|
if (NOT_A_VALID_WORD_POS == pos) {
|
||||||
return NOT_A_PROBABILITY;
|
return NOT_A_PROBABILITY;
|
||||||
}
|
}
|
||||||
return structurePolicy->getUnigramProbability(&mBinaryDictionaryInfo, pos);
|
return structurePolicy->getUnigramProbability(pos);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Dictionary::isValidBigram(const int *word0, int length0, const int *word1, int length1) const {
|
bool Dictionary::isValidBigram(const int *word0, int length0, const int *word1, int length1) const {
|
||||||
|
|
|
@ -68,7 +68,7 @@ class MultiBigramMap {
|
||||||
|
|
||||||
void init(const BinaryDictionaryInfo *const binaryDictionaryInfo, const int nodePos) {
|
void init(const BinaryDictionaryInfo *const binaryDictionaryInfo, const int nodePos) {
|
||||||
const int bigramsListPos = binaryDictionaryInfo->getStructurePolicy()->
|
const int bigramsListPos = binaryDictionaryInfo->getStructurePolicy()->
|
||||||
getBigramsPositionOfNode(binaryDictionaryInfo, nodePos);
|
getBigramsPositionOfNode(nodePos);
|
||||||
BinaryDictionaryBigramsIterator bigramsIt(binaryDictionaryInfo, bigramsListPos);
|
BinaryDictionaryBigramsIterator bigramsIt(binaryDictionaryInfo, bigramsListPos);
|
||||||
while (bigramsIt.hasNext()) {
|
while (bigramsIt.hasNext()) {
|
||||||
bigramsIt.next();
|
bigramsIt.next();
|
||||||
|
@ -108,7 +108,7 @@ class MultiBigramMap {
|
||||||
const BinaryDictionaryInfo *const binaryDictionaryInfo, const int nodePos,
|
const BinaryDictionaryInfo *const binaryDictionaryInfo, const int nodePos,
|
||||||
const int nextWordPosition, const int unigramProbability) {
|
const int nextWordPosition, const int unigramProbability) {
|
||||||
const int bigramsListPos = binaryDictionaryInfo->getStructurePolicy()->
|
const int bigramsListPos = binaryDictionaryInfo->getStructurePolicy()->
|
||||||
getBigramsPositionOfNode(binaryDictionaryInfo, nodePos);
|
getBigramsPositionOfNode(nodePos);
|
||||||
BinaryDictionaryBigramsIterator bigramsIt(binaryDictionaryInfo, bigramsListPos);
|
BinaryDictionaryBigramsIterator bigramsIt(binaryDictionaryInfo, bigramsListPos);
|
||||||
while (bigramsIt.hasNext()) {
|
while (bigramsIt.hasNext()) {
|
||||||
bigramsIt.next();
|
bigramsIt.next();
|
||||||
|
|
|
@ -21,7 +21,6 @@
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
|
||||||
class BinaryDictionaryInfo;
|
|
||||||
class DicNode;
|
class DicNode;
|
||||||
class DicNodeVector;
|
class DicNodeVector;
|
||||||
|
|
||||||
|
@ -29,7 +28,7 @@ class DicNodeVector;
|
||||||
* This class abstracts structure of dictionaries.
|
* This class abstracts structure of dictionaries.
|
||||||
* Implement this policy to support additional dictionaries.
|
* Implement this policy to support additional dictionaries.
|
||||||
*/
|
*/
|
||||||
class DictionaryStructurePolicy {
|
class DictionaryStructureWithBufferPolicy {
|
||||||
public:
|
public:
|
||||||
// This provides a filtering method for filtering new node.
|
// This provides a filtering method for filtering new node.
|
||||||
class NodeFilter {
|
class NodeFilter {
|
||||||
|
@ -44,36 +43,31 @@ class DictionaryStructurePolicy {
|
||||||
DISALLOW_COPY_AND_ASSIGN(NodeFilter);
|
DISALLOW_COPY_AND_ASSIGN(NodeFilter);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
virtual ~DictionaryStructureWithBufferPolicy() {}
|
||||||
|
|
||||||
virtual int getRootPosition() const = 0;
|
virtual int getRootPosition() const = 0;
|
||||||
|
|
||||||
virtual void createAndGetAllChildNodes(const DicNode *const dicNode,
|
virtual void createAndGetAllChildNodes(const DicNode *const dicNode,
|
||||||
const BinaryDictionaryInfo *const binaryDictionaryInfo,
|
|
||||||
const NodeFilter *const nodeFilter, DicNodeVector *const childDicNodes) const = 0;
|
const NodeFilter *const nodeFilter, DicNodeVector *const childDicNodes) const = 0;
|
||||||
|
|
||||||
virtual int getCodePointsAndProbabilityAndReturnCodePointCount(
|
virtual int getCodePointsAndProbabilityAndReturnCodePointCount(
|
||||||
const BinaryDictionaryInfo *const binaryDictionaryInfo,
|
|
||||||
const int nodePos, const int maxCodePointCount, int *const outCodePoints,
|
const int nodePos, const int maxCodePointCount, int *const outCodePoints,
|
||||||
int *const outUnigramProbability) const = 0;
|
int *const outUnigramProbability) const = 0;
|
||||||
|
|
||||||
virtual int getTerminalNodePositionOfWord(
|
virtual int getTerminalNodePositionOfWord(const int *const inWord,
|
||||||
const BinaryDictionaryInfo *const binaryDictionaryInfo, const int *const inWord,
|
|
||||||
const int length, const bool forceLowerCaseSearch) const = 0;
|
const int length, const bool forceLowerCaseSearch) const = 0;
|
||||||
|
|
||||||
virtual int getUnigramProbability(const BinaryDictionaryInfo *const binaryDictionaryInfo,
|
virtual int getUnigramProbability(const int nodePos) const = 0;
|
||||||
const int nodePos) const = 0;
|
|
||||||
|
|
||||||
virtual int getShortcutPositionOfNode(const BinaryDictionaryInfo *const binaryDictionaryInfo,
|
virtual int getShortcutPositionOfNode(const int nodePos) const = 0;
|
||||||
const int nodePos) const = 0;
|
|
||||||
|
|
||||||
virtual int getBigramsPositionOfNode(const BinaryDictionaryInfo *const binaryDictionaryInfo,
|
virtual int getBigramsPositionOfNode(const int nodePos) const = 0;
|
||||||
const int nodePos) const = 0;
|
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
DictionaryStructurePolicy() {}
|
DictionaryStructureWithBufferPolicy() {}
|
||||||
virtual ~DictionaryStructurePolicy() {}
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
DISALLOW_COPY_AND_ASSIGN(DictionaryStructurePolicy);
|
DISALLOW_COPY_AND_ASSIGN(DictionaryStructureWithBufferPolicy);
|
||||||
};
|
};
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
#endif /* LATINIME_DICTIONARY_STRUCTURE_POLICY_H */
|
#endif /* LATINIME_DICTIONARY_STRUCTURE_POLICY_H */
|
|
@ -37,12 +37,12 @@ void DicTraverseSession::init(const Dictionary *const dictionary, const int *pre
|
||||||
}
|
}
|
||||||
// TODO: merge following similar calls to getTerminalPosition into one case-insensitive call.
|
// TODO: merge following similar calls to getTerminalPosition into one case-insensitive call.
|
||||||
mPrevWordPos = binaryDictionaryInfo->getStructurePolicy()->getTerminalNodePositionOfWord(
|
mPrevWordPos = binaryDictionaryInfo->getStructurePolicy()->getTerminalNodePositionOfWord(
|
||||||
binaryDictionaryInfo, prevWord, prevWordLength, false /* forceLowerCaseSearch */);
|
prevWord, prevWordLength, false /* forceLowerCaseSearch */);
|
||||||
if (mPrevWordPos == NOT_A_VALID_WORD_POS) {
|
if (mPrevWordPos == NOT_A_VALID_WORD_POS) {
|
||||||
// Check bigrams for lower-cased previous word if original was not found. Useful for
|
// Check bigrams for lower-cased previous word if original was not found. Useful for
|
||||||
// auto-capitalized words like "The [current_word]".
|
// auto-capitalized words like "The [current_word]".
|
||||||
mPrevWordPos = binaryDictionaryInfo->getStructurePolicy()->getTerminalNodePositionOfWord(
|
mPrevWordPos = binaryDictionaryInfo->getStructurePolicy()->getTerminalNodePositionOfWord(
|
||||||
binaryDictionaryInfo, prevWord, prevWordLength, true /* forceLowerCaseSearch */);
|
prevWord, prevWordLength, true /* forceLowerCaseSearch */);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -75,7 +75,7 @@ class DicTraverseSession {
|
||||||
const int maxPointerCount);
|
const int maxPointerCount);
|
||||||
void resetCache(const int nextActiveCacheSize, const int maxWords);
|
void resetCache(const int nextActiveCacheSize, const int maxWords);
|
||||||
|
|
||||||
// TODO: Remove
|
// TODO: Use DictionaryStructurePolicy instead of BinaryDictionaryInfo.
|
||||||
const BinaryDictionaryInfo *getBinaryDictionaryInfo() const;
|
const BinaryDictionaryInfo *getBinaryDictionaryInfo() const;
|
||||||
|
|
||||||
//--------------------
|
//--------------------
|
||||||
|
|
|
@ -215,7 +215,7 @@ int Suggest::outputSuggestions(DicTraverseSession *traverseSession, int *frequen
|
||||||
traverseSession->getBinaryDictionaryInfo();
|
traverseSession->getBinaryDictionaryInfo();
|
||||||
const TerminalAttributes terminalAttributes(traverseSession->getBinaryDictionaryInfo(),
|
const TerminalAttributes terminalAttributes(traverseSession->getBinaryDictionaryInfo(),
|
||||||
binaryDictionaryInfo->getStructurePolicy()->getShortcutPositionOfNode(
|
binaryDictionaryInfo->getStructurePolicy()->getShortcutPositionOfNode(
|
||||||
binaryDictionaryInfo, terminalDicNode->getPos()));
|
terminalDicNode->getPos()));
|
||||||
// Shortcut is not supported for multiple words suggestions.
|
// Shortcut is not supported for multiple words suggestions.
|
||||||
// TODO: Check shortcuts during traversal for multiple words suggestions.
|
// TODO: Check shortcuts during traversal for multiple words suggestions.
|
||||||
const bool sameAsTyped = TRAVERSAL->sameAsTyped(traverseSession, terminalDicNode);
|
const bool sameAsTyped = TRAVERSAL->sameAsTyped(traverseSession, terminalDicNode);
|
||||||
|
|
|
@ -1,48 +0,0 @@
|
||||||
/*
|
|
||||||
* Copyright (C) 2013 The Android Open Source Project
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
* you may not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef LATINIME_DICTIONARY_STRUCTURE_POLICY_FACTORY_H
|
|
||||||
#define LATINIME_DICTIONARY_STRUCTURE_POLICY_FACTORY_H
|
|
||||||
|
|
||||||
#include "defines.h"
|
|
||||||
#include "suggest/core/dictionary/binary_dictionary_format_utils.h"
|
|
||||||
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h"
|
|
||||||
#include "suggest/policyimpl/dictionary/patricia_trie_policy.h"
|
|
||||||
|
|
||||||
namespace latinime {
|
|
||||||
|
|
||||||
class DictionaryStructurePolicy;
|
|
||||||
|
|
||||||
class DictionaryStructurePolicyFactory {
|
|
||||||
public:
|
|
||||||
static const DictionaryStructurePolicy *getDictionaryStructurePolicy(
|
|
||||||
const BinaryDictionaryFormatUtils::FORMAT_VERSION dictionaryFormat) {
|
|
||||||
switch (dictionaryFormat) {
|
|
||||||
case BinaryDictionaryFormatUtils::VERSION_2:
|
|
||||||
return PatriciaTriePolicy::getInstance();
|
|
||||||
case BinaryDictionaryFormatUtils::VERSION_3:
|
|
||||||
return DynamicPatriciaTriePolicy::getInstance();
|
|
||||||
default:
|
|
||||||
ASSERT(false);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
|
||||||
DISALLOW_IMPLICIT_CONSTRUCTORS(DictionaryStructurePolicyFactory);
|
|
||||||
};
|
|
||||||
} // namespace latinime
|
|
||||||
#endif // LATINIME_DICTIONARY_STRUCTURE_POLICY_FACTORY_H
|
|
|
@ -0,0 +1,42 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2013 The Android Open Source Project
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.h"
|
||||||
|
|
||||||
|
#include "defines.h"
|
||||||
|
#include "suggest/core/dictionary/binary_dictionary_info.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/patricia_trie_policy.h"
|
||||||
|
|
||||||
|
namespace latinime {
|
||||||
|
|
||||||
|
/* static */ DictionaryStructureWithBufferPolicy *DictionaryStructureWithBufferPolicyFactory
|
||||||
|
::newDictionaryStructurePolicy(
|
||||||
|
const BinaryDictionaryInfo *const binaryDictionaryInfo) {
|
||||||
|
switch (binaryDictionaryInfo->getFormat()) {
|
||||||
|
case BinaryDictionaryFormatUtils::VERSION_2:
|
||||||
|
return new PatriciaTriePolicy(binaryDictionaryInfo->getDictRoot(),
|
||||||
|
binaryDictionaryInfo);
|
||||||
|
case BinaryDictionaryFormatUtils::VERSION_3:
|
||||||
|
return new DynamicPatriciaTriePolicy(binaryDictionaryInfo->getDictRoot(),
|
||||||
|
binaryDictionaryInfo);
|
||||||
|
default:
|
||||||
|
ASSERT(false);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace latinime
|
|
@ -0,0 +1,37 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2013 The Android Open Source Project
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef LATINIME_DICTIONARY_STRUCTURE_WITH_BUFFER_POLICY_FACTORY_H
|
||||||
|
#define LATINIME_DICTIONARY_STRUCTURE_WITH_BUFFER_POLICY_FACTORY_H
|
||||||
|
|
||||||
|
#include "defines.h"
|
||||||
|
|
||||||
|
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
|
||||||
|
|
||||||
|
namespace latinime {
|
||||||
|
|
||||||
|
class BinaryDictionaryInfo;
|
||||||
|
|
||||||
|
class DictionaryStructureWithBufferPolicyFactory {
|
||||||
|
public:
|
||||||
|
static DictionaryStructureWithBufferPolicy *newDictionaryStructurePolicy(
|
||||||
|
const BinaryDictionaryInfo *const binaryDictionaryInfo);
|
||||||
|
|
||||||
|
private:
|
||||||
|
DISALLOW_IMPLICIT_CONSTRUCTORS(DictionaryStructureWithBufferPolicyFactory);
|
||||||
|
};
|
||||||
|
} // namespace latinime
|
||||||
|
#endif // LATINIME_DICTIONARY_STRUCTURE_WITH_BUFFER_POLICY_FACTORY_H
|
|
@ -26,23 +26,21 @@
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
|
||||||
const DynamicPatriciaTriePolicy DynamicPatriciaTriePolicy::sInstance;
|
|
||||||
// To avoid infinite loop caused by invalid or malicious forward links.
|
// To avoid infinite loop caused by invalid or malicious forward links.
|
||||||
const int DynamicPatriciaTriePolicy::MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP = 100000;
|
const int DynamicPatriciaTriePolicy::MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP = 100000;
|
||||||
|
|
||||||
void DynamicPatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode,
|
void DynamicPatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode,
|
||||||
const BinaryDictionaryInfo *const binaryDictionaryInfo,
|
|
||||||
const NodeFilter *const nodeFilter, DicNodeVector *const childDicNodes) const {
|
const NodeFilter *const nodeFilter, DicNodeVector *const childDicNodes) const {
|
||||||
if (!dicNode->hasChildren()) {
|
if (!dicNode->hasChildren()) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
DynamicPatriciaTrieNodeReader nodeReader(binaryDictionaryInfo);
|
DynamicPatriciaTrieNodeReader nodeReader(mBinaryDictionaryInfo);
|
||||||
int mergedNodeCodePoints[MAX_WORD_LENGTH];
|
int mergedNodeCodePoints[MAX_WORD_LENGTH];
|
||||||
int nextPos = dicNode->getChildrenPos();
|
int nextPos = dicNode->getChildrenPos();
|
||||||
int totalChildCount = 0;
|
int totalChildCount = 0;
|
||||||
do {
|
do {
|
||||||
const int childCount = PatriciaTrieReadingUtils::getGroupCountAndAdvancePosition(
|
const int childCount = PatriciaTrieReadingUtils::getGroupCountAndAdvancePosition(
|
||||||
binaryDictionaryInfo->getDictRoot(), &nextPos);
|
mDictRoot, &nextPos);
|
||||||
totalChildCount += childCount;
|
totalChildCount += childCount;
|
||||||
if (childCount <= 0 || totalChildCount > MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP) {
|
if (childCount <= 0 || totalChildCount > MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP) {
|
||||||
// Invalid dictionary.
|
// Invalid dictionary.
|
||||||
|
@ -64,13 +62,11 @@ void DynamicPatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const d
|
||||||
}
|
}
|
||||||
nextPos = nodeReader.getSiblingNodePos();
|
nextPos = nodeReader.getSiblingNodePos();
|
||||||
}
|
}
|
||||||
nextPos = DynamicPatriciaTrieReadingUtils::getForwardLinkPosition(
|
nextPos = DynamicPatriciaTrieReadingUtils::getForwardLinkPosition(mDictRoot, nextPos);
|
||||||
binaryDictionaryInfo->getDictRoot(), nextPos);
|
|
||||||
} while (DynamicPatriciaTrieReadingUtils::isValidForwardLinkPosition(nextPos));
|
} while (DynamicPatriciaTrieReadingUtils::isValidForwardLinkPosition(nextPos));
|
||||||
}
|
}
|
||||||
|
|
||||||
int DynamicPatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
|
int DynamicPatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
|
||||||
const BinaryDictionaryInfo *const binaryDictionaryInfo,
|
|
||||||
const int nodePos, const int maxCodePointCount, int *const outCodePoints,
|
const int nodePos, const int maxCodePointCount, int *const outCodePoints,
|
||||||
int *const outUnigramProbability) const {
|
int *const outUnigramProbability) const {
|
||||||
if (nodePos == NOT_A_VALID_WORD_POS) {
|
if (nodePos == NOT_A_VALID_WORD_POS) {
|
||||||
|
@ -83,7 +79,7 @@ int DynamicPatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCoun
|
||||||
int mergedNodeCodePoints[maxCodePointCount];
|
int mergedNodeCodePoints[maxCodePointCount];
|
||||||
int codePointCount = 0;
|
int codePointCount = 0;
|
||||||
|
|
||||||
DynamicPatriciaTrieNodeReader nodeReader(binaryDictionaryInfo);
|
DynamicPatriciaTrieNodeReader nodeReader(mBinaryDictionaryInfo);
|
||||||
// First, read terminal node and get its probability.
|
// First, read terminal node and get its probability.
|
||||||
nodeReader.fetchNodeInfoFromBufferAndGetNodeCodePoints(nodePos, maxCodePointCount,
|
nodeReader.fetchNodeInfoFromBufferAndGetNodeCodePoints(nodePos, maxCodePointCount,
|
||||||
mergedNodeCodePoints);
|
mergedNodeCodePoints);
|
||||||
|
@ -118,8 +114,7 @@ int DynamicPatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCoun
|
||||||
return codePointCount;
|
return codePointCount;
|
||||||
}
|
}
|
||||||
|
|
||||||
int DynamicPatriciaTriePolicy::getTerminalNodePositionOfWord(
|
int DynamicPatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const inWord,
|
||||||
const BinaryDictionaryInfo *const binaryDictionaryInfo, const int *const inWord,
|
|
||||||
const int length, const bool forceLowerCaseSearch) const {
|
const int length, const bool forceLowerCaseSearch) const {
|
||||||
int searchCodePoints[length];
|
int searchCodePoints[length];
|
||||||
for (int i = 0; i < length; ++i) {
|
for (int i = 0; i < length; ++i) {
|
||||||
|
@ -128,14 +123,14 @@ int DynamicPatriciaTriePolicy::getTerminalNodePositionOfWord(
|
||||||
int mergedNodeCodePoints[MAX_WORD_LENGTH];
|
int mergedNodeCodePoints[MAX_WORD_LENGTH];
|
||||||
int currentLength = 0;
|
int currentLength = 0;
|
||||||
int pos = getRootPosition();
|
int pos = getRootPosition();
|
||||||
DynamicPatriciaTrieNodeReader nodeReader(binaryDictionaryInfo);
|
DynamicPatriciaTrieNodeReader nodeReader(mBinaryDictionaryInfo);
|
||||||
while (currentLength <= length) {
|
while (currentLength <= length) {
|
||||||
// When foundMatchedNode becomes true, currentLength is increased at least once.
|
// When foundMatchedNode becomes true, currentLength is increased at least once.
|
||||||
bool foundMatchedNode = false;
|
bool foundMatchedNode = false;
|
||||||
int totalChildCount = 0;
|
int totalChildCount = 0;
|
||||||
do {
|
do {
|
||||||
const int childCount = PatriciaTrieReadingUtils::getGroupCountAndAdvancePosition(
|
const int childCount = PatriciaTrieReadingUtils::getGroupCountAndAdvancePosition(
|
||||||
binaryDictionaryInfo->getDictRoot(), &pos);
|
mDictRoot, &pos);
|
||||||
totalChildCount += childCount;
|
totalChildCount += childCount;
|
||||||
if (childCount <= 0 || totalChildCount > MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP) {
|
if (childCount <= 0 || totalChildCount > MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP) {
|
||||||
// Invalid dictionary.
|
// Invalid dictionary.
|
||||||
|
@ -183,7 +178,7 @@ int DynamicPatriciaTriePolicy::getTerminalNodePositionOfWord(
|
||||||
// If the matched node is not found in the current node group, try to follow the
|
// If the matched node is not found in the current node group, try to follow the
|
||||||
// forward link.
|
// forward link.
|
||||||
pos = DynamicPatriciaTrieReadingUtils::getForwardLinkPosition(
|
pos = DynamicPatriciaTrieReadingUtils::getForwardLinkPosition(
|
||||||
binaryDictionaryInfo->getDictRoot(), pos);
|
mDictRoot, pos);
|
||||||
} while (DynamicPatriciaTrieReadingUtils::isValidForwardLinkPosition(pos));
|
} while (DynamicPatriciaTrieReadingUtils::isValidForwardLinkPosition(pos));
|
||||||
if (!foundMatchedNode) {
|
if (!foundMatchedNode) {
|
||||||
// Matched node is not found.
|
// Matched node is not found.
|
||||||
|
@ -195,12 +190,11 @@ int DynamicPatriciaTriePolicy::getTerminalNodePositionOfWord(
|
||||||
return NOT_A_VALID_WORD_POS;
|
return NOT_A_VALID_WORD_POS;
|
||||||
}
|
}
|
||||||
|
|
||||||
int DynamicPatriciaTriePolicy::getUnigramProbability(
|
int DynamicPatriciaTriePolicy::getUnigramProbability(const int nodePos) const {
|
||||||
const BinaryDictionaryInfo *const binaryDictionaryInfo, const int nodePos) const {
|
|
||||||
if (nodePos == NOT_A_VALID_WORD_POS) {
|
if (nodePos == NOT_A_VALID_WORD_POS) {
|
||||||
return NOT_A_PROBABILITY;
|
return NOT_A_PROBABILITY;
|
||||||
}
|
}
|
||||||
DynamicPatriciaTrieNodeReader nodeReader(binaryDictionaryInfo);
|
DynamicPatriciaTrieNodeReader nodeReader(mBinaryDictionaryInfo);
|
||||||
nodeReader.fetchNodeInfoFromBuffer(nodePos);
|
nodeReader.fetchNodeInfoFromBuffer(nodePos);
|
||||||
if (nodeReader.isDeleted() || nodeReader.isBlacklisted() || nodeReader.isNotAWord()) {
|
if (nodeReader.isDeleted() || nodeReader.isBlacklisted() || nodeReader.isNotAWord()) {
|
||||||
return NOT_A_PROBABILITY;
|
return NOT_A_PROBABILITY;
|
||||||
|
@ -208,13 +202,11 @@ int DynamicPatriciaTriePolicy::getUnigramProbability(
|
||||||
return nodeReader.getProbability();
|
return nodeReader.getProbability();
|
||||||
}
|
}
|
||||||
|
|
||||||
int DynamicPatriciaTriePolicy::getShortcutPositionOfNode(
|
int DynamicPatriciaTriePolicy::getShortcutPositionOfNode(const int nodePos) const {
|
||||||
const BinaryDictionaryInfo *const binaryDictionaryInfo,
|
|
||||||
const int nodePos) const {
|
|
||||||
if (nodePos == NOT_A_VALID_WORD_POS) {
|
if (nodePos == NOT_A_VALID_WORD_POS) {
|
||||||
return NOT_A_DICT_POS;
|
return NOT_A_DICT_POS;
|
||||||
}
|
}
|
||||||
DynamicPatriciaTrieNodeReader nodeReader(binaryDictionaryInfo);
|
DynamicPatriciaTrieNodeReader nodeReader(mBinaryDictionaryInfo);
|
||||||
nodeReader.fetchNodeInfoFromBuffer(nodePos);
|
nodeReader.fetchNodeInfoFromBuffer(nodePos);
|
||||||
if (nodeReader.isDeleted()) {
|
if (nodeReader.isDeleted()) {
|
||||||
return NOT_A_DICT_POS;
|
return NOT_A_DICT_POS;
|
||||||
|
@ -222,13 +214,11 @@ int DynamicPatriciaTriePolicy::getShortcutPositionOfNode(
|
||||||
return nodeReader.getShortcutPos();
|
return nodeReader.getShortcutPos();
|
||||||
}
|
}
|
||||||
|
|
||||||
int DynamicPatriciaTriePolicy::getBigramsPositionOfNode(
|
int DynamicPatriciaTriePolicy::getBigramsPositionOfNode(const int nodePos) const {
|
||||||
const BinaryDictionaryInfo *const binaryDictionaryInfo,
|
|
||||||
const int nodePos) const {
|
|
||||||
if (nodePos == NOT_A_VALID_WORD_POS) {
|
if (nodePos == NOT_A_VALID_WORD_POS) {
|
||||||
return NOT_A_DICT_POS;
|
return NOT_A_DICT_POS;
|
||||||
}
|
}
|
||||||
DynamicPatriciaTrieNodeReader nodeReader(binaryDictionaryInfo);
|
DynamicPatriciaTrieNodeReader nodeReader(mBinaryDictionaryInfo);
|
||||||
nodeReader.fetchNodeInfoFromBuffer(nodePos);
|
nodeReader.fetchNodeInfoFromBuffer(nodePos);
|
||||||
if (nodeReader.isDeleted()) {
|
if (nodeReader.isDeleted()) {
|
||||||
return NOT_A_DICT_POS;
|
return NOT_A_DICT_POS;
|
||||||
|
|
|
@ -17,8 +17,10 @@
|
||||||
#ifndef LATINIME_DYNAMIC_PATRICIA_TRIE_POLICY_H
|
#ifndef LATINIME_DYNAMIC_PATRICIA_TRIE_POLICY_H
|
||||||
#define LATINIME_DYNAMIC_PATRICIA_TRIE_POLICY_H
|
#define LATINIME_DYNAMIC_PATRICIA_TRIE_POLICY_H
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
#include "defines.h"
|
#include "defines.h"
|
||||||
#include "suggest/core/policy/dictionary_structure_policy.h"
|
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
|
||||||
|
@ -26,45 +28,41 @@ class BinaryDictionaryInfo;
|
||||||
class DicNode;
|
class DicNode;
|
||||||
class DicNodeVector;
|
class DicNodeVector;
|
||||||
|
|
||||||
class DynamicPatriciaTriePolicy : public DictionaryStructurePolicy {
|
class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
||||||
public:
|
public:
|
||||||
static AK_FORCE_INLINE const DynamicPatriciaTriePolicy *getInstance() {
|
DynamicPatriciaTriePolicy(const uint8_t *const dictRoot,
|
||||||
return &sInstance;
|
const BinaryDictionaryInfo *const binaryDictionaryInfo)
|
||||||
}
|
: mDictRoot(dictRoot), mBinaryDictionaryInfo(binaryDictionaryInfo) {}
|
||||||
|
|
||||||
|
~DynamicPatriciaTriePolicy() {}
|
||||||
|
|
||||||
AK_FORCE_INLINE int getRootPosition() const {
|
AK_FORCE_INLINE int getRootPosition() const {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void createAndGetAllChildNodes(const DicNode *const dicNode,
|
void createAndGetAllChildNodes(const DicNode *const dicNode,
|
||||||
const BinaryDictionaryInfo *const binaryDictionaryInfo,
|
|
||||||
const NodeFilter *const nodeFilter, DicNodeVector *const childDicNodes) const;
|
const NodeFilter *const nodeFilter, DicNodeVector *const childDicNodes) const;
|
||||||
|
|
||||||
int getCodePointsAndProbabilityAndReturnCodePointCount(
|
int getCodePointsAndProbabilityAndReturnCodePointCount(
|
||||||
const BinaryDictionaryInfo *const binaryDictionaryInfo,
|
|
||||||
const int terminalNodePos, const int maxCodePointCount, int *const outCodePoints,
|
const int terminalNodePos, const int maxCodePointCount, int *const outCodePoints,
|
||||||
int *const outUnigramProbability) const;
|
int *const outUnigramProbability) const;
|
||||||
|
|
||||||
int getTerminalNodePositionOfWord(
|
int getTerminalNodePositionOfWord(const int *const inWord,
|
||||||
const BinaryDictionaryInfo *const binaryDictionaryInfo, const int *const inWord,
|
|
||||||
const int length, const bool forceLowerCaseSearch) const;
|
const int length, const bool forceLowerCaseSearch) const;
|
||||||
|
|
||||||
int getUnigramProbability(const BinaryDictionaryInfo *const binaryDictionaryInfo,
|
int getUnigramProbability(const int nodePos) const;
|
||||||
const int nodePos) const;
|
|
||||||
|
|
||||||
int getShortcutPositionOfNode(const BinaryDictionaryInfo *const binaryDictionaryInfo,
|
int getShortcutPositionOfNode(const int nodePos) const;
|
||||||
const int nodePos) const;
|
|
||||||
|
|
||||||
int getBigramsPositionOfNode(const BinaryDictionaryInfo *const binaryDictionaryInfo,
|
int getBigramsPositionOfNode(const int nodePos) const;
|
||||||
const int nodePos) const;
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
DISALLOW_COPY_AND_ASSIGN(DynamicPatriciaTriePolicy);
|
DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTriePolicy);
|
||||||
static const DynamicPatriciaTriePolicy sInstance;
|
|
||||||
static const int MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP;
|
static const int MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP;
|
||||||
|
|
||||||
DynamicPatriciaTriePolicy() {}
|
const uint8_t *const mDictRoot;
|
||||||
~DynamicPatriciaTriePolicy() {}
|
// TODO: remove
|
||||||
|
const BinaryDictionaryInfo *const mBinaryDictionaryInfo;
|
||||||
};
|
};
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
#endif // LATINIME_DYNAMIC_PATRICIA_TRIE_POLICY_H
|
#endif // LATINIME_DYNAMIC_PATRICIA_TRIE_POLICY_H
|
||||||
|
|
|
@ -27,48 +27,39 @@
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
|
||||||
const PatriciaTriePolicy PatriciaTriePolicy::sInstance;
|
|
||||||
|
|
||||||
void PatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode,
|
void PatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode,
|
||||||
const BinaryDictionaryInfo *const binaryDictionaryInfo,
|
|
||||||
const NodeFilter *const nodeFilter, DicNodeVector *const childDicNodes) const {
|
const NodeFilter *const nodeFilter, DicNodeVector *const childDicNodes) const {
|
||||||
if (!dicNode->hasChildren()) {
|
if (!dicNode->hasChildren()) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
int nextPos = dicNode->getChildrenPos();
|
int nextPos = dicNode->getChildrenPos();
|
||||||
const int childCount = PatriciaTrieReadingUtils::getGroupCountAndAdvancePosition(
|
const int childCount = PatriciaTrieReadingUtils::getGroupCountAndAdvancePosition(
|
||||||
binaryDictionaryInfo->getDictRoot(), &nextPos);
|
mDictRoot, &nextPos);
|
||||||
for (int i = 0; i < childCount; i++) {
|
for (int i = 0; i < childCount; i++) {
|
||||||
nextPos = createAndGetLeavingChildNode(dicNode, nextPos, binaryDictionaryInfo,
|
nextPos = createAndGetLeavingChildNode(dicNode, nextPos, nodeFilter, childDicNodes);
|
||||||
nodeFilter, childDicNodes);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
|
int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
|
||||||
const BinaryDictionaryInfo *const binaryDictionaryInfo,
|
|
||||||
const int nodePos, const int maxCodePointCount, int *const outCodePoints,
|
const int nodePos, const int maxCodePointCount, int *const outCodePoints,
|
||||||
int *const outUnigramProbability) const {
|
int *const outUnigramProbability) const {
|
||||||
return BinaryFormat::getCodePointsAndProbabilityAndReturnCodePointCount(
|
return BinaryFormat::getCodePointsAndProbabilityAndReturnCodePointCount(mDictRoot, nodePos,
|
||||||
binaryDictionaryInfo->getDictRoot(), nodePos,
|
|
||||||
maxCodePointCount, outCodePoints, outUnigramProbability);
|
maxCodePointCount, outCodePoints, outUnigramProbability);
|
||||||
}
|
}
|
||||||
|
|
||||||
int PatriciaTriePolicy::getTerminalNodePositionOfWord(
|
int PatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const inWord,
|
||||||
const BinaryDictionaryInfo *const binaryDictionaryInfo, const int *const inWord,
|
|
||||||
const int length, const bool forceLowerCaseSearch) const {
|
const int length, const bool forceLowerCaseSearch) const {
|
||||||
return BinaryFormat::getTerminalPosition(binaryDictionaryInfo->getDictRoot(), inWord,
|
return BinaryFormat::getTerminalPosition(mDictRoot, inWord,
|
||||||
length, forceLowerCaseSearch);
|
length, forceLowerCaseSearch);
|
||||||
}
|
}
|
||||||
|
|
||||||
int PatriciaTriePolicy::getUnigramProbability(
|
int PatriciaTriePolicy::getUnigramProbability(const int nodePos) const {
|
||||||
const BinaryDictionaryInfo *const binaryDictionaryInfo, const int nodePos) const {
|
|
||||||
if (nodePos == NOT_A_VALID_WORD_POS) {
|
if (nodePos == NOT_A_VALID_WORD_POS) {
|
||||||
return NOT_A_PROBABILITY;
|
return NOT_A_PROBABILITY;
|
||||||
}
|
}
|
||||||
const uint8_t *const dictRoot = binaryDictionaryInfo->getDictRoot();
|
|
||||||
int pos = nodePos;
|
int pos = nodePos;
|
||||||
const PatriciaTrieReadingUtils::NodeFlags flags =
|
const PatriciaTrieReadingUtils::NodeFlags flags =
|
||||||
PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictRoot, &pos);
|
PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos);
|
||||||
if (!PatriciaTrieReadingUtils::isTerminal(flags)) {
|
if (!PatriciaTrieReadingUtils::isTerminal(flags)) {
|
||||||
return NOT_A_PROBABILITY;
|
return NOT_A_PROBABILITY;
|
||||||
}
|
}
|
||||||
|
@ -79,81 +70,74 @@ int PatriciaTriePolicy::getUnigramProbability(
|
||||||
// for shortcuts).
|
// for shortcuts).
|
||||||
return NOT_A_PROBABILITY;
|
return NOT_A_PROBABILITY;
|
||||||
}
|
}
|
||||||
PatriciaTrieReadingUtils::skipCharacters(dictRoot, flags, MAX_WORD_LENGTH, &pos);
|
PatriciaTrieReadingUtils::skipCharacters(mDictRoot, flags, MAX_WORD_LENGTH, &pos);
|
||||||
return PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(dictRoot, &pos);
|
return PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos);
|
||||||
}
|
}
|
||||||
|
|
||||||
int PatriciaTriePolicy::getShortcutPositionOfNode(
|
int PatriciaTriePolicy::getShortcutPositionOfNode(const int nodePos) const {
|
||||||
const BinaryDictionaryInfo *const binaryDictionaryInfo,
|
|
||||||
const int nodePos) const {
|
|
||||||
if (nodePos == NOT_A_VALID_WORD_POS) {
|
if (nodePos == NOT_A_VALID_WORD_POS) {
|
||||||
return NOT_A_DICT_POS;
|
return NOT_A_DICT_POS;
|
||||||
}
|
}
|
||||||
const uint8_t *const dictRoot = binaryDictionaryInfo->getDictRoot();
|
|
||||||
int pos = nodePos;
|
int pos = nodePos;
|
||||||
const PatriciaTrieReadingUtils::NodeFlags flags =
|
const PatriciaTrieReadingUtils::NodeFlags flags =
|
||||||
PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictRoot, &pos);
|
PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos);
|
||||||
if (!PatriciaTrieReadingUtils::hasShortcutTargets(flags)) {
|
if (!PatriciaTrieReadingUtils::hasShortcutTargets(flags)) {
|
||||||
return NOT_A_DICT_POS;
|
return NOT_A_DICT_POS;
|
||||||
}
|
}
|
||||||
PatriciaTrieReadingUtils::skipCharacters(dictRoot, flags, MAX_WORD_LENGTH, &pos);
|
PatriciaTrieReadingUtils::skipCharacters(mDictRoot, flags, MAX_WORD_LENGTH, &pos);
|
||||||
if (PatriciaTrieReadingUtils::isTerminal(flags)) {
|
if (PatriciaTrieReadingUtils::isTerminal(flags)) {
|
||||||
PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(dictRoot, &pos);
|
PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos);
|
||||||
}
|
}
|
||||||
if (PatriciaTrieReadingUtils::hasChildrenInFlags(flags)) {
|
if (PatriciaTrieReadingUtils::hasChildrenInFlags(flags)) {
|
||||||
PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(dictRoot, flags, &pos);
|
PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(mDictRoot, flags, &pos);
|
||||||
}
|
}
|
||||||
return pos;
|
return pos;
|
||||||
}
|
}
|
||||||
|
|
||||||
int PatriciaTriePolicy::getBigramsPositionOfNode(
|
int PatriciaTriePolicy::getBigramsPositionOfNode(const int nodePos) const {
|
||||||
const BinaryDictionaryInfo *const binaryDictionaryInfo,
|
|
||||||
const int nodePos) const {
|
|
||||||
if (nodePos == NOT_A_VALID_WORD_POS) {
|
if (nodePos == NOT_A_VALID_WORD_POS) {
|
||||||
return NOT_A_DICT_POS;
|
return NOT_A_DICT_POS;
|
||||||
}
|
}
|
||||||
const uint8_t *const dictRoot = binaryDictionaryInfo->getDictRoot();
|
|
||||||
int pos = nodePos;
|
int pos = nodePos;
|
||||||
const PatriciaTrieReadingUtils::NodeFlags flags =
|
const PatriciaTrieReadingUtils::NodeFlags flags =
|
||||||
PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictRoot, &pos);
|
PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos);
|
||||||
if (!PatriciaTrieReadingUtils::hasBigrams(flags)) {
|
if (!PatriciaTrieReadingUtils::hasBigrams(flags)) {
|
||||||
return NOT_A_DICT_POS;
|
return NOT_A_DICT_POS;
|
||||||
}
|
}
|
||||||
PatriciaTrieReadingUtils::skipCharacters(dictRoot, flags, MAX_WORD_LENGTH, &pos);
|
PatriciaTrieReadingUtils::skipCharacters(mDictRoot, flags, MAX_WORD_LENGTH, &pos);
|
||||||
if (PatriciaTrieReadingUtils::isTerminal(flags)) {
|
if (PatriciaTrieReadingUtils::isTerminal(flags)) {
|
||||||
PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(dictRoot, &pos);
|
PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos);
|
||||||
}
|
}
|
||||||
if (PatriciaTrieReadingUtils::hasChildrenInFlags(flags)) {
|
if (PatriciaTrieReadingUtils::hasChildrenInFlags(flags)) {
|
||||||
PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(dictRoot, flags, &pos);
|
PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(mDictRoot, flags, &pos);
|
||||||
}
|
}
|
||||||
if (PatriciaTrieReadingUtils::hasShortcutTargets(flags)) {
|
if (PatriciaTrieReadingUtils::hasShortcutTargets(flags)) {
|
||||||
BinaryDictionaryTerminalAttributesReadingUtils::skipShortcuts(binaryDictionaryInfo, &pos);
|
BinaryDictionaryTerminalAttributesReadingUtils::skipShortcuts(mBinaryDictionaryInfo, &pos);
|
||||||
}
|
}
|
||||||
return pos;
|
return pos;
|
||||||
}
|
}
|
||||||
|
|
||||||
int PatriciaTriePolicy::createAndGetLeavingChildNode(const DicNode *const dicNode,
|
int PatriciaTriePolicy::createAndGetLeavingChildNode(const DicNode *const dicNode,
|
||||||
const int nodePos, const BinaryDictionaryInfo *const binaryDictionaryInfo,
|
const int nodePos, const NodeFilter *const childrenFilter,
|
||||||
const NodeFilter *const childrenFilter, DicNodeVector *childDicNodes) const {
|
DicNodeVector *childDicNodes) const {
|
||||||
const uint8_t *const dictRoot = binaryDictionaryInfo->getDictRoot();
|
|
||||||
int pos = nodePos;
|
int pos = nodePos;
|
||||||
const PatriciaTrieReadingUtils::NodeFlags flags =
|
const PatriciaTrieReadingUtils::NodeFlags flags =
|
||||||
PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictRoot, &pos);
|
PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos);
|
||||||
int mergedNodeCodePoints[MAX_WORD_LENGTH];
|
int mergedNodeCodePoints[MAX_WORD_LENGTH];
|
||||||
const int mergedNodeCodePointCount = PatriciaTrieReadingUtils::getCharsAndAdvancePosition(
|
const int mergedNodeCodePointCount = PatriciaTrieReadingUtils::getCharsAndAdvancePosition(
|
||||||
dictRoot, flags, MAX_WORD_LENGTH, mergedNodeCodePoints, &pos);
|
mDictRoot, flags, MAX_WORD_LENGTH, mergedNodeCodePoints, &pos);
|
||||||
const int probability = (PatriciaTrieReadingUtils::isTerminal(flags))?
|
const int probability = (PatriciaTrieReadingUtils::isTerminal(flags))?
|
||||||
PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(dictRoot, &pos)
|
PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos)
|
||||||
: NOT_A_PROBABILITY;
|
: NOT_A_PROBABILITY;
|
||||||
const int childrenPos = PatriciaTrieReadingUtils::hasChildrenInFlags(flags) ?
|
const int childrenPos = PatriciaTrieReadingUtils::hasChildrenInFlags(flags) ?
|
||||||
PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(
|
PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(
|
||||||
dictRoot, flags, &pos) : NOT_A_DICT_POS;
|
mDictRoot, flags, &pos) : NOT_A_DICT_POS;
|
||||||
if (PatriciaTrieReadingUtils::hasShortcutTargets(flags)) {
|
if (PatriciaTrieReadingUtils::hasShortcutTargets(flags)) {
|
||||||
BinaryDictionaryTerminalAttributesReadingUtils::skipShortcuts(binaryDictionaryInfo, &pos);
|
BinaryDictionaryTerminalAttributesReadingUtils::skipShortcuts(mBinaryDictionaryInfo, &pos);
|
||||||
}
|
}
|
||||||
if (PatriciaTrieReadingUtils::hasBigrams(flags)) {
|
if (PatriciaTrieReadingUtils::hasBigrams(flags)) {
|
||||||
BinaryDictionaryTerminalAttributesReadingUtils::skipExistingBigrams(
|
BinaryDictionaryTerminalAttributesReadingUtils::skipExistingBigrams(
|
||||||
binaryDictionaryInfo, &pos);
|
mBinaryDictionaryInfo, &pos);
|
||||||
}
|
}
|
||||||
if (!childrenFilter->isFilteredOut(mergedNodeCodePoints[0])) {
|
if (!childrenFilter->isFilteredOut(mergedNodeCodePoints[0])) {
|
||||||
childDicNodes->pushLeavingChild(dicNode, nodePos, childrenPos, probability,
|
childDicNodes->pushLeavingChild(dicNode, nodePos, childrenPos, probability,
|
||||||
|
|
|
@ -17,52 +17,53 @@
|
||||||
#ifndef LATINIME_PATRICIA_TRIE_POLICY_H
|
#ifndef LATINIME_PATRICIA_TRIE_POLICY_H
|
||||||
#define LATINIME_PATRICIA_TRIE_POLICY_H
|
#define LATINIME_PATRICIA_TRIE_POLICY_H
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
#include "defines.h"
|
#include "defines.h"
|
||||||
#include "suggest/core/policy/dictionary_structure_policy.h"
|
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
|
||||||
class PatriciaTriePolicy : public DictionaryStructurePolicy {
|
class BinaryDictionaryInfo;
|
||||||
|
class DicNode;
|
||||||
|
class DicNodeVector;
|
||||||
|
|
||||||
|
class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
||||||
public:
|
public:
|
||||||
static AK_FORCE_INLINE const PatriciaTriePolicy *getInstance() {
|
PatriciaTriePolicy(const uint8_t *const dictRoot,
|
||||||
return &sInstance;
|
const BinaryDictionaryInfo *const binaryDictionaryInfo)
|
||||||
}
|
: mDictRoot(dictRoot), mBinaryDictionaryInfo(binaryDictionaryInfo) {}
|
||||||
|
|
||||||
|
~PatriciaTriePolicy() {}
|
||||||
|
|
||||||
AK_FORCE_INLINE int getRootPosition() const {
|
AK_FORCE_INLINE int getRootPosition() const {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void createAndGetAllChildNodes(const DicNode *const dicNode,
|
void createAndGetAllChildNodes(const DicNode *const dicNode,
|
||||||
const BinaryDictionaryInfo *const binaryDictionaryInfo,
|
|
||||||
const NodeFilter *const nodeFilter, DicNodeVector *const childDicNodes) const;
|
const NodeFilter *const nodeFilter, DicNodeVector *const childDicNodes) const;
|
||||||
|
|
||||||
int getCodePointsAndProbabilityAndReturnCodePointCount(
|
int getCodePointsAndProbabilityAndReturnCodePointCount(
|
||||||
const BinaryDictionaryInfo *const binaryDictionaryInfo,
|
|
||||||
const int terminalNodePos, const int maxCodePointCount, int *const outCodePoints,
|
const int terminalNodePos, const int maxCodePointCount, int *const outCodePoints,
|
||||||
int *const outUnigramProbability) const;
|
int *const outUnigramProbability) const;
|
||||||
|
|
||||||
int getTerminalNodePositionOfWord(
|
int getTerminalNodePositionOfWord(const int *const inWord,
|
||||||
const BinaryDictionaryInfo *const binaryDictionaryInfo, const int *const inWord,
|
|
||||||
const int length, const bool forceLowerCaseSearch) const;
|
const int length, const bool forceLowerCaseSearch) const;
|
||||||
|
|
||||||
int getUnigramProbability(const BinaryDictionaryInfo *const binaryDictionaryInfo,
|
int getUnigramProbability(const int nodePos) const;
|
||||||
const int nodePos) const;
|
|
||||||
|
|
||||||
int getShortcutPositionOfNode(const BinaryDictionaryInfo *const binaryDictionaryInfo,
|
int getShortcutPositionOfNode(const int nodePos) const;
|
||||||
const int nodePos) const;
|
|
||||||
|
|
||||||
int getBigramsPositionOfNode(const BinaryDictionaryInfo *const binaryDictionaryInfo,
|
int getBigramsPositionOfNode(const int nodePos) const;
|
||||||
const int nodePos) const;
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
DISALLOW_COPY_AND_ASSIGN(PatriciaTriePolicy);
|
DISALLOW_IMPLICIT_CONSTRUCTORS(PatriciaTriePolicy);
|
||||||
static const PatriciaTriePolicy sInstance;
|
|
||||||
|
|
||||||
PatriciaTriePolicy() {}
|
const uint8_t *const mDictRoot;
|
||||||
~PatriciaTriePolicy() {}
|
// TODO: remove
|
||||||
|
const BinaryDictionaryInfo *const mBinaryDictionaryInfo;
|
||||||
|
|
||||||
int createAndGetLeavingChildNode(const DicNode *const dicNode, const int nodePos,
|
int createAndGetLeavingChildNode(const DicNode *const dicNode, const int nodePos,
|
||||||
const BinaryDictionaryInfo *const binaryDictionaryInfo,
|
|
||||||
const NodeFilter *const nodeFilter, DicNodeVector *const childDicNodes) const;
|
const NodeFilter *const nodeFilter, DicNodeVector *const childDicNodes) const;
|
||||||
};
|
};
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
|
|
Loading…
Reference in New Issue