Make DictionaryStructurePolicy have buffer info.

To support various format dictionary, extendable
buffers or multiple files.

Bug: 6669677
Change-Id: I203a5a4adc049a6322cfba4eacb4cb6715f8dfc2
main
Keisuke Kuroyanagi 2013-08-09 14:14:39 +09:00
parent 5c057b3241
commit e1ebef6124
18 changed files with 202 additions and 195 deletions

View File

@ -71,6 +71,7 @@ LATIN_IME_CORE_SRC_FILES := \
suggest/core/policy/weighting.cpp \
suggest/core/session/dic_traverse_session.cpp \
$(addprefix suggest/policyimpl/dictionary/, \
dictionary_structure_with_buffer_policy_factory.cpp \
dynamic_patricia_trie_node_reader.cpp \
dynamic_patricia_trie_policy.cpp \
dynamic_patricia_trie_reading_utils.cpp \

View File

@ -20,11 +20,11 @@
#include "defines.h"
#include "suggest/core/layout/proximity_info_state.h"
#include "suggest/core/layout/proximity_info_utils.h"
#include "suggest/core/policy/dictionary_structure_policy.h"
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
namespace latinime {
class DicNodeProximityFilter : public DictionaryStructurePolicy::NodeFilter {
class DicNodeProximityFilter : public DictionaryStructureWithBufferPolicy::NodeFilter {
public:
DicNodeProximityFilter(const ProximityInfoState *const pInfoState,
const int pointIndex, const bool exactOnly)

View File

@ -24,7 +24,7 @@
#include "suggest/core/dictionary/binary_dictionary_info.h"
#include "suggest/core/dictionary/multi_bigram_map.h"
#include "suggest/core/dictionary/probability_utils.h"
#include "suggest/core/policy/dictionary_structure_policy.h"
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
#include "utils/char_utils.h"
namespace latinime {
@ -83,7 +83,7 @@ namespace latinime {
DicNodeUtils::createAndGetPassingChildNode(dicNode, &childrenFilter, childDicNodes);
} else {
binaryDictionaryInfo->getStructurePolicy()->createAndGetAllChildNodes(dicNode,
binaryDictionaryInfo, &childrenFilter, childDicNodes);
&childrenFilter, childDicNodes);
}
}

View File

@ -116,9 +116,8 @@ int BigramDictionary::getPredictions(const int *prevWord, const int prevWordLeng
while (bigramsIt.hasNext()) {
bigramsIt.next();
const int length = mBinaryDictionaryInfo->getStructurePolicy()->
getCodePointsAndProbabilityAndReturnCodePointCount(
mBinaryDictionaryInfo, bigramsIt.getBigramPos(), MAX_WORD_LENGTH,
bigramBuffer, &unigramProbability);
getCodePointsAndProbabilityAndReturnCodePointCount(bigramsIt.getBigramPos(),
MAX_WORD_LENGTH, bigramBuffer, &unigramProbability);
// Due to space constraints, the probability for bigrams is approximate - the lower the
// unigram probability, the worse the precision. The theoritical maximum error in
// resulting probability is 8 - although in the practice it's never bigger than 3 or 4
@ -139,10 +138,9 @@ int BigramDictionary::getBigramListPositionForWord(const int *prevWord, const in
const bool forceLowerCaseSearch) const {
if (0 >= prevWordLength) return NOT_A_DICT_POS;
int pos = mBinaryDictionaryInfo->getStructurePolicy()->getTerminalNodePositionOfWord(
mBinaryDictionaryInfo, prevWord, prevWordLength, forceLowerCaseSearch);
prevWord, prevWordLength, forceLowerCaseSearch);
if (NOT_A_VALID_WORD_POS == pos) return NOT_A_DICT_POS;
return mBinaryDictionaryInfo->getStructurePolicy()->getBigramsPositionOfNode(
mBinaryDictionaryInfo, pos);
return mBinaryDictionaryInfo->getStructurePolicy()->getBigramsPositionOfNode(pos);
}
bool BigramDictionary::isValidBigram(const int *word0, int length0, const int *word1,
@ -151,7 +149,7 @@ bool BigramDictionary::isValidBigram(const int *word0, int length0, const int *w
// getBigramListPositionForWord returns 0 if this word isn't in the dictionary or has no bigrams
if (NOT_A_DICT_POS == pos) return false;
int nextWordPos = mBinaryDictionaryInfo->getStructurePolicy()->getTerminalNodePositionOfWord(
mBinaryDictionaryInfo, word1, length1, false /* forceLowerCaseSearch */);
word1, length1, false /* forceLowerCaseSearch */);
if (NOT_A_VALID_WORD_POS == nextWordPos) return false;
BinaryDictionaryBigramsIterator bigramsIt(mBinaryDictionaryInfo, pos);

View File

@ -23,7 +23,7 @@
#include "jni.h"
#include "suggest/core/dictionary/binary_dictionary_format_utils.h"
#include "suggest/core/dictionary/binary_dictionary_header.h"
#include "suggest/policyimpl/dictionary/dictionary_structure_policy_factory.h"
#include "suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.h"
#include "utils/log_utils.h"
namespace latinime {
@ -37,11 +37,16 @@ class BinaryDictionaryInfo {
mDictionaryFormat(BinaryDictionaryFormatUtils::detectFormatVersion(
mDictBuf, mDictSize)),
mDictionaryHeader(this), mDictRoot(mDictBuf + mDictionaryHeader.getSize()),
mStructurePolicy(DictionaryStructurePolicyFactory::getDictionaryStructurePolicy(
mDictionaryFormat)) {
// TODO: Remove.
mStructurePolicy(DictionaryStructureWithBufferPolicyFactory
::newDictionaryStructurePolicy(this)) {
logDictionaryInfo(env);
}
~BinaryDictionaryInfo() {
delete mStructurePolicy;
}
AK_FORCE_INLINE const uint8_t *getDictBuf() const {
return mDictBuf;
}
@ -66,6 +71,7 @@ class BinaryDictionaryInfo {
return mDictionaryFormat;
}
// TODO: Move to DictionaryStructurePolicy.
AK_FORCE_INLINE const BinaryDictionaryHeader *getHeader() const {
return &mDictionaryHeader;
}
@ -76,7 +82,8 @@ class BinaryDictionaryInfo {
return mIsUpdatable && isUpdatableDictionaryFormat;
}
AK_FORCE_INLINE const DictionaryStructurePolicy *getStructurePolicy() const {
// TODO: remove
AK_FORCE_INLINE const DictionaryStructureWithBufferPolicy *getStructurePolicy() const {
return mStructurePolicy;
}
@ -89,9 +96,12 @@ class BinaryDictionaryInfo {
const int mDictBufOffset;
const bool mIsUpdatable;
const BinaryDictionaryFormatUtils::FORMAT_VERSION mDictionaryFormat;
// TODO: Move BinaryDictionaryHeader to policyimpl and introduce dedicated API to the
// DictionaryStructurePolicy.
const BinaryDictionaryHeader mDictionaryHeader;
const uint8_t *const mDictRoot;
const DictionaryStructurePolicy *const mStructurePolicy;
// TODO: remove
const DictionaryStructureWithBufferPolicy *const mStructurePolicy;
AK_FORCE_INLINE void logDictionaryInfo(JNIEnv *const env) const {
const int BUFFER_SIZE = 16;

View File

@ -83,14 +83,14 @@ int Dictionary::getBigrams(const int *word, int length, int *outWords, int *freq
}
int Dictionary::getProbability(const int *word, int length) const {
const DictionaryStructurePolicy *const structurePolicy =
const DictionaryStructureWithBufferPolicy *const structurePolicy =
mBinaryDictionaryInfo.getStructurePolicy();
int pos = structurePolicy->getTerminalNodePositionOfWord(&mBinaryDictionaryInfo, word, length,
int pos = structurePolicy->getTerminalNodePositionOfWord(word, length,
false /* forceLowerCaseSearch */);
if (NOT_A_VALID_WORD_POS == pos) {
return NOT_A_PROBABILITY;
}
return structurePolicy->getUnigramProbability(&mBinaryDictionaryInfo, pos);
return structurePolicy->getUnigramProbability(pos);
}
bool Dictionary::isValidBigram(const int *word0, int length0, const int *word1, int length1) const {

View File

@ -68,7 +68,7 @@ class MultiBigramMap {
void init(const BinaryDictionaryInfo *const binaryDictionaryInfo, const int nodePos) {
const int bigramsListPos = binaryDictionaryInfo->getStructurePolicy()->
getBigramsPositionOfNode(binaryDictionaryInfo, nodePos);
getBigramsPositionOfNode(nodePos);
BinaryDictionaryBigramsIterator bigramsIt(binaryDictionaryInfo, bigramsListPos);
while (bigramsIt.hasNext()) {
bigramsIt.next();
@ -108,7 +108,7 @@ class MultiBigramMap {
const BinaryDictionaryInfo *const binaryDictionaryInfo, const int nodePos,
const int nextWordPosition, const int unigramProbability) {
const int bigramsListPos = binaryDictionaryInfo->getStructurePolicy()->
getBigramsPositionOfNode(binaryDictionaryInfo, nodePos);
getBigramsPositionOfNode(nodePos);
BinaryDictionaryBigramsIterator bigramsIt(binaryDictionaryInfo, bigramsListPos);
while (bigramsIt.hasNext()) {
bigramsIt.next();

View File

@ -21,7 +21,6 @@
namespace latinime {
class BinaryDictionaryInfo;
class DicNode;
class DicNodeVector;
@ -29,7 +28,7 @@ class DicNodeVector;
* This class abstracts structure of dictionaries.
* Implement this policy to support additional dictionaries.
*/
class DictionaryStructurePolicy {
class DictionaryStructureWithBufferPolicy {
public:
// This provides a filtering method for filtering new node.
class NodeFilter {
@ -44,36 +43,31 @@ class DictionaryStructurePolicy {
DISALLOW_COPY_AND_ASSIGN(NodeFilter);
};
virtual ~DictionaryStructureWithBufferPolicy() {}
virtual int getRootPosition() const = 0;
virtual void createAndGetAllChildNodes(const DicNode *const dicNode,
const BinaryDictionaryInfo *const binaryDictionaryInfo,
const NodeFilter *const nodeFilter, DicNodeVector *const childDicNodes) const = 0;
virtual int getCodePointsAndProbabilityAndReturnCodePointCount(
const BinaryDictionaryInfo *const binaryDictionaryInfo,
const int nodePos, const int maxCodePointCount, int *const outCodePoints,
int *const outUnigramProbability) const = 0;
virtual int getTerminalNodePositionOfWord(
const BinaryDictionaryInfo *const binaryDictionaryInfo, const int *const inWord,
virtual int getTerminalNodePositionOfWord(const int *const inWord,
const int length, const bool forceLowerCaseSearch) const = 0;
virtual int getUnigramProbability(const BinaryDictionaryInfo *const binaryDictionaryInfo,
const int nodePos) const = 0;
virtual int getUnigramProbability(const int nodePos) const = 0;
virtual int getShortcutPositionOfNode(const BinaryDictionaryInfo *const binaryDictionaryInfo,
const int nodePos) const = 0;
virtual int getShortcutPositionOfNode(const int nodePos) const = 0;
virtual int getBigramsPositionOfNode(const BinaryDictionaryInfo *const binaryDictionaryInfo,
const int nodePos) const = 0;
virtual int getBigramsPositionOfNode(const int nodePos) const = 0;
protected:
DictionaryStructurePolicy() {}
virtual ~DictionaryStructurePolicy() {}
DictionaryStructureWithBufferPolicy() {}
private:
DISALLOW_COPY_AND_ASSIGN(DictionaryStructurePolicy);
DISALLOW_COPY_AND_ASSIGN(DictionaryStructureWithBufferPolicy);
};
} // namespace latinime
#endif /* LATINIME_DICTIONARY_STRUCTURE_POLICY_H */

View File

@ -37,12 +37,12 @@ void DicTraverseSession::init(const Dictionary *const dictionary, const int *pre
}
// TODO: merge following similar calls to getTerminalPosition into one case-insensitive call.
mPrevWordPos = binaryDictionaryInfo->getStructurePolicy()->getTerminalNodePositionOfWord(
binaryDictionaryInfo, prevWord, prevWordLength, false /* forceLowerCaseSearch */);
prevWord, prevWordLength, false /* forceLowerCaseSearch */);
if (mPrevWordPos == NOT_A_VALID_WORD_POS) {
// Check bigrams for lower-cased previous word if original was not found. Useful for
// auto-capitalized words like "The [current_word]".
mPrevWordPos = binaryDictionaryInfo->getStructurePolicy()->getTerminalNodePositionOfWord(
binaryDictionaryInfo, prevWord, prevWordLength, true /* forceLowerCaseSearch */);
prevWord, prevWordLength, true /* forceLowerCaseSearch */);
}
}

View File

@ -75,7 +75,7 @@ class DicTraverseSession {
const int maxPointerCount);
void resetCache(const int nextActiveCacheSize, const int maxWords);
// TODO: Remove
// TODO: Use DictionaryStructurePolicy instead of BinaryDictionaryInfo.
const BinaryDictionaryInfo *getBinaryDictionaryInfo() const;
//--------------------

View File

@ -215,7 +215,7 @@ int Suggest::outputSuggestions(DicTraverseSession *traverseSession, int *frequen
traverseSession->getBinaryDictionaryInfo();
const TerminalAttributes terminalAttributes(traverseSession->getBinaryDictionaryInfo(),
binaryDictionaryInfo->getStructurePolicy()->getShortcutPositionOfNode(
binaryDictionaryInfo, terminalDicNode->getPos()));
terminalDicNode->getPos()));
// Shortcut is not supported for multiple words suggestions.
// TODO: Check shortcuts during traversal for multiple words suggestions.
const bool sameAsTyped = TRAVERSAL->sameAsTyped(traverseSession, terminalDicNode);

View File

@ -1,48 +0,0 @@
/*
* Copyright (C) 2013 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef LATINIME_DICTIONARY_STRUCTURE_POLICY_FACTORY_H
#define LATINIME_DICTIONARY_STRUCTURE_POLICY_FACTORY_H
#include "defines.h"
#include "suggest/core/dictionary/binary_dictionary_format_utils.h"
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h"
#include "suggest/policyimpl/dictionary/patricia_trie_policy.h"
namespace latinime {
class DictionaryStructurePolicy;
class DictionaryStructurePolicyFactory {
public:
static const DictionaryStructurePolicy *getDictionaryStructurePolicy(
const BinaryDictionaryFormatUtils::FORMAT_VERSION dictionaryFormat) {
switch (dictionaryFormat) {
case BinaryDictionaryFormatUtils::VERSION_2:
return PatriciaTriePolicy::getInstance();
case BinaryDictionaryFormatUtils::VERSION_3:
return DynamicPatriciaTriePolicy::getInstance();
default:
ASSERT(false);
return 0;
}
}
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(DictionaryStructurePolicyFactory);
};
} // namespace latinime
#endif // LATINIME_DICTIONARY_STRUCTURE_POLICY_FACTORY_H

View File

@ -0,0 +1,42 @@
/*
* Copyright (C) 2013 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "suggest/policyimpl/dictionary/dictionary_structure_with_buffer_policy_factory.h"
#include "defines.h"
#include "suggest/core/dictionary/binary_dictionary_info.h"
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h"
#include "suggest/policyimpl/dictionary/patricia_trie_policy.h"
namespace latinime {
/* static */ DictionaryStructureWithBufferPolicy *DictionaryStructureWithBufferPolicyFactory
::newDictionaryStructurePolicy(
const BinaryDictionaryInfo *const binaryDictionaryInfo) {
switch (binaryDictionaryInfo->getFormat()) {
case BinaryDictionaryFormatUtils::VERSION_2:
return new PatriciaTriePolicy(binaryDictionaryInfo->getDictRoot(),
binaryDictionaryInfo);
case BinaryDictionaryFormatUtils::VERSION_3:
return new DynamicPatriciaTriePolicy(binaryDictionaryInfo->getDictRoot(),
binaryDictionaryInfo);
default:
ASSERT(false);
return 0;
}
}
} // namespace latinime

View File

@ -0,0 +1,37 @@
/*
* Copyright (C) 2013 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef LATINIME_DICTIONARY_STRUCTURE_WITH_BUFFER_POLICY_FACTORY_H
#define LATINIME_DICTIONARY_STRUCTURE_WITH_BUFFER_POLICY_FACTORY_H
#include "defines.h"
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
namespace latinime {
class BinaryDictionaryInfo;
class DictionaryStructureWithBufferPolicyFactory {
public:
static DictionaryStructureWithBufferPolicy *newDictionaryStructurePolicy(
const BinaryDictionaryInfo *const binaryDictionaryInfo);
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(DictionaryStructureWithBufferPolicyFactory);
};
} // namespace latinime
#endif // LATINIME_DICTIONARY_STRUCTURE_WITH_BUFFER_POLICY_FACTORY_H

View File

@ -26,23 +26,21 @@
namespace latinime {
const DynamicPatriciaTriePolicy DynamicPatriciaTriePolicy::sInstance;
// To avoid infinite loop caused by invalid or malicious forward links.
const int DynamicPatriciaTriePolicy::MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP = 100000;
void DynamicPatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode,
const BinaryDictionaryInfo *const binaryDictionaryInfo,
const NodeFilter *const nodeFilter, DicNodeVector *const childDicNodes) const {
if (!dicNode->hasChildren()) {
return;
}
DynamicPatriciaTrieNodeReader nodeReader(binaryDictionaryInfo);
DynamicPatriciaTrieNodeReader nodeReader(mBinaryDictionaryInfo);
int mergedNodeCodePoints[MAX_WORD_LENGTH];
int nextPos = dicNode->getChildrenPos();
int totalChildCount = 0;
do {
const int childCount = PatriciaTrieReadingUtils::getGroupCountAndAdvancePosition(
binaryDictionaryInfo->getDictRoot(), &nextPos);
mDictRoot, &nextPos);
totalChildCount += childCount;
if (childCount <= 0 || totalChildCount > MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP) {
// Invalid dictionary.
@ -64,13 +62,11 @@ void DynamicPatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const d
}
nextPos = nodeReader.getSiblingNodePos();
}
nextPos = DynamicPatriciaTrieReadingUtils::getForwardLinkPosition(
binaryDictionaryInfo->getDictRoot(), nextPos);
nextPos = DynamicPatriciaTrieReadingUtils::getForwardLinkPosition(mDictRoot, nextPos);
} while (DynamicPatriciaTrieReadingUtils::isValidForwardLinkPosition(nextPos));
}
int DynamicPatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
const BinaryDictionaryInfo *const binaryDictionaryInfo,
const int nodePos, const int maxCodePointCount, int *const outCodePoints,
int *const outUnigramProbability) const {
if (nodePos == NOT_A_VALID_WORD_POS) {
@ -83,7 +79,7 @@ int DynamicPatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCoun
int mergedNodeCodePoints[maxCodePointCount];
int codePointCount = 0;
DynamicPatriciaTrieNodeReader nodeReader(binaryDictionaryInfo);
DynamicPatriciaTrieNodeReader nodeReader(mBinaryDictionaryInfo);
// First, read terminal node and get its probability.
nodeReader.fetchNodeInfoFromBufferAndGetNodeCodePoints(nodePos, maxCodePointCount,
mergedNodeCodePoints);
@ -118,8 +114,7 @@ int DynamicPatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCoun
return codePointCount;
}
int DynamicPatriciaTriePolicy::getTerminalNodePositionOfWord(
const BinaryDictionaryInfo *const binaryDictionaryInfo, const int *const inWord,
int DynamicPatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const inWord,
const int length, const bool forceLowerCaseSearch) const {
int searchCodePoints[length];
for (int i = 0; i < length; ++i) {
@ -128,14 +123,14 @@ int DynamicPatriciaTriePolicy::getTerminalNodePositionOfWord(
int mergedNodeCodePoints[MAX_WORD_LENGTH];
int currentLength = 0;
int pos = getRootPosition();
DynamicPatriciaTrieNodeReader nodeReader(binaryDictionaryInfo);
DynamicPatriciaTrieNodeReader nodeReader(mBinaryDictionaryInfo);
while (currentLength <= length) {
// When foundMatchedNode becomes true, currentLength is increased at least once.
bool foundMatchedNode = false;
int totalChildCount = 0;
do {
const int childCount = PatriciaTrieReadingUtils::getGroupCountAndAdvancePosition(
binaryDictionaryInfo->getDictRoot(), &pos);
mDictRoot, &pos);
totalChildCount += childCount;
if (childCount <= 0 || totalChildCount > MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP) {
// Invalid dictionary.
@ -183,7 +178,7 @@ int DynamicPatriciaTriePolicy::getTerminalNodePositionOfWord(
// If the matched node is not found in the current node group, try to follow the
// forward link.
pos = DynamicPatriciaTrieReadingUtils::getForwardLinkPosition(
binaryDictionaryInfo->getDictRoot(), pos);
mDictRoot, pos);
} while (DynamicPatriciaTrieReadingUtils::isValidForwardLinkPosition(pos));
if (!foundMatchedNode) {
// Matched node is not found.
@ -195,12 +190,11 @@ int DynamicPatriciaTriePolicy::getTerminalNodePositionOfWord(
return NOT_A_VALID_WORD_POS;
}
int DynamicPatriciaTriePolicy::getUnigramProbability(
const BinaryDictionaryInfo *const binaryDictionaryInfo, const int nodePos) const {
int DynamicPatriciaTriePolicy::getUnigramProbability(const int nodePos) const {
if (nodePos == NOT_A_VALID_WORD_POS) {
return NOT_A_PROBABILITY;
}
DynamicPatriciaTrieNodeReader nodeReader(binaryDictionaryInfo);
DynamicPatriciaTrieNodeReader nodeReader(mBinaryDictionaryInfo);
nodeReader.fetchNodeInfoFromBuffer(nodePos);
if (nodeReader.isDeleted() || nodeReader.isBlacklisted() || nodeReader.isNotAWord()) {
return NOT_A_PROBABILITY;
@ -208,13 +202,11 @@ int DynamicPatriciaTriePolicy::getUnigramProbability(
return nodeReader.getProbability();
}
int DynamicPatriciaTriePolicy::getShortcutPositionOfNode(
const BinaryDictionaryInfo *const binaryDictionaryInfo,
const int nodePos) const {
int DynamicPatriciaTriePolicy::getShortcutPositionOfNode(const int nodePos) const {
if (nodePos == NOT_A_VALID_WORD_POS) {
return NOT_A_DICT_POS;
}
DynamicPatriciaTrieNodeReader nodeReader(binaryDictionaryInfo);
DynamicPatriciaTrieNodeReader nodeReader(mBinaryDictionaryInfo);
nodeReader.fetchNodeInfoFromBuffer(nodePos);
if (nodeReader.isDeleted()) {
return NOT_A_DICT_POS;
@ -222,13 +214,11 @@ int DynamicPatriciaTriePolicy::getShortcutPositionOfNode(
return nodeReader.getShortcutPos();
}
int DynamicPatriciaTriePolicy::getBigramsPositionOfNode(
const BinaryDictionaryInfo *const binaryDictionaryInfo,
const int nodePos) const {
int DynamicPatriciaTriePolicy::getBigramsPositionOfNode(const int nodePos) const {
if (nodePos == NOT_A_VALID_WORD_POS) {
return NOT_A_DICT_POS;
}
DynamicPatriciaTrieNodeReader nodeReader(binaryDictionaryInfo);
DynamicPatriciaTrieNodeReader nodeReader(mBinaryDictionaryInfo);
nodeReader.fetchNodeInfoFromBuffer(nodePos);
if (nodeReader.isDeleted()) {
return NOT_A_DICT_POS;

View File

@ -17,8 +17,10 @@
#ifndef LATINIME_DYNAMIC_PATRICIA_TRIE_POLICY_H
#define LATINIME_DYNAMIC_PATRICIA_TRIE_POLICY_H
#include <stdint.h>
#include "defines.h"
#include "suggest/core/policy/dictionary_structure_policy.h"
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
namespace latinime {
@ -26,45 +28,41 @@ class BinaryDictionaryInfo;
class DicNode;
class DicNodeVector;
class DynamicPatriciaTriePolicy : public DictionaryStructurePolicy {
class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
public:
static AK_FORCE_INLINE const DynamicPatriciaTriePolicy *getInstance() {
return &sInstance;
}
DynamicPatriciaTriePolicy(const uint8_t *const dictRoot,
const BinaryDictionaryInfo *const binaryDictionaryInfo)
: mDictRoot(dictRoot), mBinaryDictionaryInfo(binaryDictionaryInfo) {}
~DynamicPatriciaTriePolicy() {}
AK_FORCE_INLINE int getRootPosition() const {
return 0;
}
void createAndGetAllChildNodes(const DicNode *const dicNode,
const BinaryDictionaryInfo *const binaryDictionaryInfo,
const NodeFilter *const nodeFilter, DicNodeVector *const childDicNodes) const;
int getCodePointsAndProbabilityAndReturnCodePointCount(
const BinaryDictionaryInfo *const binaryDictionaryInfo,
const int terminalNodePos, const int maxCodePointCount, int *const outCodePoints,
int *const outUnigramProbability) const;
int getTerminalNodePositionOfWord(
const BinaryDictionaryInfo *const binaryDictionaryInfo, const int *const inWord,
int getTerminalNodePositionOfWord(const int *const inWord,
const int length, const bool forceLowerCaseSearch) const;
int getUnigramProbability(const BinaryDictionaryInfo *const binaryDictionaryInfo,
const int nodePos) const;
int getUnigramProbability(const int nodePos) const;
int getShortcutPositionOfNode(const BinaryDictionaryInfo *const binaryDictionaryInfo,
const int nodePos) const;
int getShortcutPositionOfNode(const int nodePos) const;
int getBigramsPositionOfNode(const BinaryDictionaryInfo *const binaryDictionaryInfo,
const int nodePos) const;
int getBigramsPositionOfNode(const int nodePos) const;
private:
DISALLOW_COPY_AND_ASSIGN(DynamicPatriciaTriePolicy);
static const DynamicPatriciaTriePolicy sInstance;
DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTriePolicy);
static const int MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP;
DynamicPatriciaTriePolicy() {}
~DynamicPatriciaTriePolicy() {}
const uint8_t *const mDictRoot;
// TODO: remove
const BinaryDictionaryInfo *const mBinaryDictionaryInfo;
};
} // namespace latinime
#endif // LATINIME_DYNAMIC_PATRICIA_TRIE_POLICY_H

View File

@ -27,48 +27,39 @@
namespace latinime {
const PatriciaTriePolicy PatriciaTriePolicy::sInstance;
void PatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode,
const BinaryDictionaryInfo *const binaryDictionaryInfo,
const NodeFilter *const nodeFilter, DicNodeVector *const childDicNodes) const {
if (!dicNode->hasChildren()) {
return;
}
int nextPos = dicNode->getChildrenPos();
const int childCount = PatriciaTrieReadingUtils::getGroupCountAndAdvancePosition(
binaryDictionaryInfo->getDictRoot(), &nextPos);
mDictRoot, &nextPos);
for (int i = 0; i < childCount; i++) {
nextPos = createAndGetLeavingChildNode(dicNode, nextPos, binaryDictionaryInfo,
nodeFilter, childDicNodes);
nextPos = createAndGetLeavingChildNode(dicNode, nextPos, nodeFilter, childDicNodes);
}
}
int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
const BinaryDictionaryInfo *const binaryDictionaryInfo,
const int nodePos, const int maxCodePointCount, int *const outCodePoints,
int *const outUnigramProbability) const {
return BinaryFormat::getCodePointsAndProbabilityAndReturnCodePointCount(
binaryDictionaryInfo->getDictRoot(), nodePos,
return BinaryFormat::getCodePointsAndProbabilityAndReturnCodePointCount(mDictRoot, nodePos,
maxCodePointCount, outCodePoints, outUnigramProbability);
}
int PatriciaTriePolicy::getTerminalNodePositionOfWord(
const BinaryDictionaryInfo *const binaryDictionaryInfo, const int *const inWord,
int PatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const inWord,
const int length, const bool forceLowerCaseSearch) const {
return BinaryFormat::getTerminalPosition(binaryDictionaryInfo->getDictRoot(), inWord,
return BinaryFormat::getTerminalPosition(mDictRoot, inWord,
length, forceLowerCaseSearch);
}
int PatriciaTriePolicy::getUnigramProbability(
const BinaryDictionaryInfo *const binaryDictionaryInfo, const int nodePos) const {
int PatriciaTriePolicy::getUnigramProbability(const int nodePos) const {
if (nodePos == NOT_A_VALID_WORD_POS) {
return NOT_A_PROBABILITY;
}
const uint8_t *const dictRoot = binaryDictionaryInfo->getDictRoot();
int pos = nodePos;
const PatriciaTrieReadingUtils::NodeFlags flags =
PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictRoot, &pos);
PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos);
if (!PatriciaTrieReadingUtils::isTerminal(flags)) {
return NOT_A_PROBABILITY;
}
@ -79,81 +70,74 @@ int PatriciaTriePolicy::getUnigramProbability(
// for shortcuts).
return NOT_A_PROBABILITY;
}
PatriciaTrieReadingUtils::skipCharacters(dictRoot, flags, MAX_WORD_LENGTH, &pos);
return PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(dictRoot, &pos);
PatriciaTrieReadingUtils::skipCharacters(mDictRoot, flags, MAX_WORD_LENGTH, &pos);
return PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos);
}
int PatriciaTriePolicy::getShortcutPositionOfNode(
const BinaryDictionaryInfo *const binaryDictionaryInfo,
const int nodePos) const {
int PatriciaTriePolicy::getShortcutPositionOfNode(const int nodePos) const {
if (nodePos == NOT_A_VALID_WORD_POS) {
return NOT_A_DICT_POS;
}
const uint8_t *const dictRoot = binaryDictionaryInfo->getDictRoot();
int pos = nodePos;
const PatriciaTrieReadingUtils::NodeFlags flags =
PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictRoot, &pos);
PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos);
if (!PatriciaTrieReadingUtils::hasShortcutTargets(flags)) {
return NOT_A_DICT_POS;
}
PatriciaTrieReadingUtils::skipCharacters(dictRoot, flags, MAX_WORD_LENGTH, &pos);
PatriciaTrieReadingUtils::skipCharacters(mDictRoot, flags, MAX_WORD_LENGTH, &pos);
if (PatriciaTrieReadingUtils::isTerminal(flags)) {
PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(dictRoot, &pos);
PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos);
}
if (PatriciaTrieReadingUtils::hasChildrenInFlags(flags)) {
PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(dictRoot, flags, &pos);
PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(mDictRoot, flags, &pos);
}
return pos;
}
int PatriciaTriePolicy::getBigramsPositionOfNode(
const BinaryDictionaryInfo *const binaryDictionaryInfo,
const int nodePos) const {
int PatriciaTriePolicy::getBigramsPositionOfNode(const int nodePos) const {
if (nodePos == NOT_A_VALID_WORD_POS) {
return NOT_A_DICT_POS;
}
const uint8_t *const dictRoot = binaryDictionaryInfo->getDictRoot();
int pos = nodePos;
const PatriciaTrieReadingUtils::NodeFlags flags =
PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictRoot, &pos);
PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos);
if (!PatriciaTrieReadingUtils::hasBigrams(flags)) {
return NOT_A_DICT_POS;
}
PatriciaTrieReadingUtils::skipCharacters(dictRoot, flags, MAX_WORD_LENGTH, &pos);
PatriciaTrieReadingUtils::skipCharacters(mDictRoot, flags, MAX_WORD_LENGTH, &pos);
if (PatriciaTrieReadingUtils::isTerminal(flags)) {
PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(dictRoot, &pos);
PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos);
}
if (PatriciaTrieReadingUtils::hasChildrenInFlags(flags)) {
PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(dictRoot, flags, &pos);
PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(mDictRoot, flags, &pos);
}
if (PatriciaTrieReadingUtils::hasShortcutTargets(flags)) {
BinaryDictionaryTerminalAttributesReadingUtils::skipShortcuts(binaryDictionaryInfo, &pos);
BinaryDictionaryTerminalAttributesReadingUtils::skipShortcuts(mBinaryDictionaryInfo, &pos);
}
return pos;
}
int PatriciaTriePolicy::createAndGetLeavingChildNode(const DicNode *const dicNode,
const int nodePos, const BinaryDictionaryInfo *const binaryDictionaryInfo,
const NodeFilter *const childrenFilter, DicNodeVector *childDicNodes) const {
const uint8_t *const dictRoot = binaryDictionaryInfo->getDictRoot();
const int nodePos, const NodeFilter *const childrenFilter,
DicNodeVector *childDicNodes) const {
int pos = nodePos;
const PatriciaTrieReadingUtils::NodeFlags flags =
PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictRoot, &pos);
PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos);
int mergedNodeCodePoints[MAX_WORD_LENGTH];
const int mergedNodeCodePointCount = PatriciaTrieReadingUtils::getCharsAndAdvancePosition(
dictRoot, flags, MAX_WORD_LENGTH, mergedNodeCodePoints, &pos);
mDictRoot, flags, MAX_WORD_LENGTH, mergedNodeCodePoints, &pos);
const int probability = (PatriciaTrieReadingUtils::isTerminal(flags))?
PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(dictRoot, &pos)
PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos)
: NOT_A_PROBABILITY;
const int childrenPos = PatriciaTrieReadingUtils::hasChildrenInFlags(flags) ?
PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(
dictRoot, flags, &pos) : NOT_A_DICT_POS;
mDictRoot, flags, &pos) : NOT_A_DICT_POS;
if (PatriciaTrieReadingUtils::hasShortcutTargets(flags)) {
BinaryDictionaryTerminalAttributesReadingUtils::skipShortcuts(binaryDictionaryInfo, &pos);
BinaryDictionaryTerminalAttributesReadingUtils::skipShortcuts(mBinaryDictionaryInfo, &pos);
}
if (PatriciaTrieReadingUtils::hasBigrams(flags)) {
BinaryDictionaryTerminalAttributesReadingUtils::skipExistingBigrams(
binaryDictionaryInfo, &pos);
mBinaryDictionaryInfo, &pos);
}
if (!childrenFilter->isFilteredOut(mergedNodeCodePoints[0])) {
childDicNodes->pushLeavingChild(dicNode, nodePos, childrenPos, probability,

View File

@ -17,52 +17,53 @@
#ifndef LATINIME_PATRICIA_TRIE_POLICY_H
#define LATINIME_PATRICIA_TRIE_POLICY_H
#include <stdint.h>
#include "defines.h"
#include "suggest/core/policy/dictionary_structure_policy.h"
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
namespace latinime {
class PatriciaTriePolicy : public DictionaryStructurePolicy {
class BinaryDictionaryInfo;
class DicNode;
class DicNodeVector;
class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
public:
static AK_FORCE_INLINE const PatriciaTriePolicy *getInstance() {
return &sInstance;
}
PatriciaTriePolicy(const uint8_t *const dictRoot,
const BinaryDictionaryInfo *const binaryDictionaryInfo)
: mDictRoot(dictRoot), mBinaryDictionaryInfo(binaryDictionaryInfo) {}
~PatriciaTriePolicy() {}
AK_FORCE_INLINE int getRootPosition() const {
return 0;
}
void createAndGetAllChildNodes(const DicNode *const dicNode,
const BinaryDictionaryInfo *const binaryDictionaryInfo,
const NodeFilter *const nodeFilter, DicNodeVector *const childDicNodes) const;
int getCodePointsAndProbabilityAndReturnCodePointCount(
const BinaryDictionaryInfo *const binaryDictionaryInfo,
const int terminalNodePos, const int maxCodePointCount, int *const outCodePoints,
int *const outUnigramProbability) const;
int getTerminalNodePositionOfWord(
const BinaryDictionaryInfo *const binaryDictionaryInfo, const int *const inWord,
int getTerminalNodePositionOfWord(const int *const inWord,
const int length, const bool forceLowerCaseSearch) const;
int getUnigramProbability(const BinaryDictionaryInfo *const binaryDictionaryInfo,
const int nodePos) const;
int getUnigramProbability(const int nodePos) const;
int getShortcutPositionOfNode(const BinaryDictionaryInfo *const binaryDictionaryInfo,
const int nodePos) const;
int getShortcutPositionOfNode(const int nodePos) const;
int getBigramsPositionOfNode(const BinaryDictionaryInfo *const binaryDictionaryInfo,
const int nodePos) const;
int getBigramsPositionOfNode(const int nodePos) const;
private:
DISALLOW_COPY_AND_ASSIGN(PatriciaTriePolicy);
static const PatriciaTriePolicy sInstance;
DISALLOW_IMPLICIT_CONSTRUCTORS(PatriciaTriePolicy);
PatriciaTriePolicy() {}
~PatriciaTriePolicy() {}
const uint8_t *const mDictRoot;
// TODO: remove
const BinaryDictionaryInfo *const mBinaryDictionaryInfo;
int createAndGetLeavingChildNode(const DicNode *const dicNode, const int nodePos,
const BinaryDictionaryInfo *const binaryDictionaryInfo,
const NodeFilter *const nodeFilter, DicNodeVector *const childDicNodes) const;
};
} // namespace latinime