Merge "Introduce patriciaTrie to abstract traversing version 2 dictionary."
This commit is contained in:
commit
2a7f8d1a55
8 changed files with 198 additions and 32 deletions
|
@ -70,6 +70,7 @@ LATIN_IME_CORE_SRC_FILES := \
|
||||||
proximity_info_state_utils.cpp) \
|
proximity_info_state_utils.cpp) \
|
||||||
suggest/core/policy/weighting.cpp \
|
suggest/core/policy/weighting.cpp \
|
||||||
suggest/core/session/dic_traverse_session.cpp \
|
suggest/core/session/dic_traverse_session.cpp \
|
||||||
|
suggest/policyimpl/dictionary/patricia_trie_policy.cpp \
|
||||||
suggest/policyimpl/gesture/gesture_suggest_policy_factory.cpp \
|
suggest/policyimpl/gesture/gesture_suggest_policy_factory.cpp \
|
||||||
$(addprefix suggest/policyimpl/typing/, \
|
$(addprefix suggest/policyimpl/typing/, \
|
||||||
scoring_params.cpp \
|
scoring_params.cpp \
|
||||||
|
|
|
@ -26,6 +26,7 @@
|
||||||
#include "suggest/core/dictionary/probability_utils.h"
|
#include "suggest/core/dictionary/probability_utils.h"
|
||||||
#include "suggest/core/layout/proximity_info.h"
|
#include "suggest/core/layout/proximity_info.h"
|
||||||
#include "suggest/core/layout/proximity_info_state.h"
|
#include "suggest/core/layout/proximity_info_state.h"
|
||||||
|
#include "suggest/core/policy/dictionary_structure_policy.h"
|
||||||
#include "utils/char_utils.h"
|
#include "utils/char_utils.h"
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
@ -36,14 +37,15 @@ namespace latinime {
|
||||||
|
|
||||||
/* static */ void DicNodeUtils::initAsRoot(const BinaryDictionaryInfo *const binaryDictionaryInfo,
|
/* static */ void DicNodeUtils::initAsRoot(const BinaryDictionaryInfo *const binaryDictionaryInfo,
|
||||||
const int prevWordNodePos, DicNode *const newRootNode) {
|
const int prevWordNodePos, DicNode *const newRootNode) {
|
||||||
newRootNode->initAsRoot(binaryDictionaryInfo->getRootPosition(), prevWordNodePos);
|
newRootNode->initAsRoot(binaryDictionaryInfo->getStructurePolicy()->getRootPosition(),
|
||||||
|
prevWordNodePos);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*static */ void DicNodeUtils::initAsRootWithPreviousWord(
|
/*static */ void DicNodeUtils::initAsRootWithPreviousWord(
|
||||||
const BinaryDictionaryInfo *const binaryDictionaryInfo,
|
const BinaryDictionaryInfo *const binaryDictionaryInfo,
|
||||||
DicNode *const prevWordLastNode, DicNode *const newRootNode) {
|
DicNode *const prevWordLastNode, DicNode *const newRootNode) {
|
||||||
newRootNode->initAsRootWithPreviousWord(
|
newRootNode->initAsRootWithPreviousWord(
|
||||||
prevWordLastNode, binaryDictionaryInfo->getRootPosition());
|
prevWordLastNode, binaryDictionaryInfo->getStructurePolicy()->getRootPosition());
|
||||||
}
|
}
|
||||||
|
|
||||||
/* static */ void DicNodeUtils::initByCopy(DicNode *srcNode, DicNode *destNode) {
|
/* static */ void DicNodeUtils::initByCopy(DicNode *srcNode, DicNode *destNode) {
|
||||||
|
|
|
@ -150,11 +150,10 @@ int BigramDictionary::getPredictions(const int *prevWord, int prevWordLength, in
|
||||||
int BigramDictionary::getBigramListPositionForWord(const int *prevWord, const int prevWordLength,
|
int BigramDictionary::getBigramListPositionForWord(const int *prevWord, const int prevWordLength,
|
||||||
const bool forceLowerCaseSearch) const {
|
const bool forceLowerCaseSearch) const {
|
||||||
if (0 >= prevWordLength) return 0;
|
if (0 >= prevWordLength) return 0;
|
||||||
const uint8_t *const root = mBinaryDictionaryInfo->getDictRoot();
|
int pos = mBinaryDictionaryInfo->getStructurePolicy()->getTerminalNodePositionOfWord(
|
||||||
int pos = BinaryFormat::getTerminalPosition(root, prevWord, prevWordLength,
|
mBinaryDictionaryInfo, prevWord, prevWordLength, forceLowerCaseSearch);
|
||||||
forceLowerCaseSearch);
|
|
||||||
|
|
||||||
if (NOT_VALID_WORD == pos) return 0;
|
if (NOT_VALID_WORD == pos) return 0;
|
||||||
|
const uint8_t *const root = mBinaryDictionaryInfo->getDictRoot();
|
||||||
const uint8_t flags = BinaryFormat::getFlagsAndForwardPointer(root, &pos);
|
const uint8_t flags = BinaryFormat::getFlagsAndForwardPointer(root, &pos);
|
||||||
if (0 == (flags & BinaryFormat::FLAG_HAS_BIGRAMS)) return 0;
|
if (0 == (flags & BinaryFormat::FLAG_HAS_BIGRAMS)) return 0;
|
||||||
if (0 == (flags & BinaryFormat::FLAG_HAS_MULTIPLE_CHARS)) {
|
if (0 == (flags & BinaryFormat::FLAG_HAS_MULTIPLE_CHARS)) {
|
||||||
|
@ -189,8 +188,8 @@ bool BigramDictionary::isValidBigram(const int *word0, int length0, const int *w
|
||||||
int pos = getBigramListPositionForWord(word0, length0, false /* forceLowerCaseSearch */);
|
int pos = getBigramListPositionForWord(word0, length0, false /* forceLowerCaseSearch */);
|
||||||
// getBigramListPositionForWord returns 0 if this word isn't in the dictionary or has no bigrams
|
// getBigramListPositionForWord returns 0 if this word isn't in the dictionary or has no bigrams
|
||||||
if (0 == pos) return false;
|
if (0 == pos) return false;
|
||||||
int nextWordPos = BinaryFormat::getTerminalPosition(mBinaryDictionaryInfo->getDictRoot(),
|
int nextWordPos = mBinaryDictionaryInfo->getStructurePolicy()->getTerminalNodePositionOfWord(
|
||||||
word1, length1, false /* forceLowerCaseSearch */);
|
mBinaryDictionaryInfo, word1, length1, false /* forceLowerCaseSearch */);
|
||||||
if (NOT_VALID_WORD == nextWordPos) return false;
|
if (NOT_VALID_WORD == nextWordPos) return false;
|
||||||
|
|
||||||
for (BinaryDictionaryBigramsIterator bigramsIt(mBinaryDictionaryInfo, pos);
|
for (BinaryDictionaryBigramsIterator bigramsIt(mBinaryDictionaryInfo, pos);
|
||||||
|
|
|
@ -22,11 +22,10 @@
|
||||||
#include "defines.h"
|
#include "defines.h"
|
||||||
#include "suggest/core/dictionary/binary_dictionary_format_utils.h"
|
#include "suggest/core/dictionary/binary_dictionary_format_utils.h"
|
||||||
#include "suggest/core/dictionary/binary_dictionary_header.h"
|
#include "suggest/core/dictionary/binary_dictionary_header.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/dictionary_structure_policy_factory.h"
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
|
||||||
class BinaryDictionaryHeader;
|
|
||||||
|
|
||||||
class BinaryDictionaryInfo {
|
class BinaryDictionaryInfo {
|
||||||
public:
|
public:
|
||||||
BinaryDictionaryInfo(const uint8_t *const dictBuf, const int dictSize, const int mmapFd,
|
BinaryDictionaryInfo(const uint8_t *const dictBuf, const int dictSize, const int mmapFd,
|
||||||
|
@ -35,7 +34,9 @@ class BinaryDictionaryInfo {
|
||||||
mDictBufOffset(dictBufOffset), mIsUpdatable(isUpdatable),
|
mDictBufOffset(dictBufOffset), mIsUpdatable(isUpdatable),
|
||||||
mDictionaryFormat(BinaryDictionaryFormatUtils::detectFormatVersion(
|
mDictionaryFormat(BinaryDictionaryFormatUtils::detectFormatVersion(
|
||||||
mDictBuf, mDictSize)),
|
mDictBuf, mDictSize)),
|
||||||
mDictionaryHeader(this), mDictRoot(mDictBuf + mDictionaryHeader.getSize()) {}
|
mDictionaryHeader(this), mDictRoot(mDictBuf + mDictionaryHeader.getSize()),
|
||||||
|
mStructurePolicy(DictionaryStructurePolicyFactory::getDictionaryStructurePolicy(
|
||||||
|
mDictionaryFormat)) {}
|
||||||
|
|
||||||
AK_FORCE_INLINE const uint8_t *getDictBuf() const {
|
AK_FORCE_INLINE const uint8_t *getDictBuf() const {
|
||||||
return mDictBuf;
|
return mDictBuf;
|
||||||
|
@ -61,10 +62,6 @@ class BinaryDictionaryInfo {
|
||||||
return mDictionaryFormat;
|
return mDictionaryFormat;
|
||||||
}
|
}
|
||||||
|
|
||||||
AK_FORCE_INLINE int getRootPosition() const {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
AK_FORCE_INLINE const BinaryDictionaryHeader *getHeader() const {
|
AK_FORCE_INLINE const BinaryDictionaryHeader *getHeader() const {
|
||||||
return &mDictionaryHeader;
|
return &mDictionaryHeader;
|
||||||
}
|
}
|
||||||
|
@ -75,6 +72,10 @@ class BinaryDictionaryInfo {
|
||||||
return mIsUpdatable && isUpdatableDictionaryFormat;
|
return mIsUpdatable && isUpdatableDictionaryFormat;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
AK_FORCE_INLINE const DictionaryStructurePolicy *getStructurePolicy() const {
|
||||||
|
return mStructurePolicy;
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
DISALLOW_COPY_AND_ASSIGN(BinaryDictionaryInfo);
|
DISALLOW_COPY_AND_ASSIGN(BinaryDictionaryInfo);
|
||||||
|
|
||||||
|
@ -86,6 +87,7 @@ class BinaryDictionaryInfo {
|
||||||
const BinaryDictionaryFormatUtils::FORMAT_VERSION mDictionaryFormat;
|
const BinaryDictionaryFormatUtils::FORMAT_VERSION mDictionaryFormat;
|
||||||
const BinaryDictionaryHeader mDictionaryHeader;
|
const BinaryDictionaryHeader mDictionaryHeader;
|
||||||
const uint8_t *const mDictRoot;
|
const uint8_t *const mDictRoot;
|
||||||
|
const DictionaryStructurePolicy *const mStructurePolicy;
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
#endif /* LATINIME_BINARY_DICTIONARY_INFO_H */
|
#endif /* LATINIME_BINARY_DICTIONARY_INFO_H */
|
||||||
|
|
|
@ -83,27 +83,14 @@ int Dictionary::getBigrams(const int *word, int length, int *inputCodePoints, in
|
||||||
}
|
}
|
||||||
|
|
||||||
int Dictionary::getProbability(const int *word, int length) const {
|
int Dictionary::getProbability(const int *word, int length) const {
|
||||||
const uint8_t *const root = mBinaryDictionaryInfo.getDictRoot();
|
const DictionaryStructurePolicy *const structurePolicy =
|
||||||
int pos = BinaryFormat::getTerminalPosition(root, word, length,
|
mBinaryDictionaryInfo.getStructurePolicy();
|
||||||
|
int pos = structurePolicy->getTerminalNodePositionOfWord(&mBinaryDictionaryInfo, word, length,
|
||||||
false /* forceLowerCaseSearch */);
|
false /* forceLowerCaseSearch */);
|
||||||
if (NOT_VALID_WORD == pos) {
|
if (NOT_VALID_WORD == pos) {
|
||||||
return NOT_A_PROBABILITY;
|
return NOT_A_PROBABILITY;
|
||||||
}
|
}
|
||||||
const uint8_t flags = BinaryFormat::getFlagsAndForwardPointer(root, &pos);
|
return structurePolicy->getUnigramProbability(&mBinaryDictionaryInfo, pos);
|
||||||
if (flags & (BinaryFormat::FLAG_IS_BLACKLISTED | BinaryFormat::FLAG_IS_NOT_A_WORD)) {
|
|
||||||
// If this is not a word, or if it's a blacklisted entry, it should behave as
|
|
||||||
// having no probability outside of the suggestion process (where it should be used
|
|
||||||
// for shortcuts).
|
|
||||||
return NOT_A_PROBABILITY;
|
|
||||||
}
|
|
||||||
const bool hasMultipleChars = (0 != (BinaryFormat::FLAG_HAS_MULTIPLE_CHARS & flags));
|
|
||||||
if (hasMultipleChars) {
|
|
||||||
pos = BinaryFormat::skipOtherCharacters(root, pos);
|
|
||||||
} else {
|
|
||||||
BinaryFormat::getCodePointAndForwardPointer(root, &pos);
|
|
||||||
}
|
|
||||||
const int unigramProbability = BinaryFormat::readProbabilityWithoutMovingPointer(root, pos);
|
|
||||||
return unigramProbability;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Dictionary::isValidBigram(const int *word0, int length0, const int *word1, int length1) const {
|
bool Dictionary::isValidBigram(const int *word0, int length0, const int *word1, int length1) const {
|
||||||
|
|
|
@ -0,0 +1,47 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2013 The Android Open Source Project
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef LATINIME_DICTIONARY_STRUCTURE_POLICY_FACTORY_H
|
||||||
|
#define LATINIME_DICTIONARY_STRUCTURE_POLICY_FACTORY_H
|
||||||
|
|
||||||
|
#include "defines.h"
|
||||||
|
#include "suggest/core/dictionary/binary_dictionary_format_utils.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/patricia_trie_policy.h"
|
||||||
|
|
||||||
|
namespace latinime {
|
||||||
|
|
||||||
|
class DictionaryStructurePolicy;
|
||||||
|
|
||||||
|
class DictionaryStructurePolicyFactory {
|
||||||
|
public:
|
||||||
|
static const DictionaryStructurePolicy *getDictionaryStructurePolicy(
|
||||||
|
const BinaryDictionaryFormatUtils::FORMAT_VERSION dictionaryFormat) {
|
||||||
|
switch (dictionaryFormat) {
|
||||||
|
case BinaryDictionaryFormatUtils::VERSION_1:
|
||||||
|
// Fall through
|
||||||
|
case BinaryDictionaryFormatUtils::VERSION_2:
|
||||||
|
return PatriciaTriePolicy::getInstance();
|
||||||
|
default:
|
||||||
|
ASSERT(false);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
DISALLOW_IMPLICIT_CONSTRUCTORS(DictionaryStructurePolicyFactory);
|
||||||
|
};
|
||||||
|
} // namespace latinime
|
||||||
|
#endif // LATINIME_DICTIONARY_STRUCTURE_POLICY_FACTORY_H
|
|
@ -0,0 +1,70 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2013, The Android Open Source Project
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
#include "suggest/policyimpl/dictionary/patricia_trie_policy.h"
|
||||||
|
|
||||||
|
#include "defines.h"
|
||||||
|
#include "suggest/core/dicnode/dic_node.h"
|
||||||
|
#include "suggest/core/dicnode/dic_node_vector.h"
|
||||||
|
#include "suggest/core/dictionary/binary_dictionary_info.h"
|
||||||
|
#include "suggest/core/dictionary/binary_format.h"
|
||||||
|
|
||||||
|
namespace latinime {
|
||||||
|
|
||||||
|
const PatriciaTriePolicy PatriciaTriePolicy::sInstance;
|
||||||
|
|
||||||
|
void PatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode,
|
||||||
|
const BinaryDictionaryInfo *const binaryDictionaryInfo,
|
||||||
|
const NodeFilter *const nodeFilter, DicNodeVector *const childDicNodes) const {
|
||||||
|
// TODO: Move children creating methods form DicNodeUtils.
|
||||||
|
}
|
||||||
|
|
||||||
|
void PatriciaTriePolicy::getWordAtPosition(const BinaryDictionaryInfo *const binaryDictionaryInfo,
|
||||||
|
const int terminalNodePos, const int maxDepth, int *const outWord,
|
||||||
|
int *const outUnigramProbability) const {
|
||||||
|
BinaryFormat::getWordAtAddress(binaryDictionaryInfo->getDictRoot(), terminalNodePos,
|
||||||
|
maxDepth, outWord, outUnigramProbability);
|
||||||
|
}
|
||||||
|
|
||||||
|
int PatriciaTriePolicy::getTerminalNodePositionOfWord(
|
||||||
|
const BinaryDictionaryInfo *const binaryDictionaryInfo, const int *const inWord,
|
||||||
|
const int length, const bool forceLowerCaseSearch) const {
|
||||||
|
return BinaryFormat::getTerminalPosition(binaryDictionaryInfo->getDictRoot(), inWord,
|
||||||
|
length, forceLowerCaseSearch);
|
||||||
|
}
|
||||||
|
|
||||||
|
int PatriciaTriePolicy::getUnigramProbability(
|
||||||
|
const BinaryDictionaryInfo *const binaryDictionaryInfo, const int nodePos) const {
|
||||||
|
const uint8_t *const root = binaryDictionaryInfo->getDictRoot();
|
||||||
|
int pos = nodePos;
|
||||||
|
const uint8_t flags = BinaryFormat::getFlagsAndForwardPointer(root, &pos);
|
||||||
|
if (flags & (BinaryFormat::FLAG_IS_BLACKLISTED | BinaryFormat::FLAG_IS_NOT_A_WORD)) {
|
||||||
|
// If this is not a word, or if it's a blacklisted entry, it should behave as
|
||||||
|
// having no probability outside of the suggestion process (where it should be used
|
||||||
|
// for shortcuts).
|
||||||
|
return NOT_A_PROBABILITY;
|
||||||
|
}
|
||||||
|
const bool hasMultipleChars = (0 != (BinaryFormat::FLAG_HAS_MULTIPLE_CHARS & flags));
|
||||||
|
if (hasMultipleChars) {
|
||||||
|
pos = BinaryFormat::skipOtherCharacters(root, pos);
|
||||||
|
} else {
|
||||||
|
BinaryFormat::getCodePointAndForwardPointer(root, &pos);
|
||||||
|
}
|
||||||
|
return BinaryFormat::readProbabilityWithoutMovingPointer(root, pos);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace latinime
|
|
@ -0,0 +1,58 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2013, The Android Open Source Project
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef LATINIME_PATRICIA_TRIE_POLICY_H
|
||||||
|
#define LATINIME_PATRICIA_TRIE_POLICY_H
|
||||||
|
|
||||||
|
#include "defines.h"
|
||||||
|
#include "suggest/core/policy/dictionary_structure_policy.h"
|
||||||
|
|
||||||
|
namespace latinime {
|
||||||
|
|
||||||
|
class PatriciaTriePolicy : public DictionaryStructurePolicy {
|
||||||
|
public:
|
||||||
|
static AK_FORCE_INLINE const PatriciaTriePolicy *getInstance() {
|
||||||
|
return &sInstance;
|
||||||
|
}
|
||||||
|
|
||||||
|
AK_FORCE_INLINE int getRootPosition() const {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void createAndGetAllChildNodes(const DicNode *const dicNode,
|
||||||
|
const BinaryDictionaryInfo *const binaryDictionaryInfo,
|
||||||
|
const NodeFilter *const nodeFilter, DicNodeVector *const childDicNodes) const;
|
||||||
|
|
||||||
|
void getWordAtPosition(const BinaryDictionaryInfo *const binaryDictionaryInfo,
|
||||||
|
const int terminalNodePos, const int maxDepth, int *const outWord,
|
||||||
|
int *const outUnigramProbability) const;
|
||||||
|
|
||||||
|
int getTerminalNodePositionOfWord(
|
||||||
|
const BinaryDictionaryInfo *const binaryDictionaryInfo, const int *const inWord,
|
||||||
|
const int length, const bool forceLowerCaseSearch) const;
|
||||||
|
|
||||||
|
int getUnigramProbability(const BinaryDictionaryInfo *const binaryDictionaryInfo,
|
||||||
|
const int nodePos) const;
|
||||||
|
|
||||||
|
private:
|
||||||
|
DISALLOW_COPY_AND_ASSIGN(PatriciaTriePolicy);
|
||||||
|
static const PatriciaTriePolicy sInstance;
|
||||||
|
|
||||||
|
PatriciaTriePolicy() {}
|
||||||
|
~PatriciaTriePolicy() {}
|
||||||
|
};
|
||||||
|
} // namespace latinime
|
||||||
|
#endif // LATINIME_PATRICIA_TRIE_POLICY_H
|
Loading…
Reference in a new issue