Merge "Create Ver2ParticiaTrieNodeReader."
This commit is contained in:
commit
7c73e0f2a8
8 changed files with 181 additions and 69 deletions
|
@ -57,7 +57,8 @@ LATIN_IME_CORE_SRC_FILES := \
|
|||
dynamic_pt_writing_utils.cpp) \
|
||||
$(addprefix suggest/policyimpl/dictionary/structure/v2/, \
|
||||
patricia_trie_policy.cpp \
|
||||
patricia_trie_reading_utils.cpp) \
|
||||
patricia_trie_reading_utils.cpp \
|
||||
ver2_patricia_trie_node_reader.cpp) \
|
||||
$(addprefix suggest/policyimpl/dictionary/structure/v4/, \
|
||||
ver4_dict_buffers.cpp \
|
||||
ver4_dict_constants.cpp \
|
||||
|
|
|
@ -53,6 +53,21 @@ class PtNodeParams {
|
|||
memcpy(mCodePoints, ptNodeParams.getCodePoints(), sizeof(int) * mCodePointCount);
|
||||
}
|
||||
|
||||
// PtNode read from version 2 dictionary.
|
||||
PtNodeParams(const int headPos, const PatriciaTrieReadingUtils::NodeFlags flags,
|
||||
const int codePointCount, const int *const codePoints, const int probability,
|
||||
const int childrenPos, const int shortcutPos, const int bigramPos,
|
||||
const int siblingPos)
|
||||
: mHeadPos(headPos), mFlags(flags), mParentPos(NOT_A_DICT_POS),
|
||||
mCodePointCount(codePointCount), mCodePoints(), mTerminalIdFieldPos(NOT_A_DICT_POS),
|
||||
mTerminalId(Ver4DictConstants::NOT_A_TERMINAL_ID),
|
||||
mProbabilityFieldPos(NOT_A_DICT_POS), mProbability(probability),
|
||||
mChildrenPosFieldPos(NOT_A_DICT_POS), mChildrenPos(childrenPos),
|
||||
mBigramLinkedNodePos(NOT_A_DICT_POS), mShortcutPos(shortcutPos),
|
||||
mBigramPos(bigramPos), mSiblingPos(siblingPos) {
|
||||
memcpy(mCodePoints, codePoints, sizeof(int) * mCodePointCount);
|
||||
}
|
||||
|
||||
// PtNode with a terminal id.
|
||||
PtNodeParams(const int headPos, const PatriciaTrieReadingUtils::NodeFlags flags,
|
||||
const int parentPos, const int codePointCount, const int *const codePoints,
|
||||
|
|
|
@ -336,99 +336,50 @@ int PatriciaTriePolicy::getUnigramProbabilityOfPtNode(const int ptNodePos) const
|
|||
if (ptNodePos == NOT_A_DICT_POS) {
|
||||
return NOT_A_PROBABILITY;
|
||||
}
|
||||
int pos = ptNodePos;
|
||||
const PatriciaTrieReadingUtils::NodeFlags flags =
|
||||
PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos);
|
||||
if (!PatriciaTrieReadingUtils::isTerminal(flags)) {
|
||||
return NOT_A_PROBABILITY;
|
||||
}
|
||||
if (PatriciaTrieReadingUtils::isNotAWord(flags)
|
||||
|| PatriciaTrieReadingUtils::isBlacklisted(flags)) {
|
||||
const PtNodeParams ptNodeParams = mPtNodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos);
|
||||
if (ptNodeParams.isNotAWord() || ptNodeParams.isBlacklisted()) {
|
||||
// If this is not a word, or if it's a blacklisted entry, it should behave as
|
||||
// having no probability outside of the suggestion process (where it should be used
|
||||
// for shortcuts).
|
||||
return NOT_A_PROBABILITY;
|
||||
}
|
||||
PatriciaTrieReadingUtils::skipCharacters(mDictRoot, flags, MAX_WORD_LENGTH, &pos);
|
||||
return getProbability(PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(
|
||||
mDictRoot, &pos), NOT_A_PROBABILITY);
|
||||
return getProbability(ptNodeParams.getProbability(), NOT_A_PROBABILITY);
|
||||
}
|
||||
|
||||
int PatriciaTriePolicy::getShortcutPositionOfPtNode(const int ptNodePos) const {
|
||||
if (ptNodePos == NOT_A_DICT_POS) {
|
||||
return NOT_A_DICT_POS;
|
||||
}
|
||||
int pos = ptNodePos;
|
||||
const PatriciaTrieReadingUtils::NodeFlags flags =
|
||||
PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos);
|
||||
if (!PatriciaTrieReadingUtils::hasShortcutTargets(flags)) {
|
||||
return NOT_A_DICT_POS;
|
||||
}
|
||||
PatriciaTrieReadingUtils::skipCharacters(mDictRoot, flags, MAX_WORD_LENGTH, &pos);
|
||||
if (PatriciaTrieReadingUtils::isTerminal(flags)) {
|
||||
PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos);
|
||||
}
|
||||
if (PatriciaTrieReadingUtils::hasChildrenInFlags(flags)) {
|
||||
PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(mDictRoot, flags, &pos);
|
||||
}
|
||||
return pos;
|
||||
return mPtNodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos).getShortcutPos();
|
||||
}
|
||||
|
||||
int PatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) const {
|
||||
if (ptNodePos == NOT_A_DICT_POS) {
|
||||
return NOT_A_DICT_POS;
|
||||
}
|
||||
int pos = ptNodePos;
|
||||
const PatriciaTrieReadingUtils::NodeFlags flags =
|
||||
PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos);
|
||||
if (!PatriciaTrieReadingUtils::hasBigrams(flags)) {
|
||||
return NOT_A_DICT_POS;
|
||||
}
|
||||
PatriciaTrieReadingUtils::skipCharacters(mDictRoot, flags, MAX_WORD_LENGTH, &pos);
|
||||
if (PatriciaTrieReadingUtils::isTerminal(flags)) {
|
||||
PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos);
|
||||
}
|
||||
if (PatriciaTrieReadingUtils::hasChildrenInFlags(flags)) {
|
||||
PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(mDictRoot, flags, &pos);
|
||||
}
|
||||
if (PatriciaTrieReadingUtils::hasShortcutTargets(flags)) {
|
||||
mShortcutListPolicy.skipAllShortcuts(&pos);;
|
||||
}
|
||||
return pos;
|
||||
return mPtNodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos).getBigramsPos();
|
||||
}
|
||||
|
||||
int PatriciaTriePolicy::createAndGetLeavingChildNode(const DicNode *const dicNode,
|
||||
const int ptNodePos, DicNodeVector *childDicNodes) const {
|
||||
int pos = ptNodePos;
|
||||
const PatriciaTrieReadingUtils::NodeFlags flags =
|
||||
PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos);
|
||||
PatriciaTrieReadingUtils::NodeFlags flags;
|
||||
int mergedNodeCodePointCount = 0;
|
||||
int mergedNodeCodePoints[MAX_WORD_LENGTH];
|
||||
const int mergedNodeCodePointCount = PatriciaTrieReadingUtils::getCharsAndAdvancePosition(
|
||||
mDictRoot, flags, MAX_WORD_LENGTH, mergedNodeCodePoints, &pos);
|
||||
const int probability = (PatriciaTrieReadingUtils::isTerminal(flags))?
|
||||
PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos)
|
||||
: NOT_A_PROBABILITY;
|
||||
const int childrenPos = PatriciaTrieReadingUtils::hasChildrenInFlags(flags) ?
|
||||
PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(
|
||||
mDictRoot, flags, &pos) : NOT_A_DICT_POS;
|
||||
if (PatriciaTrieReadingUtils::hasShortcutTargets(flags)) {
|
||||
getShortcutsStructurePolicy()->skipAllShortcuts(&pos);
|
||||
}
|
||||
if (PatriciaTrieReadingUtils::hasBigrams(flags)) {
|
||||
getBigramsStructurePolicy()->skipAllBigrams(&pos);
|
||||
}
|
||||
if (mergedNodeCodePointCount <= 0) {
|
||||
AKLOGE("Empty PtNode is not allowed. Code point count: %d", mergedNodeCodePointCount);
|
||||
ASSERT(false);
|
||||
return pos;
|
||||
}
|
||||
int probability = NOT_A_PROBABILITY;
|
||||
int childrenPos = NOT_A_DICT_POS;
|
||||
int shortcutPos = NOT_A_DICT_POS;
|
||||
int bigramPos = NOT_A_DICT_POS;
|
||||
int siblingPos = NOT_A_DICT_POS;
|
||||
PatriciaTrieReadingUtils::readPtNodeInfo(mDictRoot, ptNodePos, getShortcutsStructurePolicy(),
|
||||
getBigramsStructurePolicy(), &flags, &mergedNodeCodePointCount, mergedNodeCodePoints,
|
||||
&probability, &childrenPos, &shortcutPos, &bigramPos, &siblingPos);
|
||||
childDicNodes->pushLeavingChild(dicNode, ptNodePos, childrenPos, probability,
|
||||
PatriciaTrieReadingUtils::isTerminal(flags),
|
||||
PatriciaTrieReadingUtils::hasChildrenInFlags(flags),
|
||||
PatriciaTrieReadingUtils::isBlacklisted(flags) ||
|
||||
PatriciaTrieReadingUtils::isNotAWord(flags),
|
||||
PatriciaTrieReadingUtils::isBlacklisted(flags)
|
||||
|| PatriciaTrieReadingUtils::isNotAWord(flags),
|
||||
mergedNodeCodePointCount, mergedNodeCodePoints);
|
||||
return pos;
|
||||
return siblingPos;
|
||||
}
|
||||
|
||||
} // namespace latinime
|
||||
|
|
|
@ -24,6 +24,7 @@
|
|||
#include "suggest/policyimpl/dictionary/bigram/bigram_list_policy.h"
|
||||
#include "suggest/policyimpl/dictionary/header/header_policy.h"
|
||||
#include "suggest/policyimpl/dictionary/shortcut/shortcut_list_policy.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.h"
|
||||
#include "suggest/policyimpl/dictionary/utils/format_utils.h"
|
||||
#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
|
||||
|
||||
|
@ -40,7 +41,8 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
|||
mDictRoot(mMmappedBuffer.get()->getBuffer() + mHeaderPolicy.getSize()),
|
||||
mDictBufferSize(mMmappedBuffer.get()->getBufferSize()
|
||||
- mHeaderPolicy.getSize()),
|
||||
mBigramListPolicy(mDictRoot), mShortcutListPolicy(mDictRoot) {}
|
||||
mBigramListPolicy(mDictRoot), mShortcutListPolicy(mDictRoot),
|
||||
mPtNodeReader(mDictRoot, mDictBufferSize, &mBigramListPolicy, &mShortcutListPolicy) {}
|
||||
|
||||
AK_FORCE_INLINE int getRootPosition() const {
|
||||
return 0;
|
||||
|
@ -143,6 +145,7 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
|||
const int mDictBufferSize;
|
||||
const BigramListPolicy mBigramListPolicy;
|
||||
const ShortcutListPolicy mShortcutListPolicy;
|
||||
const Ver2ParticiaTrieNodeReader mPtNodeReader;
|
||||
|
||||
int createAndGetLeavingChildNode(const DicNode *const dicNode, const int ptNodePos,
|
||||
DicNodeVector *const childDicNodes) const;
|
||||
|
|
|
@ -17,6 +17,8 @@
|
|||
#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h"
|
||||
|
||||
#include "defines.h"
|
||||
#include "suggest/core/policy/dictionary_bigrams_structure_policy.h"
|
||||
#include "suggest/core/policy/dictionary_shortcuts_structure_policy.h"
|
||||
#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h"
|
||||
|
||||
namespace latinime {
|
||||
|
@ -130,4 +132,32 @@ const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_IS_BLACKLISTED = 0x01;
|
|||
return base + offset;
|
||||
}
|
||||
|
||||
/* static */ void PtReadingUtils::readPtNodeInfo(const uint8_t *const dictBuf, const int ptNodePos,
|
||||
const DictionaryShortcutsStructurePolicy *const shortcutPolicy,
|
||||
const DictionaryBigramsStructurePolicy *const bigramPolicy,
|
||||
NodeFlags *const outFlags, int *const outCodePointCount, int *const outCodePoint,
|
||||
int *const outProbability, int *const outChildrenPos, int *const outShortcutPos,
|
||||
int *const outBigramPos, int *const outSiblingPos) {
|
||||
int readingPos = ptNodePos;
|
||||
const NodeFlags flags = getFlagsAndAdvancePosition(dictBuf, &readingPos);
|
||||
*outFlags = flags;
|
||||
*outCodePointCount = getCharsAndAdvancePosition(
|
||||
dictBuf, flags, MAX_WORD_LENGTH, outCodePoint, &readingPos);
|
||||
*outProbability = isTerminal(flags) ?
|
||||
readProbabilityAndAdvancePosition(dictBuf, &readingPos) : NOT_A_PROBABILITY;
|
||||
*outChildrenPos = hasChildrenInFlags(flags) ?
|
||||
readChildrenPositionAndAdvancePosition(dictBuf, flags, &readingPos) : NOT_A_DICT_POS;
|
||||
*outShortcutPos = NOT_A_DICT_POS;
|
||||
if (hasShortcutTargets(flags)) {
|
||||
*outShortcutPos = readingPos;
|
||||
shortcutPolicy->skipAllShortcuts(&readingPos);
|
||||
}
|
||||
*outBigramPos = NOT_A_DICT_POS;
|
||||
if (hasBigrams(flags)) {
|
||||
*outBigramPos = readingPos;
|
||||
bigramPolicy->skipAllBigrams(&readingPos);
|
||||
}
|
||||
*outSiblingPos = readingPos;
|
||||
}
|
||||
|
||||
} // namespace latinime
|
||||
|
|
|
@ -23,6 +23,9 @@
|
|||
|
||||
namespace latinime {
|
||||
|
||||
class DictionaryShortcutsStructurePolicy;
|
||||
class DictionaryBigramsStructurePolicy;
|
||||
|
||||
// TODO: Move to pt_common
|
||||
class PatriciaTrieReadingUtils {
|
||||
public:
|
||||
|
@ -101,6 +104,13 @@ class PatriciaTrieReadingUtils {
|
|||
return nodeFlags;
|
||||
}
|
||||
|
||||
static void readPtNodeInfo(const uint8_t *const dictBuf, const int ptNodePos,
|
||||
const DictionaryShortcutsStructurePolicy *const shortcutPolicy,
|
||||
const DictionaryBigramsStructurePolicy *const bigramPolicy,
|
||||
NodeFlags *const outFlags, int *const outCodePointCount, int *const outCodePoint,
|
||||
int *const outProbability, int *const outChildrenPos, int *const outShortcutPos,
|
||||
int *const outBigramPos, int *const outSiblingPos);
|
||||
|
||||
private:
|
||||
DISALLOW_IMPLICIT_CONSTRUCTORS(PatriciaTrieReadingUtils);
|
||||
|
||||
|
|
|
@ -0,0 +1,52 @@
|
|||
/*
|
||||
* Copyright (C) 2014, The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.h"
|
||||
|
||||
#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h"
|
||||
|
||||
namespace latinime {
|
||||
|
||||
const PtNodeParams Ver2ParticiaTrieNodeReader::fetchNodeInfoInBufferFromPtNodePos(
|
||||
const int ptNodePos) const {
|
||||
if (ptNodePos < 0 || ptNodePos >= mDictSize) {
|
||||
// Reading invalid position because of bug or broken dictionary.
|
||||
AKLOGE("Fetching PtNode info from invalid dictionary position: %d, dictionary size: %d",
|
||||
ptNodePos, mDictSize);
|
||||
ASSERT(false);
|
||||
return PtNodeParams();
|
||||
}
|
||||
PatriciaTrieReadingUtils::NodeFlags flags;
|
||||
int mergedNodeCodePointCount = 0;
|
||||
int mergedNodeCodePoints[MAX_WORD_LENGTH];
|
||||
int probability = NOT_A_PROBABILITY;
|
||||
int childrenPos = NOT_A_DICT_POS;
|
||||
int shortcutPos = NOT_A_DICT_POS;
|
||||
int bigramPos = NOT_A_DICT_POS;
|
||||
int siblingPos = NOT_A_DICT_POS;
|
||||
PatriciaTrieReadingUtils::readPtNodeInfo(mDictBuffer, ptNodePos, mShortuctPolicy,
|
||||
mBigramPolicy, &flags, &mergedNodeCodePointCount, mergedNodeCodePoints, &probability,
|
||||
&childrenPos, &shortcutPos, &bigramPos, &siblingPos);
|
||||
if (mergedNodeCodePointCount <= 0) {
|
||||
AKLOGE("Empty PtNode is not allowed. Code point count: %d", mergedNodeCodePointCount);
|
||||
ASSERT(false);
|
||||
return PtNodeParams();
|
||||
}
|
||||
return PtNodeParams(ptNodePos, flags, mergedNodeCodePointCount, mergedNodeCodePoints,
|
||||
probability, childrenPos, shortcutPos, bigramPos, siblingPos);
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,50 @@
|
|||
/*
|
||||
* Copyright (C) 2014, The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef LATINIME_VER2_PATRICIA_TRIE_NODE_READER_H
|
||||
#define LATINIME_VER2_PATRICIA_TRIE_NODE_READER_H
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include "defines.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_reader.h"
|
||||
|
||||
namespace latinime {
|
||||
|
||||
class DictionaryBigramsStructurePolicy;
|
||||
class DictionaryShortcutsStructurePolicy;
|
||||
|
||||
class Ver2ParticiaTrieNodeReader : public PtNodeReader {
|
||||
public:
|
||||
Ver2ParticiaTrieNodeReader(const uint8_t *const dictBuffer, const int dictSize,
|
||||
const DictionaryBigramsStructurePolicy *const bigramPolicy,
|
||||
const DictionaryShortcutsStructurePolicy *const shortcutPolicy)
|
||||
: mDictBuffer(dictBuffer), mDictSize(dictSize), mBigramPolicy(bigramPolicy),
|
||||
mShortuctPolicy(shortcutPolicy) {}
|
||||
|
||||
virtual const PtNodeParams fetchNodeInfoInBufferFromPtNodePos(const int ptNodePos) const;
|
||||
|
||||
private:
|
||||
DISALLOW_IMPLICIT_CONSTRUCTORS(Ver2ParticiaTrieNodeReader);
|
||||
|
||||
const uint8_t *const mDictBuffer;
|
||||
const int mDictSize;
|
||||
const DictionaryBigramsStructurePolicy *const mBigramPolicy;
|
||||
const DictionaryShortcutsStructurePolicy *const mShortuctPolicy;
|
||||
};
|
||||
} // namespace latinime
|
||||
#endif /* LATINIME_VER2_PATRICIA_TRIE_NODE_READER_H */
|
Loading…
Reference in a new issue