Merge "Create Ver2PtNodeArrayReader."
This commit is contained in:
commit
fd9599e9c7
5 changed files with 108 additions and 85 deletions
|
@ -58,7 +58,8 @@ LATIN_IME_CORE_SRC_FILES := \
|
||||||
$(addprefix suggest/policyimpl/dictionary/structure/v2/, \
|
$(addprefix suggest/policyimpl/dictionary/structure/v2/, \
|
||||||
patricia_trie_policy.cpp \
|
patricia_trie_policy.cpp \
|
||||||
patricia_trie_reading_utils.cpp \
|
patricia_trie_reading_utils.cpp \
|
||||||
ver2_patricia_trie_node_reader.cpp) \
|
ver2_patricia_trie_node_reader.cpp \
|
||||||
|
ver2_pt_node_array_reader.cpp) \
|
||||||
$(addprefix suggest/policyimpl/dictionary/structure/v4/, \
|
$(addprefix suggest/policyimpl/dictionary/structure/v4/, \
|
||||||
ver4_dict_buffers.cpp \
|
ver4_dict_buffers.cpp \
|
||||||
ver4_dict_constants.cpp \
|
ver4_dict_constants.cpp \
|
||||||
|
|
|
@ -20,6 +20,7 @@
|
||||||
#include "defines.h"
|
#include "defines.h"
|
||||||
#include "suggest/core/dicnode/dic_node.h"
|
#include "suggest/core/dicnode/dic_node.h"
|
||||||
#include "suggest/core/dicnode/dic_node_vector.h"
|
#include "suggest/core/dicnode/dic_node_vector.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h"
|
#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h"
|
||||||
#include "suggest/policyimpl/dictionary/utils/probability_utils.h"
|
#include "suggest/policyimpl/dictionary/utils/probability_utils.h"
|
||||||
|
|
||||||
|
@ -235,89 +236,9 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
|
||||||
// dictionary. If no match is found, it returns NOT_A_DICT_POS.
|
// dictionary. If no match is found, it returns NOT_A_DICT_POS.
|
||||||
int PatriciaTriePolicy::getTerminalPtNodePositionOfWord(const int *const inWord,
|
int PatriciaTriePolicy::getTerminalPtNodePositionOfWord(const int *const inWord,
|
||||||
const int length, const bool forceLowerCaseSearch) const {
|
const int length, const bool forceLowerCaseSearch) const {
|
||||||
int pos = getRootPosition();
|
DynamicPtReadingHelper readingHelper(&mPtNodeReader, &mPtNodeArrayReader);
|
||||||
int wordPos = 0;
|
readingHelper.initWithPtNodeArrayPos(getRootPosition());
|
||||||
|
return readingHelper.getTerminalPtNodePositionOfWord(inWord, length, forceLowerCaseSearch);
|
||||||
while (true) {
|
|
||||||
// If we already traversed the tree further than the word is long, there means
|
|
||||||
// there was no match (or we would have found it).
|
|
||||||
if (wordPos >= length) return NOT_A_DICT_POS;
|
|
||||||
int ptNodeCount = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition(mDictRoot,
|
|
||||||
&pos);
|
|
||||||
const int wChar = forceLowerCaseSearch
|
|
||||||
? CharUtils::toLowerCase(inWord[wordPos]) : inWord[wordPos];
|
|
||||||
while (true) {
|
|
||||||
// If there are no more PtNodes in this array, it means we could not
|
|
||||||
// find a matching character for this depth, therefore there is no match.
|
|
||||||
if (0 >= ptNodeCount) return NOT_A_DICT_POS;
|
|
||||||
const int ptNodePos = pos;
|
|
||||||
const PatriciaTrieReadingUtils::NodeFlags flags =
|
|
||||||
PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos);
|
|
||||||
int character = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(mDictRoot,
|
|
||||||
&pos);
|
|
||||||
if (character == wChar) {
|
|
||||||
// This is the correct PtNode. Only one PtNode may start with the same char within
|
|
||||||
// a PtNode array, so either we found our match in this array, or there is
|
|
||||||
// no match and we can return NOT_A_DICT_POS. So we will check all the
|
|
||||||
// characters in this PtNode indeed does match.
|
|
||||||
if (PatriciaTrieReadingUtils::hasMultipleChars(flags)) {
|
|
||||||
character = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(mDictRoot,
|
|
||||||
&pos);
|
|
||||||
while (NOT_A_CODE_POINT != character) {
|
|
||||||
++wordPos;
|
|
||||||
// If we shoot the length of the word we search for, or if we find a single
|
|
||||||
// character that does not match, as explained above, it means the word is
|
|
||||||
// not in the dictionary (by virtue of this PtNode being the only one to
|
|
||||||
// match the word on the first character, but not matching the whole word).
|
|
||||||
if (wordPos >= length) return NOT_A_DICT_POS;
|
|
||||||
if (inWord[wordPos] != character) return NOT_A_DICT_POS;
|
|
||||||
character = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(
|
|
||||||
mDictRoot, &pos);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// If we come here we know that so far, we do match. Either we are on a terminal
|
|
||||||
// and we match the length, in which case we found it, or we traverse children.
|
|
||||||
// If we don't match the length AND don't have children, then a word in the
|
|
||||||
// dictionary fully matches a prefix of the searched word but not the full word.
|
|
||||||
++wordPos;
|
|
||||||
if (PatriciaTrieReadingUtils::isTerminal(flags)) {
|
|
||||||
if (wordPos == length) {
|
|
||||||
return ptNodePos;
|
|
||||||
}
|
|
||||||
PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos);
|
|
||||||
}
|
|
||||||
if (!PatriciaTrieReadingUtils::hasChildrenInFlags(flags)) {
|
|
||||||
return NOT_A_DICT_POS;
|
|
||||||
}
|
|
||||||
// We have children and we are still shorter than the word we are searching for, so
|
|
||||||
// we need to traverse children. Put the pointer on the children position, and
|
|
||||||
// break
|
|
||||||
pos = PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(mDictRoot,
|
|
||||||
flags, &pos);
|
|
||||||
break;
|
|
||||||
} else {
|
|
||||||
// This PtNode does not match, so skip the remaining part and go to the next.
|
|
||||||
if (PatriciaTrieReadingUtils::hasMultipleChars(flags)) {
|
|
||||||
PatriciaTrieReadingUtils::skipCharacters(mDictRoot, flags, MAX_WORD_LENGTH,
|
|
||||||
&pos);
|
|
||||||
}
|
|
||||||
if (PatriciaTrieReadingUtils::isTerminal(flags)) {
|
|
||||||
PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos);
|
|
||||||
}
|
|
||||||
if (PatriciaTrieReadingUtils::hasChildrenInFlags(flags)) {
|
|
||||||
PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(mDictRoot,
|
|
||||||
flags, &pos);
|
|
||||||
}
|
|
||||||
if (PatriciaTrieReadingUtils::hasShortcutTargets(flags)) {
|
|
||||||
mShortcutListPolicy.skipAllShortcuts(&pos);
|
|
||||||
}
|
|
||||||
if (PatriciaTrieReadingUtils::hasBigrams(flags)) {
|
|
||||||
mBigramListPolicy.skipAllBigrams(&pos);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
--ptNodeCount;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int PatriciaTriePolicy::getProbability(const int unigramProbability,
|
int PatriciaTriePolicy::getProbability(const int unigramProbability,
|
||||||
|
|
|
@ -25,6 +25,7 @@
|
||||||
#include "suggest/policyimpl/dictionary/header/header_policy.h"
|
#include "suggest/policyimpl/dictionary/header/header_policy.h"
|
||||||
#include "suggest/policyimpl/dictionary/shortcut/shortcut_list_policy.h"
|
#include "suggest/policyimpl/dictionary/shortcut/shortcut_list_policy.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.h"
|
#include "suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/structure/v2/ver2_pt_node_array_reader.h"
|
||||||
#include "suggest/policyimpl/dictionary/utils/format_utils.h"
|
#include "suggest/policyimpl/dictionary/utils/format_utils.h"
|
||||||
#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
|
#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
|
||||||
|
|
||||||
|
@ -42,7 +43,8 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
||||||
mDictBufferSize(mMmappedBuffer.get()->getBufferSize()
|
mDictBufferSize(mMmappedBuffer.get()->getBufferSize()
|
||||||
- mHeaderPolicy.getSize()),
|
- mHeaderPolicy.getSize()),
|
||||||
mBigramListPolicy(mDictRoot), mShortcutListPolicy(mDictRoot),
|
mBigramListPolicy(mDictRoot), mShortcutListPolicy(mDictRoot),
|
||||||
mPtNodeReader(mDictRoot, mDictBufferSize, &mBigramListPolicy, &mShortcutListPolicy) {}
|
mPtNodeReader(mDictRoot, mDictBufferSize, &mBigramListPolicy, &mShortcutListPolicy),
|
||||||
|
mPtNodeArrayReader(mDictRoot, mDictBufferSize) {}
|
||||||
|
|
||||||
AK_FORCE_INLINE int getRootPosition() const {
|
AK_FORCE_INLINE int getRootPosition() const {
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -146,6 +148,7 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
||||||
const BigramListPolicy mBigramListPolicy;
|
const BigramListPolicy mBigramListPolicy;
|
||||||
const ShortcutListPolicy mShortcutListPolicy;
|
const ShortcutListPolicy mShortcutListPolicy;
|
||||||
const Ver2ParticiaTrieNodeReader mPtNodeReader;
|
const Ver2ParticiaTrieNodeReader mPtNodeReader;
|
||||||
|
const Ver2PtNodeArrayReader mPtNodeArrayReader;
|
||||||
|
|
||||||
int createAndGetLeavingChildNode(const DicNode *const dicNode, const int ptNodePos,
|
int createAndGetLeavingChildNode(const DicNode *const dicNode, const int ptNodePos,
|
||||||
DicNodeVector *const childDicNodes) const;
|
DicNodeVector *const childDicNodes) const;
|
||||||
|
|
|
@ -0,0 +1,54 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2014, The Android Open Source Project
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "suggest/policyimpl/dictionary/structure/v2/ver2_pt_node_array_reader.h"
|
||||||
|
|
||||||
|
#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h"
|
||||||
|
|
||||||
|
namespace latinime {
|
||||||
|
|
||||||
|
bool Ver2PtNodeArrayReader::readPtNodeArrayInfoAndReturnIfValid(const int ptNodeArrayPos,
|
||||||
|
int *const outPtNodeCount, int *const outFirstPtNodePos) const {
|
||||||
|
if (ptNodeArrayPos < 0 || ptNodeArrayPos >= mDictSize) {
|
||||||
|
// Reading invalid position because of a bug or a broken dictionary.
|
||||||
|
AKLOGE("Reading PtNode array info from invalid dictionary position: %d, dict size: %d",
|
||||||
|
ptNodeArrayPos, mDictSize);
|
||||||
|
ASSERT(false);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
int readingPos = ptNodeArrayPos;
|
||||||
|
const int ptNodeCountInArray = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition(
|
||||||
|
mDictBuffer, &readingPos);
|
||||||
|
*outPtNodeCount = ptNodeCountInArray;
|
||||||
|
*outFirstPtNodePos = readingPos;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool Ver2PtNodeArrayReader::readForwardLinkAndReturnIfValid(const int forwordLinkPos,
|
||||||
|
int *const outNextPtNodeArrayPos) const {
|
||||||
|
if (forwordLinkPos < 0 || forwordLinkPos >= mDictSize) {
|
||||||
|
// Reading invalid position because of bug or broken dictionary.
|
||||||
|
AKLOGE("Reading forward link from invalid dictionary position: %d, dict size: %d",
|
||||||
|
forwordLinkPos, mDictSize);
|
||||||
|
ASSERT(false);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
// Ver2 dicts don't have forward links.
|
||||||
|
*outNextPtNodeArrayPos = NOT_A_DICT_POS;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace latinime
|
|
@ -0,0 +1,44 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2014, The Android Open Source Project
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef LATINIME_VER2_PT_NODE_ARRAY_READER_H
|
||||||
|
#define LATINIME_VER2_PT_NODE_ARRAY_READER_H
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
#include "defines.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_array_reader.h"
|
||||||
|
|
||||||
|
namespace latinime {
|
||||||
|
|
||||||
|
class Ver2PtNodeArrayReader : public PtNodeArrayReader {
|
||||||
|
public:
|
||||||
|
Ver2PtNodeArrayReader(const uint8_t *const dictBuffer, const int dictSize)
|
||||||
|
: mDictBuffer(dictBuffer), mDictSize(dictSize) {};
|
||||||
|
|
||||||
|
virtual bool readPtNodeArrayInfoAndReturnIfValid(const int ptNodeArrayPos,
|
||||||
|
int *const outPtNodeCount, int *const outFirstPtNodePos) const;
|
||||||
|
virtual bool readForwardLinkAndReturnIfValid(const int forwordLinkPos,
|
||||||
|
int *const outNextPtNodeArrayPos) const;
|
||||||
|
|
||||||
|
private:
|
||||||
|
DISALLOW_COPY_AND_ASSIGN(Ver2PtNodeArrayReader);
|
||||||
|
|
||||||
|
const uint8_t *const mDictBuffer;
|
||||||
|
const int mDictSize;
|
||||||
|
};
|
||||||
|
} // namespace latinime
|
||||||
|
#endif /* LATINIME_VER2_PT_NODE_ARRAY_READER_H */
|
Loading…
Reference in a new issue