Merge "Create Ver2PtNodeArrayReader."
This commit is contained in:
commit
fd9599e9c7
5 changed files with 108 additions and 85 deletions
|
@ -58,7 +58,8 @@ LATIN_IME_CORE_SRC_FILES := \
|
|||
$(addprefix suggest/policyimpl/dictionary/structure/v2/, \
|
||||
patricia_trie_policy.cpp \
|
||||
patricia_trie_reading_utils.cpp \
|
||||
ver2_patricia_trie_node_reader.cpp) \
|
||||
ver2_patricia_trie_node_reader.cpp \
|
||||
ver2_pt_node_array_reader.cpp) \
|
||||
$(addprefix suggest/policyimpl/dictionary/structure/v4/, \
|
||||
ver4_dict_buffers.cpp \
|
||||
ver4_dict_constants.cpp \
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
#include "defines.h"
|
||||
#include "suggest/core/dicnode/dic_node.h"
|
||||
#include "suggest/core/dicnode/dic_node_vector.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h"
|
||||
#include "suggest/policyimpl/dictionary/utils/probability_utils.h"
|
||||
|
||||
|
@ -235,89 +236,9 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
|
|||
// dictionary. If no match is found, it returns NOT_A_DICT_POS.
|
||||
int PatriciaTriePolicy::getTerminalPtNodePositionOfWord(const int *const inWord,
|
||||
const int length, const bool forceLowerCaseSearch) const {
|
||||
int pos = getRootPosition();
|
||||
int wordPos = 0;
|
||||
|
||||
while (true) {
|
||||
// If we already traversed the tree further than the word is long, there means
|
||||
// there was no match (or we would have found it).
|
||||
if (wordPos >= length) return NOT_A_DICT_POS;
|
||||
int ptNodeCount = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition(mDictRoot,
|
||||
&pos);
|
||||
const int wChar = forceLowerCaseSearch
|
||||
? CharUtils::toLowerCase(inWord[wordPos]) : inWord[wordPos];
|
||||
while (true) {
|
||||
// If there are no more PtNodes in this array, it means we could not
|
||||
// find a matching character for this depth, therefore there is no match.
|
||||
if (0 >= ptNodeCount) return NOT_A_DICT_POS;
|
||||
const int ptNodePos = pos;
|
||||
const PatriciaTrieReadingUtils::NodeFlags flags =
|
||||
PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos);
|
||||
int character = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(mDictRoot,
|
||||
&pos);
|
||||
if (character == wChar) {
|
||||
// This is the correct PtNode. Only one PtNode may start with the same char within
|
||||
// a PtNode array, so either we found our match in this array, or there is
|
||||
// no match and we can return NOT_A_DICT_POS. So we will check all the
|
||||
// characters in this PtNode indeed does match.
|
||||
if (PatriciaTrieReadingUtils::hasMultipleChars(flags)) {
|
||||
character = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(mDictRoot,
|
||||
&pos);
|
||||
while (NOT_A_CODE_POINT != character) {
|
||||
++wordPos;
|
||||
// If we shoot the length of the word we search for, or if we find a single
|
||||
// character that does not match, as explained above, it means the word is
|
||||
// not in the dictionary (by virtue of this PtNode being the only one to
|
||||
// match the word on the first character, but not matching the whole word).
|
||||
if (wordPos >= length) return NOT_A_DICT_POS;
|
||||
if (inWord[wordPos] != character) return NOT_A_DICT_POS;
|
||||
character = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(
|
||||
mDictRoot, &pos);
|
||||
}
|
||||
}
|
||||
// If we come here we know that so far, we do match. Either we are on a terminal
|
||||
// and we match the length, in which case we found it, or we traverse children.
|
||||
// If we don't match the length AND don't have children, then a word in the
|
||||
// dictionary fully matches a prefix of the searched word but not the full word.
|
||||
++wordPos;
|
||||
if (PatriciaTrieReadingUtils::isTerminal(flags)) {
|
||||
if (wordPos == length) {
|
||||
return ptNodePos;
|
||||
}
|
||||
PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos);
|
||||
}
|
||||
if (!PatriciaTrieReadingUtils::hasChildrenInFlags(flags)) {
|
||||
return NOT_A_DICT_POS;
|
||||
}
|
||||
// We have children and we are still shorter than the word we are searching for, so
|
||||
// we need to traverse children. Put the pointer on the children position, and
|
||||
// break
|
||||
pos = PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(mDictRoot,
|
||||
flags, &pos);
|
||||
break;
|
||||
} else {
|
||||
// This PtNode does not match, so skip the remaining part and go to the next.
|
||||
if (PatriciaTrieReadingUtils::hasMultipleChars(flags)) {
|
||||
PatriciaTrieReadingUtils::skipCharacters(mDictRoot, flags, MAX_WORD_LENGTH,
|
||||
&pos);
|
||||
}
|
||||
if (PatriciaTrieReadingUtils::isTerminal(flags)) {
|
||||
PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos);
|
||||
}
|
||||
if (PatriciaTrieReadingUtils::hasChildrenInFlags(flags)) {
|
||||
PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(mDictRoot,
|
||||
flags, &pos);
|
||||
}
|
||||
if (PatriciaTrieReadingUtils::hasShortcutTargets(flags)) {
|
||||
mShortcutListPolicy.skipAllShortcuts(&pos);
|
||||
}
|
||||
if (PatriciaTrieReadingUtils::hasBigrams(flags)) {
|
||||
mBigramListPolicy.skipAllBigrams(&pos);
|
||||
}
|
||||
}
|
||||
--ptNodeCount;
|
||||
}
|
||||
}
|
||||
DynamicPtReadingHelper readingHelper(&mPtNodeReader, &mPtNodeArrayReader);
|
||||
readingHelper.initWithPtNodeArrayPos(getRootPosition());
|
||||
return readingHelper.getTerminalPtNodePositionOfWord(inWord, length, forceLowerCaseSearch);
|
||||
}
|
||||
|
||||
int PatriciaTriePolicy::getProbability(const int unigramProbability,
|
||||
|
|
|
@ -25,6 +25,7 @@
|
|||
#include "suggest/policyimpl/dictionary/header/header_policy.h"
|
||||
#include "suggest/policyimpl/dictionary/shortcut/shortcut_list_policy.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/v2/ver2_pt_node_array_reader.h"
|
||||
#include "suggest/policyimpl/dictionary/utils/format_utils.h"
|
||||
#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
|
||||
|
||||
|
@ -42,7 +43,8 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
|||
mDictBufferSize(mMmappedBuffer.get()->getBufferSize()
|
||||
- mHeaderPolicy.getSize()),
|
||||
mBigramListPolicy(mDictRoot), mShortcutListPolicy(mDictRoot),
|
||||
mPtNodeReader(mDictRoot, mDictBufferSize, &mBigramListPolicy, &mShortcutListPolicy) {}
|
||||
mPtNodeReader(mDictRoot, mDictBufferSize, &mBigramListPolicy, &mShortcutListPolicy),
|
||||
mPtNodeArrayReader(mDictRoot, mDictBufferSize) {}
|
||||
|
||||
AK_FORCE_INLINE int getRootPosition() const {
|
||||
return 0;
|
||||
|
@ -146,6 +148,7 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
|||
const BigramListPolicy mBigramListPolicy;
|
||||
const ShortcutListPolicy mShortcutListPolicy;
|
||||
const Ver2ParticiaTrieNodeReader mPtNodeReader;
|
||||
const Ver2PtNodeArrayReader mPtNodeArrayReader;
|
||||
|
||||
int createAndGetLeavingChildNode(const DicNode *const dicNode, const int ptNodePos,
|
||||
DicNodeVector *const childDicNodes) const;
|
||||
|
|
|
@ -0,0 +1,54 @@
|
|||
/*
|
||||
* Copyright (C) 2014, The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "suggest/policyimpl/dictionary/structure/v2/ver2_pt_node_array_reader.h"
|
||||
|
||||
#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h"
|
||||
|
||||
namespace latinime {
|
||||
|
||||
bool Ver2PtNodeArrayReader::readPtNodeArrayInfoAndReturnIfValid(const int ptNodeArrayPos,
|
||||
int *const outPtNodeCount, int *const outFirstPtNodePos) const {
|
||||
if (ptNodeArrayPos < 0 || ptNodeArrayPos >= mDictSize) {
|
||||
// Reading invalid position because of a bug or a broken dictionary.
|
||||
AKLOGE("Reading PtNode array info from invalid dictionary position: %d, dict size: %d",
|
||||
ptNodeArrayPos, mDictSize);
|
||||
ASSERT(false);
|
||||
return false;
|
||||
}
|
||||
int readingPos = ptNodeArrayPos;
|
||||
const int ptNodeCountInArray = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition(
|
||||
mDictBuffer, &readingPos);
|
||||
*outPtNodeCount = ptNodeCountInArray;
|
||||
*outFirstPtNodePos = readingPos;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Ver2PtNodeArrayReader::readForwardLinkAndReturnIfValid(const int forwordLinkPos,
|
||||
int *const outNextPtNodeArrayPos) const {
|
||||
if (forwordLinkPos < 0 || forwordLinkPos >= mDictSize) {
|
||||
// Reading invalid position because of bug or broken dictionary.
|
||||
AKLOGE("Reading forward link from invalid dictionary position: %d, dict size: %d",
|
||||
forwordLinkPos, mDictSize);
|
||||
ASSERT(false);
|
||||
return false;
|
||||
}
|
||||
// Ver2 dicts don't have forward links.
|
||||
*outNextPtNodeArrayPos = NOT_A_DICT_POS;
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace latinime
|
|
@ -0,0 +1,44 @@
|
|||
/*
|
||||
* Copyright (C) 2014, The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef LATINIME_VER2_PT_NODE_ARRAY_READER_H
|
||||
#define LATINIME_VER2_PT_NODE_ARRAY_READER_H
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include "defines.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_array_reader.h"
|
||||
|
||||
namespace latinime {
|
||||
|
||||
class Ver2PtNodeArrayReader : public PtNodeArrayReader {
|
||||
public:
|
||||
Ver2PtNodeArrayReader(const uint8_t *const dictBuffer, const int dictSize)
|
||||
: mDictBuffer(dictBuffer), mDictSize(dictSize) {};
|
||||
|
||||
virtual bool readPtNodeArrayInfoAndReturnIfValid(const int ptNodeArrayPos,
|
||||
int *const outPtNodeCount, int *const outFirstPtNodePos) const;
|
||||
virtual bool readForwardLinkAndReturnIfValid(const int forwordLinkPos,
|
||||
int *const outNextPtNodeArrayPos) const;
|
||||
|
||||
private:
|
||||
DISALLOW_COPY_AND_ASSIGN(Ver2PtNodeArrayReader);
|
||||
|
||||
const uint8_t *const mDictBuffer;
|
||||
const int mDictSize;
|
||||
};
|
||||
} // namespace latinime
|
||||
#endif /* LATINIME_VER2_PT_NODE_ARRAY_READER_H */
|
Loading…
Reference in a new issue