Merge "Create Ver2PtNodeArrayReader."

This commit is contained in:
Keisuke Kuroyanagi 2014-02-10 12:47:44 +00:00 committed by Android (Google) Code Review
commit fd9599e9c7
5 changed files with 108 additions and 85 deletions

View file

@ -58,7 +58,8 @@ LATIN_IME_CORE_SRC_FILES := \
$(addprefix suggest/policyimpl/dictionary/structure/v2/, \ $(addprefix suggest/policyimpl/dictionary/structure/v2/, \
patricia_trie_policy.cpp \ patricia_trie_policy.cpp \
patricia_trie_reading_utils.cpp \ patricia_trie_reading_utils.cpp \
ver2_patricia_trie_node_reader.cpp) \ ver2_patricia_trie_node_reader.cpp \
ver2_pt_node_array_reader.cpp) \
$(addprefix suggest/policyimpl/dictionary/structure/v4/, \ $(addprefix suggest/policyimpl/dictionary/structure/v4/, \
ver4_dict_buffers.cpp \ ver4_dict_buffers.cpp \
ver4_dict_constants.cpp \ ver4_dict_constants.cpp \

View file

@ -20,6 +20,7 @@
#include "defines.h" #include "defines.h"
#include "suggest/core/dicnode/dic_node.h" #include "suggest/core/dicnode/dic_node.h"
#include "suggest/core/dicnode/dic_node_vector.h" #include "suggest/core/dicnode/dic_node_vector.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h"
#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h" #include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h"
#include "suggest/policyimpl/dictionary/utils/probability_utils.h" #include "suggest/policyimpl/dictionary/utils/probability_utils.h"
@ -235,89 +236,9 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
// dictionary. If no match is found, it returns NOT_A_DICT_POS. // dictionary. If no match is found, it returns NOT_A_DICT_POS.
int PatriciaTriePolicy::getTerminalPtNodePositionOfWord(const int *const inWord, int PatriciaTriePolicy::getTerminalPtNodePositionOfWord(const int *const inWord,
const int length, const bool forceLowerCaseSearch) const { const int length, const bool forceLowerCaseSearch) const {
int pos = getRootPosition(); DynamicPtReadingHelper readingHelper(&mPtNodeReader, &mPtNodeArrayReader);
int wordPos = 0; readingHelper.initWithPtNodeArrayPos(getRootPosition());
return readingHelper.getTerminalPtNodePositionOfWord(inWord, length, forceLowerCaseSearch);
while (true) {
// If we already traversed the tree further than the word is long, there means
// there was no match (or we would have found it).
if (wordPos >= length) return NOT_A_DICT_POS;
int ptNodeCount = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition(mDictRoot,
&pos);
const int wChar = forceLowerCaseSearch
? CharUtils::toLowerCase(inWord[wordPos]) : inWord[wordPos];
while (true) {
// If there are no more PtNodes in this array, it means we could not
// find a matching character for this depth, therefore there is no match.
if (0 >= ptNodeCount) return NOT_A_DICT_POS;
const int ptNodePos = pos;
const PatriciaTrieReadingUtils::NodeFlags flags =
PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos);
int character = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(mDictRoot,
&pos);
if (character == wChar) {
// This is the correct PtNode. Only one PtNode may start with the same char within
// a PtNode array, so either we found our match in this array, or there is
// no match and we can return NOT_A_DICT_POS. So we will check all the
// characters in this PtNode indeed does match.
if (PatriciaTrieReadingUtils::hasMultipleChars(flags)) {
character = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(mDictRoot,
&pos);
while (NOT_A_CODE_POINT != character) {
++wordPos;
// If we shoot the length of the word we search for, or if we find a single
// character that does not match, as explained above, it means the word is
// not in the dictionary (by virtue of this PtNode being the only one to
// match the word on the first character, but not matching the whole word).
if (wordPos >= length) return NOT_A_DICT_POS;
if (inWord[wordPos] != character) return NOT_A_DICT_POS;
character = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(
mDictRoot, &pos);
}
}
// If we come here we know that so far, we do match. Either we are on a terminal
// and we match the length, in which case we found it, or we traverse children.
// If we don't match the length AND don't have children, then a word in the
// dictionary fully matches a prefix of the searched word but not the full word.
++wordPos;
if (PatriciaTrieReadingUtils::isTerminal(flags)) {
if (wordPos == length) {
return ptNodePos;
}
PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos);
}
if (!PatriciaTrieReadingUtils::hasChildrenInFlags(flags)) {
return NOT_A_DICT_POS;
}
// We have children and we are still shorter than the word we are searching for, so
// we need to traverse children. Put the pointer on the children position, and
// break
pos = PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(mDictRoot,
flags, &pos);
break;
} else {
// This PtNode does not match, so skip the remaining part and go to the next.
if (PatriciaTrieReadingUtils::hasMultipleChars(flags)) {
PatriciaTrieReadingUtils::skipCharacters(mDictRoot, flags, MAX_WORD_LENGTH,
&pos);
}
if (PatriciaTrieReadingUtils::isTerminal(flags)) {
PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos);
}
if (PatriciaTrieReadingUtils::hasChildrenInFlags(flags)) {
PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(mDictRoot,
flags, &pos);
}
if (PatriciaTrieReadingUtils::hasShortcutTargets(flags)) {
mShortcutListPolicy.skipAllShortcuts(&pos);
}
if (PatriciaTrieReadingUtils::hasBigrams(flags)) {
mBigramListPolicy.skipAllBigrams(&pos);
}
}
--ptNodeCount;
}
}
} }
int PatriciaTriePolicy::getProbability(const int unigramProbability, int PatriciaTriePolicy::getProbability(const int unigramProbability,

View file

@ -25,6 +25,7 @@
#include "suggest/policyimpl/dictionary/header/header_policy.h" #include "suggest/policyimpl/dictionary/header/header_policy.h"
#include "suggest/policyimpl/dictionary/shortcut/shortcut_list_policy.h" #include "suggest/policyimpl/dictionary/shortcut/shortcut_list_policy.h"
#include "suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.h" #include "suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.h"
#include "suggest/policyimpl/dictionary/structure/v2/ver2_pt_node_array_reader.h"
#include "suggest/policyimpl/dictionary/utils/format_utils.h" #include "suggest/policyimpl/dictionary/utils/format_utils.h"
#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h" #include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
@ -42,7 +43,8 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
mDictBufferSize(mMmappedBuffer.get()->getBufferSize() mDictBufferSize(mMmappedBuffer.get()->getBufferSize()
- mHeaderPolicy.getSize()), - mHeaderPolicy.getSize()),
mBigramListPolicy(mDictRoot), mShortcutListPolicy(mDictRoot), mBigramListPolicy(mDictRoot), mShortcutListPolicy(mDictRoot),
mPtNodeReader(mDictRoot, mDictBufferSize, &mBigramListPolicy, &mShortcutListPolicy) {} mPtNodeReader(mDictRoot, mDictBufferSize, &mBigramListPolicy, &mShortcutListPolicy),
mPtNodeArrayReader(mDictRoot, mDictBufferSize) {}
AK_FORCE_INLINE int getRootPosition() const { AK_FORCE_INLINE int getRootPosition() const {
return 0; return 0;
@ -146,6 +148,7 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
const BigramListPolicy mBigramListPolicy; const BigramListPolicy mBigramListPolicy;
const ShortcutListPolicy mShortcutListPolicy; const ShortcutListPolicy mShortcutListPolicy;
const Ver2ParticiaTrieNodeReader mPtNodeReader; const Ver2ParticiaTrieNodeReader mPtNodeReader;
const Ver2PtNodeArrayReader mPtNodeArrayReader;
int createAndGetLeavingChildNode(const DicNode *const dicNode, const int ptNodePos, int createAndGetLeavingChildNode(const DicNode *const dicNode, const int ptNodePos,
DicNodeVector *const childDicNodes) const; DicNodeVector *const childDicNodes) const;

View file

@ -0,0 +1,54 @@
/*
* Copyright (C) 2014, The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "suggest/policyimpl/dictionary/structure/v2/ver2_pt_node_array_reader.h"
#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h"
namespace latinime {
bool Ver2PtNodeArrayReader::readPtNodeArrayInfoAndReturnIfValid(const int ptNodeArrayPos,
int *const outPtNodeCount, int *const outFirstPtNodePos) const {
if (ptNodeArrayPos < 0 || ptNodeArrayPos >= mDictSize) {
// Reading invalid position because of a bug or a broken dictionary.
AKLOGE("Reading PtNode array info from invalid dictionary position: %d, dict size: %d",
ptNodeArrayPos, mDictSize);
ASSERT(false);
return false;
}
int readingPos = ptNodeArrayPos;
const int ptNodeCountInArray = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition(
mDictBuffer, &readingPos);
*outPtNodeCount = ptNodeCountInArray;
*outFirstPtNodePos = readingPos;
return true;
}
bool Ver2PtNodeArrayReader::readForwardLinkAndReturnIfValid(const int forwordLinkPos,
int *const outNextPtNodeArrayPos) const {
if (forwordLinkPos < 0 || forwordLinkPos >= mDictSize) {
// Reading invalid position because of bug or broken dictionary.
AKLOGE("Reading forward link from invalid dictionary position: %d, dict size: %d",
forwordLinkPos, mDictSize);
ASSERT(false);
return false;
}
// Ver2 dicts don't have forward links.
*outNextPtNodeArrayPos = NOT_A_DICT_POS;
return true;
}
} // namespace latinime

View file

@ -0,0 +1,44 @@
/*
* Copyright (C) 2014, The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef LATINIME_VER2_PT_NODE_ARRAY_READER_H
#define LATINIME_VER2_PT_NODE_ARRAY_READER_H
#include <stdint.h>
#include "defines.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_array_reader.h"
namespace latinime {
class Ver2PtNodeArrayReader : public PtNodeArrayReader {
public:
Ver2PtNodeArrayReader(const uint8_t *const dictBuffer, const int dictSize)
: mDictBuffer(dictBuffer), mDictSize(dictSize) {};
virtual bool readPtNodeArrayInfoAndReturnIfValid(const int ptNodeArrayPos,
int *const outPtNodeCount, int *const outFirstPtNodePos) const;
virtual bool readForwardLinkAndReturnIfValid(const int forwordLinkPos,
int *const outNextPtNodeArrayPos) const;
private:
DISALLOW_COPY_AND_ASSIGN(Ver2PtNodeArrayReader);
const uint8_t *const mDictBuffer;
const int mDictSize;
};
} // namespace latinime
#endif /* LATINIME_VER2_PT_NODE_ARRAY_READER_H */