From 2d57b3339ad5b4bbf0939858c36c7daf5e38a4cb Mon Sep 17 00:00:00 2001 From: Keisuke Kuroyanagi Date: Wed, 30 Jul 2014 10:51:24 +0900 Subject: [PATCH] Add a method to iterate ngram entries. Bug: 14425059 Change-Id: I9962c546504288f6c22b1a7368f775decd229c62 --- .../suggest/core/dictionary/dictionary.cpp | 52 +++++++++++-------- .../src/suggest/core/dictionary/dictionary.h | 16 ++++++ .../suggest/core/dictionary/ngram_listener.h | 40 ++++++++++++++ .../dictionary_structure_with_buffer_policy.h | 4 ++ .../v402/ver4_patricia_trie_policy.cpp | 11 ++++ .../backward/v402/ver4_patricia_trie_policy.h | 3 ++ .../structure/v2/patricia_trie_policy.cpp | 11 ++++ .../structure/v2/patricia_trie_policy.h | 3 ++ .../v4/ver4_patricia_trie_policy.cpp | 11 ++++ .../structure/v4/ver4_patricia_trie_policy.h | 3 ++ 10 files changed, 132 insertions(+), 22 deletions(-) create mode 100644 native/jni/src/suggest/core/dictionary/ngram_listener.h diff --git a/native/jni/src/suggest/core/dictionary/dictionary.cpp b/native/jni/src/suggest/core/dictionary/dictionary.cpp index bf917d69c..92f5c1713 100644 --- a/native/jni/src/suggest/core/dictionary/dictionary.cpp +++ b/native/jni/src/suggest/core/dictionary/dictionary.cpp @@ -59,32 +59,40 @@ void Dictionary::getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession } } +Dictionary::NgramListenerForPrediction::NgramListenerForPrediction( + const PrevWordsInfo *const prevWordsInfo, SuggestionResults *const suggestionResults, + const DictionaryStructureWithBufferPolicy *const dictStructurePolicy) + : mPrevWordsInfo(prevWordsInfo), mSuggestionResults(suggestionResults), + mDictStructurePolicy(dictStructurePolicy) {} + +void Dictionary::NgramListenerForPrediction::onVisitEntry(const int ngramProbability, + const int targetPtNodePos) { + if (targetPtNodePos == NOT_A_DICT_POS) { + return; + } + if (mPrevWordsInfo->isNthPrevWordBeginningOfSentence(1 /* n */) + && ngramProbability == NOT_A_PROBABILITY) { + return; + } + int targetWordCodePoints[MAX_WORD_LENGTH]; + int unigramProbability = 0; + const int codePointCount = mDictStructurePolicy-> + getCodePointsAndProbabilityAndReturnCodePointCount(targetPtNodePos, + MAX_WORD_LENGTH, targetWordCodePoints, &unigramProbability); + if (codePointCount <= 0) { + return; + } + const int probability = mDictStructurePolicy->getProbability( + unigramProbability, ngramProbability); + mSuggestionResults->addPrediction(targetWordCodePoints, codePointCount, probability); +} + void Dictionary::getPredictions(const PrevWordsInfo *const prevWordsInfo, SuggestionResults *const outSuggestionResults) const { TimeKeeper::setCurrentTime(); - int unigramProbability = 0; - int bigramCodePoints[MAX_WORD_LENGTH]; - BinaryDictionaryBigramsIterator bigramsIt = prevWordsInfo->getBigramsIteratorForPrediction( + NgramListenerForPrediction listener(prevWordsInfo, outSuggestionResults, mDictionaryStructureWithBufferPolicy.get()); - while (bigramsIt.hasNext()) { - bigramsIt.next(); - if (bigramsIt.getBigramPos() == NOT_A_DICT_POS) { - continue; - } - if (prevWordsInfo->isNthPrevWordBeginningOfSentence(1 /* n */) - && bigramsIt.getProbability() == NOT_A_PROBABILITY) { - continue; - } - const int codePointCount = mDictionaryStructureWithBufferPolicy-> - getCodePointsAndProbabilityAndReturnCodePointCount(bigramsIt.getBigramPos(), - MAX_WORD_LENGTH, bigramCodePoints, &unigramProbability); - if (codePointCount <= 0) { - continue; - } - const int probability = mDictionaryStructureWithBufferPolicy->getProbability( - unigramProbability, bigramsIt.getProbability()); - outSuggestionResults->addPrediction(bigramCodePoints, codePointCount, probability); - } + mDictionaryStructureWithBufferPolicy->iterateNgramEntries(prevWordsInfo, &listener); } int Dictionary::getProbability(const int *word, int length) const { diff --git a/native/jni/src/suggest/core/dictionary/dictionary.h b/native/jni/src/suggest/core/dictionary/dictionary.h index 3b41088fe..732d3b199 100644 --- a/native/jni/src/suggest/core/dictionary/dictionary.h +++ b/native/jni/src/suggest/core/dictionary/dictionary.h @@ -21,6 +21,7 @@ #include "defines.h" #include "jni.h" +#include "suggest/core/dictionary/ngram_listener.h" #include "suggest/core/dictionary/property/word_property.h" #include "suggest/core/policy/dictionary_header_structure_policy.h" #include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" @@ -114,6 +115,21 @@ class Dictionary { typedef std::unique_ptr SuggestInterfacePtr; + class NgramListenerForPrediction : public NgramListener { + public: + NgramListenerForPrediction(const PrevWordsInfo *const prevWordsInfo, + SuggestionResults *const suggestionResults, + const DictionaryStructureWithBufferPolicy *const dictStructurePolicy); + virtual void onVisitEntry(const int ngramProbability, const int targetPtNodePos); + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(NgramListenerForPrediction); + + const PrevWordsInfo *const mPrevWordsInfo; + SuggestionResults *const mSuggestionResults; + const DictionaryStructureWithBufferPolicy *const mDictStructurePolicy; + }; + static const int HEADER_ATTRIBUTE_BUFFER_SIZE; const DictionaryStructureWithBufferPolicy::StructurePolicyPtr diff --git a/native/jni/src/suggest/core/dictionary/ngram_listener.h b/native/jni/src/suggest/core/dictionary/ngram_listener.h new file mode 100644 index 000000000..88b88bafb --- /dev/null +++ b/native/jni/src/suggest/core/dictionary/ngram_listener.h @@ -0,0 +1,40 @@ +/* + * Copyright (C) 2014, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_NGRAM_LISTENER_H +#define LATINIME_NGRAM_LISTENER_H + +#include "defines.h" + +namespace latinime { + +/** + * Interface to iterate ngram entries. + */ +class NgramListener { + public: + virtual void onVisitEntry(const int ngramProbability, const int targetPtNodePos) = 0; + virtual ~NgramListener() {}; + + protected: + NgramListener() {} + + private: + DISALLOW_COPY_AND_ASSIGN(NgramListener); + +}; +} // namespace latinime +#endif /* LATINIME_NGRAM_LISTENER_H */ diff --git a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h index 7ad20e782..81e38f78e 100644 --- a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h +++ b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h @@ -30,6 +30,7 @@ class DicNodeVector; class DictionaryBigramsStructurePolicy; class DictionaryHeaderStructurePolicy; class DictionaryShortcutsStructurePolicy; +class NgramListener; class PrevWordsInfo; class UnigramProperty; @@ -61,6 +62,9 @@ class DictionaryStructureWithBufferPolicy { virtual int getProbabilityOfPtNode(const PrevWordsInfo *const prevWordsInfo, const int nodePos) const = 0; + virtual void iterateNgramEntries(const PrevWordsInfo *const prevWordsInfo, + NgramListener *const listener) const = 0; + virtual int getShortcutPositionOfPtNode(const int nodePos) const = 0; virtual BinaryDictionaryBigramsIterator getBigramsIteratorOfPtNode(const int nodePos) const = 0; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp index 327741065..4b834a09d 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp @@ -28,6 +28,7 @@ #include "suggest/core/dicnode/dic_node.h" #include "suggest/core/dicnode/dic_node_vector.h" +#include "suggest/core/dictionary/ngram_listener.h" #include "suggest/core/dictionary/property/bigram_property.h" #include "suggest/core/dictionary/property/unigram_property.h" #include "suggest/core/dictionary/property/word_property.h" @@ -155,6 +156,16 @@ int Ver4PatriciaTriePolicy::getProbabilityOfPtNode(const PrevWordsInfo *const pr return getProbability(ptNodeParams.getProbability(), NOT_A_PROBABILITY); } +void Ver4PatriciaTriePolicy::iterateNgramEntries(const PrevWordsInfo *const prevWordsInfo, + NgramListener *const listener) const { + BinaryDictionaryBigramsIterator bigramsIt = prevWordsInfo->getBigramsIteratorForPrediction( + this /* dictStructurePolicy */); + while (bigramsIt.hasNext()) { + bigramsIt.next(); + listener->onVisitEntry(bigramsIt.getProbability(), bigramsIt.getBigramPos()); + } +} + int Ver4PatriciaTriePolicy::getShortcutPositionOfPtNode(const int ptNodePos) const { if (ptNodePos == NOT_A_DICT_POS) { return NOT_A_DICT_POS; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h index c80a73af7..e61c060e8 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h @@ -93,6 +93,9 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { int getProbabilityOfPtNode(const PrevWordsInfo *const prevWordsInfo, const int ptNodePos) const; + void iterateNgramEntries(const PrevWordsInfo *const prevWordsInfo, + NgramListener *const listener) const; + int getShortcutPositionOfPtNode(const int ptNodePos) const; BinaryDictionaryBigramsIterator getBigramsIteratorOfPtNode(const int ptNodePos) const; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp index b909e8268..6f02ff363 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp @@ -21,6 +21,7 @@ #include "suggest/core/dicnode/dic_node.h" #include "suggest/core/dicnode/dic_node_vector.h" #include "suggest/core/dictionary/binary_dictionary_bigrams_iterator.h" +#include "suggest/core/dictionary/ngram_listener.h" #include "suggest/core/session/prev_words_info.h" #include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h" #include "suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.h" @@ -324,6 +325,16 @@ int PatriciaTriePolicy::getProbabilityOfPtNode(const PrevWordsInfo *const prevWo return getProbability(ptNodeParams.getProbability(), NOT_A_PROBABILITY); } +void PatriciaTriePolicy::iterateNgramEntries(const PrevWordsInfo *const prevWordsInfo, + NgramListener *const listener) const { + BinaryDictionaryBigramsIterator bigramsIt = prevWordsInfo->getBigramsIteratorForPrediction( + this /* dictStructurePolicy */); + while (bigramsIt.hasNext()) { + bigramsIt.next(); + listener->onVisitEntry(bigramsIt.getProbability(), bigramsIt.getBigramPos()); + } +} + int PatriciaTriePolicy::getShortcutPositionOfPtNode(const int ptNodePos) const { if (ptNodePos == NOT_A_DICT_POS) { return NOT_A_DICT_POS; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h index 1dd5705be..a3b22206c 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h @@ -65,6 +65,9 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { int getProbabilityOfPtNode(const PrevWordsInfo *const prevWordsInfo, const int ptNodePos) const; + void iterateNgramEntries(const PrevWordsInfo *const prevWordsInfo, + NgramListener *const listener) const; + int getShortcutPositionOfPtNode(const int ptNodePos) const; BinaryDictionaryBigramsIterator getBigramsIteratorOfPtNode(const int ptNodePos) const; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp index cada3d1f7..23bbbbde5 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp @@ -20,6 +20,7 @@ #include "suggest/core/dicnode/dic_node.h" #include "suggest/core/dicnode/dic_node_vector.h" +#include "suggest/core/dictionary/ngram_listener.h" #include "suggest/core/dictionary/property/bigram_property.h" #include "suggest/core/dictionary/property/unigram_property.h" #include "suggest/core/dictionary/property/word_property.h" @@ -145,6 +146,16 @@ int Ver4PatriciaTriePolicy::getProbabilityOfPtNode(const PrevWordsInfo *const pr return getProbability(ptNodeParams.getProbability(), NOT_A_PROBABILITY); } +void Ver4PatriciaTriePolicy::iterateNgramEntries(const PrevWordsInfo *const prevWordsInfo, + NgramListener *const listener) const { + BinaryDictionaryBigramsIterator bigramsIt = prevWordsInfo->getBigramsIteratorForPrediction( + this /* dictStructurePolicy */); + while (bigramsIt.hasNext()) { + bigramsIt.next(); + listener->onVisitEntry(bigramsIt.getProbability(), bigramsIt.getBigramPos()); + } +} + int Ver4PatriciaTriePolicy::getShortcutPositionOfPtNode(const int ptNodePos) const { if (ptNodePos == NOT_A_DICT_POS) { return NOT_A_DICT_POS; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h index b0f16cd01..18384546f 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h @@ -74,6 +74,9 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { int getProbabilityOfPtNode(const PrevWordsInfo *const prevWordsInfo, const int ptNodePos) const; + void iterateNgramEntries(const PrevWordsInfo *const prevWordsInfo, + NgramListener *const listener) const; + int getShortcutPositionOfPtNode(const int ptNodePos) const; BinaryDictionaryBigramsIterator getBigramsIteratorOfPtNode(const int ptNodePos) const;