Merge "Add a method to iterate ngram entries." into lmp-dev
This commit is contained in:
commit
f40adc97cb
10 changed files with 132 additions and 22 deletions
|
@ -59,32 +59,40 @@ void Dictionary::getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession
|
|||
}
|
||||
}
|
||||
|
||||
Dictionary::NgramListenerForPrediction::NgramListenerForPrediction(
|
||||
const PrevWordsInfo *const prevWordsInfo, SuggestionResults *const suggestionResults,
|
||||
const DictionaryStructureWithBufferPolicy *const dictStructurePolicy)
|
||||
: mPrevWordsInfo(prevWordsInfo), mSuggestionResults(suggestionResults),
|
||||
mDictStructurePolicy(dictStructurePolicy) {}
|
||||
|
||||
void Dictionary::NgramListenerForPrediction::onVisitEntry(const int ngramProbability,
|
||||
const int targetPtNodePos) {
|
||||
if (targetPtNodePos == NOT_A_DICT_POS) {
|
||||
return;
|
||||
}
|
||||
if (mPrevWordsInfo->isNthPrevWordBeginningOfSentence(1 /* n */)
|
||||
&& ngramProbability == NOT_A_PROBABILITY) {
|
||||
return;
|
||||
}
|
||||
int targetWordCodePoints[MAX_WORD_LENGTH];
|
||||
int unigramProbability = 0;
|
||||
const int codePointCount = mDictStructurePolicy->
|
||||
getCodePointsAndProbabilityAndReturnCodePointCount(targetPtNodePos,
|
||||
MAX_WORD_LENGTH, targetWordCodePoints, &unigramProbability);
|
||||
if (codePointCount <= 0) {
|
||||
return;
|
||||
}
|
||||
const int probability = mDictStructurePolicy->getProbability(
|
||||
unigramProbability, ngramProbability);
|
||||
mSuggestionResults->addPrediction(targetWordCodePoints, codePointCount, probability);
|
||||
}
|
||||
|
||||
void Dictionary::getPredictions(const PrevWordsInfo *const prevWordsInfo,
|
||||
SuggestionResults *const outSuggestionResults) const {
|
||||
TimeKeeper::setCurrentTime();
|
||||
int unigramProbability = 0;
|
||||
int bigramCodePoints[MAX_WORD_LENGTH];
|
||||
BinaryDictionaryBigramsIterator bigramsIt = prevWordsInfo->getBigramsIteratorForPrediction(
|
||||
NgramListenerForPrediction listener(prevWordsInfo, outSuggestionResults,
|
||||
mDictionaryStructureWithBufferPolicy.get());
|
||||
while (bigramsIt.hasNext()) {
|
||||
bigramsIt.next();
|
||||
if (bigramsIt.getBigramPos() == NOT_A_DICT_POS) {
|
||||
continue;
|
||||
}
|
||||
if (prevWordsInfo->isNthPrevWordBeginningOfSentence(1 /* n */)
|
||||
&& bigramsIt.getProbability() == NOT_A_PROBABILITY) {
|
||||
continue;
|
||||
}
|
||||
const int codePointCount = mDictionaryStructureWithBufferPolicy->
|
||||
getCodePointsAndProbabilityAndReturnCodePointCount(bigramsIt.getBigramPos(),
|
||||
MAX_WORD_LENGTH, bigramCodePoints, &unigramProbability);
|
||||
if (codePointCount <= 0) {
|
||||
continue;
|
||||
}
|
||||
const int probability = mDictionaryStructureWithBufferPolicy->getProbability(
|
||||
unigramProbability, bigramsIt.getProbability());
|
||||
outSuggestionResults->addPrediction(bigramCodePoints, codePointCount, probability);
|
||||
}
|
||||
mDictionaryStructureWithBufferPolicy->iterateNgramEntries(prevWordsInfo, &listener);
|
||||
}
|
||||
|
||||
int Dictionary::getProbability(const int *word, int length) const {
|
||||
|
|
|
@ -21,6 +21,7 @@
|
|||
|
||||
#include "defines.h"
|
||||
#include "jni.h"
|
||||
#include "suggest/core/dictionary/ngram_listener.h"
|
||||
#include "suggest/core/dictionary/property/word_property.h"
|
||||
#include "suggest/core/policy/dictionary_header_structure_policy.h"
|
||||
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
|
||||
|
@ -114,6 +115,21 @@ class Dictionary {
|
|||
|
||||
typedef std::unique_ptr<SuggestInterface> SuggestInterfacePtr;
|
||||
|
||||
class NgramListenerForPrediction : public NgramListener {
|
||||
public:
|
||||
NgramListenerForPrediction(const PrevWordsInfo *const prevWordsInfo,
|
||||
SuggestionResults *const suggestionResults,
|
||||
const DictionaryStructureWithBufferPolicy *const dictStructurePolicy);
|
||||
virtual void onVisitEntry(const int ngramProbability, const int targetPtNodePos);
|
||||
|
||||
private:
|
||||
DISALLOW_IMPLICIT_CONSTRUCTORS(NgramListenerForPrediction);
|
||||
|
||||
const PrevWordsInfo *const mPrevWordsInfo;
|
||||
SuggestionResults *const mSuggestionResults;
|
||||
const DictionaryStructureWithBufferPolicy *const mDictStructurePolicy;
|
||||
};
|
||||
|
||||
static const int HEADER_ATTRIBUTE_BUFFER_SIZE;
|
||||
|
||||
const DictionaryStructureWithBufferPolicy::StructurePolicyPtr
|
||||
|
|
40
native/jni/src/suggest/core/dictionary/ngram_listener.h
Normal file
40
native/jni/src/suggest/core/dictionary/ngram_listener.h
Normal file
|
@ -0,0 +1,40 @@
|
|||
/*
|
||||
* Copyright (C) 2014, The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef LATINIME_NGRAM_LISTENER_H
|
||||
#define LATINIME_NGRAM_LISTENER_H
|
||||
|
||||
#include "defines.h"
|
||||
|
||||
namespace latinime {
|
||||
|
||||
/**
|
||||
* Interface to iterate ngram entries.
|
||||
*/
|
||||
class NgramListener {
|
||||
public:
|
||||
virtual void onVisitEntry(const int ngramProbability, const int targetPtNodePos) = 0;
|
||||
virtual ~NgramListener() {};
|
||||
|
||||
protected:
|
||||
NgramListener() {}
|
||||
|
||||
private:
|
||||
DISALLOW_COPY_AND_ASSIGN(NgramListener);
|
||||
|
||||
};
|
||||
} // namespace latinime
|
||||
#endif /* LATINIME_NGRAM_LISTENER_H */
|
|
@ -30,6 +30,7 @@ class DicNodeVector;
|
|||
class DictionaryBigramsStructurePolicy;
|
||||
class DictionaryHeaderStructurePolicy;
|
||||
class DictionaryShortcutsStructurePolicy;
|
||||
class NgramListener;
|
||||
class PrevWordsInfo;
|
||||
class UnigramProperty;
|
||||
|
||||
|
@ -61,6 +62,9 @@ class DictionaryStructureWithBufferPolicy {
|
|||
virtual int getProbabilityOfPtNode(const PrevWordsInfo *const prevWordsInfo,
|
||||
const int nodePos) const = 0;
|
||||
|
||||
virtual void iterateNgramEntries(const PrevWordsInfo *const prevWordsInfo,
|
||||
NgramListener *const listener) const = 0;
|
||||
|
||||
virtual int getShortcutPositionOfPtNode(const int nodePos) const = 0;
|
||||
|
||||
virtual BinaryDictionaryBigramsIterator getBigramsIteratorOfPtNode(const int nodePos) const = 0;
|
||||
|
|
|
@ -28,6 +28,7 @@
|
|||
|
||||
#include "suggest/core/dicnode/dic_node.h"
|
||||
#include "suggest/core/dicnode/dic_node_vector.h"
|
||||
#include "suggest/core/dictionary/ngram_listener.h"
|
||||
#include "suggest/core/dictionary/property/bigram_property.h"
|
||||
#include "suggest/core/dictionary/property/unigram_property.h"
|
||||
#include "suggest/core/dictionary/property/word_property.h"
|
||||
|
@ -155,6 +156,16 @@ int Ver4PatriciaTriePolicy::getProbabilityOfPtNode(const PrevWordsInfo *const pr
|
|||
return getProbability(ptNodeParams.getProbability(), NOT_A_PROBABILITY);
|
||||
}
|
||||
|
||||
void Ver4PatriciaTriePolicy::iterateNgramEntries(const PrevWordsInfo *const prevWordsInfo,
|
||||
NgramListener *const listener) const {
|
||||
BinaryDictionaryBigramsIterator bigramsIt = prevWordsInfo->getBigramsIteratorForPrediction(
|
||||
this /* dictStructurePolicy */);
|
||||
while (bigramsIt.hasNext()) {
|
||||
bigramsIt.next();
|
||||
listener->onVisitEntry(bigramsIt.getProbability(), bigramsIt.getBigramPos());
|
||||
}
|
||||
}
|
||||
|
||||
int Ver4PatriciaTriePolicy::getShortcutPositionOfPtNode(const int ptNodePos) const {
|
||||
if (ptNodePos == NOT_A_DICT_POS) {
|
||||
return NOT_A_DICT_POS;
|
||||
|
|
|
@ -93,6 +93,9 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
|||
int getProbabilityOfPtNode(const PrevWordsInfo *const prevWordsInfo,
|
||||
const int ptNodePos) const;
|
||||
|
||||
void iterateNgramEntries(const PrevWordsInfo *const prevWordsInfo,
|
||||
NgramListener *const listener) const;
|
||||
|
||||
int getShortcutPositionOfPtNode(const int ptNodePos) const;
|
||||
|
||||
BinaryDictionaryBigramsIterator getBigramsIteratorOfPtNode(const int ptNodePos) const;
|
||||
|
|
|
@ -21,6 +21,7 @@
|
|||
#include "suggest/core/dicnode/dic_node.h"
|
||||
#include "suggest/core/dicnode/dic_node_vector.h"
|
||||
#include "suggest/core/dictionary/binary_dictionary_bigrams_iterator.h"
|
||||
#include "suggest/core/dictionary/ngram_listener.h"
|
||||
#include "suggest/core/session/prev_words_info.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.h"
|
||||
|
@ -324,6 +325,16 @@ int PatriciaTriePolicy::getProbabilityOfPtNode(const PrevWordsInfo *const prevWo
|
|||
return getProbability(ptNodeParams.getProbability(), NOT_A_PROBABILITY);
|
||||
}
|
||||
|
||||
void PatriciaTriePolicy::iterateNgramEntries(const PrevWordsInfo *const prevWordsInfo,
|
||||
NgramListener *const listener) const {
|
||||
BinaryDictionaryBigramsIterator bigramsIt = prevWordsInfo->getBigramsIteratorForPrediction(
|
||||
this /* dictStructurePolicy */);
|
||||
while (bigramsIt.hasNext()) {
|
||||
bigramsIt.next();
|
||||
listener->onVisitEntry(bigramsIt.getProbability(), bigramsIt.getBigramPos());
|
||||
}
|
||||
}
|
||||
|
||||
int PatriciaTriePolicy::getShortcutPositionOfPtNode(const int ptNodePos) const {
|
||||
if (ptNodePos == NOT_A_DICT_POS) {
|
||||
return NOT_A_DICT_POS;
|
||||
|
|
|
@ -65,6 +65,9 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
|||
|
||||
int getProbabilityOfPtNode(const PrevWordsInfo *const prevWordsInfo, const int ptNodePos) const;
|
||||
|
||||
void iterateNgramEntries(const PrevWordsInfo *const prevWordsInfo,
|
||||
NgramListener *const listener) const;
|
||||
|
||||
int getShortcutPositionOfPtNode(const int ptNodePos) const;
|
||||
|
||||
BinaryDictionaryBigramsIterator getBigramsIteratorOfPtNode(const int ptNodePos) const;
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
|
||||
#include "suggest/core/dicnode/dic_node.h"
|
||||
#include "suggest/core/dicnode/dic_node_vector.h"
|
||||
#include "suggest/core/dictionary/ngram_listener.h"
|
||||
#include "suggest/core/dictionary/property/bigram_property.h"
|
||||
#include "suggest/core/dictionary/property/unigram_property.h"
|
||||
#include "suggest/core/dictionary/property/word_property.h"
|
||||
|
@ -145,6 +146,16 @@ int Ver4PatriciaTriePolicy::getProbabilityOfPtNode(const PrevWordsInfo *const pr
|
|||
return getProbability(ptNodeParams.getProbability(), NOT_A_PROBABILITY);
|
||||
}
|
||||
|
||||
void Ver4PatriciaTriePolicy::iterateNgramEntries(const PrevWordsInfo *const prevWordsInfo,
|
||||
NgramListener *const listener) const {
|
||||
BinaryDictionaryBigramsIterator bigramsIt = prevWordsInfo->getBigramsIteratorForPrediction(
|
||||
this /* dictStructurePolicy */);
|
||||
while (bigramsIt.hasNext()) {
|
||||
bigramsIt.next();
|
||||
listener->onVisitEntry(bigramsIt.getProbability(), bigramsIt.getBigramPos());
|
||||
}
|
||||
}
|
||||
|
||||
int Ver4PatriciaTriePolicy::getShortcutPositionOfPtNode(const int ptNodePos) const {
|
||||
if (ptNodePos == NOT_A_DICT_POS) {
|
||||
return NOT_A_DICT_POS;
|
||||
|
|
|
@ -74,6 +74,9 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
|||
|
||||
int getProbabilityOfPtNode(const PrevWordsInfo *const prevWordsInfo, const int ptNodePos) const;
|
||||
|
||||
void iterateNgramEntries(const PrevWordsInfo *const prevWordsInfo,
|
||||
NgramListener *const listener) const;
|
||||
|
||||
int getShortcutPositionOfPtNode(const int ptNodePos) const;
|
||||
|
||||
BinaryDictionaryBigramsIterator getBigramsIteratorOfPtNode(const int ptNodePos) const;
|
||||
|
|
Loading…
Reference in a new issue