am f40adc97
: Merge "Add a method to iterate ngram entries." into lmp-dev
* commit 'f40adc97cbe309bafc94492b615daa529752fabc': Add a method to iterate ngram entries.
This commit is contained in:
commit
f3f81d0e0e
10 changed files with 132 additions and 22 deletions
|
@ -59,32 +59,40 @@ void Dictionary::getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Dictionary::NgramListenerForPrediction::NgramListenerForPrediction(
|
||||||
|
const PrevWordsInfo *const prevWordsInfo, SuggestionResults *const suggestionResults,
|
||||||
|
const DictionaryStructureWithBufferPolicy *const dictStructurePolicy)
|
||||||
|
: mPrevWordsInfo(prevWordsInfo), mSuggestionResults(suggestionResults),
|
||||||
|
mDictStructurePolicy(dictStructurePolicy) {}
|
||||||
|
|
||||||
|
void Dictionary::NgramListenerForPrediction::onVisitEntry(const int ngramProbability,
|
||||||
|
const int targetPtNodePos) {
|
||||||
|
if (targetPtNodePos == NOT_A_DICT_POS) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (mPrevWordsInfo->isNthPrevWordBeginningOfSentence(1 /* n */)
|
||||||
|
&& ngramProbability == NOT_A_PROBABILITY) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
int targetWordCodePoints[MAX_WORD_LENGTH];
|
||||||
|
int unigramProbability = 0;
|
||||||
|
const int codePointCount = mDictStructurePolicy->
|
||||||
|
getCodePointsAndProbabilityAndReturnCodePointCount(targetPtNodePos,
|
||||||
|
MAX_WORD_LENGTH, targetWordCodePoints, &unigramProbability);
|
||||||
|
if (codePointCount <= 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const int probability = mDictStructurePolicy->getProbability(
|
||||||
|
unigramProbability, ngramProbability);
|
||||||
|
mSuggestionResults->addPrediction(targetWordCodePoints, codePointCount, probability);
|
||||||
|
}
|
||||||
|
|
||||||
void Dictionary::getPredictions(const PrevWordsInfo *const prevWordsInfo,
|
void Dictionary::getPredictions(const PrevWordsInfo *const prevWordsInfo,
|
||||||
SuggestionResults *const outSuggestionResults) const {
|
SuggestionResults *const outSuggestionResults) const {
|
||||||
TimeKeeper::setCurrentTime();
|
TimeKeeper::setCurrentTime();
|
||||||
int unigramProbability = 0;
|
NgramListenerForPrediction listener(prevWordsInfo, outSuggestionResults,
|
||||||
int bigramCodePoints[MAX_WORD_LENGTH];
|
|
||||||
BinaryDictionaryBigramsIterator bigramsIt = prevWordsInfo->getBigramsIteratorForPrediction(
|
|
||||||
mDictionaryStructureWithBufferPolicy.get());
|
mDictionaryStructureWithBufferPolicy.get());
|
||||||
while (bigramsIt.hasNext()) {
|
mDictionaryStructureWithBufferPolicy->iterateNgramEntries(prevWordsInfo, &listener);
|
||||||
bigramsIt.next();
|
|
||||||
if (bigramsIt.getBigramPos() == NOT_A_DICT_POS) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if (prevWordsInfo->isNthPrevWordBeginningOfSentence(1 /* n */)
|
|
||||||
&& bigramsIt.getProbability() == NOT_A_PROBABILITY) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
const int codePointCount = mDictionaryStructureWithBufferPolicy->
|
|
||||||
getCodePointsAndProbabilityAndReturnCodePointCount(bigramsIt.getBigramPos(),
|
|
||||||
MAX_WORD_LENGTH, bigramCodePoints, &unigramProbability);
|
|
||||||
if (codePointCount <= 0) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
const int probability = mDictionaryStructureWithBufferPolicy->getProbability(
|
|
||||||
unigramProbability, bigramsIt.getProbability());
|
|
||||||
outSuggestionResults->addPrediction(bigramCodePoints, codePointCount, probability);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int Dictionary::getProbability(const int *word, int length) const {
|
int Dictionary::getProbability(const int *word, int length) const {
|
||||||
|
|
|
@ -21,6 +21,7 @@
|
||||||
|
|
||||||
#include "defines.h"
|
#include "defines.h"
|
||||||
#include "jni.h"
|
#include "jni.h"
|
||||||
|
#include "suggest/core/dictionary/ngram_listener.h"
|
||||||
#include "suggest/core/dictionary/property/word_property.h"
|
#include "suggest/core/dictionary/property/word_property.h"
|
||||||
#include "suggest/core/policy/dictionary_header_structure_policy.h"
|
#include "suggest/core/policy/dictionary_header_structure_policy.h"
|
||||||
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
|
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
|
||||||
|
@ -114,6 +115,21 @@ class Dictionary {
|
||||||
|
|
||||||
typedef std::unique_ptr<SuggestInterface> SuggestInterfacePtr;
|
typedef std::unique_ptr<SuggestInterface> SuggestInterfacePtr;
|
||||||
|
|
||||||
|
class NgramListenerForPrediction : public NgramListener {
|
||||||
|
public:
|
||||||
|
NgramListenerForPrediction(const PrevWordsInfo *const prevWordsInfo,
|
||||||
|
SuggestionResults *const suggestionResults,
|
||||||
|
const DictionaryStructureWithBufferPolicy *const dictStructurePolicy);
|
||||||
|
virtual void onVisitEntry(const int ngramProbability, const int targetPtNodePos);
|
||||||
|
|
||||||
|
private:
|
||||||
|
DISALLOW_IMPLICIT_CONSTRUCTORS(NgramListenerForPrediction);
|
||||||
|
|
||||||
|
const PrevWordsInfo *const mPrevWordsInfo;
|
||||||
|
SuggestionResults *const mSuggestionResults;
|
||||||
|
const DictionaryStructureWithBufferPolicy *const mDictStructurePolicy;
|
||||||
|
};
|
||||||
|
|
||||||
static const int HEADER_ATTRIBUTE_BUFFER_SIZE;
|
static const int HEADER_ATTRIBUTE_BUFFER_SIZE;
|
||||||
|
|
||||||
const DictionaryStructureWithBufferPolicy::StructurePolicyPtr
|
const DictionaryStructureWithBufferPolicy::StructurePolicyPtr
|
||||||
|
|
40
native/jni/src/suggest/core/dictionary/ngram_listener.h
Normal file
40
native/jni/src/suggest/core/dictionary/ngram_listener.h
Normal file
|
@ -0,0 +1,40 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2014, The Android Open Source Project
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef LATINIME_NGRAM_LISTENER_H
|
||||||
|
#define LATINIME_NGRAM_LISTENER_H
|
||||||
|
|
||||||
|
#include "defines.h"
|
||||||
|
|
||||||
|
namespace latinime {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Interface to iterate ngram entries.
|
||||||
|
*/
|
||||||
|
class NgramListener {
|
||||||
|
public:
|
||||||
|
virtual void onVisitEntry(const int ngramProbability, const int targetPtNodePos) = 0;
|
||||||
|
virtual ~NgramListener() {};
|
||||||
|
|
||||||
|
protected:
|
||||||
|
NgramListener() {}
|
||||||
|
|
||||||
|
private:
|
||||||
|
DISALLOW_COPY_AND_ASSIGN(NgramListener);
|
||||||
|
|
||||||
|
};
|
||||||
|
} // namespace latinime
|
||||||
|
#endif /* LATINIME_NGRAM_LISTENER_H */
|
|
@ -30,6 +30,7 @@ class DicNodeVector;
|
||||||
class DictionaryBigramsStructurePolicy;
|
class DictionaryBigramsStructurePolicy;
|
||||||
class DictionaryHeaderStructurePolicy;
|
class DictionaryHeaderStructurePolicy;
|
||||||
class DictionaryShortcutsStructurePolicy;
|
class DictionaryShortcutsStructurePolicy;
|
||||||
|
class NgramListener;
|
||||||
class PrevWordsInfo;
|
class PrevWordsInfo;
|
||||||
class UnigramProperty;
|
class UnigramProperty;
|
||||||
|
|
||||||
|
@ -61,6 +62,9 @@ class DictionaryStructureWithBufferPolicy {
|
||||||
virtual int getProbabilityOfPtNode(const PrevWordsInfo *const prevWordsInfo,
|
virtual int getProbabilityOfPtNode(const PrevWordsInfo *const prevWordsInfo,
|
||||||
const int nodePos) const = 0;
|
const int nodePos) const = 0;
|
||||||
|
|
||||||
|
virtual void iterateNgramEntries(const PrevWordsInfo *const prevWordsInfo,
|
||||||
|
NgramListener *const listener) const = 0;
|
||||||
|
|
||||||
virtual int getShortcutPositionOfPtNode(const int nodePos) const = 0;
|
virtual int getShortcutPositionOfPtNode(const int nodePos) const = 0;
|
||||||
|
|
||||||
virtual BinaryDictionaryBigramsIterator getBigramsIteratorOfPtNode(const int nodePos) const = 0;
|
virtual BinaryDictionaryBigramsIterator getBigramsIteratorOfPtNode(const int nodePos) const = 0;
|
||||||
|
|
|
@ -28,6 +28,7 @@
|
||||||
|
|
||||||
#include "suggest/core/dicnode/dic_node.h"
|
#include "suggest/core/dicnode/dic_node.h"
|
||||||
#include "suggest/core/dicnode/dic_node_vector.h"
|
#include "suggest/core/dicnode/dic_node_vector.h"
|
||||||
|
#include "suggest/core/dictionary/ngram_listener.h"
|
||||||
#include "suggest/core/dictionary/property/bigram_property.h"
|
#include "suggest/core/dictionary/property/bigram_property.h"
|
||||||
#include "suggest/core/dictionary/property/unigram_property.h"
|
#include "suggest/core/dictionary/property/unigram_property.h"
|
||||||
#include "suggest/core/dictionary/property/word_property.h"
|
#include "suggest/core/dictionary/property/word_property.h"
|
||||||
|
@ -155,6 +156,16 @@ int Ver4PatriciaTriePolicy::getProbabilityOfPtNode(const PrevWordsInfo *const pr
|
||||||
return getProbability(ptNodeParams.getProbability(), NOT_A_PROBABILITY);
|
return getProbability(ptNodeParams.getProbability(), NOT_A_PROBABILITY);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Ver4PatriciaTriePolicy::iterateNgramEntries(const PrevWordsInfo *const prevWordsInfo,
|
||||||
|
NgramListener *const listener) const {
|
||||||
|
BinaryDictionaryBigramsIterator bigramsIt = prevWordsInfo->getBigramsIteratorForPrediction(
|
||||||
|
this /* dictStructurePolicy */);
|
||||||
|
while (bigramsIt.hasNext()) {
|
||||||
|
bigramsIt.next();
|
||||||
|
listener->onVisitEntry(bigramsIt.getProbability(), bigramsIt.getBigramPos());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
int Ver4PatriciaTriePolicy::getShortcutPositionOfPtNode(const int ptNodePos) const {
|
int Ver4PatriciaTriePolicy::getShortcutPositionOfPtNode(const int ptNodePos) const {
|
||||||
if (ptNodePos == NOT_A_DICT_POS) {
|
if (ptNodePos == NOT_A_DICT_POS) {
|
||||||
return NOT_A_DICT_POS;
|
return NOT_A_DICT_POS;
|
||||||
|
|
|
@ -93,6 +93,9 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
||||||
int getProbabilityOfPtNode(const PrevWordsInfo *const prevWordsInfo,
|
int getProbabilityOfPtNode(const PrevWordsInfo *const prevWordsInfo,
|
||||||
const int ptNodePos) const;
|
const int ptNodePos) const;
|
||||||
|
|
||||||
|
void iterateNgramEntries(const PrevWordsInfo *const prevWordsInfo,
|
||||||
|
NgramListener *const listener) const;
|
||||||
|
|
||||||
int getShortcutPositionOfPtNode(const int ptNodePos) const;
|
int getShortcutPositionOfPtNode(const int ptNodePos) const;
|
||||||
|
|
||||||
BinaryDictionaryBigramsIterator getBigramsIteratorOfPtNode(const int ptNodePos) const;
|
BinaryDictionaryBigramsIterator getBigramsIteratorOfPtNode(const int ptNodePos) const;
|
||||||
|
|
|
@ -21,6 +21,7 @@
|
||||||
#include "suggest/core/dicnode/dic_node.h"
|
#include "suggest/core/dicnode/dic_node.h"
|
||||||
#include "suggest/core/dicnode/dic_node_vector.h"
|
#include "suggest/core/dicnode/dic_node_vector.h"
|
||||||
#include "suggest/core/dictionary/binary_dictionary_bigrams_iterator.h"
|
#include "suggest/core/dictionary/binary_dictionary_bigrams_iterator.h"
|
||||||
|
#include "suggest/core/dictionary/ngram_listener.h"
|
||||||
#include "suggest/core/session/prev_words_info.h"
|
#include "suggest/core/session/prev_words_info.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h"
|
#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.h"
|
#include "suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.h"
|
||||||
|
@ -324,6 +325,16 @@ int PatriciaTriePolicy::getProbabilityOfPtNode(const PrevWordsInfo *const prevWo
|
||||||
return getProbability(ptNodeParams.getProbability(), NOT_A_PROBABILITY);
|
return getProbability(ptNodeParams.getProbability(), NOT_A_PROBABILITY);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void PatriciaTriePolicy::iterateNgramEntries(const PrevWordsInfo *const prevWordsInfo,
|
||||||
|
NgramListener *const listener) const {
|
||||||
|
BinaryDictionaryBigramsIterator bigramsIt = prevWordsInfo->getBigramsIteratorForPrediction(
|
||||||
|
this /* dictStructurePolicy */);
|
||||||
|
while (bigramsIt.hasNext()) {
|
||||||
|
bigramsIt.next();
|
||||||
|
listener->onVisitEntry(bigramsIt.getProbability(), bigramsIt.getBigramPos());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
int PatriciaTriePolicy::getShortcutPositionOfPtNode(const int ptNodePos) const {
|
int PatriciaTriePolicy::getShortcutPositionOfPtNode(const int ptNodePos) const {
|
||||||
if (ptNodePos == NOT_A_DICT_POS) {
|
if (ptNodePos == NOT_A_DICT_POS) {
|
||||||
return NOT_A_DICT_POS;
|
return NOT_A_DICT_POS;
|
||||||
|
|
|
@ -65,6 +65,9 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
||||||
|
|
||||||
int getProbabilityOfPtNode(const PrevWordsInfo *const prevWordsInfo, const int ptNodePos) const;
|
int getProbabilityOfPtNode(const PrevWordsInfo *const prevWordsInfo, const int ptNodePos) const;
|
||||||
|
|
||||||
|
void iterateNgramEntries(const PrevWordsInfo *const prevWordsInfo,
|
||||||
|
NgramListener *const listener) const;
|
||||||
|
|
||||||
int getShortcutPositionOfPtNode(const int ptNodePos) const;
|
int getShortcutPositionOfPtNode(const int ptNodePos) const;
|
||||||
|
|
||||||
BinaryDictionaryBigramsIterator getBigramsIteratorOfPtNode(const int ptNodePos) const;
|
BinaryDictionaryBigramsIterator getBigramsIteratorOfPtNode(const int ptNodePos) const;
|
||||||
|
|
|
@ -20,6 +20,7 @@
|
||||||
|
|
||||||
#include "suggest/core/dicnode/dic_node.h"
|
#include "suggest/core/dicnode/dic_node.h"
|
||||||
#include "suggest/core/dicnode/dic_node_vector.h"
|
#include "suggest/core/dicnode/dic_node_vector.h"
|
||||||
|
#include "suggest/core/dictionary/ngram_listener.h"
|
||||||
#include "suggest/core/dictionary/property/bigram_property.h"
|
#include "suggest/core/dictionary/property/bigram_property.h"
|
||||||
#include "suggest/core/dictionary/property/unigram_property.h"
|
#include "suggest/core/dictionary/property/unigram_property.h"
|
||||||
#include "suggest/core/dictionary/property/word_property.h"
|
#include "suggest/core/dictionary/property/word_property.h"
|
||||||
|
@ -145,6 +146,16 @@ int Ver4PatriciaTriePolicy::getProbabilityOfPtNode(const PrevWordsInfo *const pr
|
||||||
return getProbability(ptNodeParams.getProbability(), NOT_A_PROBABILITY);
|
return getProbability(ptNodeParams.getProbability(), NOT_A_PROBABILITY);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Ver4PatriciaTriePolicy::iterateNgramEntries(const PrevWordsInfo *const prevWordsInfo,
|
||||||
|
NgramListener *const listener) const {
|
||||||
|
BinaryDictionaryBigramsIterator bigramsIt = prevWordsInfo->getBigramsIteratorForPrediction(
|
||||||
|
this /* dictStructurePolicy */);
|
||||||
|
while (bigramsIt.hasNext()) {
|
||||||
|
bigramsIt.next();
|
||||||
|
listener->onVisitEntry(bigramsIt.getProbability(), bigramsIt.getBigramPos());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
int Ver4PatriciaTriePolicy::getShortcutPositionOfPtNode(const int ptNodePos) const {
|
int Ver4PatriciaTriePolicy::getShortcutPositionOfPtNode(const int ptNodePos) const {
|
||||||
if (ptNodePos == NOT_A_DICT_POS) {
|
if (ptNodePos == NOT_A_DICT_POS) {
|
||||||
return NOT_A_DICT_POS;
|
return NOT_A_DICT_POS;
|
||||||
|
|
|
@ -74,6 +74,9 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
||||||
|
|
||||||
int getProbabilityOfPtNode(const PrevWordsInfo *const prevWordsInfo, const int ptNodePos) const;
|
int getProbabilityOfPtNode(const PrevWordsInfo *const prevWordsInfo, const int ptNodePos) const;
|
||||||
|
|
||||||
|
void iterateNgramEntries(const PrevWordsInfo *const prevWordsInfo,
|
||||||
|
NgramListener *const listener) const;
|
||||||
|
|
||||||
int getShortcutPositionOfPtNode(const int ptNodePos) const;
|
int getShortcutPositionOfPtNode(const int ptNodePos) const;
|
||||||
|
|
||||||
BinaryDictionaryBigramsIterator getBigramsIteratorOfPtNode(const int ptNodePos) const;
|
BinaryDictionaryBigramsIterator getBigramsIteratorOfPtNode(const int ptNodePos) const;
|
||||||
|
|
Loading…
Reference in a new issue