Merge "Use MultiBigramMap in structure policy."

main
Keisuke Kuroyanagi 2014-09-10 09:52:38 +00:00 committed by Android (Google) Code Review
commit 53da06b805
9 changed files with 82 additions and 20 deletions

View File

@ -18,7 +18,6 @@
#include "suggest/core/dicnode/dic_node.h" #include "suggest/core/dicnode/dic_node.h"
#include "suggest/core/dicnode/dic_node_vector.h" #include "suggest/core/dicnode/dic_node_vector.h"
#include "suggest/core/dictionary/multi_bigram_map.h"
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" #include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
namespace latinime { namespace latinime {
@ -73,25 +72,12 @@ namespace latinime {
if (dicNode->hasMultipleWords() && !dicNode->isValidMultipleWordSuggestion()) { if (dicNode->hasMultipleWords() && !dicNode->isValidMultipleWordSuggestion()) {
return static_cast<float>(MAX_VALUE_FOR_WEIGHTING); return static_cast<float>(MAX_VALUE_FOR_WEIGHTING);
} }
const int probability = getBigramNodeProbability(dictionaryStructurePolicy, dicNode, const int probability = dictionaryStructurePolicy->getProbabilityOfWordInContext(
multiBigramMap); dicNode->getPrevWordIds(), dicNode->getWordId(), multiBigramMap);
// TODO: This equation to calculate the improbability looks unreasonable. Investigate this. // TODO: This equation to calculate the improbability looks unreasonable. Investigate this.
const float cost = static_cast<float>(MAX_PROBABILITY - probability) const float cost = static_cast<float>(MAX_PROBABILITY - probability)
/ static_cast<float>(MAX_PROBABILITY); / static_cast<float>(MAX_PROBABILITY);
return cost; return cost;
} }
/* static */ int DicNodeUtils::getBigramNodeProbability(
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
const DicNode *const dicNode, MultiBigramMap *const multiBigramMap) {
const int unigramProbability = dicNode->getUnigramProbability();
if (multiBigramMap) {
const int *const prevWordIds = dicNode->getPrevWordIds();
return multiBigramMap->getBigramProbability(dictionaryStructurePolicy,
prevWordIds, dicNode->getWordId(), unigramProbability);
}
return dictionaryStructurePolicy->getProbability(unigramProbability,
NOT_A_PROBABILITY);
}
} // namespace latinime } // namespace latinime

View File

@ -46,10 +46,6 @@ class DicNodeUtils {
DISALLOW_IMPLICIT_CONSTRUCTORS(DicNodeUtils); DISALLOW_IMPLICIT_CONSTRUCTORS(DicNodeUtils);
// Max number of bigrams to look up // Max number of bigrams to look up
static const int MAX_BIGRAMS_CONSIDERED_PER_CONTEXT = 500; static const int MAX_BIGRAMS_CONSIDERED_PER_CONTEXT = 500;
static int getBigramNodeProbability(
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
const DicNode *const dicNode, MultiBigramMap *const multiBigramMap);
}; };
} // namespace latinime } // namespace latinime
#endif // LATINIME_DIC_NODE_UTILS_H #endif // LATINIME_DIC_NODE_UTILS_H

View File

@ -29,6 +29,7 @@ namespace latinime {
class DicNode; class DicNode;
class DicNodeVector; class DicNodeVector;
class DictionaryHeaderStructurePolicy; class DictionaryHeaderStructurePolicy;
class MultiBigramMap;
class NgramListener; class NgramListener;
class PrevWordsInfo; class PrevWordsInfo;
class UnigramProperty; class UnigramProperty;
@ -56,6 +57,10 @@ class DictionaryStructureWithBufferPolicy {
virtual int getWordId(const CodePointArrayView wordCodePoints, virtual int getWordId(const CodePointArrayView wordCodePoints,
const bool forceLowerCaseSearch) const = 0; const bool forceLowerCaseSearch) const = 0;
virtual int getProbabilityOfWordInContext(const int *const prevWordIds, const int wordId,
MultiBigramMap *const multiBigramMap) const = 0;
// TODO: Remove
virtual int getProbability(const int unigramProbability, const int bigramProbability) const = 0; virtual int getProbability(const int unigramProbability, const int bigramProbability) const = 0;
virtual int getProbabilityOfWord(const int *const prevWordIds, const int wordId) const = 0; virtual int getProbabilityOfWord(const int *const prevWordIds, const int wordId) const = 0;

View File

@ -28,6 +28,7 @@
#include "suggest/core/dicnode/dic_node.h" #include "suggest/core/dicnode/dic_node.h"
#include "suggest/core/dicnode/dic_node_vector.h" #include "suggest/core/dicnode/dic_node_vector.h"
#include "suggest/core/dictionary/multi_bigram_map.h"
#include "suggest/core/dictionary/ngram_listener.h" #include "suggest/core/dictionary/ngram_listener.h"
#include "suggest/core/dictionary/property/bigram_property.h" #include "suggest/core/dictionary/property/bigram_property.h"
#include "suggest/core/dictionary/property/unigram_property.h" #include "suggest/core/dictionary/property/unigram_property.h"
@ -117,6 +118,26 @@ int Ver4PatriciaTriePolicy::getWordId(const CodePointArrayView wordCodePoints,
return getWordIdFromTerminalPtNodePos(ptNodePos); return getWordIdFromTerminalPtNodePos(ptNodePos);
} }
int Ver4PatriciaTriePolicy::getProbabilityOfWordInContext(const int *const prevWordIds,
const int wordId, MultiBigramMap *const multiBigramMap) const {
if (wordId == NOT_A_WORD_ID) {
return NOT_A_PROBABILITY;
}
const int ptNodePos = getTerminalPtNodePosFromWordId(wordId);
const PtNodeParams ptNodeParams(mNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos));
if (multiBigramMap) {
return multiBigramMap->getBigramProbability(this /* structurePolicy */, prevWordIds,
wordId, ptNodeParams.getProbability());
}
if (prevWordIds) {
const int probability = getProbabilityOfWord(prevWordIds, wordId);
if (probability != NOT_A_PROBABILITY) {
return probability;
}
}
return getProbability(ptNodeParams.getProbability(), NOT_A_PROBABILITY);
}
int Ver4PatriciaTriePolicy::getProbability(const int unigramProbability, int Ver4PatriciaTriePolicy::getProbability(const int unigramProbability,
const int bigramProbability) const { const int bigramProbability) const {
if (mHeaderPolicy->isDecayingDict()) { if (mHeaderPolicy->isDecayingDict()) {

View File

@ -91,6 +91,9 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
int getWordId(const CodePointArrayView wordCodePoints, const bool forceLowerCaseSearch) const; int getWordId(const CodePointArrayView wordCodePoints, const bool forceLowerCaseSearch) const;
int getProbabilityOfWordInContext(const int *const prevWordIds, const int wordId,
MultiBigramMap *const multiBigramMap) const;
int getProbability(const int unigramProbability, const int bigramProbability) const; int getProbability(const int unigramProbability, const int bigramProbability) const;
int getProbabilityOfWord(const int *const prevWordIds, const int wordId) const; int getProbabilityOfWord(const int *const prevWordIds, const int wordId) const;

View File

@ -21,6 +21,7 @@
#include "suggest/core/dicnode/dic_node.h" #include "suggest/core/dicnode/dic_node.h"
#include "suggest/core/dicnode/dic_node_vector.h" #include "suggest/core/dicnode/dic_node_vector.h"
#include "suggest/core/dictionary/binary_dictionary_bigrams_iterator.h" #include "suggest/core/dictionary/binary_dictionary_bigrams_iterator.h"
#include "suggest/core/dictionary/multi_bigram_map.h"
#include "suggest/core/dictionary/ngram_listener.h" #include "suggest/core/dictionary/ngram_listener.h"
#include "suggest/core/session/prev_words_info.h" #include "suggest/core/session/prev_words_info.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h" #include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h"
@ -281,6 +282,27 @@ int PatriciaTriePolicy::getWordId(const CodePointArrayView wordCodePoints,
return getWordIdFromTerminalPtNodePos(ptNodePos); return getWordIdFromTerminalPtNodePos(ptNodePos);
} }
int PatriciaTriePolicy::getProbabilityOfWordInContext(const int *const prevWordIds,
const int wordId, MultiBigramMap *const multiBigramMap) const {
if (wordId == NOT_A_WORD_ID) {
return NOT_A_PROBABILITY;
}
const int ptNodePos = getTerminalPtNodePosFromWordId(wordId);
const PtNodeParams ptNodeParams =
mPtNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos);
if (multiBigramMap) {
return multiBigramMap->getBigramProbability(this /* structurePolicy */, prevWordIds,
wordId, ptNodeParams.getProbability());
}
if (prevWordIds) {
const int bigramProbability = getProbabilityOfWord(prevWordIds, wordId);
if (bigramProbability != NOT_A_PROBABILITY) {
return bigramProbability;
}
}
return getProbability(ptNodeParams.getProbability(), NOT_A_PROBABILITY);
}
int PatriciaTriePolicy::getProbability(const int unigramProbability, int PatriciaTriePolicy::getProbability(const int unigramProbability,
const int bigramProbability) const { const int bigramProbability) const {
// Due to space constraints, the probability for bigrams is approximate - the lower the unigram // Due to space constraints, the probability for bigrams is approximate - the lower the unigram

View File

@ -66,6 +66,9 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
int getWordId(const CodePointArrayView wordCodePoints, const bool forceLowerCaseSearch) const; int getWordId(const CodePointArrayView wordCodePoints, const bool forceLowerCaseSearch) const;
int getProbabilityOfWordInContext(const int *const prevWordIds, const int wordId,
MultiBigramMap *const multiBigramMap) const;
int getProbability(const int unigramProbability, const int bigramProbability) const; int getProbability(const int unigramProbability, const int bigramProbability) const;
int getProbabilityOfWord(const int *const prevWordIds, const int wordId) const; int getProbabilityOfWord(const int *const prevWordIds, const int wordId) const;

View File

@ -20,6 +20,7 @@
#include "suggest/core/dicnode/dic_node.h" #include "suggest/core/dicnode/dic_node.h"
#include "suggest/core/dicnode/dic_node_vector.h" #include "suggest/core/dicnode/dic_node_vector.h"
#include "suggest/core/dictionary/multi_bigram_map.h"
#include "suggest/core/dictionary/ngram_listener.h" #include "suggest/core/dictionary/ngram_listener.h"
#include "suggest/core/dictionary/property/bigram_property.h" #include "suggest/core/dictionary/property/bigram_property.h"
#include "suggest/core/dictionary/property/unigram_property.h" #include "suggest/core/dictionary/property/unigram_property.h"
@ -112,6 +113,28 @@ int Ver4PatriciaTriePolicy::getWordId(const CodePointArrayView wordCodePoints,
return ptNodeParams.getTerminalId(); return ptNodeParams.getTerminalId();
} }
int Ver4PatriciaTriePolicy::getProbabilityOfWordInContext(const int *const prevWordIds,
const int wordId, MultiBigramMap *const multiBigramMap) const {
// TODO: Quit using MultiBigramMap.
if (wordId == NOT_A_WORD_ID) {
return NOT_A_PROBABILITY;
}
const int ptNodePos =
mBuffers->getTerminalPositionLookupTable()->getTerminalPtNodePosition(wordId);
const PtNodeParams ptNodeParams(mNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos));
if (multiBigramMap) {
return multiBigramMap->getBigramProbability(this /* structurePolicy */, prevWordIds,
wordId, ptNodeParams.getProbability());
}
if (prevWordIds) {
const int probability = getProbabilityOfWord(prevWordIds, wordId);
if (probability != NOT_A_PROBABILITY) {
return probability;
}
}
return getProbability(ptNodeParams.getProbability(), NOT_A_PROBABILITY);
}
int Ver4PatriciaTriePolicy::getProbability(const int unigramProbability, int Ver4PatriciaTriePolicy::getProbability(const int unigramProbability,
const int bigramProbability) const { const int bigramProbability) const {
if (mHeaderPolicy->isDecayingDict()) { if (mHeaderPolicy->isDecayingDict()) {

View File

@ -68,6 +68,9 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
int getWordId(const CodePointArrayView wordCodePoints, const bool forceLowerCaseSearch) const; int getWordId(const CodePointArrayView wordCodePoints, const bool forceLowerCaseSearch) const;
int getProbabilityOfWordInContext(const int *const prevWordIds, const int wordId,
MultiBigramMap *const multiBigramMap) const;
int getProbability(const int unigramProbability, const int bigramProbability) const; int getProbability(const int unigramProbability, const int bigramProbability) const;
int getProbabilityOfWord(const int *const prevWordIds, const int wordId) const; int getProbabilityOfWord(const int *const prevWordIds, const int wordId) const;