* commit '4a4ab6f69cabf63ac0c3978474218ebd2a42369a': Use NgramListener in MultiBigramMap.
This commit is contained in:
commit
76425ba8f5
4 changed files with 55 additions and 56 deletions
|
@ -117,7 +117,7 @@ class DicNode {
|
||||||
int newPrevWordsPtNodePos[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
|
int newPrevWordsPtNodePos[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
|
||||||
newPrevWordsPtNodePos[0] = dicNode->mDicNodeProperties.getPtNodePos();
|
newPrevWordsPtNodePos[0] = dicNode->mDicNodeProperties.getPtNodePos();
|
||||||
for (size_t i = 1; i < NELEMS(newPrevWordsPtNodePos); ++i) {
|
for (size_t i = 1; i < NELEMS(newPrevWordsPtNodePos); ++i) {
|
||||||
newPrevWordsPtNodePos[i] = dicNode->getNthPrevWordTerminalPtNodePos(i);
|
newPrevWordsPtNodePos[i] = dicNode->getPrevWordsTerminalPtNodePos()[i - 1];
|
||||||
}
|
}
|
||||||
mDicNodeProperties.init(rootPtNodeArrayPos, newPrevWordsPtNodePos);
|
mDicNodeProperties.init(rootPtNodeArrayPos, newPrevWordsPtNodePos);
|
||||||
mDicNodeState.initAsRootWithPreviousWord(&dicNode->mDicNodeState,
|
mDicNodeState.initAsRootWithPreviousWord(&dicNode->mDicNodeState,
|
||||||
|
@ -208,12 +208,9 @@ class DicNode {
|
||||||
return mDicNodeProperties.getPtNodePos();
|
return mDicNodeProperties.getPtNodePos();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Used to get n-gram probability in DicNodeUtils. n is 1-indexed.
|
// TODO: Use view class to return PtNodePos array.
|
||||||
int getNthPrevWordTerminalPtNodePos(const int n) const {
|
const int *getPrevWordsTerminalPtNodePos() const {
|
||||||
if (n <= 0 || n > MAX_PREV_WORD_COUNT_FOR_N_GRAM) {
|
return mDicNodeProperties.getPrevWordsTerminalPtNodePos();
|
||||||
return NOT_A_DICT_POS;
|
|
||||||
}
|
|
||||||
return mDicNodeProperties.getPrevWordsTerminalPtNodePos()[n - 1];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Used in DicNodeUtils
|
// Used in DicNodeUtils
|
||||||
|
|
|
@ -85,17 +85,10 @@ namespace latinime {
|
||||||
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
|
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
|
||||||
const DicNode *const dicNode, MultiBigramMap *const multiBigramMap) {
|
const DicNode *const dicNode, MultiBigramMap *const multiBigramMap) {
|
||||||
const int unigramProbability = dicNode->getProbability();
|
const int unigramProbability = dicNode->getProbability();
|
||||||
const int ptNodePos = dicNode->getPtNodePos();
|
|
||||||
const int prevWordTerminalPtNodePos = dicNode->getNthPrevWordTerminalPtNodePos(1 /* n */);
|
|
||||||
if (NOT_A_DICT_POS == ptNodePos || NOT_A_DICT_POS == prevWordTerminalPtNodePos) {
|
|
||||||
// Note: Normally wordPos comes from the dictionary and should never equal
|
|
||||||
// NOT_A_VALID_WORD_POS.
|
|
||||||
return dictionaryStructurePolicy->getProbability(unigramProbability,
|
|
||||||
NOT_A_PROBABILITY);
|
|
||||||
}
|
|
||||||
if (multiBigramMap) {
|
if (multiBigramMap) {
|
||||||
|
const int *const prevWordsPtNodePos = dicNode->getPrevWordsTerminalPtNodePos();
|
||||||
return multiBigramMap->getBigramProbability(dictionaryStructurePolicy,
|
return multiBigramMap->getBigramProbability(dictionaryStructurePolicy,
|
||||||
prevWordTerminalPtNodePos, ptNodePos, unigramProbability);
|
prevWordsPtNodePos, dicNode->getPtNodePos(), unigramProbability);
|
||||||
}
|
}
|
||||||
return dictionaryStructurePolicy->getProbability(unigramProbability,
|
return dictionaryStructurePolicy->getProbability(unigramProbability,
|
||||||
NOT_A_PROBABILITY);
|
NOT_A_PROBABILITY);
|
||||||
|
|
|
@ -35,34 +35,30 @@ const int MultiBigramMap::BigramMap::DEFAULT_HASH_MAP_SIZE_FOR_EACH_BIGRAM_MAP =
|
||||||
// Also caches the bigrams if there is space remaining and they have not been cached already.
|
// Also caches the bigrams if there is space remaining and they have not been cached already.
|
||||||
int MultiBigramMap::getBigramProbability(
|
int MultiBigramMap::getBigramProbability(
|
||||||
const DictionaryStructureWithBufferPolicy *const structurePolicy,
|
const DictionaryStructureWithBufferPolicy *const structurePolicy,
|
||||||
const int wordPosition, const int nextWordPosition, const int unigramProbability) {
|
const int *const prevWordsPtNodePos, const int nextWordPosition,
|
||||||
|
const int unigramProbability) {
|
||||||
|
if (!prevWordsPtNodePos || prevWordsPtNodePos[0] == NOT_A_DICT_POS) {
|
||||||
|
return structurePolicy->getProbability(unigramProbability, NOT_A_PROBABILITY);
|
||||||
|
}
|
||||||
std::unordered_map<int, BigramMap>::const_iterator mapPosition =
|
std::unordered_map<int, BigramMap>::const_iterator mapPosition =
|
||||||
mBigramMaps.find(wordPosition);
|
mBigramMaps.find(prevWordsPtNodePos[0]);
|
||||||
if (mapPosition != mBigramMaps.end()) {
|
if (mapPosition != mBigramMaps.end()) {
|
||||||
return mapPosition->second.getBigramProbability(structurePolicy, nextWordPosition,
|
return mapPosition->second.getBigramProbability(structurePolicy, nextWordPosition,
|
||||||
unigramProbability);
|
unigramProbability);
|
||||||
}
|
}
|
||||||
if (mBigramMaps.size() < MAX_CACHED_PREV_WORDS_IN_BIGRAM_MAP) {
|
if (mBigramMaps.size() < MAX_CACHED_PREV_WORDS_IN_BIGRAM_MAP) {
|
||||||
addBigramsForWordPosition(structurePolicy, wordPosition);
|
addBigramsForWordPosition(structurePolicy, prevWordsPtNodePos);
|
||||||
return mBigramMaps[wordPosition].getBigramProbability(structurePolicy,
|
return mBigramMaps[prevWordsPtNodePos[0]].getBigramProbability(structurePolicy,
|
||||||
nextWordPosition, unigramProbability);
|
nextWordPosition, unigramProbability);
|
||||||
}
|
}
|
||||||
return readBigramProbabilityFromBinaryDictionary(structurePolicy, wordPosition,
|
return readBigramProbabilityFromBinaryDictionary(structurePolicy, prevWordsPtNodePos,
|
||||||
nextWordPosition, unigramProbability);
|
nextWordPosition, unigramProbability);
|
||||||
}
|
}
|
||||||
|
|
||||||
void MultiBigramMap::BigramMap::init(
|
void MultiBigramMap::BigramMap::init(
|
||||||
const DictionaryStructureWithBufferPolicy *const structurePolicy, const int nodePos) {
|
const DictionaryStructureWithBufferPolicy *const structurePolicy,
|
||||||
BinaryDictionaryBigramsIterator bigramsIt =
|
const int *const prevWordsPtNodePos) {
|
||||||
structurePolicy->getBigramsIteratorOfPtNode(nodePos);
|
structurePolicy->iterateNgramEntries(prevWordsPtNodePos, this /* listener */);
|
||||||
while (bigramsIt.hasNext()) {
|
|
||||||
bigramsIt.next();
|
|
||||||
if (bigramsIt.getBigramPos() == NOT_A_DICT_POS) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
mBigramMap[bigramsIt.getBigramPos()] = bigramsIt.getProbability();
|
|
||||||
mBloomFilter.setInFilter(bigramsIt.getBigramPos());
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int MultiBigramMap::BigramMap::getBigramProbability(
|
int MultiBigramMap::BigramMap::getBigramProbability(
|
||||||
|
@ -79,25 +75,33 @@ int MultiBigramMap::BigramMap::getBigramProbability(
|
||||||
return structurePolicy->getProbability(unigramProbability, bigramProbability);
|
return structurePolicy->getProbability(unigramProbability, bigramProbability);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void MultiBigramMap::BigramMap::onVisitEntry(const int ngramProbability,
|
||||||
|
const int targetPtNodePos) {
|
||||||
|
if (targetPtNodePos == NOT_A_DICT_POS) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
mBigramMap[targetPtNodePos] = ngramProbability;
|
||||||
|
mBloomFilter.setInFilter(targetPtNodePos);
|
||||||
|
}
|
||||||
|
|
||||||
void MultiBigramMap::addBigramsForWordPosition(
|
void MultiBigramMap::addBigramsForWordPosition(
|
||||||
const DictionaryStructureWithBufferPolicy *const structurePolicy, const int position) {
|
const DictionaryStructureWithBufferPolicy *const structurePolicy,
|
||||||
mBigramMaps[position].init(structurePolicy, position);
|
const int *const prevWordsPtNodePos) {
|
||||||
|
if (prevWordsPtNodePos) {
|
||||||
|
mBigramMaps[prevWordsPtNodePos[0]].init(structurePolicy, prevWordsPtNodePos);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int MultiBigramMap::readBigramProbabilityFromBinaryDictionary(
|
int MultiBigramMap::readBigramProbabilityFromBinaryDictionary(
|
||||||
const DictionaryStructureWithBufferPolicy *const structurePolicy, const int nodePos,
|
const DictionaryStructureWithBufferPolicy *const structurePolicy,
|
||||||
const int nextWordPosition, const int unigramProbability) {
|
const int *const prevWordsPtNodePos, const int nextWordPosition,
|
||||||
int bigramProbability = NOT_A_PROBABILITY;
|
const int unigramProbability) {
|
||||||
BinaryDictionaryBigramsIterator bigramsIt =
|
const int bigramProbability = structurePolicy->getProbabilityOfPtNode(prevWordsPtNodePos,
|
||||||
structurePolicy->getBigramsIteratorOfPtNode(nodePos);
|
nextWordPosition);
|
||||||
while (bigramsIt.hasNext()) {
|
if (bigramProbability != NOT_A_PROBABILITY) {
|
||||||
bigramsIt.next();
|
return bigramProbability;
|
||||||
if (bigramsIt.getBigramPos() == nextWordPosition) {
|
|
||||||
bigramProbability = bigramsIt.getProbability();
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
}
|
return structurePolicy->getProbability(unigramProbability, NOT_A_PROBABILITY);
|
||||||
return structurePolicy->getProbability(unigramProbability, bigramProbability);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
|
|
|
@ -23,6 +23,7 @@
|
||||||
#include "defines.h"
|
#include "defines.h"
|
||||||
#include "suggest/core/dictionary/binary_dictionary_bigrams_iterator.h"
|
#include "suggest/core/dictionary/binary_dictionary_bigrams_iterator.h"
|
||||||
#include "suggest/core/dictionary/bloom_filter.h"
|
#include "suggest/core/dictionary/bloom_filter.h"
|
||||||
|
#include "suggest/core/dictionary/ngram_listener.h"
|
||||||
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
|
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
@ -38,7 +39,8 @@ class MultiBigramMap {
|
||||||
// Look up the bigram probability for the given word pair from the cached bigram maps.
|
// Look up the bigram probability for the given word pair from the cached bigram maps.
|
||||||
// Also caches the bigrams if there is space remaining and they have not been cached already.
|
// Also caches the bigrams if there is space remaining and they have not been cached already.
|
||||||
int getBigramProbability(const DictionaryStructureWithBufferPolicy *const structurePolicy,
|
int getBigramProbability(const DictionaryStructureWithBufferPolicy *const structurePolicy,
|
||||||
const int wordPosition, const int nextWordPosition, const int unigramProbability);
|
const int *const prevWordsPtNodePos, const int nextWordPosition,
|
||||||
|
const int unigramProbability);
|
||||||
|
|
||||||
void clear() {
|
void clear() {
|
||||||
mBigramMaps.clear();
|
mBigramMaps.clear();
|
||||||
|
@ -47,32 +49,35 @@ class MultiBigramMap {
|
||||||
private:
|
private:
|
||||||
DISALLOW_COPY_AND_ASSIGN(MultiBigramMap);
|
DISALLOW_COPY_AND_ASSIGN(MultiBigramMap);
|
||||||
|
|
||||||
class BigramMap {
|
class BigramMap : public NgramListener {
|
||||||
public:
|
public:
|
||||||
BigramMap() : mBigramMap(DEFAULT_HASH_MAP_SIZE_FOR_EACH_BIGRAM_MAP), mBloomFilter() {}
|
BigramMap() : mBigramMap(DEFAULT_HASH_MAP_SIZE_FOR_EACH_BIGRAM_MAP), mBloomFilter() {}
|
||||||
~BigramMap() {}
|
// Copy constructor needed for std::unordered_map.
|
||||||
|
BigramMap(const BigramMap &bigramMap)
|
||||||
|
: mBigramMap(bigramMap.mBigramMap), mBloomFilter(bigramMap.mBloomFilter) {}
|
||||||
|
virtual ~BigramMap() {}
|
||||||
|
|
||||||
void init(const DictionaryStructureWithBufferPolicy *const structurePolicy,
|
void init(const DictionaryStructureWithBufferPolicy *const structurePolicy,
|
||||||
const int nodePos);
|
const int *const prevWordsPtNodePos);
|
||||||
|
|
||||||
int getBigramProbability(
|
int getBigramProbability(
|
||||||
const DictionaryStructureWithBufferPolicy *const structurePolicy,
|
const DictionaryStructureWithBufferPolicy *const structurePolicy,
|
||||||
const int nextWordPosition, const int unigramProbability) const;
|
const int nextWordPosition, const int unigramProbability) const;
|
||||||
|
virtual void onVisitEntry(const int ngramProbability, const int targetPtNodePos);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
// NOTE: The BigramMap class doesn't use DISALLOW_COPY_AND_ASSIGN() because its default
|
|
||||||
// copy constructor is needed for use in hash_map.
|
|
||||||
static const int DEFAULT_HASH_MAP_SIZE_FOR_EACH_BIGRAM_MAP;
|
static const int DEFAULT_HASH_MAP_SIZE_FOR_EACH_BIGRAM_MAP;
|
||||||
std::unordered_map<int, int> mBigramMap;
|
std::unordered_map<int, int> mBigramMap;
|
||||||
BloomFilter mBloomFilter;
|
BloomFilter mBloomFilter;
|
||||||
};
|
};
|
||||||
|
|
||||||
void addBigramsForWordPosition(
|
void addBigramsForWordPosition(
|
||||||
const DictionaryStructureWithBufferPolicy *const structurePolicy, const int position);
|
const DictionaryStructureWithBufferPolicy *const structurePolicy,
|
||||||
|
const int *const prevWordsPtNodePos);
|
||||||
|
|
||||||
int readBigramProbabilityFromBinaryDictionary(
|
int readBigramProbabilityFromBinaryDictionary(
|
||||||
const DictionaryStructureWithBufferPolicy *const structurePolicy, const int nodePos,
|
const DictionaryStructureWithBufferPolicy *const structurePolicy,
|
||||||
const int nextWordPosition, const int unigramProbability);
|
const int *const prevWordsPtNodePos, const int nextWordPosition,
|
||||||
|
const int unigramProbability);
|
||||||
|
|
||||||
static const size_t MAX_CACHED_PREV_WORDS_IN_BIGRAM_MAP;
|
static const size_t MAX_CACHED_PREV_WORDS_IN_BIGRAM_MAP;
|
||||||
std::unordered_map<int, BigramMap> mBigramMaps;
|
std::unordered_map<int, BigramMap> mBigramMaps;
|
||||||
|
|
Loading…
Reference in a new issue