Use NgramListener in MultiBigramMap.

Bug: 14425059
Change-Id: I425536290111f2a8172f31370706f858a1e07f6e
This commit is contained in:
Keisuke Kuroyanagi 2014-08-01 11:00:03 +09:00
parent da5ccd9f18
commit 35c62b2cc9
4 changed files with 55 additions and 56 deletions

View file

@ -117,7 +117,7 @@ class DicNode {
int newPrevWordsPtNodePos[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
newPrevWordsPtNodePos[0] = dicNode->mDicNodeProperties.getPtNodePos();
for (size_t i = 1; i < NELEMS(newPrevWordsPtNodePos); ++i) {
newPrevWordsPtNodePos[i] = dicNode->getNthPrevWordTerminalPtNodePos(i);
newPrevWordsPtNodePos[i] = dicNode->getPrevWordsTerminalPtNodePos()[i - 1];
}
mDicNodeProperties.init(rootPtNodeArrayPos, newPrevWordsPtNodePos);
mDicNodeState.initAsRootWithPreviousWord(&dicNode->mDicNodeState,
@ -208,12 +208,9 @@ class DicNode {
return mDicNodeProperties.getPtNodePos();
}
// Used to get n-gram probability in DicNodeUtils. n is 1-indexed.
int getNthPrevWordTerminalPtNodePos(const int n) const {
if (n <= 0 || n > MAX_PREV_WORD_COUNT_FOR_N_GRAM) {
return NOT_A_DICT_POS;
}
return mDicNodeProperties.getPrevWordsTerminalPtNodePos()[n - 1];
// TODO: Use view class to return PtNodePos array.
const int *getPrevWordsTerminalPtNodePos() const {
return mDicNodeProperties.getPrevWordsTerminalPtNodePos();
}
// Used in DicNodeUtils

View file

@ -85,17 +85,10 @@ namespace latinime {
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
const DicNode *const dicNode, MultiBigramMap *const multiBigramMap) {
const int unigramProbability = dicNode->getProbability();
const int ptNodePos = dicNode->getPtNodePos();
const int prevWordTerminalPtNodePos = dicNode->getNthPrevWordTerminalPtNodePos(1 /* n */);
if (NOT_A_DICT_POS == ptNodePos || NOT_A_DICT_POS == prevWordTerminalPtNodePos) {
// Note: Normally wordPos comes from the dictionary and should never equal
// NOT_A_VALID_WORD_POS.
return dictionaryStructurePolicy->getProbability(unigramProbability,
NOT_A_PROBABILITY);
}
if (multiBigramMap) {
const int *const prevWordsPtNodePos = dicNode->getPrevWordsTerminalPtNodePos();
return multiBigramMap->getBigramProbability(dictionaryStructurePolicy,
prevWordTerminalPtNodePos, ptNodePos, unigramProbability);
prevWordsPtNodePos, dicNode->getPtNodePos(), unigramProbability);
}
return dictionaryStructurePolicy->getProbability(unigramProbability,
NOT_A_PROBABILITY);

View file

@ -35,34 +35,30 @@ const int MultiBigramMap::BigramMap::DEFAULT_HASH_MAP_SIZE_FOR_EACH_BIGRAM_MAP =
// Also caches the bigrams if there is space remaining and they have not been cached already.
int MultiBigramMap::getBigramProbability(
const DictionaryStructureWithBufferPolicy *const structurePolicy,
const int wordPosition, const int nextWordPosition, const int unigramProbability) {
const int *const prevWordsPtNodePos, const int nextWordPosition,
const int unigramProbability) {
if (!prevWordsPtNodePos || prevWordsPtNodePos[0] == NOT_A_DICT_POS) {
return structurePolicy->getProbability(unigramProbability, NOT_A_PROBABILITY);
}
std::unordered_map<int, BigramMap>::const_iterator mapPosition =
mBigramMaps.find(wordPosition);
mBigramMaps.find(prevWordsPtNodePos[0]);
if (mapPosition != mBigramMaps.end()) {
return mapPosition->second.getBigramProbability(structurePolicy, nextWordPosition,
unigramProbability);
}
if (mBigramMaps.size() < MAX_CACHED_PREV_WORDS_IN_BIGRAM_MAP) {
addBigramsForWordPosition(structurePolicy, wordPosition);
return mBigramMaps[wordPosition].getBigramProbability(structurePolicy,
addBigramsForWordPosition(structurePolicy, prevWordsPtNodePos);
return mBigramMaps[prevWordsPtNodePos[0]].getBigramProbability(structurePolicy,
nextWordPosition, unigramProbability);
}
return readBigramProbabilityFromBinaryDictionary(structurePolicy, wordPosition,
return readBigramProbabilityFromBinaryDictionary(structurePolicy, prevWordsPtNodePos,
nextWordPosition, unigramProbability);
}
void MultiBigramMap::BigramMap::init(
const DictionaryStructureWithBufferPolicy *const structurePolicy, const int nodePos) {
BinaryDictionaryBigramsIterator bigramsIt =
structurePolicy->getBigramsIteratorOfPtNode(nodePos);
while (bigramsIt.hasNext()) {
bigramsIt.next();
if (bigramsIt.getBigramPos() == NOT_A_DICT_POS) {
continue;
}
mBigramMap[bigramsIt.getBigramPos()] = bigramsIt.getProbability();
mBloomFilter.setInFilter(bigramsIt.getBigramPos());
}
const DictionaryStructureWithBufferPolicy *const structurePolicy,
const int *const prevWordsPtNodePos) {
structurePolicy->iterateNgramEntries(prevWordsPtNodePos, this /* listener */);
}
int MultiBigramMap::BigramMap::getBigramProbability(
@ -79,25 +75,33 @@ int MultiBigramMap::BigramMap::getBigramProbability(
return structurePolicy->getProbability(unigramProbability, bigramProbability);
}
void MultiBigramMap::BigramMap::onVisitEntry(const int ngramProbability,
const int targetPtNodePos) {
if (targetPtNodePos == NOT_A_DICT_POS) {
return;
}
mBigramMap[targetPtNodePos] = ngramProbability;
mBloomFilter.setInFilter(targetPtNodePos);
}
void MultiBigramMap::addBigramsForWordPosition(
const DictionaryStructureWithBufferPolicy *const structurePolicy, const int position) {
mBigramMaps[position].init(structurePolicy, position);
const DictionaryStructureWithBufferPolicy *const structurePolicy,
const int *const prevWordsPtNodePos) {
if (prevWordsPtNodePos) {
mBigramMaps[prevWordsPtNodePos[0]].init(structurePolicy, prevWordsPtNodePos);
}
}
int MultiBigramMap::readBigramProbabilityFromBinaryDictionary(
const DictionaryStructureWithBufferPolicy *const structurePolicy, const int nodePos,
const int nextWordPosition, const int unigramProbability) {
int bigramProbability = NOT_A_PROBABILITY;
BinaryDictionaryBigramsIterator bigramsIt =
structurePolicy->getBigramsIteratorOfPtNode(nodePos);
while (bigramsIt.hasNext()) {
bigramsIt.next();
if (bigramsIt.getBigramPos() == nextWordPosition) {
bigramProbability = bigramsIt.getProbability();
break;
}
const DictionaryStructureWithBufferPolicy *const structurePolicy,
const int *const prevWordsPtNodePos, const int nextWordPosition,
const int unigramProbability) {
const int bigramProbability = structurePolicy->getProbabilityOfPtNode(prevWordsPtNodePos,
nextWordPosition);
if (bigramProbability != NOT_A_PROBABILITY) {
return bigramProbability;
}
return structurePolicy->getProbability(unigramProbability, bigramProbability);
return structurePolicy->getProbability(unigramProbability, NOT_A_PROBABILITY);
}
} // namespace latinime

View file

@ -23,6 +23,7 @@
#include "defines.h"
#include "suggest/core/dictionary/binary_dictionary_bigrams_iterator.h"
#include "suggest/core/dictionary/bloom_filter.h"
#include "suggest/core/dictionary/ngram_listener.h"
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
namespace latinime {
@ -38,7 +39,8 @@ class MultiBigramMap {
// Look up the bigram probability for the given word pair from the cached bigram maps.
// Also caches the bigrams if there is space remaining and they have not been cached already.
int getBigramProbability(const DictionaryStructureWithBufferPolicy *const structurePolicy,
const int wordPosition, const int nextWordPosition, const int unigramProbability);
const int *const prevWordsPtNodePos, const int nextWordPosition,
const int unigramProbability);
void clear() {
mBigramMaps.clear();
@ -47,32 +49,35 @@ class MultiBigramMap {
private:
DISALLOW_COPY_AND_ASSIGN(MultiBigramMap);
class BigramMap {
class BigramMap : public NgramListener {
public:
BigramMap() : mBigramMap(DEFAULT_HASH_MAP_SIZE_FOR_EACH_BIGRAM_MAP), mBloomFilter() {}
~BigramMap() {}
// Copy constructor needed for std::unordered_map.
BigramMap(const BigramMap &bigramMap)
: mBigramMap(bigramMap.mBigramMap), mBloomFilter(bigramMap.mBloomFilter) {}
virtual ~BigramMap() {}
void init(const DictionaryStructureWithBufferPolicy *const structurePolicy,
const int nodePos);
const int *const prevWordsPtNodePos);
int getBigramProbability(
const DictionaryStructureWithBufferPolicy *const structurePolicy,
const int nextWordPosition, const int unigramProbability) const;
virtual void onVisitEntry(const int ngramProbability, const int targetPtNodePos);
private:
// NOTE: The BigramMap class doesn't use DISALLOW_COPY_AND_ASSIGN() because its default
// copy constructor is needed for use in hash_map.
static const int DEFAULT_HASH_MAP_SIZE_FOR_EACH_BIGRAM_MAP;
std::unordered_map<int, int> mBigramMap;
BloomFilter mBloomFilter;
};
void addBigramsForWordPosition(
const DictionaryStructureWithBufferPolicy *const structurePolicy, const int position);
const DictionaryStructureWithBufferPolicy *const structurePolicy,
const int *const prevWordsPtNodePos);
int readBigramProbabilityFromBinaryDictionary(
const DictionaryStructureWithBufferPolicy *const structurePolicy, const int nodePos,
const int nextWordPosition, const int unigramProbability);
const DictionaryStructureWithBufferPolicy *const structurePolicy,
const int *const prevWordsPtNodePos, const int nextWordPosition,
const int unigramProbability);
static const size_t MAX_CACHED_PREV_WORDS_IN_BIGRAM_MAP;
std::unordered_map<int, BigramMap> mBigramMaps;