From 35c62b2cc99761e97f57060ad5e3cdfad926aea7 Mon Sep 17 00:00:00 2001 From: Keisuke Kuroyanagi Date: Fri, 1 Aug 2014 11:00:03 +0900 Subject: [PATCH] Use NgramListener in MultiBigramMap. Bug: 14425059 Change-Id: I425536290111f2a8172f31370706f858a1e07f6e --- .../jni/src/suggest/core/dicnode/dic_node.h | 11 ++-- .../suggest/core/dicnode/dic_node_utils.cpp | 11 +--- .../core/dictionary/multi_bigram_map.cpp | 64 ++++++++++--------- .../core/dictionary/multi_bigram_map.h | 25 +++++--- 4 files changed, 55 insertions(+), 56 deletions(-) diff --git a/native/jni/src/suggest/core/dicnode/dic_node.h b/native/jni/src/suggest/core/dicnode/dic_node.h index 92f39ea25..d1b2c87be 100644 --- a/native/jni/src/suggest/core/dicnode/dic_node.h +++ b/native/jni/src/suggest/core/dicnode/dic_node.h @@ -117,7 +117,7 @@ class DicNode { int newPrevWordsPtNodePos[MAX_PREV_WORD_COUNT_FOR_N_GRAM]; newPrevWordsPtNodePos[0] = dicNode->mDicNodeProperties.getPtNodePos(); for (size_t i = 1; i < NELEMS(newPrevWordsPtNodePos); ++i) { - newPrevWordsPtNodePos[i] = dicNode->getNthPrevWordTerminalPtNodePos(i); + newPrevWordsPtNodePos[i] = dicNode->getPrevWordsTerminalPtNodePos()[i - 1]; } mDicNodeProperties.init(rootPtNodeArrayPos, newPrevWordsPtNodePos); mDicNodeState.initAsRootWithPreviousWord(&dicNode->mDicNodeState, @@ -208,12 +208,9 @@ class DicNode { return mDicNodeProperties.getPtNodePos(); } - // Used to get n-gram probability in DicNodeUtils. n is 1-indexed. - int getNthPrevWordTerminalPtNodePos(const int n) const { - if (n <= 0 || n > MAX_PREV_WORD_COUNT_FOR_N_GRAM) { - return NOT_A_DICT_POS; - } - return mDicNodeProperties.getPrevWordsTerminalPtNodePos()[n - 1]; + // TODO: Use view class to return PtNodePos array. + const int *getPrevWordsTerminalPtNodePos() const { + return mDicNodeProperties.getPrevWordsTerminalPtNodePos(); } // Used in DicNodeUtils diff --git a/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp b/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp index 4445f4aaf..69ea67418 100644 --- a/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp +++ b/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp @@ -85,17 +85,10 @@ namespace latinime { const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, const DicNode *const dicNode, MultiBigramMap *const multiBigramMap) { const int unigramProbability = dicNode->getProbability(); - const int ptNodePos = dicNode->getPtNodePos(); - const int prevWordTerminalPtNodePos = dicNode->getNthPrevWordTerminalPtNodePos(1 /* n */); - if (NOT_A_DICT_POS == ptNodePos || NOT_A_DICT_POS == prevWordTerminalPtNodePos) { - // Note: Normally wordPos comes from the dictionary and should never equal - // NOT_A_VALID_WORD_POS. - return dictionaryStructurePolicy->getProbability(unigramProbability, - NOT_A_PROBABILITY); - } if (multiBigramMap) { + const int *const prevWordsPtNodePos = dicNode->getPrevWordsTerminalPtNodePos(); return multiBigramMap->getBigramProbability(dictionaryStructurePolicy, - prevWordTerminalPtNodePos, ptNodePos, unigramProbability); + prevWordsPtNodePos, dicNode->getPtNodePos(), unigramProbability); } return dictionaryStructurePolicy->getProbability(unigramProbability, NOT_A_PROBABILITY); diff --git a/native/jni/src/suggest/core/dictionary/multi_bigram_map.cpp b/native/jni/src/suggest/core/dictionary/multi_bigram_map.cpp index 012e4dc9c..91f33a8dd 100644 --- a/native/jni/src/suggest/core/dictionary/multi_bigram_map.cpp +++ b/native/jni/src/suggest/core/dictionary/multi_bigram_map.cpp @@ -35,34 +35,30 @@ const int MultiBigramMap::BigramMap::DEFAULT_HASH_MAP_SIZE_FOR_EACH_BIGRAM_MAP = // Also caches the bigrams if there is space remaining and they have not been cached already. int MultiBigramMap::getBigramProbability( const DictionaryStructureWithBufferPolicy *const structurePolicy, - const int wordPosition, const int nextWordPosition, const int unigramProbability) { + const int *const prevWordsPtNodePos, const int nextWordPosition, + const int unigramProbability) { + if (!prevWordsPtNodePos || prevWordsPtNodePos[0] == NOT_A_DICT_POS) { + return structurePolicy->getProbability(unigramProbability, NOT_A_PROBABILITY); + } std::unordered_map::const_iterator mapPosition = - mBigramMaps.find(wordPosition); + mBigramMaps.find(prevWordsPtNodePos[0]); if (mapPosition != mBigramMaps.end()) { return mapPosition->second.getBigramProbability(structurePolicy, nextWordPosition, unigramProbability); } if (mBigramMaps.size() < MAX_CACHED_PREV_WORDS_IN_BIGRAM_MAP) { - addBigramsForWordPosition(structurePolicy, wordPosition); - return mBigramMaps[wordPosition].getBigramProbability(structurePolicy, + addBigramsForWordPosition(structurePolicy, prevWordsPtNodePos); + return mBigramMaps[prevWordsPtNodePos[0]].getBigramProbability(structurePolicy, nextWordPosition, unigramProbability); } - return readBigramProbabilityFromBinaryDictionary(structurePolicy, wordPosition, + return readBigramProbabilityFromBinaryDictionary(structurePolicy, prevWordsPtNodePos, nextWordPosition, unigramProbability); } void MultiBigramMap::BigramMap::init( - const DictionaryStructureWithBufferPolicy *const structurePolicy, const int nodePos) { - BinaryDictionaryBigramsIterator bigramsIt = - structurePolicy->getBigramsIteratorOfPtNode(nodePos); - while (bigramsIt.hasNext()) { - bigramsIt.next(); - if (bigramsIt.getBigramPos() == NOT_A_DICT_POS) { - continue; - } - mBigramMap[bigramsIt.getBigramPos()] = bigramsIt.getProbability(); - mBloomFilter.setInFilter(bigramsIt.getBigramPos()); - } + const DictionaryStructureWithBufferPolicy *const structurePolicy, + const int *const prevWordsPtNodePos) { + structurePolicy->iterateNgramEntries(prevWordsPtNodePos, this /* listener */); } int MultiBigramMap::BigramMap::getBigramProbability( @@ -79,25 +75,33 @@ int MultiBigramMap::BigramMap::getBigramProbability( return structurePolicy->getProbability(unigramProbability, bigramProbability); } +void MultiBigramMap::BigramMap::onVisitEntry(const int ngramProbability, + const int targetPtNodePos) { + if (targetPtNodePos == NOT_A_DICT_POS) { + return; + } + mBigramMap[targetPtNodePos] = ngramProbability; + mBloomFilter.setInFilter(targetPtNodePos); +} + void MultiBigramMap::addBigramsForWordPosition( - const DictionaryStructureWithBufferPolicy *const structurePolicy, const int position) { - mBigramMaps[position].init(structurePolicy, position); + const DictionaryStructureWithBufferPolicy *const structurePolicy, + const int *const prevWordsPtNodePos) { + if (prevWordsPtNodePos) { + mBigramMaps[prevWordsPtNodePos[0]].init(structurePolicy, prevWordsPtNodePos); + } } int MultiBigramMap::readBigramProbabilityFromBinaryDictionary( - const DictionaryStructureWithBufferPolicy *const structurePolicy, const int nodePos, - const int nextWordPosition, const int unigramProbability) { - int bigramProbability = NOT_A_PROBABILITY; - BinaryDictionaryBigramsIterator bigramsIt = - structurePolicy->getBigramsIteratorOfPtNode(nodePos); - while (bigramsIt.hasNext()) { - bigramsIt.next(); - if (bigramsIt.getBigramPos() == nextWordPosition) { - bigramProbability = bigramsIt.getProbability(); - break; - } + const DictionaryStructureWithBufferPolicy *const structurePolicy, + const int *const prevWordsPtNodePos, const int nextWordPosition, + const int unigramProbability) { + const int bigramProbability = structurePolicy->getProbabilityOfPtNode(prevWordsPtNodePos, + nextWordPosition); + if (bigramProbability != NOT_A_PROBABILITY) { + return bigramProbability; } - return structurePolicy->getProbability(unigramProbability, bigramProbability); + return structurePolicy->getProbability(unigramProbability, NOT_A_PROBABILITY); } } // namespace latinime diff --git a/native/jni/src/suggest/core/dictionary/multi_bigram_map.h b/native/jni/src/suggest/core/dictionary/multi_bigram_map.h index 195b5e22f..ad36dde83 100644 --- a/native/jni/src/suggest/core/dictionary/multi_bigram_map.h +++ b/native/jni/src/suggest/core/dictionary/multi_bigram_map.h @@ -23,6 +23,7 @@ #include "defines.h" #include "suggest/core/dictionary/binary_dictionary_bigrams_iterator.h" #include "suggest/core/dictionary/bloom_filter.h" +#include "suggest/core/dictionary/ngram_listener.h" #include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" namespace latinime { @@ -38,7 +39,8 @@ class MultiBigramMap { // Look up the bigram probability for the given word pair from the cached bigram maps. // Also caches the bigrams if there is space remaining and they have not been cached already. int getBigramProbability(const DictionaryStructureWithBufferPolicy *const structurePolicy, - const int wordPosition, const int nextWordPosition, const int unigramProbability); + const int *const prevWordsPtNodePos, const int nextWordPosition, + const int unigramProbability); void clear() { mBigramMaps.clear(); @@ -47,32 +49,35 @@ class MultiBigramMap { private: DISALLOW_COPY_AND_ASSIGN(MultiBigramMap); - class BigramMap { + class BigramMap : public NgramListener { public: BigramMap() : mBigramMap(DEFAULT_HASH_MAP_SIZE_FOR_EACH_BIGRAM_MAP), mBloomFilter() {} - ~BigramMap() {} + // Copy constructor needed for std::unordered_map. + BigramMap(const BigramMap &bigramMap) + : mBigramMap(bigramMap.mBigramMap), mBloomFilter(bigramMap.mBloomFilter) {} + virtual ~BigramMap() {} void init(const DictionaryStructureWithBufferPolicy *const structurePolicy, - const int nodePos); - + const int *const prevWordsPtNodePos); int getBigramProbability( const DictionaryStructureWithBufferPolicy *const structurePolicy, const int nextWordPosition, const int unigramProbability) const; + virtual void onVisitEntry(const int ngramProbability, const int targetPtNodePos); private: - // NOTE: The BigramMap class doesn't use DISALLOW_COPY_AND_ASSIGN() because its default - // copy constructor is needed for use in hash_map. static const int DEFAULT_HASH_MAP_SIZE_FOR_EACH_BIGRAM_MAP; std::unordered_map mBigramMap; BloomFilter mBloomFilter; }; void addBigramsForWordPosition( - const DictionaryStructureWithBufferPolicy *const structurePolicy, const int position); + const DictionaryStructureWithBufferPolicy *const structurePolicy, + const int *const prevWordsPtNodePos); int readBigramProbabilityFromBinaryDictionary( - const DictionaryStructureWithBufferPolicy *const structurePolicy, const int nodePos, - const int nextWordPosition, const int unigramProbability); + const DictionaryStructureWithBufferPolicy *const structurePolicy, + const int *const prevWordsPtNodePos, const int nextWordPosition, + const int unigramProbability); static const size_t MAX_CACHED_PREV_WORDS_IN_BIGRAM_MAP; std::unordered_map mBigramMaps;