From 11765ee804ee8af7e3d28e97ca41c746a07c97e3 Mon Sep 17 00:00:00 2001 From: Keisuke Kuroyanagi Date: Mon, 2 Dec 2013 16:11:25 +0900 Subject: [PATCH 1/2] Fix: ver4 bigram GC. Bug: 11073222 Change-Id: I1637525ead60026cdf75ac90d40f97d02ce44ea1 --- .../v4/content/bigram_dict_content.cpp | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp index 4cd96722e..431f342ae 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp @@ -46,6 +46,7 @@ bool BigramDictContent::writeBigramEntryAndAdvancePosition(const int probability const int bigramFlags = createAndGetBigramFlags(probability, hasNext); if (!bigramListBuffer->writeUintAndAdvancePosition(bigramFlags, Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE, entryWritingPos)) { + AKLOGE("Cannot write bigram flags. pos: %d, flags: %x", *entryWritingPos, bigramFlags); return false; } const int targetTerminalIdToWrite = @@ -66,6 +67,7 @@ bool BigramDictContent::copyBigramList(const int bigramListPos, const int toPos) &readingPos); if (!writeBigramEntryAndAdvancePosition(probability, hasNext, targetTerminalId, &writingPos)) { + AKLOGE("Cannot write bigram entry to copy. pos: %d", writingPos); return false; } } @@ -88,6 +90,8 @@ bool BigramDictContent::runGC(const TerminalPositionLookupTable::TerminalIdMap * // Copy bigram list with GC from original content. if (!runGCBigramList(originalBigramListPos, originalBigramDictContent, bigramListPos, terminalIdMap, &bigramEntryCount)) { + AKLOGE("Cannot complete GC for the bigram list. original pos: %d, pos: %d", + originalBigramListPos, bigramListPos); return false; } if (bigramEntryCount == 0) { @@ -97,6 +101,8 @@ bool BigramDictContent::runGC(const TerminalPositionLookupTable::TerminalIdMap * *outBigramEntryCount += bigramEntryCount; // Set bigram list position to the lookup table. if (!getUpdatableAddressLookupTable()->set(it->second, bigramListPos)) { + AKLOGE("Cannot set bigram list position. terminal id: %d, pos: %d", + it->second, bigramListPos); return false; } } @@ -111,6 +117,7 @@ bool BigramDictContent::runGCBigramList(const int bigramListPos, bool hasNext = true; int readingPos = bigramListPos; int writingPos = toPos; + int lastEntryPos = NOT_A_DICT_POS; while (hasNext) { int probability = NOT_A_PROBABILITY; int targetTerminalId = Ver4DictConstants::NOT_A_TERMINAL_ID; @@ -125,12 +132,24 @@ bool BigramDictContent::runGCBigramList(const int bigramListPos, // Target word has been removed. continue; } + lastEntryPos = hasNext ? writingPos : NOT_A_DICT_POS; if (!writeBigramEntryAndAdvancePosition(probability, hasNext, it->second, &writingPos)) { + AKLOGE("Cannot write bigram entry to run GC. pos: %d", writingPos); return false; } *outEntrycount += 1; } + if (lastEntryPos != NOT_A_DICT_POS) { + // Update has next flag in the last written entry. + int probability = NOT_A_PROBABILITY; + int targetTerminalId = Ver4DictConstants::NOT_A_TERMINAL_ID; + getBigramEntry(&probability, 0 /* outHasNext */, &targetTerminalId, lastEntryPos); + if (!writeBigramEntry(probability, false /* hasNext */, targetTerminalId, writingPos)) { + AKLOGE("Cannot write bigram entry to set hasNext flag after GC. pos: %d", writingPos); + return false; + } + } return true; } From b685ffa13c7fad4ebd47c682aa2bdc95bc3189b3 Mon Sep 17 00:00:00 2001 From: Keisuke Kuroyanagi Date: Mon, 2 Dec 2013 15:14:10 +0900 Subject: [PATCH 2/2] Move methods of MultiBigramMap to cpp file. Change-Id: Icf10795037a7e966ac843cd168fe45955b6aef56 --- .../core/dictionary/multi_bigram_map.cpp | 71 +++++++++++++++++++ .../core/dictionary/multi_bigram_map.h | 67 +++-------------- 2 files changed, 79 insertions(+), 59 deletions(-) diff --git a/native/jni/src/suggest/core/dictionary/multi_bigram_map.cpp b/native/jni/src/suggest/core/dictionary/multi_bigram_map.cpp index b1d2f4b4d..49d82e69a 100644 --- a/native/jni/src/suggest/core/dictionary/multi_bigram_map.cpp +++ b/native/jni/src/suggest/core/dictionary/multi_bigram_map.cpp @@ -30,4 +30,75 @@ const size_t MultiBigramMap::MAX_CACHED_PREV_WORDS_IN_BIGRAM_MAP = 25; // Most common previous word contexts currently have 100 bigrams const int MultiBigramMap::BigramMap::DEFAULT_HASH_MAP_SIZE_FOR_EACH_BIGRAM_MAP = 100; +// Look up the bigram probability for the given word pair from the cached bigram maps. +// Also caches the bigrams if there is space remaining and they have not been cached already. +int MultiBigramMap::getBigramProbability( + const DictionaryStructureWithBufferPolicy *const structurePolicy, + const int wordPosition, const int nextWordPosition, const int unigramProbability) { + hash_map_compat::const_iterator mapPosition = + mBigramMaps.find(wordPosition); + if (mapPosition != mBigramMaps.end()) { + return mapPosition->second.getBigramProbability(structurePolicy, nextWordPosition, + unigramProbability); + } + if (mBigramMaps.size() < MAX_CACHED_PREV_WORDS_IN_BIGRAM_MAP) { + addBigramsForWordPosition(structurePolicy, wordPosition); + return mBigramMaps[wordPosition].getBigramProbability(structurePolicy, + nextWordPosition, unigramProbability); + } + return readBigramProbabilityFromBinaryDictionary(structurePolicy, wordPosition, + nextWordPosition, unigramProbability); +} + +void MultiBigramMap::BigramMap::init( + const DictionaryStructureWithBufferPolicy *const structurePolicy, const int nodePos) { + const int bigramsListPos = structurePolicy->getBigramsPositionOfPtNode(nodePos); + BinaryDictionaryBigramsIterator bigramsIt(structurePolicy->getBigramsStructurePolicy(), + bigramsListPos); + while (bigramsIt.hasNext()) { + bigramsIt.next(); + if (bigramsIt.getBigramPos() == NOT_A_DICT_POS) { + continue; + } + mBigramMap[bigramsIt.getBigramPos()] = bigramsIt.getProbability(); + mBloomFilter.setInFilter(bigramsIt.getBigramPos()); + } +} + +int MultiBigramMap::BigramMap::getBigramProbability( + const DictionaryStructureWithBufferPolicy *const structurePolicy, + const int nextWordPosition, const int unigramProbability) const { + int bigramProbability = NOT_A_PROBABILITY; + if (mBloomFilter.isInFilter(nextWordPosition)) { + const hash_map_compat::const_iterator bigramProbabilityIt = + mBigramMap.find(nextWordPosition); + if (bigramProbabilityIt != mBigramMap.end()) { + bigramProbability = bigramProbabilityIt->second; + } + } + return structurePolicy->getProbability(unigramProbability, bigramProbability); +} + +void MultiBigramMap::addBigramsForWordPosition( + const DictionaryStructureWithBufferPolicy *const structurePolicy, const int position) { + mBigramMaps[position].init(structurePolicy, position); +} + +int MultiBigramMap::readBigramProbabilityFromBinaryDictionary( + const DictionaryStructureWithBufferPolicy *const structurePolicy, const int nodePos, + const int nextWordPosition, const int unigramProbability) { + int bigramProbability = NOT_A_PROBABILITY; + const int bigramsListPos = structurePolicy->getBigramsPositionOfPtNode(nodePos); + BinaryDictionaryBigramsIterator bigramsIt(structurePolicy->getBigramsStructurePolicy(), + bigramsListPos); + while (bigramsIt.hasNext()) { + bigramsIt.next(); + if (bigramsIt.getBigramPos() == nextWordPosition) { + bigramProbability = bigramsIt.getProbability(); + break; + } + } + return structurePolicy->getProbability(unigramProbability, bigramProbability); +} + } // namespace latinime diff --git a/native/jni/src/suggest/core/dictionary/multi_bigram_map.h b/native/jni/src/suggest/core/dictionary/multi_bigram_map.h index 4633c07b0..421b2681c 100644 --- a/native/jni/src/suggest/core/dictionary/multi_bigram_map.h +++ b/native/jni/src/suggest/core/dictionary/multi_bigram_map.h @@ -38,21 +38,7 @@ class MultiBigramMap { // Look up the bigram probability for the given word pair from the cached bigram maps. // Also caches the bigrams if there is space remaining and they have not been cached already. int getBigramProbability(const DictionaryStructureWithBufferPolicy *const structurePolicy, - const int wordPosition, const int nextWordPosition, const int unigramProbability) { - hash_map_compat::const_iterator mapPosition = - mBigramMaps.find(wordPosition); - if (mapPosition != mBigramMaps.end()) { - return mapPosition->second.getBigramProbability(structurePolicy, nextWordPosition, - unigramProbability); - } - if (mBigramMaps.size() < MAX_CACHED_PREV_WORDS_IN_BIGRAM_MAP) { - addBigramsForWordPosition(structurePolicy, wordPosition); - return mBigramMaps[wordPosition].getBigramProbability(structurePolicy, - nextWordPosition, unigramProbability); - } - return readBigramProbabilityFromBinaryDictionary(structurePolicy, wordPosition, - nextWordPosition, unigramProbability); - } + const int wordPosition, const int nextWordPosition, const int unigramProbability); void clear() { mBigramMaps.clear(); @@ -67,33 +53,11 @@ class MultiBigramMap { ~BigramMap() {} void init(const DictionaryStructureWithBufferPolicy *const structurePolicy, - const int nodePos) { - const int bigramsListPos = structurePolicy->getBigramsPositionOfPtNode(nodePos); - BinaryDictionaryBigramsIterator bigramsIt(structurePolicy->getBigramsStructurePolicy(), - bigramsListPos); - while (bigramsIt.hasNext()) { - bigramsIt.next(); - if (bigramsIt.getBigramPos() == NOT_A_DICT_POS) { - continue; - } - mBigramMap[bigramsIt.getBigramPos()] = bigramsIt.getProbability(); - mBloomFilter.setInFilter(bigramsIt.getBigramPos()); - } - } + const int nodePos); - AK_FORCE_INLINE int getBigramProbability( + int getBigramProbability( const DictionaryStructureWithBufferPolicy *const structurePolicy, - const int nextWordPosition, const int unigramProbability) const { - int bigramProbability = NOT_A_PROBABILITY; - if (mBloomFilter.isInFilter(nextWordPosition)) { - const hash_map_compat::const_iterator bigramProbabilityIt = - mBigramMap.find(nextWordPosition); - if (bigramProbabilityIt != mBigramMap.end()) { - bigramProbability = bigramProbabilityIt->second; - } - } - return structurePolicy->getProbability(unigramProbability, bigramProbability); - } + const int nextWordPosition, const int unigramProbability) const; private: // NOTE: The BigramMap class doesn't use DISALLOW_COPY_AND_ASSIGN() because its default @@ -103,27 +67,12 @@ class MultiBigramMap { BloomFilter mBloomFilter; }; - AK_FORCE_INLINE void addBigramsForWordPosition( - const DictionaryStructureWithBufferPolicy *const structurePolicy, const int position) { - mBigramMaps[position].init(structurePolicy, position); - } + void addBigramsForWordPosition( + const DictionaryStructureWithBufferPolicy *const structurePolicy, const int position); - AK_FORCE_INLINE int readBigramProbabilityFromBinaryDictionary( + int readBigramProbabilityFromBinaryDictionary( const DictionaryStructureWithBufferPolicy *const structurePolicy, const int nodePos, - const int nextWordPosition, const int unigramProbability) { - int bigramProbability = NOT_A_PROBABILITY; - const int bigramsListPos = structurePolicy->getBigramsPositionOfPtNode(nodePos); - BinaryDictionaryBigramsIterator bigramsIt(structurePolicy->getBigramsStructurePolicy(), - bigramsListPos); - while (bigramsIt.hasNext()) { - bigramsIt.next(); - if (bigramsIt.getBigramPos() == nextWordPosition) { - bigramProbability = bigramsIt.getProbability(); - break; - } - } - return structurePolicy->getProbability(unigramProbability, bigramProbability); - } + const int nextWordPosition, const int unigramProbability); static const size_t MAX_CACHED_PREV_WORDS_IN_BIGRAM_MAP; hash_map_compat mBigramMaps;