am 6e587b7f: Merge changes Icf107950,I1637525e

* commit '6e587b7f178448e24518d169f2aa8bb1a74643b7':
  Move methods of MultiBigramMap to cpp file.
  Fix: ver4 bigram GC.
main
Keisuke Kuroyanagi 2013-12-01 23:42:22 -08:00 committed by Android Git Automerger
commit c9f43b1226
3 changed files with 98 additions and 59 deletions

View File

@ -30,4 +30,75 @@ const size_t MultiBigramMap::MAX_CACHED_PREV_WORDS_IN_BIGRAM_MAP = 25;
// Most common previous word contexts currently have 100 bigrams // Most common previous word contexts currently have 100 bigrams
const int MultiBigramMap::BigramMap::DEFAULT_HASH_MAP_SIZE_FOR_EACH_BIGRAM_MAP = 100; const int MultiBigramMap::BigramMap::DEFAULT_HASH_MAP_SIZE_FOR_EACH_BIGRAM_MAP = 100;
// Look up the bigram probability for the given word pair from the cached bigram maps.
// Also caches the bigrams if there is space remaining and they have not been cached already.
int MultiBigramMap::getBigramProbability(
const DictionaryStructureWithBufferPolicy *const structurePolicy,
const int wordPosition, const int nextWordPosition, const int unigramProbability) {
hash_map_compat<int, BigramMap>::const_iterator mapPosition =
mBigramMaps.find(wordPosition);
if (mapPosition != mBigramMaps.end()) {
return mapPosition->second.getBigramProbability(structurePolicy, nextWordPosition,
unigramProbability);
}
if (mBigramMaps.size() < MAX_CACHED_PREV_WORDS_IN_BIGRAM_MAP) {
addBigramsForWordPosition(structurePolicy, wordPosition);
return mBigramMaps[wordPosition].getBigramProbability(structurePolicy,
nextWordPosition, unigramProbability);
}
return readBigramProbabilityFromBinaryDictionary(structurePolicy, wordPosition,
nextWordPosition, unigramProbability);
}
void MultiBigramMap::BigramMap::init(
const DictionaryStructureWithBufferPolicy *const structurePolicy, const int nodePos) {
const int bigramsListPos = structurePolicy->getBigramsPositionOfPtNode(nodePos);
BinaryDictionaryBigramsIterator bigramsIt(structurePolicy->getBigramsStructurePolicy(),
bigramsListPos);
while (bigramsIt.hasNext()) {
bigramsIt.next();
if (bigramsIt.getBigramPos() == NOT_A_DICT_POS) {
continue;
}
mBigramMap[bigramsIt.getBigramPos()] = bigramsIt.getProbability();
mBloomFilter.setInFilter(bigramsIt.getBigramPos());
}
}
int MultiBigramMap::BigramMap::getBigramProbability(
const DictionaryStructureWithBufferPolicy *const structurePolicy,
const int nextWordPosition, const int unigramProbability) const {
int bigramProbability = NOT_A_PROBABILITY;
if (mBloomFilter.isInFilter(nextWordPosition)) {
const hash_map_compat<int, int>::const_iterator bigramProbabilityIt =
mBigramMap.find(nextWordPosition);
if (bigramProbabilityIt != mBigramMap.end()) {
bigramProbability = bigramProbabilityIt->second;
}
}
return structurePolicy->getProbability(unigramProbability, bigramProbability);
}
void MultiBigramMap::addBigramsForWordPosition(
const DictionaryStructureWithBufferPolicy *const structurePolicy, const int position) {
mBigramMaps[position].init(structurePolicy, position);
}
int MultiBigramMap::readBigramProbabilityFromBinaryDictionary(
const DictionaryStructureWithBufferPolicy *const structurePolicy, const int nodePos,
const int nextWordPosition, const int unigramProbability) {
int bigramProbability = NOT_A_PROBABILITY;
const int bigramsListPos = structurePolicy->getBigramsPositionOfPtNode(nodePos);
BinaryDictionaryBigramsIterator bigramsIt(structurePolicy->getBigramsStructurePolicy(),
bigramsListPos);
while (bigramsIt.hasNext()) {
bigramsIt.next();
if (bigramsIt.getBigramPos() == nextWordPosition) {
bigramProbability = bigramsIt.getProbability();
break;
}
}
return structurePolicy->getProbability(unigramProbability, bigramProbability);
}
} // namespace latinime } // namespace latinime

View File

@ -38,21 +38,7 @@ class MultiBigramMap {
// Look up the bigram probability for the given word pair from the cached bigram maps. // Look up the bigram probability for the given word pair from the cached bigram maps.
// Also caches the bigrams if there is space remaining and they have not been cached already. // Also caches the bigrams if there is space remaining and they have not been cached already.
int getBigramProbability(const DictionaryStructureWithBufferPolicy *const structurePolicy, int getBigramProbability(const DictionaryStructureWithBufferPolicy *const structurePolicy,
const int wordPosition, const int nextWordPosition, const int unigramProbability) { const int wordPosition, const int nextWordPosition, const int unigramProbability);
hash_map_compat<int, BigramMap>::const_iterator mapPosition =
mBigramMaps.find(wordPosition);
if (mapPosition != mBigramMaps.end()) {
return mapPosition->second.getBigramProbability(structurePolicy, nextWordPosition,
unigramProbability);
}
if (mBigramMaps.size() < MAX_CACHED_PREV_WORDS_IN_BIGRAM_MAP) {
addBigramsForWordPosition(structurePolicy, wordPosition);
return mBigramMaps[wordPosition].getBigramProbability(structurePolicy,
nextWordPosition, unigramProbability);
}
return readBigramProbabilityFromBinaryDictionary(structurePolicy, wordPosition,
nextWordPosition, unigramProbability);
}
void clear() { void clear() {
mBigramMaps.clear(); mBigramMaps.clear();
@ -67,33 +53,11 @@ class MultiBigramMap {
~BigramMap() {} ~BigramMap() {}
void init(const DictionaryStructureWithBufferPolicy *const structurePolicy, void init(const DictionaryStructureWithBufferPolicy *const structurePolicy,
const int nodePos) { const int nodePos);
const int bigramsListPos = structurePolicy->getBigramsPositionOfPtNode(nodePos);
BinaryDictionaryBigramsIterator bigramsIt(structurePolicy->getBigramsStructurePolicy(),
bigramsListPos);
while (bigramsIt.hasNext()) {
bigramsIt.next();
if (bigramsIt.getBigramPos() == NOT_A_DICT_POS) {
continue;
}
mBigramMap[bigramsIt.getBigramPos()] = bigramsIt.getProbability();
mBloomFilter.setInFilter(bigramsIt.getBigramPos());
}
}
AK_FORCE_INLINE int getBigramProbability( int getBigramProbability(
const DictionaryStructureWithBufferPolicy *const structurePolicy, const DictionaryStructureWithBufferPolicy *const structurePolicy,
const int nextWordPosition, const int unigramProbability) const { const int nextWordPosition, const int unigramProbability) const;
int bigramProbability = NOT_A_PROBABILITY;
if (mBloomFilter.isInFilter(nextWordPosition)) {
const hash_map_compat<int, int>::const_iterator bigramProbabilityIt =
mBigramMap.find(nextWordPosition);
if (bigramProbabilityIt != mBigramMap.end()) {
bigramProbability = bigramProbabilityIt->second;
}
}
return structurePolicy->getProbability(unigramProbability, bigramProbability);
}
private: private:
// NOTE: The BigramMap class doesn't use DISALLOW_COPY_AND_ASSIGN() because its default // NOTE: The BigramMap class doesn't use DISALLOW_COPY_AND_ASSIGN() because its default
@ -103,27 +67,12 @@ class MultiBigramMap {
BloomFilter mBloomFilter; BloomFilter mBloomFilter;
}; };
AK_FORCE_INLINE void addBigramsForWordPosition( void addBigramsForWordPosition(
const DictionaryStructureWithBufferPolicy *const structurePolicy, const int position) { const DictionaryStructureWithBufferPolicy *const structurePolicy, const int position);
mBigramMaps[position].init(structurePolicy, position);
}
AK_FORCE_INLINE int readBigramProbabilityFromBinaryDictionary( int readBigramProbabilityFromBinaryDictionary(
const DictionaryStructureWithBufferPolicy *const structurePolicy, const int nodePos, const DictionaryStructureWithBufferPolicy *const structurePolicy, const int nodePos,
const int nextWordPosition, const int unigramProbability) { const int nextWordPosition, const int unigramProbability);
int bigramProbability = NOT_A_PROBABILITY;
const int bigramsListPos = structurePolicy->getBigramsPositionOfPtNode(nodePos);
BinaryDictionaryBigramsIterator bigramsIt(structurePolicy->getBigramsStructurePolicy(),
bigramsListPos);
while (bigramsIt.hasNext()) {
bigramsIt.next();
if (bigramsIt.getBigramPos() == nextWordPosition) {
bigramProbability = bigramsIt.getProbability();
break;
}
}
return structurePolicy->getProbability(unigramProbability, bigramProbability);
}
static const size_t MAX_CACHED_PREV_WORDS_IN_BIGRAM_MAP; static const size_t MAX_CACHED_PREV_WORDS_IN_BIGRAM_MAP;
hash_map_compat<int, BigramMap> mBigramMaps; hash_map_compat<int, BigramMap> mBigramMaps;

View File

@ -46,6 +46,7 @@ bool BigramDictContent::writeBigramEntryAndAdvancePosition(const int probability
const int bigramFlags = createAndGetBigramFlags(probability, hasNext); const int bigramFlags = createAndGetBigramFlags(probability, hasNext);
if (!bigramListBuffer->writeUintAndAdvancePosition(bigramFlags, if (!bigramListBuffer->writeUintAndAdvancePosition(bigramFlags,
Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE, entryWritingPos)) { Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE, entryWritingPos)) {
AKLOGE("Cannot write bigram flags. pos: %d, flags: %x", *entryWritingPos, bigramFlags);
return false; return false;
} }
const int targetTerminalIdToWrite = const int targetTerminalIdToWrite =
@ -66,6 +67,7 @@ bool BigramDictContent::copyBigramList(const int bigramListPos, const int toPos)
&readingPos); &readingPos);
if (!writeBigramEntryAndAdvancePosition(probability, hasNext, targetTerminalId, if (!writeBigramEntryAndAdvancePosition(probability, hasNext, targetTerminalId,
&writingPos)) { &writingPos)) {
AKLOGE("Cannot write bigram entry to copy. pos: %d", writingPos);
return false; return false;
} }
} }
@ -88,6 +90,8 @@ bool BigramDictContent::runGC(const TerminalPositionLookupTable::TerminalIdMap *
// Copy bigram list with GC from original content. // Copy bigram list with GC from original content.
if (!runGCBigramList(originalBigramListPos, originalBigramDictContent, bigramListPos, if (!runGCBigramList(originalBigramListPos, originalBigramDictContent, bigramListPos,
terminalIdMap, &bigramEntryCount)) { terminalIdMap, &bigramEntryCount)) {
AKLOGE("Cannot complete GC for the bigram list. original pos: %d, pos: %d",
originalBigramListPos, bigramListPos);
return false; return false;
} }
if (bigramEntryCount == 0) { if (bigramEntryCount == 0) {
@ -97,6 +101,8 @@ bool BigramDictContent::runGC(const TerminalPositionLookupTable::TerminalIdMap *
*outBigramEntryCount += bigramEntryCount; *outBigramEntryCount += bigramEntryCount;
// Set bigram list position to the lookup table. // Set bigram list position to the lookup table.
if (!getUpdatableAddressLookupTable()->set(it->second, bigramListPos)) { if (!getUpdatableAddressLookupTable()->set(it->second, bigramListPos)) {
AKLOGE("Cannot set bigram list position. terminal id: %d, pos: %d",
it->second, bigramListPos);
return false; return false;
} }
} }
@ -111,6 +117,7 @@ bool BigramDictContent::runGCBigramList(const int bigramListPos,
bool hasNext = true; bool hasNext = true;
int readingPos = bigramListPos; int readingPos = bigramListPos;
int writingPos = toPos; int writingPos = toPos;
int lastEntryPos = NOT_A_DICT_POS;
while (hasNext) { while (hasNext) {
int probability = NOT_A_PROBABILITY; int probability = NOT_A_PROBABILITY;
int targetTerminalId = Ver4DictConstants::NOT_A_TERMINAL_ID; int targetTerminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
@ -125,12 +132,24 @@ bool BigramDictContent::runGCBigramList(const int bigramListPos,
// Target word has been removed. // Target word has been removed.
continue; continue;
} }
lastEntryPos = hasNext ? writingPos : NOT_A_DICT_POS;
if (!writeBigramEntryAndAdvancePosition(probability, hasNext, it->second, if (!writeBigramEntryAndAdvancePosition(probability, hasNext, it->second,
&writingPos)) { &writingPos)) {
AKLOGE("Cannot write bigram entry to run GC. pos: %d", writingPos);
return false; return false;
} }
*outEntrycount += 1; *outEntrycount += 1;
} }
if (lastEntryPos != NOT_A_DICT_POS) {
// Update has next flag in the last written entry.
int probability = NOT_A_PROBABILITY;
int targetTerminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
getBigramEntry(&probability, 0 /* outHasNext */, &targetTerminalId, lastEntryPos);
if (!writeBigramEntry(probability, false /* hasNext */, targetTerminalId, writingPos)) {
AKLOGE("Cannot write bigram entry to set hasNext flag after GC. pos: %d", writingPos);
return false;
}
}
return true; return true;
} }