am 6e587b7f: Merge changes Icf107950,I1637525e
* commit '6e587b7f178448e24518d169f2aa8bb1a74643b7': Move methods of MultiBigramMap to cpp file. Fix: ver4 bigram GC.main
commit
c9f43b1226
|
@ -30,4 +30,75 @@ const size_t MultiBigramMap::MAX_CACHED_PREV_WORDS_IN_BIGRAM_MAP = 25;
|
||||||
// Most common previous word contexts currently have 100 bigrams
|
// Most common previous word contexts currently have 100 bigrams
|
||||||
const int MultiBigramMap::BigramMap::DEFAULT_HASH_MAP_SIZE_FOR_EACH_BIGRAM_MAP = 100;
|
const int MultiBigramMap::BigramMap::DEFAULT_HASH_MAP_SIZE_FOR_EACH_BIGRAM_MAP = 100;
|
||||||
|
|
||||||
|
// Look up the bigram probability for the given word pair from the cached bigram maps.
|
||||||
|
// Also caches the bigrams if there is space remaining and they have not been cached already.
|
||||||
|
int MultiBigramMap::getBigramProbability(
|
||||||
|
const DictionaryStructureWithBufferPolicy *const structurePolicy,
|
||||||
|
const int wordPosition, const int nextWordPosition, const int unigramProbability) {
|
||||||
|
hash_map_compat<int, BigramMap>::const_iterator mapPosition =
|
||||||
|
mBigramMaps.find(wordPosition);
|
||||||
|
if (mapPosition != mBigramMaps.end()) {
|
||||||
|
return mapPosition->second.getBigramProbability(structurePolicy, nextWordPosition,
|
||||||
|
unigramProbability);
|
||||||
|
}
|
||||||
|
if (mBigramMaps.size() < MAX_CACHED_PREV_WORDS_IN_BIGRAM_MAP) {
|
||||||
|
addBigramsForWordPosition(structurePolicy, wordPosition);
|
||||||
|
return mBigramMaps[wordPosition].getBigramProbability(structurePolicy,
|
||||||
|
nextWordPosition, unigramProbability);
|
||||||
|
}
|
||||||
|
return readBigramProbabilityFromBinaryDictionary(structurePolicy, wordPosition,
|
||||||
|
nextWordPosition, unigramProbability);
|
||||||
|
}
|
||||||
|
|
||||||
|
void MultiBigramMap::BigramMap::init(
|
||||||
|
const DictionaryStructureWithBufferPolicy *const structurePolicy, const int nodePos) {
|
||||||
|
const int bigramsListPos = structurePolicy->getBigramsPositionOfPtNode(nodePos);
|
||||||
|
BinaryDictionaryBigramsIterator bigramsIt(structurePolicy->getBigramsStructurePolicy(),
|
||||||
|
bigramsListPos);
|
||||||
|
while (bigramsIt.hasNext()) {
|
||||||
|
bigramsIt.next();
|
||||||
|
if (bigramsIt.getBigramPos() == NOT_A_DICT_POS) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
mBigramMap[bigramsIt.getBigramPos()] = bigramsIt.getProbability();
|
||||||
|
mBloomFilter.setInFilter(bigramsIt.getBigramPos());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int MultiBigramMap::BigramMap::getBigramProbability(
|
||||||
|
const DictionaryStructureWithBufferPolicy *const structurePolicy,
|
||||||
|
const int nextWordPosition, const int unigramProbability) const {
|
||||||
|
int bigramProbability = NOT_A_PROBABILITY;
|
||||||
|
if (mBloomFilter.isInFilter(nextWordPosition)) {
|
||||||
|
const hash_map_compat<int, int>::const_iterator bigramProbabilityIt =
|
||||||
|
mBigramMap.find(nextWordPosition);
|
||||||
|
if (bigramProbabilityIt != mBigramMap.end()) {
|
||||||
|
bigramProbability = bigramProbabilityIt->second;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return structurePolicy->getProbability(unigramProbability, bigramProbability);
|
||||||
|
}
|
||||||
|
|
||||||
|
void MultiBigramMap::addBigramsForWordPosition(
|
||||||
|
const DictionaryStructureWithBufferPolicy *const structurePolicy, const int position) {
|
||||||
|
mBigramMaps[position].init(structurePolicy, position);
|
||||||
|
}
|
||||||
|
|
||||||
|
int MultiBigramMap::readBigramProbabilityFromBinaryDictionary(
|
||||||
|
const DictionaryStructureWithBufferPolicy *const structurePolicy, const int nodePos,
|
||||||
|
const int nextWordPosition, const int unigramProbability) {
|
||||||
|
int bigramProbability = NOT_A_PROBABILITY;
|
||||||
|
const int bigramsListPos = structurePolicy->getBigramsPositionOfPtNode(nodePos);
|
||||||
|
BinaryDictionaryBigramsIterator bigramsIt(structurePolicy->getBigramsStructurePolicy(),
|
||||||
|
bigramsListPos);
|
||||||
|
while (bigramsIt.hasNext()) {
|
||||||
|
bigramsIt.next();
|
||||||
|
if (bigramsIt.getBigramPos() == nextWordPosition) {
|
||||||
|
bigramProbability = bigramsIt.getProbability();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return structurePolicy->getProbability(unigramProbability, bigramProbability);
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
|
|
|
@ -38,21 +38,7 @@ class MultiBigramMap {
|
||||||
// Look up the bigram probability for the given word pair from the cached bigram maps.
|
// Look up the bigram probability for the given word pair from the cached bigram maps.
|
||||||
// Also caches the bigrams if there is space remaining and they have not been cached already.
|
// Also caches the bigrams if there is space remaining and they have not been cached already.
|
||||||
int getBigramProbability(const DictionaryStructureWithBufferPolicy *const structurePolicy,
|
int getBigramProbability(const DictionaryStructureWithBufferPolicy *const structurePolicy,
|
||||||
const int wordPosition, const int nextWordPosition, const int unigramProbability) {
|
const int wordPosition, const int nextWordPosition, const int unigramProbability);
|
||||||
hash_map_compat<int, BigramMap>::const_iterator mapPosition =
|
|
||||||
mBigramMaps.find(wordPosition);
|
|
||||||
if (mapPosition != mBigramMaps.end()) {
|
|
||||||
return mapPosition->second.getBigramProbability(structurePolicy, nextWordPosition,
|
|
||||||
unigramProbability);
|
|
||||||
}
|
|
||||||
if (mBigramMaps.size() < MAX_CACHED_PREV_WORDS_IN_BIGRAM_MAP) {
|
|
||||||
addBigramsForWordPosition(structurePolicy, wordPosition);
|
|
||||||
return mBigramMaps[wordPosition].getBigramProbability(structurePolicy,
|
|
||||||
nextWordPosition, unigramProbability);
|
|
||||||
}
|
|
||||||
return readBigramProbabilityFromBinaryDictionary(structurePolicy, wordPosition,
|
|
||||||
nextWordPosition, unigramProbability);
|
|
||||||
}
|
|
||||||
|
|
||||||
void clear() {
|
void clear() {
|
||||||
mBigramMaps.clear();
|
mBigramMaps.clear();
|
||||||
|
@ -67,33 +53,11 @@ class MultiBigramMap {
|
||||||
~BigramMap() {}
|
~BigramMap() {}
|
||||||
|
|
||||||
void init(const DictionaryStructureWithBufferPolicy *const structurePolicy,
|
void init(const DictionaryStructureWithBufferPolicy *const structurePolicy,
|
||||||
const int nodePos) {
|
const int nodePos);
|
||||||
const int bigramsListPos = structurePolicy->getBigramsPositionOfPtNode(nodePos);
|
|
||||||
BinaryDictionaryBigramsIterator bigramsIt(structurePolicy->getBigramsStructurePolicy(),
|
|
||||||
bigramsListPos);
|
|
||||||
while (bigramsIt.hasNext()) {
|
|
||||||
bigramsIt.next();
|
|
||||||
if (bigramsIt.getBigramPos() == NOT_A_DICT_POS) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
mBigramMap[bigramsIt.getBigramPos()] = bigramsIt.getProbability();
|
|
||||||
mBloomFilter.setInFilter(bigramsIt.getBigramPos());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
AK_FORCE_INLINE int getBigramProbability(
|
int getBigramProbability(
|
||||||
const DictionaryStructureWithBufferPolicy *const structurePolicy,
|
const DictionaryStructureWithBufferPolicy *const structurePolicy,
|
||||||
const int nextWordPosition, const int unigramProbability) const {
|
const int nextWordPosition, const int unigramProbability) const;
|
||||||
int bigramProbability = NOT_A_PROBABILITY;
|
|
||||||
if (mBloomFilter.isInFilter(nextWordPosition)) {
|
|
||||||
const hash_map_compat<int, int>::const_iterator bigramProbabilityIt =
|
|
||||||
mBigramMap.find(nextWordPosition);
|
|
||||||
if (bigramProbabilityIt != mBigramMap.end()) {
|
|
||||||
bigramProbability = bigramProbabilityIt->second;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return structurePolicy->getProbability(unigramProbability, bigramProbability);
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
// NOTE: The BigramMap class doesn't use DISALLOW_COPY_AND_ASSIGN() because its default
|
// NOTE: The BigramMap class doesn't use DISALLOW_COPY_AND_ASSIGN() because its default
|
||||||
|
@ -103,27 +67,12 @@ class MultiBigramMap {
|
||||||
BloomFilter mBloomFilter;
|
BloomFilter mBloomFilter;
|
||||||
};
|
};
|
||||||
|
|
||||||
AK_FORCE_INLINE void addBigramsForWordPosition(
|
void addBigramsForWordPosition(
|
||||||
const DictionaryStructureWithBufferPolicy *const structurePolicy, const int position) {
|
const DictionaryStructureWithBufferPolicy *const structurePolicy, const int position);
|
||||||
mBigramMaps[position].init(structurePolicy, position);
|
|
||||||
}
|
|
||||||
|
|
||||||
AK_FORCE_INLINE int readBigramProbabilityFromBinaryDictionary(
|
int readBigramProbabilityFromBinaryDictionary(
|
||||||
const DictionaryStructureWithBufferPolicy *const structurePolicy, const int nodePos,
|
const DictionaryStructureWithBufferPolicy *const structurePolicy, const int nodePos,
|
||||||
const int nextWordPosition, const int unigramProbability) {
|
const int nextWordPosition, const int unigramProbability);
|
||||||
int bigramProbability = NOT_A_PROBABILITY;
|
|
||||||
const int bigramsListPos = structurePolicy->getBigramsPositionOfPtNode(nodePos);
|
|
||||||
BinaryDictionaryBigramsIterator bigramsIt(structurePolicy->getBigramsStructurePolicy(),
|
|
||||||
bigramsListPos);
|
|
||||||
while (bigramsIt.hasNext()) {
|
|
||||||
bigramsIt.next();
|
|
||||||
if (bigramsIt.getBigramPos() == nextWordPosition) {
|
|
||||||
bigramProbability = bigramsIt.getProbability();
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return structurePolicy->getProbability(unigramProbability, bigramProbability);
|
|
||||||
}
|
|
||||||
|
|
||||||
static const size_t MAX_CACHED_PREV_WORDS_IN_BIGRAM_MAP;
|
static const size_t MAX_CACHED_PREV_WORDS_IN_BIGRAM_MAP;
|
||||||
hash_map_compat<int, BigramMap> mBigramMaps;
|
hash_map_compat<int, BigramMap> mBigramMaps;
|
||||||
|
|
|
@ -46,6 +46,7 @@ bool BigramDictContent::writeBigramEntryAndAdvancePosition(const int probability
|
||||||
const int bigramFlags = createAndGetBigramFlags(probability, hasNext);
|
const int bigramFlags = createAndGetBigramFlags(probability, hasNext);
|
||||||
if (!bigramListBuffer->writeUintAndAdvancePosition(bigramFlags,
|
if (!bigramListBuffer->writeUintAndAdvancePosition(bigramFlags,
|
||||||
Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE, entryWritingPos)) {
|
Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE, entryWritingPos)) {
|
||||||
|
AKLOGE("Cannot write bigram flags. pos: %d, flags: %x", *entryWritingPos, bigramFlags);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
const int targetTerminalIdToWrite =
|
const int targetTerminalIdToWrite =
|
||||||
|
@ -66,6 +67,7 @@ bool BigramDictContent::copyBigramList(const int bigramListPos, const int toPos)
|
||||||
&readingPos);
|
&readingPos);
|
||||||
if (!writeBigramEntryAndAdvancePosition(probability, hasNext, targetTerminalId,
|
if (!writeBigramEntryAndAdvancePosition(probability, hasNext, targetTerminalId,
|
||||||
&writingPos)) {
|
&writingPos)) {
|
||||||
|
AKLOGE("Cannot write bigram entry to copy. pos: %d", writingPos);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -88,6 +90,8 @@ bool BigramDictContent::runGC(const TerminalPositionLookupTable::TerminalIdMap *
|
||||||
// Copy bigram list with GC from original content.
|
// Copy bigram list with GC from original content.
|
||||||
if (!runGCBigramList(originalBigramListPos, originalBigramDictContent, bigramListPos,
|
if (!runGCBigramList(originalBigramListPos, originalBigramDictContent, bigramListPos,
|
||||||
terminalIdMap, &bigramEntryCount)) {
|
terminalIdMap, &bigramEntryCount)) {
|
||||||
|
AKLOGE("Cannot complete GC for the bigram list. original pos: %d, pos: %d",
|
||||||
|
originalBigramListPos, bigramListPos);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (bigramEntryCount == 0) {
|
if (bigramEntryCount == 0) {
|
||||||
|
@ -97,6 +101,8 @@ bool BigramDictContent::runGC(const TerminalPositionLookupTable::TerminalIdMap *
|
||||||
*outBigramEntryCount += bigramEntryCount;
|
*outBigramEntryCount += bigramEntryCount;
|
||||||
// Set bigram list position to the lookup table.
|
// Set bigram list position to the lookup table.
|
||||||
if (!getUpdatableAddressLookupTable()->set(it->second, bigramListPos)) {
|
if (!getUpdatableAddressLookupTable()->set(it->second, bigramListPos)) {
|
||||||
|
AKLOGE("Cannot set bigram list position. terminal id: %d, pos: %d",
|
||||||
|
it->second, bigramListPos);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -111,6 +117,7 @@ bool BigramDictContent::runGCBigramList(const int bigramListPos,
|
||||||
bool hasNext = true;
|
bool hasNext = true;
|
||||||
int readingPos = bigramListPos;
|
int readingPos = bigramListPos;
|
||||||
int writingPos = toPos;
|
int writingPos = toPos;
|
||||||
|
int lastEntryPos = NOT_A_DICT_POS;
|
||||||
while (hasNext) {
|
while (hasNext) {
|
||||||
int probability = NOT_A_PROBABILITY;
|
int probability = NOT_A_PROBABILITY;
|
||||||
int targetTerminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
|
int targetTerminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
|
||||||
|
@ -125,12 +132,24 @@ bool BigramDictContent::runGCBigramList(const int bigramListPos,
|
||||||
// Target word has been removed.
|
// Target word has been removed.
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
lastEntryPos = hasNext ? writingPos : NOT_A_DICT_POS;
|
||||||
if (!writeBigramEntryAndAdvancePosition(probability, hasNext, it->second,
|
if (!writeBigramEntryAndAdvancePosition(probability, hasNext, it->second,
|
||||||
&writingPos)) {
|
&writingPos)) {
|
||||||
|
AKLOGE("Cannot write bigram entry to run GC. pos: %d", writingPos);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
*outEntrycount += 1;
|
*outEntrycount += 1;
|
||||||
}
|
}
|
||||||
|
if (lastEntryPos != NOT_A_DICT_POS) {
|
||||||
|
// Update has next flag in the last written entry.
|
||||||
|
int probability = NOT_A_PROBABILITY;
|
||||||
|
int targetTerminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
|
||||||
|
getBigramEntry(&probability, 0 /* outHasNext */, &targetTerminalId, lastEntryPos);
|
||||||
|
if (!writeBigramEntry(probability, false /* hasNext */, targetTerminalId, writingPos)) {
|
||||||
|
AKLOGE("Cannot write bigram entry to set hasNext flag after GC. pos: %d", writingPos);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue