Fill up a map of bigram addresses for lookup.
We don't want to do a linear search on each terminal when there may be 100+ bigrams for a given word because that would be disastrous for performance. Also, we need to resolve each bigram address anyway. This change resolves the addresses at first and puts them in a balanced tree so that lookup will be O(log(n)). Bug: 6313806 Change-Id: Ibf088035870b9acb41e948f0ab7af4726f2cee24main
parent
a1c89d9dbf
commit
1ff8dc47be
|
@ -135,6 +135,7 @@ int BigramDictionary::getBigrams(const int32_t *prevWord, int prevWordLength, in
|
|||
// If the word is not found or has no bigrams, this function returns 0.
|
||||
int BigramDictionary::getBigramListPositionForWord(const int32_t *prevWord,
|
||||
const int prevWordLength) {
|
||||
if (0 >= prevWordLength) return 0;
|
||||
const uint8_t* const root = DICT;
|
||||
int pos = BinaryFormat::getTerminalPosition(root, prevWord, prevWordLength);
|
||||
|
||||
|
@ -152,6 +153,22 @@ int BigramDictionary::getBigramListPositionForWord(const int32_t *prevWord,
|
|||
return pos;
|
||||
}
|
||||
|
||||
void BigramDictionary::fillBigramAddressToFrequencyMap(const int32_t *prevWord,
|
||||
const int prevWordLength, std::map<int, int> *map) {
|
||||
const uint8_t* const root = DICT;
|
||||
int pos = getBigramListPositionForWord(prevWord, prevWordLength);
|
||||
if (0 == pos) return;
|
||||
|
||||
int bigramFlags;
|
||||
do {
|
||||
bigramFlags = BinaryFormat::getFlagsAndForwardPointer(root, &pos);
|
||||
const int frequency = UnigramDictionary::MASK_ATTRIBUTE_FREQUENCY & bigramFlags;
|
||||
const int bigramPos = BinaryFormat::getAttributeAddressAndForwardPointer(root, bigramFlags,
|
||||
&pos);
|
||||
(*map)[bigramPos] = frequency;
|
||||
} while (0 != (UnigramDictionary::FLAG_ATTRIBUTE_HAS_NEXT & bigramFlags));
|
||||
}
|
||||
|
||||
bool BigramDictionary::checkFirstCharacter(unsigned short *word) {
|
||||
// Checks whether this word starts with same character or neighboring characters of
|
||||
// what user typed.
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
#ifndef LATINIME_BIGRAM_DICTIONARY_H
|
||||
#define LATINIME_BIGRAM_DICTIONARY_H
|
||||
|
||||
#include <map>
|
||||
#include <stdint.h>
|
||||
|
||||
namespace latinime {
|
||||
|
@ -28,6 +29,8 @@ class BigramDictionary {
|
|||
int getBigrams(const int32_t *word, int length, int *codes, int codesSize,
|
||||
unsigned short *outWords, int *frequencies, int maxWordLength, int maxBigrams);
|
||||
int getBigramListPositionForWord(const int32_t *prevWord, const int prevWordLength);
|
||||
void fillBigramAddressToFrequencyMap(const int32_t *prevWord, const int prevWordLength,
|
||||
std::map<int, int> *map);
|
||||
~BigramDictionary();
|
||||
private:
|
||||
bool addWordBigram(unsigned short *word, int length, int frequency);
|
||||
|
|
|
@ -22,7 +22,7 @@
|
|||
|
||||
static inline unsigned char* convertToUnibyteString(unsigned short* input, unsigned char* output,
|
||||
const unsigned int length) {
|
||||
int i = 0;
|
||||
unsigned int i = 0;
|
||||
for (; i <= length && input[i] != 0; ++i)
|
||||
output[i] = input[i] & 0xFF;
|
||||
output[i] = 0;
|
||||
|
@ -31,10 +31,10 @@ static inline unsigned char* convertToUnibyteString(unsigned short* input, unsig
|
|||
|
||||
static inline unsigned char* convertToUnibyteStringAndReplaceLastChar(unsigned short* input,
|
||||
unsigned char* output, const unsigned int length, unsigned char c) {
|
||||
int i = 0;
|
||||
unsigned int i = 0;
|
||||
for (; i <= length && input[i] != 0; ++i)
|
||||
output[i] = input[i] & 0xFF;
|
||||
output[i-1] = c;
|
||||
if (i > 0) output[i-1] = c;
|
||||
output[i] = 0;
|
||||
return output;
|
||||
}
|
||||
|
|
|
@ -17,6 +17,8 @@
|
|||
#ifndef LATINIME_DICTIONARY_H
|
||||
#define LATINIME_DICTIONARY_H
|
||||
|
||||
#include <map>
|
||||
|
||||
#include "bigram_dictionary.h"
|
||||
#include "char_utils.h"
|
||||
#include "correction.h"
|
||||
|
@ -39,6 +41,9 @@ class Dictionary {
|
|||
// If none, it's zero.
|
||||
const int bigramListPosition = !prevWordChars ? 0
|
||||
: mBigramDictionary->getBigramListPositionForWord(prevWordChars, prevWordLength);
|
||||
std::map<int, int> bigramMap;
|
||||
mBigramDictionary->fillBigramAddressToFrequencyMap(prevWordChars, prevWordLength,
|
||||
&bigramMap);
|
||||
return mUnigramDictionary->getSuggestions(proximityInfo, mWordsPriorityQueuePool,
|
||||
mCorrection, xcoordinates, ycoordinates, codes, codesSize, bigramListPosition,
|
||||
useFullEditDistance, outWords, frequencies);
|
||||
|
|
Loading…
Reference in New Issue