Merge "Fill up a map of bigram addresses for lookup." into jb-dev

This commit is contained in:
Jean Chalard 2012-05-02 01:59:04 -07:00 committed by Android (Google) Code Review
commit a28d8e4cf0
4 changed files with 28 additions and 3 deletions

View file

@ -135,6 +135,7 @@ int BigramDictionary::getBigrams(const int32_t *prevWord, int prevWordLength, in
// If the word is not found or has no bigrams, this function returns 0. // If the word is not found or has no bigrams, this function returns 0.
int BigramDictionary::getBigramListPositionForWord(const int32_t *prevWord, int BigramDictionary::getBigramListPositionForWord(const int32_t *prevWord,
const int prevWordLength) { const int prevWordLength) {
if (0 >= prevWordLength) return 0;
const uint8_t* const root = DICT; const uint8_t* const root = DICT;
int pos = BinaryFormat::getTerminalPosition(root, prevWord, prevWordLength); int pos = BinaryFormat::getTerminalPosition(root, prevWord, prevWordLength);
@ -152,6 +153,22 @@ int BigramDictionary::getBigramListPositionForWord(const int32_t *prevWord,
return pos; return pos;
} }
void BigramDictionary::fillBigramAddressToFrequencyMap(const int32_t *prevWord,
const int prevWordLength, std::map<int, int> *map) {
const uint8_t* const root = DICT;
int pos = getBigramListPositionForWord(prevWord, prevWordLength);
if (0 == pos) return;
int bigramFlags;
do {
bigramFlags = BinaryFormat::getFlagsAndForwardPointer(root, &pos);
const int frequency = UnigramDictionary::MASK_ATTRIBUTE_FREQUENCY & bigramFlags;
const int bigramPos = BinaryFormat::getAttributeAddressAndForwardPointer(root, bigramFlags,
&pos);
(*map)[bigramPos] = frequency;
} while (0 != (UnigramDictionary::FLAG_ATTRIBUTE_HAS_NEXT & bigramFlags));
}
bool BigramDictionary::checkFirstCharacter(unsigned short *word) { bool BigramDictionary::checkFirstCharacter(unsigned short *word) {
// Checks whether this word starts with same character or neighboring characters of // Checks whether this word starts with same character or neighboring characters of
// what user typed. // what user typed.

View file

@ -17,6 +17,7 @@
#ifndef LATINIME_BIGRAM_DICTIONARY_H #ifndef LATINIME_BIGRAM_DICTIONARY_H
#define LATINIME_BIGRAM_DICTIONARY_H #define LATINIME_BIGRAM_DICTIONARY_H
#include <map>
#include <stdint.h> #include <stdint.h>
namespace latinime { namespace latinime {
@ -28,6 +29,8 @@ class BigramDictionary {
int getBigrams(const int32_t *word, int length, int *codes, int codesSize, int getBigrams(const int32_t *word, int length, int *codes, int codesSize,
unsigned short *outWords, int *frequencies, int maxWordLength, int maxBigrams); unsigned short *outWords, int *frequencies, int maxWordLength, int maxBigrams);
int getBigramListPositionForWord(const int32_t *prevWord, const int prevWordLength); int getBigramListPositionForWord(const int32_t *prevWord, const int prevWordLength);
void fillBigramAddressToFrequencyMap(const int32_t *prevWord, const int prevWordLength,
std::map<int, int> *map);
~BigramDictionary(); ~BigramDictionary();
private: private:
bool addWordBigram(unsigned short *word, int length, int frequency); bool addWordBigram(unsigned short *word, int length, int frequency);

View file

@ -22,7 +22,7 @@
static inline unsigned char* convertToUnibyteString(unsigned short* input, unsigned char* output, static inline unsigned char* convertToUnibyteString(unsigned short* input, unsigned char* output,
const unsigned int length) { const unsigned int length) {
int i = 0; unsigned int i = 0;
for (; i <= length && input[i] != 0; ++i) for (; i <= length && input[i] != 0; ++i)
output[i] = input[i] & 0xFF; output[i] = input[i] & 0xFF;
output[i] = 0; output[i] = 0;
@ -31,10 +31,10 @@ static inline unsigned char* convertToUnibyteString(unsigned short* input, unsig
static inline unsigned char* convertToUnibyteStringAndReplaceLastChar(unsigned short* input, static inline unsigned char* convertToUnibyteStringAndReplaceLastChar(unsigned short* input,
unsigned char* output, const unsigned int length, unsigned char c) { unsigned char* output, const unsigned int length, unsigned char c) {
int i = 0; unsigned int i = 0;
for (; i <= length && input[i] != 0; ++i) for (; i <= length && input[i] != 0; ++i)
output[i] = input[i] & 0xFF; output[i] = input[i] & 0xFF;
output[i-1] = c; if (i > 0) output[i-1] = c;
output[i] = 0; output[i] = 0;
return output; return output;
} }

View file

@ -17,6 +17,8 @@
#ifndef LATINIME_DICTIONARY_H #ifndef LATINIME_DICTIONARY_H
#define LATINIME_DICTIONARY_H #define LATINIME_DICTIONARY_H
#include <map>
#include "bigram_dictionary.h" #include "bigram_dictionary.h"
#include "char_utils.h" #include "char_utils.h"
#include "correction.h" #include "correction.h"
@ -39,6 +41,9 @@ class Dictionary {
// If none, it's zero. // If none, it's zero.
const int bigramListPosition = !prevWordChars ? 0 const int bigramListPosition = !prevWordChars ? 0
: mBigramDictionary->getBigramListPositionForWord(prevWordChars, prevWordLength); : mBigramDictionary->getBigramListPositionForWord(prevWordChars, prevWordLength);
std::map<int, int> bigramMap;
mBigramDictionary->fillBigramAddressToFrequencyMap(prevWordChars, prevWordLength,
&bigramMap);
return mUnigramDictionary->getSuggestions(proximityInfo, mWordsPriorityQueuePool, return mUnigramDictionary->getSuggestions(proximityInfo, mWordsPriorityQueuePool,
mCorrection, xcoordinates, ycoordinates, codes, codesSize, bigramListPosition, mCorrection, xcoordinates, ycoordinates, codes, codesSize, bigramListPosition,
useFullEditDistance, outWords, frequencies); useFullEditDistance, outWords, frequencies);