Merge "Fill up a map of bigram addresses for lookup." into jb-dev
This commit is contained in:
commit
a28d8e4cf0
4 changed files with 28 additions and 3 deletions
|
@ -135,6 +135,7 @@ int BigramDictionary::getBigrams(const int32_t *prevWord, int prevWordLength, in
|
||||||
// If the word is not found or has no bigrams, this function returns 0.
|
// If the word is not found or has no bigrams, this function returns 0.
|
||||||
int BigramDictionary::getBigramListPositionForWord(const int32_t *prevWord,
|
int BigramDictionary::getBigramListPositionForWord(const int32_t *prevWord,
|
||||||
const int prevWordLength) {
|
const int prevWordLength) {
|
||||||
|
if (0 >= prevWordLength) return 0;
|
||||||
const uint8_t* const root = DICT;
|
const uint8_t* const root = DICT;
|
||||||
int pos = BinaryFormat::getTerminalPosition(root, prevWord, prevWordLength);
|
int pos = BinaryFormat::getTerminalPosition(root, prevWord, prevWordLength);
|
||||||
|
|
||||||
|
@ -152,6 +153,22 @@ int BigramDictionary::getBigramListPositionForWord(const int32_t *prevWord,
|
||||||
return pos;
|
return pos;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void BigramDictionary::fillBigramAddressToFrequencyMap(const int32_t *prevWord,
|
||||||
|
const int prevWordLength, std::map<int, int> *map) {
|
||||||
|
const uint8_t* const root = DICT;
|
||||||
|
int pos = getBigramListPositionForWord(prevWord, prevWordLength);
|
||||||
|
if (0 == pos) return;
|
||||||
|
|
||||||
|
int bigramFlags;
|
||||||
|
do {
|
||||||
|
bigramFlags = BinaryFormat::getFlagsAndForwardPointer(root, &pos);
|
||||||
|
const int frequency = UnigramDictionary::MASK_ATTRIBUTE_FREQUENCY & bigramFlags;
|
||||||
|
const int bigramPos = BinaryFormat::getAttributeAddressAndForwardPointer(root, bigramFlags,
|
||||||
|
&pos);
|
||||||
|
(*map)[bigramPos] = frequency;
|
||||||
|
} while (0 != (UnigramDictionary::FLAG_ATTRIBUTE_HAS_NEXT & bigramFlags));
|
||||||
|
}
|
||||||
|
|
||||||
bool BigramDictionary::checkFirstCharacter(unsigned short *word) {
|
bool BigramDictionary::checkFirstCharacter(unsigned short *word) {
|
||||||
// Checks whether this word starts with same character or neighboring characters of
|
// Checks whether this word starts with same character or neighboring characters of
|
||||||
// what user typed.
|
// what user typed.
|
||||||
|
|
|
@ -17,6 +17,7 @@
|
||||||
#ifndef LATINIME_BIGRAM_DICTIONARY_H
|
#ifndef LATINIME_BIGRAM_DICTIONARY_H
|
||||||
#define LATINIME_BIGRAM_DICTIONARY_H
|
#define LATINIME_BIGRAM_DICTIONARY_H
|
||||||
|
|
||||||
|
#include <map>
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
@ -28,6 +29,8 @@ class BigramDictionary {
|
||||||
int getBigrams(const int32_t *word, int length, int *codes, int codesSize,
|
int getBigrams(const int32_t *word, int length, int *codes, int codesSize,
|
||||||
unsigned short *outWords, int *frequencies, int maxWordLength, int maxBigrams);
|
unsigned short *outWords, int *frequencies, int maxWordLength, int maxBigrams);
|
||||||
int getBigramListPositionForWord(const int32_t *prevWord, const int prevWordLength);
|
int getBigramListPositionForWord(const int32_t *prevWord, const int prevWordLength);
|
||||||
|
void fillBigramAddressToFrequencyMap(const int32_t *prevWord, const int prevWordLength,
|
||||||
|
std::map<int, int> *map);
|
||||||
~BigramDictionary();
|
~BigramDictionary();
|
||||||
private:
|
private:
|
||||||
bool addWordBigram(unsigned short *word, int length, int frequency);
|
bool addWordBigram(unsigned short *word, int length, int frequency);
|
||||||
|
|
|
@ -22,7 +22,7 @@
|
||||||
|
|
||||||
static inline unsigned char* convertToUnibyteString(unsigned short* input, unsigned char* output,
|
static inline unsigned char* convertToUnibyteString(unsigned short* input, unsigned char* output,
|
||||||
const unsigned int length) {
|
const unsigned int length) {
|
||||||
int i = 0;
|
unsigned int i = 0;
|
||||||
for (; i <= length && input[i] != 0; ++i)
|
for (; i <= length && input[i] != 0; ++i)
|
||||||
output[i] = input[i] & 0xFF;
|
output[i] = input[i] & 0xFF;
|
||||||
output[i] = 0;
|
output[i] = 0;
|
||||||
|
@ -31,10 +31,10 @@ static inline unsigned char* convertToUnibyteString(unsigned short* input, unsig
|
||||||
|
|
||||||
static inline unsigned char* convertToUnibyteStringAndReplaceLastChar(unsigned short* input,
|
static inline unsigned char* convertToUnibyteStringAndReplaceLastChar(unsigned short* input,
|
||||||
unsigned char* output, const unsigned int length, unsigned char c) {
|
unsigned char* output, const unsigned int length, unsigned char c) {
|
||||||
int i = 0;
|
unsigned int i = 0;
|
||||||
for (; i <= length && input[i] != 0; ++i)
|
for (; i <= length && input[i] != 0; ++i)
|
||||||
output[i] = input[i] & 0xFF;
|
output[i] = input[i] & 0xFF;
|
||||||
output[i-1] = c;
|
if (i > 0) output[i-1] = c;
|
||||||
output[i] = 0;
|
output[i] = 0;
|
||||||
return output;
|
return output;
|
||||||
}
|
}
|
||||||
|
|
|
@ -17,6 +17,8 @@
|
||||||
#ifndef LATINIME_DICTIONARY_H
|
#ifndef LATINIME_DICTIONARY_H
|
||||||
#define LATINIME_DICTIONARY_H
|
#define LATINIME_DICTIONARY_H
|
||||||
|
|
||||||
|
#include <map>
|
||||||
|
|
||||||
#include "bigram_dictionary.h"
|
#include "bigram_dictionary.h"
|
||||||
#include "char_utils.h"
|
#include "char_utils.h"
|
||||||
#include "correction.h"
|
#include "correction.h"
|
||||||
|
@ -39,6 +41,9 @@ class Dictionary {
|
||||||
// If none, it's zero.
|
// If none, it's zero.
|
||||||
const int bigramListPosition = !prevWordChars ? 0
|
const int bigramListPosition = !prevWordChars ? 0
|
||||||
: mBigramDictionary->getBigramListPositionForWord(prevWordChars, prevWordLength);
|
: mBigramDictionary->getBigramListPositionForWord(prevWordChars, prevWordLength);
|
||||||
|
std::map<int, int> bigramMap;
|
||||||
|
mBigramDictionary->fillBigramAddressToFrequencyMap(prevWordChars, prevWordLength,
|
||||||
|
&bigramMap);
|
||||||
return mUnigramDictionary->getSuggestions(proximityInfo, mWordsPriorityQueuePool,
|
return mUnigramDictionary->getSuggestions(proximityInfo, mWordsPriorityQueuePool,
|
||||||
mCorrection, xcoordinates, ycoordinates, codes, codesSize, bigramListPosition,
|
mCorrection, xcoordinates, ycoordinates, codes, codesSize, bigramListPosition,
|
||||||
useFullEditDistance, outWords, frequencies);
|
useFullEditDistance, outWords, frequencies);
|
||||||
|
|
Loading…
Reference in a new issue