Replace the bigram list position with the map and filter
Passing the position will not allow us a reasonable lookup time. Replace this with a map and bloom filter for very fast lookup. Bug: 6313806 Change-Id: I3a61c0001cbc987c1c3c7b8df635d4590a370144
This commit is contained in:
parent
f1634c872c
commit
8950ce6c44
5 changed files with 60 additions and 47 deletions
|
@ -158,6 +158,11 @@ static inline void setInFilter(uint8_t *filter, const int position) {
|
||||||
filter[bucket >> 3] |= (1 << (bucket & 0x7));
|
filter[bucket >> 3] |= (1 << (bucket & 0x7));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline bool isInFilter(uint8_t *filter, const int position) {
|
||||||
|
const unsigned int bucket = position % BIGRAM_FILTER_MODULO;
|
||||||
|
return filter[bucket >> 3] & (1 << (bucket & 0x7));
|
||||||
|
}
|
||||||
|
|
||||||
void BigramDictionary::fillBigramAddressToFrequencyMapAndFilter(const int32_t *prevWord,
|
void BigramDictionary::fillBigramAddressToFrequencyMapAndFilter(const int32_t *prevWord,
|
||||||
const int prevWordLength, std::map<int, int> *map, uint8_t *filter) {
|
const int prevWordLength, std::map<int, int> *map, uint8_t *filter) {
|
||||||
memset(filter, 0, BIGRAM_FILTER_BYTE_SIZE);
|
memset(filter, 0, BIGRAM_FILTER_BYTE_SIZE);
|
||||||
|
|
|
@ -66,7 +66,8 @@ class BinaryFormat {
|
||||||
const int length);
|
const int length);
|
||||||
static int getWordAtAddress(const uint8_t* const root, const int address, const int maxDepth,
|
static int getWordAtAddress(const uint8_t* const root, const int address, const int maxDepth,
|
||||||
uint16_t* outWord);
|
uint16_t* outWord);
|
||||||
static int getProbability(const int bigramListPosition, const int unigramFreq);
|
static int getProbability(const std::map<int, int> *bigramMap, const uint8_t *bigramFilter,
|
||||||
|
const int unigramFreq);
|
||||||
|
|
||||||
// Flags for special processing
|
// Flags for special processing
|
||||||
// Those *must* match the flags in makedict (BinaryDictInputOutput#*_PROCESSING_FLAG) or
|
// Those *must* match the flags in makedict (BinaryDictInputOutput#*_PROCESSING_FLAG) or
|
||||||
|
@ -519,9 +520,11 @@ inline int BinaryFormat::getWordAtAddress(const uint8_t* const root, const int a
|
||||||
}
|
}
|
||||||
|
|
||||||
// This should probably return a probability in log space.
|
// This should probably return a probability in log space.
|
||||||
inline int BinaryFormat::getProbability(const int bigramListPosition, const int unigramFreq) {
|
inline int BinaryFormat::getProbability(const std::map<int, int> *bigramMap,
|
||||||
// TODO: use the bigram list position to get the bigram probability. If the bigram
|
const uint8_t *bigramFilter, const int unigramFreq) {
|
||||||
// is not found, use the unigram frequency.
|
// TODO: use the bigram filter for fast rejection, then the bigram map for lookup
|
||||||
|
// to get the bigram probability. If the bigram is not found, use the unigram frequency.
|
||||||
|
// Don't forget that they can be null.
|
||||||
// TODO: if the unigram frequency is used, compute the actual probability
|
// TODO: if the unigram frequency is used, compute the actual probability
|
||||||
return unigramFreq;
|
return unigramFreq;
|
||||||
}
|
}
|
||||||
|
|
|
@ -37,17 +37,13 @@ class Dictionary {
|
||||||
int getSuggestions(ProximityInfo *proximityInfo, int *xcoordinates, int *ycoordinates,
|
int getSuggestions(ProximityInfo *proximityInfo, int *xcoordinates, int *ycoordinates,
|
||||||
int *codes, int codesSize, const int32_t* prevWordChars, const int prevWordLength,
|
int *codes, int codesSize, const int32_t* prevWordChars, const int prevWordLength,
|
||||||
bool useFullEditDistance, unsigned short *outWords, int *frequencies) {
|
bool useFullEditDistance, unsigned short *outWords, int *frequencies) {
|
||||||
// bigramListPosition is, as an int, the offset of the bigram list in the file.
|
|
||||||
// If none, it's zero.
|
|
||||||
const int bigramListPosition = !prevWordChars ? 0
|
|
||||||
: mBigramDictionary->getBigramListPositionForWord(prevWordChars, prevWordLength);
|
|
||||||
std::map<int, int> bigramMap;
|
std::map<int, int> bigramMap;
|
||||||
uint8_t bigramFilter[BIGRAM_FILTER_BYTE_SIZE];
|
uint8_t bigramFilter[BIGRAM_FILTER_BYTE_SIZE];
|
||||||
mBigramDictionary->fillBigramAddressToFrequencyMapAndFilter(prevWordChars,
|
mBigramDictionary->fillBigramAddressToFrequencyMapAndFilter(prevWordChars,
|
||||||
prevWordLength, &bigramMap, bigramFilter);
|
prevWordLength, &bigramMap, bigramFilter);
|
||||||
return mUnigramDictionary->getSuggestions(proximityInfo, mWordsPriorityQueuePool,
|
return mUnigramDictionary->getSuggestions(proximityInfo, mWordsPriorityQueuePool,
|
||||||
mCorrection, xcoordinates, ycoordinates, codes, codesSize, bigramListPosition,
|
mCorrection, xcoordinates, ycoordinates, codes, codesSize, &bigramMap,
|
||||||
useFullEditDistance, outWords, frequencies);
|
bigramFilter, useFullEditDistance, outWords, frequencies);
|
||||||
}
|
}
|
||||||
|
|
||||||
int getBigrams(const int32_t *word, int length, int *codes, int codesSize,
|
int getBigrams(const int32_t *word, int length, int *codes, int codesSize,
|
||||||
|
|
|
@ -98,7 +98,7 @@ int UnigramDictionary::getDigraphReplacement(const int *codes, const int i, cons
|
||||||
void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximityInfo,
|
void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximityInfo,
|
||||||
const int *xcoordinates, const int *ycoordinates, const int *codesBuffer,
|
const int *xcoordinates, const int *ycoordinates, const int *codesBuffer,
|
||||||
int *xCoordinatesBuffer, int *yCoordinatesBuffer,
|
int *xCoordinatesBuffer, int *yCoordinatesBuffer,
|
||||||
const int codesBufferSize, const int bigramListPosition,
|
const int codesBufferSize, const std::map<int, int> *bigramMap, const uint8_t *bigramFilter,
|
||||||
const bool useFullEditDistance, const int *codesSrc,
|
const bool useFullEditDistance, const int *codesSrc,
|
||||||
const int codesRemain, const int currentDepth, int *codesDest, Correction *correction,
|
const int codesRemain, const int currentDepth, int *codesDest, Correction *correction,
|
||||||
WordsPriorityQueuePool *queuePool,
|
WordsPriorityQueuePool *queuePool,
|
||||||
|
@ -128,7 +128,7 @@ void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximit
|
||||||
replacementCodePoint;
|
replacementCodePoint;
|
||||||
getWordWithDigraphSuggestionsRec(proximityInfo, xcoordinates, ycoordinates,
|
getWordWithDigraphSuggestionsRec(proximityInfo, xcoordinates, ycoordinates,
|
||||||
codesBuffer, xCoordinatesBuffer, yCoordinatesBuffer, codesBufferSize,
|
codesBuffer, xCoordinatesBuffer, yCoordinatesBuffer, codesBufferSize,
|
||||||
bigramListPosition, useFullEditDistance, codesSrc + i + 1,
|
bigramMap, bigramFilter, useFullEditDistance, codesSrc + i + 1,
|
||||||
codesRemain - i - 1, currentDepth + 1, codesDest + i, correction,
|
codesRemain - i - 1, currentDepth + 1, codesDest + i, correction,
|
||||||
queuePool, digraphs, digraphsSize);
|
queuePool, digraphs, digraphsSize);
|
||||||
|
|
||||||
|
@ -138,7 +138,7 @@ void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximit
|
||||||
memcpy(codesDest + i, codesSrc + i, BYTES_IN_ONE_CHAR);
|
memcpy(codesDest + i, codesSrc + i, BYTES_IN_ONE_CHAR);
|
||||||
getWordWithDigraphSuggestionsRec(proximityInfo, xcoordinates, ycoordinates,
|
getWordWithDigraphSuggestionsRec(proximityInfo, xcoordinates, ycoordinates,
|
||||||
codesBuffer, xCoordinatesBuffer, yCoordinatesBuffer, codesBufferSize,
|
codesBuffer, xCoordinatesBuffer, yCoordinatesBuffer, codesBufferSize,
|
||||||
bigramListPosition, useFullEditDistance, codesSrc + i, codesRemain - i,
|
bigramMap, bigramFilter, useFullEditDistance, codesSrc + i, codesRemain - i,
|
||||||
currentDepth + 1, codesDest + i, correction, queuePool, digraphs,
|
currentDepth + 1, codesDest + i, correction, queuePool, digraphs,
|
||||||
digraphsSize);
|
digraphsSize);
|
||||||
return;
|
return;
|
||||||
|
@ -161,16 +161,18 @@ void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximit
|
||||||
}
|
}
|
||||||
|
|
||||||
getWordSuggestions(proximityInfo, xCoordinatesBuffer, yCoordinatesBuffer, codesBuffer,
|
getWordSuggestions(proximityInfo, xCoordinatesBuffer, yCoordinatesBuffer, codesBuffer,
|
||||||
startIndex + codesRemain, bigramListPosition, useFullEditDistance, correction,
|
startIndex + codesRemain, bigramMap, bigramFilter, useFullEditDistance, correction,
|
||||||
queuePool);
|
queuePool);
|
||||||
}
|
}
|
||||||
|
|
||||||
// bigramListPosition is the offset in the file to the list of bigrams for the previous word.
|
// bigramMap contains the association <bigram address> -> <bigram frequency>
|
||||||
|
// bigramFilter is a bloom filter for fast rejection: see functions setInFilter and isInFilter
|
||||||
|
// in bigram_dictionary.cpp
|
||||||
int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo,
|
int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo,
|
||||||
WordsPriorityQueuePool *queuePool, Correction *correction, const int *xcoordinates,
|
WordsPriorityQueuePool *queuePool, Correction *correction, const int *xcoordinates,
|
||||||
const int *ycoordinates, const int *codes, const int codesSize,
|
const int *ycoordinates, const int *codes, const int codesSize,
|
||||||
const int bigramListPosition, const bool useFullEditDistance, unsigned short *outWords,
|
const std::map<int, int> *bigramMap, const uint8_t *bigramFilter,
|
||||||
int *frequencies) {
|
const bool useFullEditDistance, unsigned short *outWords, int *frequencies) {
|
||||||
|
|
||||||
queuePool->clearAll();
|
queuePool->clearAll();
|
||||||
Correction* masterCorrection = correction;
|
Correction* masterCorrection = correction;
|
||||||
|
@ -180,7 +182,7 @@ int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo,
|
||||||
int xCoordinatesBuffer[codesSize];
|
int xCoordinatesBuffer[codesSize];
|
||||||
int yCoordinatesBuffer[codesSize];
|
int yCoordinatesBuffer[codesSize];
|
||||||
getWordWithDigraphSuggestionsRec(proximityInfo, xcoordinates, ycoordinates, codesBuffer,
|
getWordWithDigraphSuggestionsRec(proximityInfo, xcoordinates, ycoordinates, codesBuffer,
|
||||||
xCoordinatesBuffer, yCoordinatesBuffer, codesSize, bigramListPosition,
|
xCoordinatesBuffer, yCoordinatesBuffer, codesSize, bigramMap, bigramFilter,
|
||||||
useFullEditDistance, codes, codesSize, 0, codesBuffer, masterCorrection,
|
useFullEditDistance, codes, codesSize, 0, codesBuffer, masterCorrection,
|
||||||
queuePool, GERMAN_UMLAUT_DIGRAPHS,
|
queuePool, GERMAN_UMLAUT_DIGRAPHS,
|
||||||
sizeof(GERMAN_UMLAUT_DIGRAPHS) / sizeof(GERMAN_UMLAUT_DIGRAPHS[0]));
|
sizeof(GERMAN_UMLAUT_DIGRAPHS) / sizeof(GERMAN_UMLAUT_DIGRAPHS[0]));
|
||||||
|
@ -189,13 +191,13 @@ int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo,
|
||||||
int xCoordinatesBuffer[codesSize];
|
int xCoordinatesBuffer[codesSize];
|
||||||
int yCoordinatesBuffer[codesSize];
|
int yCoordinatesBuffer[codesSize];
|
||||||
getWordWithDigraphSuggestionsRec(proximityInfo, xcoordinates, ycoordinates, codesBuffer,
|
getWordWithDigraphSuggestionsRec(proximityInfo, xcoordinates, ycoordinates, codesBuffer,
|
||||||
xCoordinatesBuffer, yCoordinatesBuffer, codesSize, bigramListPosition,
|
xCoordinatesBuffer, yCoordinatesBuffer, codesSize, bigramMap, bigramFilter,
|
||||||
useFullEditDistance, codes, codesSize, 0, codesBuffer, masterCorrection,
|
useFullEditDistance, codes, codesSize, 0, codesBuffer, masterCorrection,
|
||||||
queuePool, FRENCH_LIGATURES_DIGRAPHS,
|
queuePool, FRENCH_LIGATURES_DIGRAPHS,
|
||||||
sizeof(FRENCH_LIGATURES_DIGRAPHS) / sizeof(FRENCH_LIGATURES_DIGRAPHS[0]));
|
sizeof(FRENCH_LIGATURES_DIGRAPHS) / sizeof(FRENCH_LIGATURES_DIGRAPHS[0]));
|
||||||
} else { // Normal processing
|
} else { // Normal processing
|
||||||
getWordSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, codesSize,
|
getWordSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, codesSize,
|
||||||
bigramListPosition, useFullEditDistance, masterCorrection, queuePool);
|
bigramMap, bigramFilter, useFullEditDistance, masterCorrection, queuePool);
|
||||||
}
|
}
|
||||||
|
|
||||||
PROF_START(20);
|
PROF_START(20);
|
||||||
|
@ -228,15 +230,15 @@ int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo,
|
||||||
|
|
||||||
void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo,
|
void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo,
|
||||||
const int *xcoordinates, const int *ycoordinates, const int *codes,
|
const int *xcoordinates, const int *ycoordinates, const int *codes,
|
||||||
const int inputLength, const int bigramListPosition, const bool useFullEditDistance,
|
const int inputLength, const std::map<int, int> *bigramMap, const uint8_t *bigramFilter,
|
||||||
Correction *correction, WordsPriorityQueuePool *queuePool) {
|
const bool useFullEditDistance, Correction *correction, WordsPriorityQueuePool *queuePool) {
|
||||||
|
|
||||||
PROF_OPEN;
|
PROF_OPEN;
|
||||||
PROF_START(0);
|
PROF_START(0);
|
||||||
PROF_END(0);
|
PROF_END(0);
|
||||||
|
|
||||||
PROF_START(1);
|
PROF_START(1);
|
||||||
getOneWordSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, bigramListPosition,
|
getOneWordSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, bigramMap, bigramFilter,
|
||||||
useFullEditDistance, inputLength, correction, queuePool);
|
useFullEditDistance, inputLength, correction, queuePool);
|
||||||
PROF_END(1);
|
PROF_END(1);
|
||||||
|
|
||||||
|
@ -308,15 +310,16 @@ static const char SPACE = ' ';
|
||||||
|
|
||||||
void UnigramDictionary::getOneWordSuggestions(ProximityInfo *proximityInfo,
|
void UnigramDictionary::getOneWordSuggestions(ProximityInfo *proximityInfo,
|
||||||
const int *xcoordinates, const int *ycoordinates, const int *codes,
|
const int *xcoordinates, const int *ycoordinates, const int *codes,
|
||||||
const int bigramListPosition, const bool useFullEditDistance, const int inputLength,
|
const std::map<int, int> *bigramMap, const uint8_t *bigramFilter,
|
||||||
|
const bool useFullEditDistance, const int inputLength,
|
||||||
Correction *correction, WordsPriorityQueuePool *queuePool) {
|
Correction *correction, WordsPriorityQueuePool *queuePool) {
|
||||||
initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, inputLength, correction);
|
initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, inputLength, correction);
|
||||||
getSuggestionCandidates(useFullEditDistance, inputLength, bigramListPosition, correction,
|
getSuggestionCandidates(useFullEditDistance, inputLength, bigramMap, bigramFilter, correction,
|
||||||
queuePool, true /* doAutoCompletion */, DEFAULT_MAX_ERRORS, FIRST_WORD_INDEX);
|
queuePool, true /* doAutoCompletion */, DEFAULT_MAX_ERRORS, FIRST_WORD_INDEX);
|
||||||
}
|
}
|
||||||
|
|
||||||
void UnigramDictionary::getSuggestionCandidates(const bool useFullEditDistance,
|
void UnigramDictionary::getSuggestionCandidates(const bool useFullEditDistance,
|
||||||
const int inputLength, const int bigramListPosition,
|
const int inputLength, const std::map<int, int> *bigramMap, const uint8_t *bigramFilter,
|
||||||
Correction *correction, WordsPriorityQueuePool *queuePool,
|
Correction *correction, WordsPriorityQueuePool *queuePool,
|
||||||
const bool doAutoCompletion, const int maxErrors, const int currentWordIndex) {
|
const bool doAutoCompletion, const int maxErrors, const int currentWordIndex) {
|
||||||
// TODO: Remove setCorrectionParams
|
// TODO: Remove setCorrectionParams
|
||||||
|
@ -337,7 +340,7 @@ void UnigramDictionary::getSuggestionCandidates(const bool useFullEditDistance,
|
||||||
int firstChildPos;
|
int firstChildPos;
|
||||||
|
|
||||||
const bool needsToTraverseChildrenNodes = processCurrentNode(siblingPos,
|
const bool needsToTraverseChildrenNodes = processCurrentNode(siblingPos,
|
||||||
bigramListPosition, correction, &childCount, &firstChildPos, &siblingPos,
|
bigramMap, bigramFilter, correction, &childCount, &firstChildPos, &siblingPos,
|
||||||
queuePool, currentWordIndex);
|
queuePool, currentWordIndex);
|
||||||
// Update next sibling pos
|
// Update next sibling pos
|
||||||
correction->setTreeSiblingPos(outputIndex, siblingPos);
|
correction->setTreeSiblingPos(outputIndex, siblingPos);
|
||||||
|
@ -432,8 +435,8 @@ bool UnigramDictionary::getSubStringSuggestion(
|
||||||
queuePool->clearSubQueue(currentWordIndex);
|
queuePool->clearSubQueue(currentWordIndex);
|
||||||
// TODO: pass the bigram list for substring suggestion
|
// TODO: pass the bigram list for substring suggestion
|
||||||
getSuggestionCandidates(useFullEditDistance, inputWordLength,
|
getSuggestionCandidates(useFullEditDistance, inputWordLength,
|
||||||
0 /* bigramListPosition */, correction, queuePool, false /* doAutoCompletion */,
|
0 /* bigramMap */, 0 /* bigramFilter */, correction, queuePool,
|
||||||
MAX_ERRORS_FOR_TWO_WORDS, currentWordIndex);
|
false /* doAutoCompletion */, MAX_ERRORS_FOR_TWO_WORDS, currentWordIndex);
|
||||||
if (DEBUG_DICT) {
|
if (DEBUG_DICT) {
|
||||||
if (currentWordIndex < MULTIPLE_WORDS_SUGGESTION_MAX_WORDS) {
|
if (currentWordIndex < MULTIPLE_WORDS_SUGGESTION_MAX_WORDS) {
|
||||||
AKLOGI("Dump word candidates(%d) %d", currentWordIndex, inputWordLength);
|
AKLOGI("Dump word candidates(%d) %d", currentWordIndex, inputWordLength);
|
||||||
|
@ -763,9 +766,9 @@ int UnigramDictionary::getBigramPosition(int pos, unsigned short *word, int offs
|
||||||
// the current node in nextSiblingPosition. Thus, the caller must keep count of the nodes at any
|
// the current node in nextSiblingPosition. Thus, the caller must keep count of the nodes at any
|
||||||
// given level, as output into newCount when traversing this level's parent.
|
// given level, as output into newCount when traversing this level's parent.
|
||||||
inline bool UnigramDictionary::processCurrentNode(const int initialPos,
|
inline bool UnigramDictionary::processCurrentNode(const int initialPos,
|
||||||
const int bigramListPosition, Correction *correction, int *newCount,
|
const std::map<int, int> *bigramMap, const uint8_t *bigramFilter, Correction *correction,
|
||||||
int *newChildrenPosition, int *nextSiblingPosition, WordsPriorityQueuePool *queuePool,
|
int *newCount, int *newChildrenPosition, int *nextSiblingPosition,
|
||||||
const int currentWordIndex) {
|
WordsPriorityQueuePool *queuePool, const int currentWordIndex) {
|
||||||
if (DEBUG_DICT) {
|
if (DEBUG_DICT) {
|
||||||
correction->checkState();
|
correction->checkState();
|
||||||
}
|
}
|
||||||
|
@ -846,9 +849,9 @@ inline bool UnigramDictionary::processCurrentNode(const int initialPos,
|
||||||
const int childrenAddressPos = BinaryFormat::skipFrequency(flags, pos);
|
const int childrenAddressPos = BinaryFormat::skipFrequency(flags, pos);
|
||||||
const int attributesPos = BinaryFormat::skipChildrenPosition(flags, childrenAddressPos);
|
const int attributesPos = BinaryFormat::skipChildrenPosition(flags, childrenAddressPos);
|
||||||
TerminalAttributes terminalAttributes(DICT_ROOT, flags, attributesPos);
|
TerminalAttributes terminalAttributes(DICT_ROOT, flags, attributesPos);
|
||||||
// The bigramListPosition is the offset in the file of the bigrams for the previous word,
|
// bigramMap contains the bigram frequencies indexed by addresses for fast lookup.
|
||||||
// or zero if we don't know of any bigrams for it.
|
// bigramFilter is a bloom filter of said frequencies for even faster rejection.
|
||||||
const int probability = BinaryFormat::getProbability(bigramListPosition, unigramFreq);
|
const int probability = BinaryFormat::getProbability(bigramMap, bigramFilter, unigramFreq);
|
||||||
onTerminal(probability, terminalAttributes, correction, queuePool, needsToInvokeOnTerminal,
|
onTerminal(probability, terminalAttributes, correction, queuePool, needsToInvokeOnTerminal,
|
||||||
currentWordIndex);
|
currentWordIndex);
|
||||||
|
|
||||||
|
|
|
@ -17,6 +17,7 @@
|
||||||
#ifndef LATINIME_UNIGRAM_DICTIONARY_H
|
#ifndef LATINIME_UNIGRAM_DICTIONARY_H
|
||||||
#define LATINIME_UNIGRAM_DICTIONARY_H
|
#define LATINIME_UNIGRAM_DICTIONARY_H
|
||||||
|
|
||||||
|
#include <map>
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
#include "correction.h"
|
#include "correction.h"
|
||||||
#include "correction_state.h"
|
#include "correction_state.h"
|
||||||
|
@ -75,32 +76,36 @@ class UnigramDictionary {
|
||||||
int getBigramPosition(int pos, unsigned short *word, int offset, int length) const;
|
int getBigramPosition(int pos, unsigned short *word, int offset, int length) const;
|
||||||
int getSuggestions(ProximityInfo *proximityInfo, WordsPriorityQueuePool *queuePool,
|
int getSuggestions(ProximityInfo *proximityInfo, WordsPriorityQueuePool *queuePool,
|
||||||
Correction *correction, const int *xcoordinates, const int *ycoordinates,
|
Correction *correction, const int *xcoordinates, const int *ycoordinates,
|
||||||
const int *codes, const int codesSize, const int bigramListPosition,
|
const int *codes, const int codesSize, const std::map<int, int> *bigramMap,
|
||||||
const bool useFullEditDistance, unsigned short *outWords, int *frequencies);
|
const uint8_t *bigramFilter, const bool useFullEditDistance, unsigned short *outWords,
|
||||||
|
int *frequencies);
|
||||||
virtual ~UnigramDictionary();
|
virtual ~UnigramDictionary();
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void getWordSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
|
void getWordSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
|
||||||
const int *ycoordinates, const int *codes, const int inputLength,
|
const int *ycoordinates, const int *codes, const int inputLength,
|
||||||
const int bigramListPosition, const bool useFullEditDistance, Correction *correction,
|
const std::map<int, int> *bigramMap, const uint8_t *bigramFilter,
|
||||||
|
const bool useFullEditDistance, Correction *correction,
|
||||||
WordsPriorityQueuePool *queuePool);
|
WordsPriorityQueuePool *queuePool);
|
||||||
int getDigraphReplacement(const int *codes, const int i, const int codesSize,
|
int getDigraphReplacement(const int *codes, const int i, const int codesSize,
|
||||||
const digraph_t* const digraphs, const unsigned int digraphsSize) const;
|
const digraph_t* const digraphs, const unsigned int digraphsSize) const;
|
||||||
void getWordWithDigraphSuggestionsRec(ProximityInfo *proximityInfo,
|
void getWordWithDigraphSuggestionsRec(ProximityInfo *proximityInfo,
|
||||||
const int *xcoordinates, const int* ycoordinates, const int *codesBuffer,
|
const int *xcoordinates, const int* ycoordinates, const int *codesBuffer,
|
||||||
int *xCoordinatesBuffer, int *yCoordinatesBuffer, const int codesBufferSize,
|
int *xCoordinatesBuffer, int *yCoordinatesBuffer, const int codesBufferSize,
|
||||||
const int bigramListPosition, const bool useFullEditDistance, const int* codesSrc,
|
const std::map<int, int> *bigramMap, const uint8_t *bigramFilter,
|
||||||
const int codesRemain, const int currentDepth, int* codesDest, Correction *correction,
|
const bool useFullEditDistance, const int* codesSrc, const int codesRemain,
|
||||||
|
const int currentDepth, int* codesDest, Correction *correction,
|
||||||
WordsPriorityQueuePool* queuePool, const digraph_t* const digraphs,
|
WordsPriorityQueuePool* queuePool, const digraph_t* const digraphs,
|
||||||
const unsigned int digraphsSize);
|
const unsigned int digraphsSize);
|
||||||
void initSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
|
void initSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
|
||||||
const int *ycoordinates, const int *codes, const int codesSize, Correction *correction);
|
const int *ycoordinates, const int *codes, const int codesSize, Correction *correction);
|
||||||
void getOneWordSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
|
void getOneWordSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
|
||||||
const int *ycoordinates, const int *codes, const int bigramListPosition,
|
const int *ycoordinates, const int *codes, const std::map<int, int> *bigramMap,
|
||||||
const bool useFullEditDistance, const int inputLength, Correction *correction,
|
const uint8_t *bigramFilter, const bool useFullEditDistance, const int inputLength,
|
||||||
WordsPriorityQueuePool* queuePool);
|
Correction *correction, WordsPriorityQueuePool* queuePool);
|
||||||
void getSuggestionCandidates(
|
void getSuggestionCandidates(
|
||||||
const bool useFullEditDistance, const int inputLength, const int bigramListPosition,
|
const bool useFullEditDistance, const int inputLength,
|
||||||
|
const std::map<int, int> *bigramMap, const uint8_t *bigramFilter,
|
||||||
Correction *correction, WordsPriorityQueuePool* queuePool, const bool doAutoCompletion,
|
Correction *correction, WordsPriorityQueuePool* queuePool, const bool doAutoCompletion,
|
||||||
const int maxErrors, const int currentWordIndex);
|
const int maxErrors, const int currentWordIndex);
|
||||||
void getSplitMultipleWordsSuggestions(ProximityInfo *proximityInfo,
|
void getSplitMultipleWordsSuggestions(ProximityInfo *proximityInfo,
|
||||||
|
@ -114,9 +119,10 @@ class UnigramDictionary {
|
||||||
bool needsToSkipCurrentNode(const unsigned short c,
|
bool needsToSkipCurrentNode(const unsigned short c,
|
||||||
const int inputIndex, const int skipPos, const int depth);
|
const int inputIndex, const int skipPos, const int depth);
|
||||||
// Process a node by considering proximity, missing and excessive character
|
// Process a node by considering proximity, missing and excessive character
|
||||||
bool processCurrentNode(const int initialPos, const int bigramListPosition,
|
bool processCurrentNode(const int initialPos, const std::map<int, int> *bigramMap,
|
||||||
Correction *correction, int *newCount, int *newChildPosition, int *nextSiblingPosition,
|
const uint8_t *bigramFilter, Correction *correction, int *newCount,
|
||||||
WordsPriorityQueuePool *queuePool, const int currentWordIndex);
|
int *newChildPosition, int *nextSiblingPosition, WordsPriorityQueuePool *queuePool,
|
||||||
|
const int currentWordIndex);
|
||||||
int getMostFrequentWordLike(const int startInputIndex, const int inputLength,
|
int getMostFrequentWordLike(const int startInputIndex, const int inputLength,
|
||||||
ProximityInfo *proximityInfo, unsigned short *word);
|
ProximityInfo *proximityInfo, unsigned short *word);
|
||||||
int getMostFrequentWordLikeInner(const uint16_t* const inWord, const int length,
|
int getMostFrequentWordLikeInner(const uint16_t* const inWord, const int length,
|
||||||
|
|
Loading…
Reference in a new issue