Merge "Pass the bigram list position from the top level"
This commit is contained in:
commit
8b01650b12
3 changed files with 55 additions and 44 deletions
|
@ -35,9 +35,13 @@ class Dictionary {
|
|||
int getSuggestions(ProximityInfo *proximityInfo, int *xcoordinates, int *ycoordinates,
|
||||
int *codes, int codesSize, bool useFullEditDistance, unsigned short *outWords,
|
||||
int *frequencies) {
|
||||
// bigramListPosition is, as an int, the offset of the bigram list in the file.
|
||||
// If none, it's zero.
|
||||
// TODO: get this from the bigram dictionary instance
|
||||
const int bigramListPosition = 0;
|
||||
return mUnigramDictionary->getSuggestions(proximityInfo, mWordsPriorityQueuePool,
|
||||
mCorrection, xcoordinates, ycoordinates, codes,
|
||||
codesSize, useFullEditDistance, outWords, frequencies);
|
||||
mCorrection, xcoordinates, ycoordinates, codes, codesSize, bigramListPosition,
|
||||
useFullEditDistance, outWords, frequencies);
|
||||
}
|
||||
|
||||
int getBigrams(const int32_t *word, int length, int *codes, int codesSize,
|
||||
|
|
|
@ -98,7 +98,8 @@ int UnigramDictionary::getDigraphReplacement(const int *codes, const int i, cons
|
|||
void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximityInfo,
|
||||
const int *xcoordinates, const int *ycoordinates, const int *codesBuffer,
|
||||
int *xCoordinatesBuffer, int *yCoordinatesBuffer,
|
||||
const int codesBufferSize, const bool useFullEditDistance, const int *codesSrc,
|
||||
const int codesBufferSize, const int bigramListPosition,
|
||||
const bool useFullEditDistance, const int *codesSrc,
|
||||
const int codesRemain, const int currentDepth, int *codesDest, Correction *correction,
|
||||
WordsPriorityQueuePool *queuePool,
|
||||
const digraph_t* const digraphs, const unsigned int digraphsSize) {
|
||||
|
@ -127,8 +128,8 @@ void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximit
|
|||
replacementCodePoint;
|
||||
getWordWithDigraphSuggestionsRec(proximityInfo, xcoordinates, ycoordinates,
|
||||
codesBuffer, xCoordinatesBuffer, yCoordinatesBuffer, codesBufferSize,
|
||||
useFullEditDistance, codesSrc + i + 1, codesRemain - i - 1,
|
||||
currentDepth + 1, codesDest + i, correction,
|
||||
bigramListPosition, useFullEditDistance, codesSrc + i + 1,
|
||||
codesRemain - i - 1, currentDepth + 1, codesDest + i, correction,
|
||||
queuePool, digraphs, digraphsSize);
|
||||
|
||||
// Copy the second char of the digraph in place, then continue processing on
|
||||
|
@ -137,9 +138,9 @@ void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximit
|
|||
memcpy(codesDest + i, codesSrc + i, BYTES_IN_ONE_CHAR);
|
||||
getWordWithDigraphSuggestionsRec(proximityInfo, xcoordinates, ycoordinates,
|
||||
codesBuffer, xCoordinatesBuffer, yCoordinatesBuffer, codesBufferSize,
|
||||
useFullEditDistance, codesSrc + i, codesRemain - i, currentDepth + 1,
|
||||
codesDest + i, correction, queuePool,
|
||||
digraphs, digraphsSize);
|
||||
bigramListPosition, useFullEditDistance, codesSrc + i, codesRemain - i,
|
||||
currentDepth + 1, codesDest + i, correction, queuePool, digraphs,
|
||||
digraphsSize);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
@ -160,14 +161,16 @@ void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximit
|
|||
}
|
||||
|
||||
getWordSuggestions(proximityInfo, xCoordinatesBuffer, yCoordinatesBuffer, codesBuffer,
|
||||
startIndex + codesRemain, useFullEditDistance, correction,
|
||||
startIndex + codesRemain, bigramListPosition, useFullEditDistance, correction,
|
||||
queuePool);
|
||||
}
|
||||
|
||||
// bigramListPosition is the offset in the file to the list of bigrams for the previous word.
|
||||
int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo,
|
||||
WordsPriorityQueuePool *queuePool, Correction *correction, const int *xcoordinates,
|
||||
const int *ycoordinates, const int *codes, const int codesSize,
|
||||
const bool useFullEditDistance, unsigned short *outWords, int *frequencies) {
|
||||
const int bigramListPosition, const bool useFullEditDistance, unsigned short *outWords,
|
||||
int *frequencies) {
|
||||
|
||||
queuePool->clearAll();
|
||||
Correction* masterCorrection = correction;
|
||||
|
@ -177,8 +180,8 @@ int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo,
|
|||
int xCoordinatesBuffer[codesSize];
|
||||
int yCoordinatesBuffer[codesSize];
|
||||
getWordWithDigraphSuggestionsRec(proximityInfo, xcoordinates, ycoordinates, codesBuffer,
|
||||
xCoordinatesBuffer, yCoordinatesBuffer,
|
||||
codesSize, useFullEditDistance, codes, codesSize, 0, codesBuffer, masterCorrection,
|
||||
xCoordinatesBuffer, yCoordinatesBuffer, codesSize, bigramListPosition,
|
||||
useFullEditDistance, codes, codesSize, 0, codesBuffer, masterCorrection,
|
||||
queuePool, GERMAN_UMLAUT_DIGRAPHS,
|
||||
sizeof(GERMAN_UMLAUT_DIGRAPHS) / sizeof(GERMAN_UMLAUT_DIGRAPHS[0]));
|
||||
} else if (BinaryFormat::REQUIRES_FRENCH_LIGATURES_PROCESSING & FLAGS) {
|
||||
|
@ -186,13 +189,13 @@ int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo,
|
|||
int xCoordinatesBuffer[codesSize];
|
||||
int yCoordinatesBuffer[codesSize];
|
||||
getWordWithDigraphSuggestionsRec(proximityInfo, xcoordinates, ycoordinates, codesBuffer,
|
||||
xCoordinatesBuffer, yCoordinatesBuffer,
|
||||
codesSize, useFullEditDistance, codes, codesSize, 0, codesBuffer, masterCorrection,
|
||||
xCoordinatesBuffer, yCoordinatesBuffer, codesSize, bigramListPosition,
|
||||
useFullEditDistance, codes, codesSize, 0, codesBuffer, masterCorrection,
|
||||
queuePool, FRENCH_LIGATURES_DIGRAPHS,
|
||||
sizeof(FRENCH_LIGATURES_DIGRAPHS) / sizeof(FRENCH_LIGATURES_DIGRAPHS[0]));
|
||||
} else { // Normal processing
|
||||
getWordSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, codesSize,
|
||||
useFullEditDistance, masterCorrection, queuePool);
|
||||
bigramListPosition, useFullEditDistance, masterCorrection, queuePool);
|
||||
}
|
||||
|
||||
PROF_START(20);
|
||||
|
@ -225,16 +228,16 @@ int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo,
|
|||
|
||||
void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo,
|
||||
const int *xcoordinates, const int *ycoordinates, const int *codes,
|
||||
const int inputLength, const bool useFullEditDistance, Correction *correction,
|
||||
WordsPriorityQueuePool *queuePool) {
|
||||
const int inputLength, const int bigramListPosition, const bool useFullEditDistance,
|
||||
Correction *correction, WordsPriorityQueuePool *queuePool) {
|
||||
|
||||
PROF_OPEN;
|
||||
PROF_START(0);
|
||||
PROF_END(0);
|
||||
|
||||
PROF_START(1);
|
||||
getOneWordSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, useFullEditDistance,
|
||||
inputLength, correction, queuePool);
|
||||
getOneWordSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, bigramListPosition,
|
||||
useFullEditDistance, inputLength, correction, queuePool);
|
||||
PROF_END(1);
|
||||
|
||||
PROF_START(2);
|
||||
|
@ -305,15 +308,16 @@ static const char SPACE = ' ';
|
|||
|
||||
void UnigramDictionary::getOneWordSuggestions(ProximityInfo *proximityInfo,
|
||||
const int *xcoordinates, const int *ycoordinates, const int *codes,
|
||||
const bool useFullEditDistance, const int inputLength, Correction *correction,
|
||||
WordsPriorityQueuePool *queuePool) {
|
||||
const int bigramListPosition, const bool useFullEditDistance, const int inputLength,
|
||||
Correction *correction, WordsPriorityQueuePool *queuePool) {
|
||||
initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, inputLength, correction);
|
||||
getSuggestionCandidates(useFullEditDistance, inputLength, correction, queuePool,
|
||||
true /* doAutoCompletion */, DEFAULT_MAX_ERRORS, FIRST_WORD_INDEX);
|
||||
getSuggestionCandidates(useFullEditDistance, inputLength, bigramListPosition, correction,
|
||||
queuePool, true /* doAutoCompletion */, DEFAULT_MAX_ERRORS, FIRST_WORD_INDEX);
|
||||
}
|
||||
|
||||
void UnigramDictionary::getSuggestionCandidates(const bool useFullEditDistance,
|
||||
const int inputLength, Correction *correction, WordsPriorityQueuePool *queuePool,
|
||||
const int inputLength, const int bigramListPosition,
|
||||
Correction *correction, WordsPriorityQueuePool *queuePool,
|
||||
const bool doAutoCompletion, const int maxErrors, const int currentWordIndex) {
|
||||
// TODO: Remove setCorrectionParams
|
||||
correction->setCorrectionParams(0, 0, 0,
|
||||
|
@ -333,8 +337,8 @@ void UnigramDictionary::getSuggestionCandidates(const bool useFullEditDistance,
|
|||
int firstChildPos;
|
||||
|
||||
const bool needsToTraverseChildrenNodes = processCurrentNode(siblingPos,
|
||||
correction, &childCount, &firstChildPos, &siblingPos, queuePool,
|
||||
currentWordIndex);
|
||||
bigramListPosition, correction, &childCount, &firstChildPos, &siblingPos,
|
||||
queuePool, currentWordIndex);
|
||||
// Update next sibling pos
|
||||
correction->setTreeSiblingPos(outputIndex, siblingPos);
|
||||
|
||||
|
@ -426,8 +430,10 @@ bool UnigramDictionary::getSubStringSuggestion(
|
|||
initSuggestions(proximityInfo, &xcoordinates[offset], &ycoordinates[offset],
|
||||
codes + offset, inputWordLength, correction);
|
||||
queuePool->clearSubQueue(currentWordIndex);
|
||||
getSuggestionCandidates(useFullEditDistance, inputWordLength, correction,
|
||||
queuePool, false, MAX_ERRORS_FOR_TWO_WORDS, currentWordIndex);
|
||||
// TODO: pass the bigram list for substring suggestion
|
||||
getSuggestionCandidates(useFullEditDistance, inputWordLength,
|
||||
0 /* bigramListPosition */, correction, queuePool, false /* doAutoCompletion */,
|
||||
MAX_ERRORS_FOR_TWO_WORDS, currentWordIndex);
|
||||
if (DEBUG_DICT) {
|
||||
if (currentWordIndex < MULTIPLE_WORDS_SUGGESTION_MAX_WORDS) {
|
||||
AKLOGI("Dump word candidates(%d) %d", currentWordIndex, inputWordLength);
|
||||
|
@ -757,15 +763,13 @@ int UnigramDictionary::getBigramPosition(int pos, unsigned short *word, int offs
|
|||
// the current node in nextSiblingPosition. Thus, the caller must keep count of the nodes at any
|
||||
// given level, as output into newCount when traversing this level's parent.
|
||||
inline bool UnigramDictionary::processCurrentNode(const int initialPos,
|
||||
Correction *correction, int *newCount,
|
||||
const int bigramListPosition, Correction *correction, int *newCount,
|
||||
int *newChildrenPosition, int *nextSiblingPosition, WordsPriorityQueuePool *queuePool,
|
||||
const int currentWordIndex) {
|
||||
if (DEBUG_DICT) {
|
||||
correction->checkState();
|
||||
}
|
||||
int pos = initialPos;
|
||||
// TODO: get this as an argument
|
||||
const int bigramListPosition = 0;
|
||||
|
||||
// Flags contain the following information:
|
||||
// - Address type (MASK_GROUP_ADDRESS_TYPE) on two bits:
|
||||
|
@ -842,6 +846,8 @@ inline bool UnigramDictionary::processCurrentNode(const int initialPos,
|
|||
const int childrenAddressPos = BinaryFormat::skipFrequency(flags, pos);
|
||||
const int attributesPos = BinaryFormat::skipChildrenPosition(flags, childrenAddressPos);
|
||||
TerminalAttributes terminalAttributes(DICT_ROOT, flags, attributesPos);
|
||||
// The bigramListPosition is the offset in the file of the bigrams for the previous word,
|
||||
// or zero if we don't know of any bigrams for it.
|
||||
const int probability = BinaryFormat::getProbability(bigramListPosition, unigramFreq);
|
||||
onTerminal(probability, terminalAttributes, correction, queuePool, needsToInvokeOnTerminal,
|
||||
currentWordIndex);
|
||||
|
|
|
@ -74,34 +74,35 @@ class UnigramDictionary {
|
|||
bool isValidWord(const int32_t* const inWord, const int length) const;
|
||||
int getBigramPosition(int pos, unsigned short *word, int offset, int length) const;
|
||||
int getSuggestions(ProximityInfo *proximityInfo, WordsPriorityQueuePool *queuePool,
|
||||
Correction *correction, const int *xcoordinates,
|
||||
const int *ycoordinates, const int *codes, const int codesSize,
|
||||
Correction *correction, const int *xcoordinates, const int *ycoordinates,
|
||||
const int *codes, const int codesSize, const int bigramListPosition,
|
||||
const bool useFullEditDistance, unsigned short *outWords, int *frequencies);
|
||||
virtual ~UnigramDictionary();
|
||||
|
||||
private:
|
||||
void getWordSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
|
||||
const int *ycoordinates, const int *codes, const int inputLength,
|
||||
const bool useFullEditDistance, Correction *correction,
|
||||
const int bigramListPosition, const bool useFullEditDistance, Correction *correction,
|
||||
WordsPriorityQueuePool *queuePool);
|
||||
int getDigraphReplacement(const int *codes, const int i, const int codesSize,
|
||||
const digraph_t* const digraphs, const unsigned int digraphsSize) const;
|
||||
void getWordWithDigraphSuggestionsRec(ProximityInfo *proximityInfo,
|
||||
const int *xcoordinates, const int* ycoordinates, const int *codesBuffer,
|
||||
int *xCoordinatesBuffer, int *yCoordinatesBuffer,
|
||||
const int codesBufferSize, const bool useFullEditDistance, const int* codesSrc,
|
||||
int *xCoordinatesBuffer, int *yCoordinatesBuffer, const int codesBufferSize,
|
||||
const int bigramListPosition, const bool useFullEditDistance, const int* codesSrc,
|
||||
const int codesRemain, const int currentDepth, int* codesDest, Correction *correction,
|
||||
WordsPriorityQueuePool* queuePool, const digraph_t* const digraphs,
|
||||
const unsigned int digraphsSize);
|
||||
void initSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
|
||||
const int *ycoordinates, const int *codes, const int codesSize, Correction *correction);
|
||||
void getOneWordSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
|
||||
const int *ycoordinates, const int *codes, const bool useFullEditDistance,
|
||||
const int inputLength, Correction *correction, WordsPriorityQueuePool* queuePool);
|
||||
void getSuggestionCandidates(
|
||||
const int *ycoordinates, const int *codes, const int bigramListPosition,
|
||||
const bool useFullEditDistance, const int inputLength, Correction *correction,
|
||||
WordsPriorityQueuePool* queuePool, const bool doAutoCompletion, const int maxErrors,
|
||||
const int currentWordIndex);
|
||||
WordsPriorityQueuePool* queuePool);
|
||||
void getSuggestionCandidates(
|
||||
const bool useFullEditDistance, const int inputLength, const int bigramListPosition,
|
||||
Correction *correction, WordsPriorityQueuePool* queuePool, const bool doAutoCompletion,
|
||||
const int maxErrors, const int currentWordIndex);
|
||||
void getSplitMultipleWordsSuggestions(ProximityInfo *proximityInfo,
|
||||
const int *xcoordinates, const int *ycoordinates, const int *codes,
|
||||
const bool useFullEditDistance, const int inputLength,
|
||||
|
@ -113,9 +114,9 @@ class UnigramDictionary {
|
|||
bool needsToSkipCurrentNode(const unsigned short c,
|
||||
const int inputIndex, const int skipPos, const int depth);
|
||||
// Process a node by considering proximity, missing and excessive character
|
||||
bool processCurrentNode(const int initialPos, Correction *correction, int *newCount,
|
||||
int *newChildPosition, int *nextSiblingPosition, WordsPriorityQueuePool *queuePool,
|
||||
const int currentWordIndex);
|
||||
bool processCurrentNode(const int initialPos, const int bigramListPosition,
|
||||
Correction *correction, int *newCount, int *newChildPosition, int *nextSiblingPosition,
|
||||
WordsPriorityQueuePool *queuePool, const int currentWordIndex);
|
||||
int getMostFrequentWordLike(const int startInputIndex, const int inputLength,
|
||||
ProximityInfo *proximityInfo, unsigned short *word);
|
||||
int getMostFrequentWordLikeInner(const uint16_t* const inWord, const int length,
|
||||
|
|
Loading…
Reference in a new issue