From 4d9b202c4009352352ae98f8b13fe6330d102ba6 Mon Sep 17 00:00:00 2001 From: Jean Chalard Date: Mon, 23 Apr 2012 19:25:28 +0900 Subject: [PATCH] Pass the bigram list position from the top level The position itself is still a const int = 0 until we have the previous word passed to the function. This basically does the plumbing. Bug: 6313806 Change-Id: Ib58995f334fe93e3ff5704d7c79f332017f101ac --- native/jni/src/dictionary.h | 8 +++- native/jni/src/unigram_dictionary.cpp | 64 +++++++++++++++------------ native/jni/src/unigram_dictionary.h | 27 +++++------ 3 files changed, 55 insertions(+), 44 deletions(-) diff --git a/native/jni/src/dictionary.h b/native/jni/src/dictionary.h index dea5763d0..5b9ddb3e9 100644 --- a/native/jni/src/dictionary.h +++ b/native/jni/src/dictionary.h @@ -35,9 +35,13 @@ class Dictionary { int getSuggestions(ProximityInfo *proximityInfo, int *xcoordinates, int *ycoordinates, int *codes, int codesSize, bool useFullEditDistance, unsigned short *outWords, int *frequencies) { + // bigramListPosition is, as an int, the offset of the bigram list in the file. + // If none, it's zero. + // TODO: get this from the bigram dictionary instance + const int bigramListPosition = 0; return mUnigramDictionary->getSuggestions(proximityInfo, mWordsPriorityQueuePool, - mCorrection, xcoordinates, ycoordinates, codes, - codesSize, useFullEditDistance, outWords, frequencies); + mCorrection, xcoordinates, ycoordinates, codes, codesSize, bigramListPosition, + useFullEditDistance, outWords, frequencies); } int getBigrams(const int32_t *word, int length, int *codes, int codesSize, diff --git a/native/jni/src/unigram_dictionary.cpp b/native/jni/src/unigram_dictionary.cpp index a3eda0061..0c759d438 100644 --- a/native/jni/src/unigram_dictionary.cpp +++ b/native/jni/src/unigram_dictionary.cpp @@ -98,7 +98,8 @@ int UnigramDictionary::getDigraphReplacement(const int *codes, const int i, cons void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *codesBuffer, int *xCoordinatesBuffer, int *yCoordinatesBuffer, - const int codesBufferSize, const bool useFullEditDistance, const int *codesSrc, + const int codesBufferSize, const int bigramListPosition, + const bool useFullEditDistance, const int *codesSrc, const int codesRemain, const int currentDepth, int *codesDest, Correction *correction, WordsPriorityQueuePool *queuePool, const digraph_t* const digraphs, const unsigned int digraphsSize) { @@ -127,8 +128,8 @@ void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximit replacementCodePoint; getWordWithDigraphSuggestionsRec(proximityInfo, xcoordinates, ycoordinates, codesBuffer, xCoordinatesBuffer, yCoordinatesBuffer, codesBufferSize, - useFullEditDistance, codesSrc + i + 1, codesRemain - i - 1, - currentDepth + 1, codesDest + i, correction, + bigramListPosition, useFullEditDistance, codesSrc + i + 1, + codesRemain - i - 1, currentDepth + 1, codesDest + i, correction, queuePool, digraphs, digraphsSize); // Copy the second char of the digraph in place, then continue processing on @@ -137,9 +138,9 @@ void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximit memcpy(codesDest + i, codesSrc + i, BYTES_IN_ONE_CHAR); getWordWithDigraphSuggestionsRec(proximityInfo, xcoordinates, ycoordinates, codesBuffer, xCoordinatesBuffer, yCoordinatesBuffer, codesBufferSize, - useFullEditDistance, codesSrc + i, codesRemain - i, currentDepth + 1, - codesDest + i, correction, queuePool, - digraphs, digraphsSize); + bigramListPosition, useFullEditDistance, codesSrc + i, codesRemain - i, + currentDepth + 1, codesDest + i, correction, queuePool, digraphs, + digraphsSize); return; } } @@ -160,14 +161,16 @@ void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximit } getWordSuggestions(proximityInfo, xCoordinatesBuffer, yCoordinatesBuffer, codesBuffer, - startIndex + codesRemain, useFullEditDistance, correction, + startIndex + codesRemain, bigramListPosition, useFullEditDistance, correction, queuePool); } +// bigramListPosition is the offset in the file to the list of bigrams for the previous word. int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo, WordsPriorityQueuePool *queuePool, Correction *correction, const int *xcoordinates, const int *ycoordinates, const int *codes, const int codesSize, - const bool useFullEditDistance, unsigned short *outWords, int *frequencies) { + const int bigramListPosition, const bool useFullEditDistance, unsigned short *outWords, + int *frequencies) { queuePool->clearAll(); Correction* masterCorrection = correction; @@ -177,8 +180,8 @@ int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo, int xCoordinatesBuffer[codesSize]; int yCoordinatesBuffer[codesSize]; getWordWithDigraphSuggestionsRec(proximityInfo, xcoordinates, ycoordinates, codesBuffer, - xCoordinatesBuffer, yCoordinatesBuffer, - codesSize, useFullEditDistance, codes, codesSize, 0, codesBuffer, masterCorrection, + xCoordinatesBuffer, yCoordinatesBuffer, codesSize, bigramListPosition, + useFullEditDistance, codes, codesSize, 0, codesBuffer, masterCorrection, queuePool, GERMAN_UMLAUT_DIGRAPHS, sizeof(GERMAN_UMLAUT_DIGRAPHS) / sizeof(GERMAN_UMLAUT_DIGRAPHS[0])); } else if (BinaryFormat::REQUIRES_FRENCH_LIGATURES_PROCESSING & FLAGS) { @@ -186,13 +189,13 @@ int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo, int xCoordinatesBuffer[codesSize]; int yCoordinatesBuffer[codesSize]; getWordWithDigraphSuggestionsRec(proximityInfo, xcoordinates, ycoordinates, codesBuffer, - xCoordinatesBuffer, yCoordinatesBuffer, - codesSize, useFullEditDistance, codes, codesSize, 0, codesBuffer, masterCorrection, + xCoordinatesBuffer, yCoordinatesBuffer, codesSize, bigramListPosition, + useFullEditDistance, codes, codesSize, 0, codesBuffer, masterCorrection, queuePool, FRENCH_LIGATURES_DIGRAPHS, sizeof(FRENCH_LIGATURES_DIGRAPHS) / sizeof(FRENCH_LIGATURES_DIGRAPHS[0])); } else { // Normal processing getWordSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, codesSize, - useFullEditDistance, masterCorrection, queuePool); + bigramListPosition, useFullEditDistance, masterCorrection, queuePool); } PROF_START(20); @@ -225,16 +228,16 @@ int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo, void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *codes, - const int inputLength, const bool useFullEditDistance, Correction *correction, - WordsPriorityQueuePool *queuePool) { + const int inputLength, const int bigramListPosition, const bool useFullEditDistance, + Correction *correction, WordsPriorityQueuePool *queuePool) { PROF_OPEN; PROF_START(0); PROF_END(0); PROF_START(1); - getOneWordSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, useFullEditDistance, - inputLength, correction, queuePool); + getOneWordSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, bigramListPosition, + useFullEditDistance, inputLength, correction, queuePool); PROF_END(1); PROF_START(2); @@ -305,15 +308,16 @@ static const char SPACE = ' '; void UnigramDictionary::getOneWordSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *codes, - const bool useFullEditDistance, const int inputLength, Correction *correction, - WordsPriorityQueuePool *queuePool) { + const int bigramListPosition, const bool useFullEditDistance, const int inputLength, + Correction *correction, WordsPriorityQueuePool *queuePool) { initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, inputLength, correction); - getSuggestionCandidates(useFullEditDistance, inputLength, correction, queuePool, - true /* doAutoCompletion */, DEFAULT_MAX_ERRORS, FIRST_WORD_INDEX); + getSuggestionCandidates(useFullEditDistance, inputLength, bigramListPosition, correction, + queuePool, true /* doAutoCompletion */, DEFAULT_MAX_ERRORS, FIRST_WORD_INDEX); } void UnigramDictionary::getSuggestionCandidates(const bool useFullEditDistance, - const int inputLength, Correction *correction, WordsPriorityQueuePool *queuePool, + const int inputLength, const int bigramListPosition, + Correction *correction, WordsPriorityQueuePool *queuePool, const bool doAutoCompletion, const int maxErrors, const int currentWordIndex) { // TODO: Remove setCorrectionParams correction->setCorrectionParams(0, 0, 0, @@ -333,8 +337,8 @@ void UnigramDictionary::getSuggestionCandidates(const bool useFullEditDistance, int firstChildPos; const bool needsToTraverseChildrenNodes = processCurrentNode(siblingPos, - correction, &childCount, &firstChildPos, &siblingPos, queuePool, - currentWordIndex); + bigramListPosition, correction, &childCount, &firstChildPos, &siblingPos, + queuePool, currentWordIndex); // Update next sibling pos correction->setTreeSiblingPos(outputIndex, siblingPos); @@ -426,8 +430,10 @@ bool UnigramDictionary::getSubStringSuggestion( initSuggestions(proximityInfo, &xcoordinates[offset], &ycoordinates[offset], codes + offset, inputWordLength, correction); queuePool->clearSubQueue(currentWordIndex); - getSuggestionCandidates(useFullEditDistance, inputWordLength, correction, - queuePool, false, MAX_ERRORS_FOR_TWO_WORDS, currentWordIndex); + // TODO: pass the bigram list for substring suggestion + getSuggestionCandidates(useFullEditDistance, inputWordLength, + 0 /* bigramListPosition */, correction, queuePool, false /* doAutoCompletion */, + MAX_ERRORS_FOR_TWO_WORDS, currentWordIndex); if (DEBUG_DICT) { if (currentWordIndex < MULTIPLE_WORDS_SUGGESTION_MAX_WORDS) { AKLOGI("Dump word candidates(%d) %d", currentWordIndex, inputWordLength); @@ -757,15 +763,13 @@ int UnigramDictionary::getBigramPosition(int pos, unsigned short *word, int offs // the current node in nextSiblingPosition. Thus, the caller must keep count of the nodes at any // given level, as output into newCount when traversing this level's parent. inline bool UnigramDictionary::processCurrentNode(const int initialPos, - Correction *correction, int *newCount, + const int bigramListPosition, Correction *correction, int *newCount, int *newChildrenPosition, int *nextSiblingPosition, WordsPriorityQueuePool *queuePool, const int currentWordIndex) { if (DEBUG_DICT) { correction->checkState(); } int pos = initialPos; - // TODO: get this as an argument - const int bigramListPosition = 0; // Flags contain the following information: // - Address type (MASK_GROUP_ADDRESS_TYPE) on two bits: @@ -842,6 +846,8 @@ inline bool UnigramDictionary::processCurrentNode(const int initialPos, const int childrenAddressPos = BinaryFormat::skipFrequency(flags, pos); const int attributesPos = BinaryFormat::skipChildrenPosition(flags, childrenAddressPos); TerminalAttributes terminalAttributes(DICT_ROOT, flags, attributesPos); + // The bigramListPosition is the offset in the file of the bigrams for the previous word, + // or zero if we don't know of any bigrams for it. const int probability = BinaryFormat::getProbability(bigramListPosition, unigramFreq); onTerminal(probability, terminalAttributes, correction, queuePool, needsToInvokeOnTerminal, currentWordIndex); diff --git a/native/jni/src/unigram_dictionary.h b/native/jni/src/unigram_dictionary.h index b09c0006d..0cc59bac8 100644 --- a/native/jni/src/unigram_dictionary.h +++ b/native/jni/src/unigram_dictionary.h @@ -74,34 +74,35 @@ class UnigramDictionary { bool isValidWord(const int32_t* const inWord, const int length) const; int getBigramPosition(int pos, unsigned short *word, int offset, int length) const; int getSuggestions(ProximityInfo *proximityInfo, WordsPriorityQueuePool *queuePool, - Correction *correction, const int *xcoordinates, - const int *ycoordinates, const int *codes, const int codesSize, + Correction *correction, const int *xcoordinates, const int *ycoordinates, + const int *codes, const int codesSize, const int bigramListPosition, const bool useFullEditDistance, unsigned short *outWords, int *frequencies); virtual ~UnigramDictionary(); private: void getWordSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *codes, const int inputLength, - const bool useFullEditDistance, Correction *correction, + const int bigramListPosition, const bool useFullEditDistance, Correction *correction, WordsPriorityQueuePool *queuePool); int getDigraphReplacement(const int *codes, const int i, const int codesSize, const digraph_t* const digraphs, const unsigned int digraphsSize) const; void getWordWithDigraphSuggestionsRec(ProximityInfo *proximityInfo, const int *xcoordinates, const int* ycoordinates, const int *codesBuffer, - int *xCoordinatesBuffer, int *yCoordinatesBuffer, - const int codesBufferSize, const bool useFullEditDistance, const int* codesSrc, + int *xCoordinatesBuffer, int *yCoordinatesBuffer, const int codesBufferSize, + const int bigramListPosition, const bool useFullEditDistance, const int* codesSrc, const int codesRemain, const int currentDepth, int* codesDest, Correction *correction, WordsPriorityQueuePool* queuePool, const digraph_t* const digraphs, const unsigned int digraphsSize); void initSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *codes, const int codesSize, Correction *correction); void getOneWordSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, - const int *ycoordinates, const int *codes, const bool useFullEditDistance, - const int inputLength, Correction *correction, WordsPriorityQueuePool* queuePool); - void getSuggestionCandidates( + const int *ycoordinates, const int *codes, const int bigramListPosition, const bool useFullEditDistance, const int inputLength, Correction *correction, - WordsPriorityQueuePool* queuePool, const bool doAutoCompletion, const int maxErrors, - const int currentWordIndex); + WordsPriorityQueuePool* queuePool); + void getSuggestionCandidates( + const bool useFullEditDistance, const int inputLength, const int bigramListPosition, + Correction *correction, WordsPriorityQueuePool* queuePool, const bool doAutoCompletion, + const int maxErrors, const int currentWordIndex); void getSplitMultipleWordsSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *codes, const bool useFullEditDistance, const int inputLength, @@ -113,9 +114,9 @@ class UnigramDictionary { bool needsToSkipCurrentNode(const unsigned short c, const int inputIndex, const int skipPos, const int depth); // Process a node by considering proximity, missing and excessive character - bool processCurrentNode(const int initialPos, Correction *correction, int *newCount, - int *newChildPosition, int *nextSiblingPosition, WordsPriorityQueuePool *queuePool, - const int currentWordIndex); + bool processCurrentNode(const int initialPos, const int bigramListPosition, + Correction *correction, int *newCount, int *newChildPosition, int *nextSiblingPosition, + WordsPriorityQueuePool *queuePool, const int currentWordIndex); int getMostFrequentWordLike(const int startInputIndex, const int inputLength, ProximityInfo *proximityInfo, unsigned short *word); int getMostFrequentWordLikeInner(const uint16_t* const inWord, const int length,