From bd6ccdd5f0a1d3cdb4fde9a4a6a39c4c0cdd3daa Mon Sep 17 00:00:00 2001 From: satok Date: Mon, 23 Jan 2012 12:30:20 +0900 Subject: [PATCH] Clean up two word correction Change-Id: I5cd2697d7f61b81aff0c249df01479d86ad0fba5 --- native/src/unigram_dictionary.cpp | 118 +++++++++++++++++------------- 1 file changed, 67 insertions(+), 51 deletions(-) diff --git a/native/src/unigram_dictionary.cpp b/native/src/unigram_dictionary.cpp index 2c5b9402a..e998ee486 100644 --- a/native/src/unigram_dictionary.cpp +++ b/native/src/unigram_dictionary.cpp @@ -389,24 +389,28 @@ void UnigramDictionary::getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo const int *xcoordinates, const int *ycoordinates, const int *codes, const bool useFullEditDistance, const int inputLength, const int missingSpacePos, const int spaceProximityPos, Correction *correction, WordsPriorityQueuePool* queuePool) { - WordsPriorityQueue *masterQueue = queuePool->getMasterQueue(); - + if (inputLength >= MAX_WORD_LENGTH) return; if (DEBUG_DICT) { int inputCount = 0; if (spaceProximityPos >= 0) ++inputCount; if (missingSpacePos >= 0) ++inputCount; assert(inputCount <= 1); } - const bool isSpaceProximity = spaceProximityPos >= 0; - const int firstWordStartPos = 0; - const int firstTypedWordLength = isSpaceProximity ? spaceProximityPos : missingSpacePos; - int firstFreq = getMostFrequentWordLike(0, firstTypedWordLength, proximityInfo, mWord); - unsigned short* firstWord = 0; - int firstWordLength = 0; + WordsPriorityQueue *masterQueue = queuePool->getMasterQueue(); + + const bool isSpaceProximity = spaceProximityPos >= 0; + + // First word + const int firstInputWordStartPos = 0; + const int firstInputWordLength = isSpaceProximity ? spaceProximityPos : missingSpacePos; + int firstFreq = getMostFrequentWordLike( + firstInputWordStartPos, firstInputWordLength, proximityInfo, mWord); + unsigned short* firstOutputWord = 0; + int firstOutputWordLength = 0; if (firstFreq > 0) { - firstWordLength = firstTypedWordLength; - firstWord = mWord; + firstOutputWordLength = firstInputWordLength; + firstOutputWord = mWord; } else { if (masterQueue->size() > 0) { double nsForMaster = masterQueue->getHighestNormalizedScore( @@ -416,80 +420,92 @@ void UnigramDictionary::getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo return; } } - WordsPriorityQueue* firstWordQueue = queuePool->getSubQueue1(firstTypedWordLength); + WordsPriorityQueue* firstWordQueue = queuePool->getSubQueue1(firstInputWordLength); if (firstWordQueue->size() < 1) { return; } int score = 0; const double ns = firstWordQueue->getHighestNormalizedScore( - proximityInfo->getPrimaryInputWord(), firstTypedWordLength, &firstWord, &score, - &firstWordLength); + proximityInfo->getPrimaryInputWord(), firstInputWordLength, + &firstOutputWord, &score, &firstOutputWordLength); // Two words correction won't be done if the score of the first word doesn't exceed the // threshold. if (ns < TWO_WORDS_CORRECTION_WITH_OTHER_ERROR_THRESHOLD) { return; } - firstFreq = score >> (firstWordLength + firstFreq = score >> (firstOutputWordLength + TWO_WORDS_PLUS_OTHER_ERROR_CORRECTION_DEMOTION_DIVIDER); } - if (firstFreq <= 0) { - return; - } - - const int secondWordStartPos = isSpaceProximity ? (spaceProximityPos + 1) : missingSpacePos; - const int secondWordLength = isSpaceProximity - ? (inputLength - spaceProximityPos - 1) - : (inputLength - missingSpacePos); - - if (inputLength >= MAX_WORD_LENGTH) return; - - if (0 >= firstWordLength || 0 >= secondWordLength || firstWordStartPos >= secondWordStartPos - || firstWordStartPos < 0 || secondWordStartPos + secondWordLength > inputLength) - return; - - const int newWordLength = firstWordLength + secondWordLength + 1; - - // Space proximity preparation - //WordsPriorityQueue *subQueue = queuePool->getSubQueue1(); - //initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, firstWordLength, subQueue, - //correction); - //getSuggestionCandidates(useFullEditDistance, firstWordLength, correction, subQueue, false, - //MAX_ERRORS_FOR_TWO_WORDS); - - // Allocating variable length array on stack - unsigned short word[newWordLength]; if (DEBUG_DICT) { AKLOGI("First freq: %d", firstFreq); } - for (int i = 0; i < firstWordLength; ++i) { - word[i] = firstWord[i]; + if (firstFreq <= 0 || firstOutputWordLength <= 0 || MAX_WORD_LENGTH <= firstOutputWordLength) { + return; + } + + // Allocating fixed length array on stack + unsigned short outputWord[MAX_WORD_LENGTH]; + int outputWordLength = 0; + + for (int i = 0; i < firstOutputWordLength; ++i) { + outputWord[i] = firstOutputWord[i]; + } + + outputWord[firstOutputWordLength] = SPACE; + outputWordLength = firstOutputWordLength + 1; + + //const int outputWordLength = firstOutputWordLength + secondWordLength + 1; + // Space proximity preparation + //WordsPriorityQueue *subQueue = queuePool->getSubQueue1(); + //initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, firstOutputWordLength, + //subQueue, correction); + //getSuggestionCandidates(useFullEditDistance, firstOutputWordLength, correction, subQueue, + //false, MAX_ERRORS_FOR_TWO_WORDS); + + // Second word + const int secondInputWordLength = isSpaceProximity + ? (inputLength - spaceProximityPos - 1) + : (inputLength - missingSpacePos); + const int secondInputWordStartPos = + isSpaceProximity ? (spaceProximityPos + 1) : missingSpacePos; + int secondFreq = getMostFrequentWordLike( + secondInputWordStartPos, secondInputWordLength, proximityInfo, mWord); + unsigned short* secondOutputWord = 0; + int secondOutputWordLength = 0; + + if (secondFreq > 0) { + secondOutputWordLength = secondInputWordLength; + secondOutputWord = mWord; } - const int secondFreq = getMostFrequentWordLike( - secondWordStartPos, secondWordLength, proximityInfo, mWord); if (DEBUG_DICT) { - AKLOGI("Second freq: %d", secondFreq); + AKLOGI("Second freq: %d", secondFreq); } - if (secondFreq <= 0) return; - word[firstWordLength] = SPACE; - for (int i = (firstWordLength + 1); i < newWordLength; ++i) { - word[i] = mWord[i - firstWordLength - 1]; + if (secondFreq <= 0 || secondOutputWordLength <= 0 + || MAX_WORD_LENGTH <= (firstOutputWordLength + 1 + secondOutputWordLength)) { + return; } + for (int i = 0; i < secondOutputWordLength; ++i) { + outputWord[firstOutputWordLength + 1 + i] = secondOutputWord[i]; + } + + outputWordLength += secondOutputWordLength; + // TODO: Remove initSuggestions and correction->setCorrectionParams initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, inputLength, correction); correction->setCorrectionParams(-1 /* skipPos */, -1 /* excessivePos */, -1 /* transposedPos */, spaceProximityPos, missingSpacePos, useFullEditDistance, false /* doAutoCompletion */, MAX_ERRORS_FOR_TWO_WORDS); - const int pairFreq = correction->getFreqForSplitTwoWords(firstFreq, secondFreq, word); + const int pairFreq = correction->getFreqForSplitTwoWords(firstFreq, secondFreq, outputWord); if (DEBUG_DICT) { AKLOGI("Split two words: %d, %d, %d, %d", firstFreq, secondFreq, pairFreq, inputLength); } - addWord(word, newWordLength, pairFreq, masterQueue); + addWord(outputWord, outputWordLength, pairFreq, masterQueue); return; }