am f8dbeb00: Merge "Clean up two word correction"

* commit 'f8dbeb00df29f59cea622ba28fdf8e3eba521a28': Clean up two word correction
2012-01-22 23:06:31 -08:00 · 2012-01-22 23:06:31 -08:00 · cf1c08a9de
parent 861e23af9c f8dbeb00df
commit cf1c08a9de
1 changed files with 67 additions and 51 deletions
--- a/native/src/unigram_dictionary.cpp
+++ b/native/src/unigram_dictionary.cpp
@ -389,24 +389,28 @@ void UnigramDictionary::getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo
        const int *xcoordinates, const int *ycoordinates, const int *codes,
        const bool useFullEditDistance, const int inputLength, const int missingSpacePos,
        const int  spaceProximityPos, Correction *correction, WordsPriorityQueuePool* queuePool) {
-    WordsPriorityQueue *masterQueue = queuePool->getMasterQueue();
+    if (inputLength >= MAX_WORD_LENGTH) return;
    if (DEBUG_DICT) {
        int inputCount = 0;
        if (spaceProximityPos >= 0) ++inputCount;
        if (missingSpacePos >= 0) ++inputCount;
        assert(inputCount <= 1);
    }
    const bool isSpaceProximity = spaceProximityPos >= 0;
    const int firstWordStartPos = 0;
-    const int firstTypedWordLength = isSpaceProximity ? spaceProximityPos : missingSpacePos;
+    WordsPriorityQueue *masterQueue = queuePool->getMasterQueue();
-    int firstFreq = getMostFrequentWordLike(0, firstTypedWordLength, proximityInfo, mWord);
+
-    unsigned short* firstWord = 0;
+    const bool isSpaceProximity = spaceProximityPos >= 0;
-    int firstWordLength = 0;
+
    // First word
    const int firstInputWordStartPos = 0;
    const int firstInputWordLength = isSpaceProximity ? spaceProximityPos : missingSpacePos;
    int firstFreq = getMostFrequentWordLike(
            firstInputWordStartPos, firstInputWordLength, proximityInfo, mWord);
    unsigned short* firstOutputWord = 0;
    int firstOutputWordLength = 0;
    if (firstFreq > 0) {
-        firstWordLength = firstTypedWordLength;
+        firstOutputWordLength = firstInputWordLength;
-        firstWord = mWord;
+        firstOutputWord = mWord;
    } else {
        if (masterQueue->size() > 0) {
            double nsForMaster = masterQueue->getHighestNormalizedScore(
@ -416,80 +420,92 @@ void UnigramDictionary::getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo
                return;
            }
        }
-        WordsPriorityQueue* firstWordQueue = queuePool->getSubQueue1(firstTypedWordLength);
+        WordsPriorityQueue* firstWordQueue = queuePool->getSubQueue1(firstInputWordLength);
        if (firstWordQueue->size() < 1) {
            return;
        }
        int score = 0;
        const double ns = firstWordQueue->getHighestNormalizedScore(
-                proximityInfo->getPrimaryInputWord(), firstTypedWordLength, &firstWord, &score,
+                proximityInfo->getPrimaryInputWord(), firstInputWordLength,
-                &firstWordLength);
+                &firstOutputWord, &score, &firstOutputWordLength);
        // Two words correction won't be done if the score of the first word doesn't exceed the
        // threshold.
        if (ns < TWO_WORDS_CORRECTION_WITH_OTHER_ERROR_THRESHOLD) {
            return;
        }
-        firstFreq = score >> (firstWordLength
+        firstFreq = score >> (firstOutputWordLength
                + TWO_WORDS_PLUS_OTHER_ERROR_CORRECTION_DEMOTION_DIVIDER);
    }
    if (firstFreq <= 0) {
        return;
    }
    const int secondWordStartPos = isSpaceProximity ? (spaceProximityPos + 1) : missingSpacePos;
    const int secondWordLength = isSpaceProximity
            ? (inputLength - spaceProximityPos - 1)
            : (inputLength - missingSpacePos);
    if (inputLength >= MAX_WORD_LENGTH) return;
    if (0 >= firstWordLength || 0 >= secondWordLength || firstWordStartPos >= secondWordStartPos
            || firstWordStartPos < 0 || secondWordStartPos + secondWordLength > inputLength)
        return;
    const int newWordLength = firstWordLength + secondWordLength + 1;
    // Space proximity preparation
    //WordsPriorityQueue *subQueue = queuePool->getSubQueue1();
    //initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, firstWordLength, subQueue,
    //correction);
    //getSuggestionCandidates(useFullEditDistance, firstWordLength, correction, subQueue, false,
    //MAX_ERRORS_FOR_TWO_WORDS);
    // Allocating variable length array on stack
    unsigned short word[newWordLength];
    if (DEBUG_DICT) {
        AKLOGI("First freq: %d", firstFreq);
    }
-    for (int i = 0; i < firstWordLength; ++i) {
+    if (firstFreq <= 0 || firstOutputWordLength <= 0 || MAX_WORD_LENGTH <= firstOutputWordLength) {
-        word[i] = firstWord[i];
+        return;
    }
    // Allocating fixed length array on stack
    unsigned short outputWord[MAX_WORD_LENGTH];
    int outputWordLength = 0;
    for (int i = 0; i < firstOutputWordLength; ++i) {
        outputWord[i] = firstOutputWord[i];
    }
    outputWord[firstOutputWordLength] = SPACE;
    outputWordLength = firstOutputWordLength + 1;
    //const int outputWordLength = firstOutputWordLength + secondWordLength + 1;
    // Space proximity preparation
    //WordsPriorityQueue *subQueue = queuePool->getSubQueue1();
    //initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, firstOutputWordLength,
    //subQueue, correction);
    //getSuggestionCandidates(useFullEditDistance, firstOutputWordLength, correction, subQueue,
    //false, MAX_ERRORS_FOR_TWO_WORDS);
    // Second word
    const int secondInputWordLength = isSpaceProximity
            ? (inputLength - spaceProximityPos - 1)
            : (inputLength - missingSpacePos);
    const int secondInputWordStartPos =
            isSpaceProximity ? (spaceProximityPos + 1) : missingSpacePos;
    int secondFreq = getMostFrequentWordLike(
            secondInputWordStartPos, secondInputWordLength, proximityInfo, mWord);
    unsigned short* secondOutputWord = 0;
    int secondOutputWordLength = 0;
    if (secondFreq > 0) {
        secondOutputWordLength = secondInputWordLength;
        secondOutputWord = mWord;
    }
    const int secondFreq = getMostFrequentWordLike(
            secondWordStartPos, secondWordLength, proximityInfo, mWord);
    if (DEBUG_DICT) {
-        AKLOGI("Second  freq:  %d", secondFreq);
+        AKLOGI("Second freq: %d", secondFreq);
    }
    if (secondFreq <= 0) return;
-    word[firstWordLength] = SPACE;
+    if (secondFreq <= 0 || secondOutputWordLength <= 0
-    for (int i = (firstWordLength + 1); i < newWordLength; ++i) {
+            || MAX_WORD_LENGTH <= (firstOutputWordLength + 1 + secondOutputWordLength)) {
-        word[i] = mWord[i - firstWordLength - 1];
+        return;
    }
    for (int i = 0; i < secondOutputWordLength; ++i) {
        outputWord[firstOutputWordLength + 1 + i] = secondOutputWord[i];
    }
    outputWordLength += secondOutputWordLength;
    // TODO: Remove initSuggestions and correction->setCorrectionParams
    initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, inputLength, correction);
    correction->setCorrectionParams(-1 /* skipPos */, -1 /* excessivePos */,
            -1 /* transposedPos */, spaceProximityPos, missingSpacePos,
            useFullEditDistance, false /* doAutoCompletion */, MAX_ERRORS_FOR_TWO_WORDS);
-    const int pairFreq = correction->getFreqForSplitTwoWords(firstFreq, secondFreq, word);
+    const int pairFreq = correction->getFreqForSplitTwoWords(firstFreq, secondFreq, outputWord);
    if (DEBUG_DICT) {
        AKLOGI("Split two words:  %d, %d, %d, %d", firstFreq, secondFreq, pairFreq, inputLength);
    }
-    addWord(word, newWordLength, pairFreq, masterQueue);
+    addWord(outputWord, outputWordLength, pairFreq, masterQueue);
    return;
 }