am a191afb7: Merge "Implement multi words suggestions step1"

* commit 'a191afb78d47022a1cc4474ffa7d7ab872a9270b': Implement multi words suggestions step1
2012-01-30 01:06:41 -08:00 · 2012-01-30 01:06:41 -08:00 · f800eb0889
parent 3c505f2999 a191afb78d
commit f800eb0889
5 changed files with 132 additions and 110 deletions
--- a/native/src/correction.cpp
+++ b/native/src/correction.cpp
@ -827,11 +827,6 @@ int Correction::RankingAlgorithm::calcFreqForSplitTwoWords(
    const bool capitalizedWordDemotion =
            firstCapitalizedWordDemotion ^ secondCapitalizedWordDemotion;
    if (DEBUG_DICT_FULL) {
        AKLOGI("Two words: %c, %c, %d",
                word[0], word[firstWordLength + 1], capitalizedWordDemotion);
    }
    if (firstWordLength == 0 || secondWordLength == 0) {
        return 0;
    }
@ -891,6 +886,12 @@ int Correction::RankingAlgorithm::calcFreqForSplitTwoWords(
        multiplyRate(TWO_WORDS_CAPITALIZED_DEMOTION_RATE, &totalFreq);
    }
    if (DEBUG_CORRECTION_FREQ) {
        AKLOGI("Two words (%d, %d) (%d, %d) %d, %d", firstFreq, secondFreq, firstWordLength,
                secondWordLength, capitalizedWordDemotion, totalFreq);
        DUMP_WORD(word, firstWordLength);
    }
    return totalFreq;
 }
--- a/native/src/defines.h
+++ b/native/src/defines.h
@ -216,15 +216,15 @@ static void prof_out(void) {
 #define SUB_QUEUE_MAX_WORDS 1
 #define SUB_QUEUE_MAX_COUNT 10
 #define SUB_QUEUE_MIN_WORD_LENGTH 4
-#define SUB_QUEUE_MAX_WORD_INDEX 2
+#define MULTIPLE_WORDS_SUGGESTION_MAX_WORDS 2
 #define TWO_WORDS_CORRECTION_WITH_OTHER_ERROR_THRESHOLD 0.39
 #define START_TWO_WORDS_CORRECTION_THRESHOLD 0.22
 #define MAX_DEPTH_MULTIPLIER 3
-#define FIRST_WORD_INDEX 1
+#define FIRST_WORD_INDEX 0
-#define SECOND_WORD_INDEX 2
+#define SECOND_WORD_INDEX 1
 // TODO: Reduce this constant if possible; check the maximum number of umlauts in the same German
 // word in the dictionary
--- a/native/src/unigram_dictionary.cpp
+++ b/native/src/unigram_dictionary.cpp
@ -224,15 +224,10 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo,
    // Multiple word suggestions
    if (SUGGEST_MULTIPLE_WORDS
            && inputLength >= MIN_USER_TYPED_LENGTH_FOR_MULTIPLE_WORD_SUGGESTION) {
        for (int i = 1; i < inputLength; ++i) {
            if (DEBUG_DICT) {
                AKLOGI("--- Suggest multiple words %d", i);
            }
        getSplitTwoWordsSuggestions(proximityInfo, xcoordinates, ycoordinates, codes,
-                    useFullEditDistance, inputLength, i, correction, queuePool,
+                useFullEditDistance, inputLength, correction, queuePool,
                hasAutoCorrectionCandidate);
    }
    }
    PROF_END(5);
    PROF_START(6);
@ -329,7 +324,7 @@ inline void UnigramDictionary::onTerminal(const int freq,
    int wordLength;
    unsigned short* wordPointer;
-    if ((currentWordIndex == 1) && addToMasterQueue) {
+    if ((currentWordIndex == FIRST_WORD_INDEX) && addToMasterQueue) {
        WordsPriorityQueue *masterQueue = queuePool->getMasterQueue();
        const int finalFreq = correction->getFinalFreq(freq, &wordPointer, &wordLength);
        if (finalFreq != NOT_A_FREQUENCY) {
@ -377,11 +372,8 @@ bool UnigramDictionary::getSubStringSuggestion(
        const int inputWordStartPos, const int inputWordLength,
        const int outputWordStartPos, const bool isSpaceProximity, int *freqArray,
        int*wordLengthArray, unsigned short* outputWord, int *outputWordLength) {
    if (DEBUG_DICT) {
        assert(currentWordIndex >= 1);
    }
    unsigned short* tempOutputWord = 0;
-    int tempOutputWordLength = 0;
+    int nextWordLength = 0;
    // TODO: Optimize init suggestion
    initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes,
            inputLength, correction);
@ -389,7 +381,7 @@ bool UnigramDictionary::getSubStringSuggestion(
    int freq = getMostFrequentWordLike(
            inputWordStartPos, inputWordLength, proximityInfo, mWord);
    if (freq > 0) {
-        tempOutputWordLength = inputWordLength;
+        nextWordLength = inputWordLength;
        tempOutputWord = mWord;
    } else if (!hasAutoCorrectionCandidate) {
        if (inputWordStartPos > 0) {
@ -400,7 +392,7 @@ bool UnigramDictionary::getSubStringSuggestion(
            getSuggestionCandidates(useFullEditDistance, inputWordLength, correction,
                    queuePool, false, MAX_ERRORS_FOR_TWO_WORDS, currentWordIndex);
            if (DEBUG_DICT) {
-                if (currentWordIndex <= SUB_QUEUE_MAX_WORD_INDEX) {
+                if (currentWordIndex < MULTIPLE_WORDS_SUGGESTION_MAX_WORDS) {
                    AKLOGI("Dump word candidates(%d) %d", currentWordIndex, inputWordLength);
                    for (int i = 0; i < SUB_QUEUE_MAX_COUNT; ++i) {
                        queuePool->getSubQueue(currentWordIndex, i)->dumpTopWord();
@ -415,59 +407,122 @@ bool UnigramDictionary::getSubStringSuggestion(
        int score = 0;
        const double ns = queue->getHighestNormalizedScore(
                proximityInfo->getPrimaryInputWord(), inputWordLength,
-                &tempOutputWord, &score, &tempOutputWordLength);
+                &tempOutputWord, &score, &nextWordLength);
        if (DEBUG_DICT) {
            AKLOGI("NS(%d) = %f, Score = %d", currentWordIndex, ns, score);
        }
        // Two words correction won't be done if the score of the first word doesn't exceed the
        // threshold.
        if (ns < TWO_WORDS_CORRECTION_WITH_OTHER_ERROR_THRESHOLD
-                || tempOutputWordLength < SUB_QUEUE_MIN_WORD_LENGTH) {
+                || nextWordLength < SUB_QUEUE_MIN_WORD_LENGTH) {
            return false;
        }
-        freq = score >> (tempOutputWordLength
+        freq = score >> (nextWordLength + TWO_WORDS_PLUS_OTHER_ERROR_CORRECTION_DEMOTION_DIVIDER);
                + TWO_WORDS_PLUS_OTHER_ERROR_CORRECTION_DEMOTION_DIVIDER);
    }
    if (DEBUG_DICT) {
-        AKLOGI("Freq(%d): %d, length: %d, input length: %d, input start: %d"
+        AKLOGI("Freq(%d): %d, length: %d, input length: %d, input start: %d (%d)"
-                , currentWordIndex, freq, tempOutputWordLength, inputWordLength, inputWordStartPos);
+                , currentWordIndex, freq, nextWordLength, inputWordLength, inputWordStartPos,
                wordLengthArray[0]);
    }
-    if (freq <= 0 || tempOutputWordLength <= 0
+    if (freq <= 0 || nextWordLength <= 0
-            || MAX_WORD_LENGTH <= (outputWordStartPos + tempOutputWordLength)) {
+            || MAX_WORD_LENGTH <= (outputWordStartPos + nextWordLength)) {
        return false;
    }
-    for (int i = 0; i < tempOutputWordLength; ++i) {
+    for (int i = 0; i < nextWordLength; ++i) {
        outputWord[outputWordStartPos + i] = tempOutputWord[i];
    }
    // Put output values
-    freqArray[currentWordIndex - 1] = freq;
+    freqArray[currentWordIndex] = freq;
    // TODO: put output length instead of input length
-    wordLengthArray[currentWordIndex - 1] = inputWordLength;
+    wordLengthArray[currentWordIndex] = inputWordLength;
-    *outputWordLength = outputWordStartPos + tempOutputWordLength;
+    const int tempOutputWordLength = outputWordStartPos + nextWordLength;
    if (outputWordLength) {
        *outputWordLength = tempOutputWordLength;
    }
    if ((inputWordStartPos + inputWordLength) < inputLength) {
-        if (outputWordStartPos + tempOutputWordLength >= MAX_WORD_LENGTH) {
+        if (outputWordStartPos + nextWordLength >= MAX_WORD_LENGTH) {
            return false;
        }
        outputWord[outputWordStartPos + tempOutputWordLength] = SPACE;
        if (outputWordLength) {
            ++*outputWordLength;
-    } else if (currentWordIndex >= 2) {
+        }
    } else if (currentWordIndex >= 1) {
        // TODO: Handle 3 or more words
        const int pairFreq = correction->getFreqForSplitTwoWords(
                freqArray, wordLengthArray, isSpaceProximity, outputWord);
        if (DEBUG_DICT) {
-            AKLOGI("Split two words: %d, %d, %d, %d", freqArray[0], freqArray[1], pairFreq,
+            AKLOGI("Split two words: %d, %d, %d, %d, (%d)", freqArray[0], freqArray[1], pairFreq,
-                    inputLength);
+                    inputLength, wordLengthArray[0]);
        }
-        addWord(outputWord, *outputWordLength, pairFreq, queuePool->getMasterQueue());
+        addWord(outputWord, tempOutputWordLength, pairFreq, queuePool->getMasterQueue());
    }
    return true;
 }
 void UnigramDictionary::getMultiWordsSuggestionRec(ProximityInfo *proximityInfo,
        const int *xcoordinates, const int *ycoordinates, const int *codes,
        const bool useFullEditDistance, const int inputLength,
        Correction *correction, WordsPriorityQueuePool* queuePool,
        const bool hasAutoCorrectionCandidate, const int startInputPos, const int startWordIndex,
        const int outputWordLength, int *freqArray, int* wordLengthArray,
        unsigned short* outputWord) {
    if (startWordIndex >= (MULTIPLE_WORDS_SUGGESTION_MAX_WORDS - 1)) {
        // Return if the last word index
        return;
    }
    for (int i = 1; i < inputLength; ++i) {
        int tempOutputWordLength = 0;
        // First word
        int inputWordStartPos = 0;
        int inputWordLength = i;
        if (DEBUG_CORRECTION_FREQ) {
            AKLOGI("Two words, %d", inputWordLength);
        }
        if (!getSubStringSuggestion(proximityInfo, xcoordinates, ycoordinates, codes,
                useFullEditDistance, correction, queuePool, inputLength, hasAutoCorrectionCandidate,
                FIRST_WORD_INDEX, inputWordStartPos, inputWordLength, 0, true /* not used */,
                freqArray, wordLengthArray, outputWord, &tempOutputWordLength)) {
            continue;
        }
        // Second word
        // Missing space
        inputWordStartPos = i;
        inputWordLength = inputLength - i;
        getSubStringSuggestion(proximityInfo, xcoordinates, ycoordinates, codes,
                useFullEditDistance, correction, queuePool, inputLength, hasAutoCorrectionCandidate,
                SECOND_WORD_INDEX, inputWordStartPos, inputWordLength, tempOutputWordLength,
                false /* missing space */, freqArray, wordLengthArray, outputWord,
                0);
        // Mistyped space
        ++inputWordStartPos;
        --inputWordLength;
        if (inputWordLength <= 0) {
            continue;
        }
        const int x = xcoordinates[inputWordStartPos - 1];
        const int y = ycoordinates[inputWordStartPos - 1];
        if (!proximityInfo->hasSpaceProximity(x, y)) {
            continue;
        }
        getSubStringSuggestion(proximityInfo, xcoordinates, ycoordinates, codes,
                useFullEditDistance, correction, queuePool, inputLength, hasAutoCorrectionCandidate,
                SECOND_WORD_INDEX, inputWordStartPos, inputWordLength, tempOutputWordLength,
                true /* mistyped space */, freqArray, wordLengthArray, outputWord,
                0);
    }
 }
 void UnigramDictionary::getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo,
        const int *xcoordinates, const int *ycoordinates, const int *codes,
-        const bool useFullEditDistance, const int inputLength, const int wordDivideIndex,
+        const bool useFullEditDistance, const int inputLength,
        Correction *correction, WordsPriorityQueuePool* queuePool,
        const bool hasAutoCorrectionCandidate) {
    if (inputLength >= MAX_WORD_LENGTH) return;
@ -475,51 +530,21 @@ void UnigramDictionary::getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo
        // MAX_PROXIMITY_CHARS_SIZE in ProximityInfo.java should be 16
        assert(MAX_PROXIMITY_CHARS == 16);
    }
    if (DEBUG_DICT) {
        AKLOGI("--- Suggest multiple words");
    }
    // Allocating fixed length array on stack
    unsigned short outputWord[MAX_WORD_LENGTH];
-    int freqArray[SUB_QUEUE_MAX_WORD_INDEX];
+    int freqArray[MULTIPLE_WORDS_SUGGESTION_MAX_WORDS];
-    int wordLengthArray[SUB_QUEUE_MAX_WORD_INDEX];
+    int wordLengthArray[MULTIPLE_WORDS_SUGGESTION_MAX_WORDS];
-    int outputWordLength = 0;
+    const int outputWordLength = 0;
-
+    const int startInputPos = 0;
-    // First word
+    const int startWordIndex = 0;
-    int inputWordStartPos = 0;
+    getMultiWordsSuggestionRec(proximityInfo, xcoordinates, ycoordinates, codes,
-    int inputWordLength = wordDivideIndex;
+            useFullEditDistance, inputLength, correction, queuePool, hasAutoCorrectionCandidate,
-    if (!getSubStringSuggestion(proximityInfo, xcoordinates, ycoordinates, codes,
+            startInputPos, startWordIndex, outputWordLength, freqArray, wordLengthArray,
-            useFullEditDistance, correction, queuePool, inputLength, hasAutoCorrectionCandidate,
+            outputWord);
            FIRST_WORD_INDEX, inputWordStartPos, inputWordLength, 0, true /* not used */,
            freqArray, wordLengthArray, outputWord, &outputWordLength)) {
        return;
    }
    const int tempOutputWordLength = outputWordLength;
    // Second word
    // Missing space
    inputWordStartPos = wordDivideIndex;
    inputWordLength = inputLength - wordDivideIndex;
    getSubStringSuggestion(proximityInfo, xcoordinates, ycoordinates, codes,
            useFullEditDistance, correction, queuePool, inputLength, hasAutoCorrectionCandidate,
            SECOND_WORD_INDEX, inputWordStartPos, inputWordLength, tempOutputWordLength,
            false /* missing space */, freqArray, wordLengthArray, outputWord, &outputWordLength);
    // Mistyped space
    ++inputWordStartPos;
    --inputWordLength;
    if (inputWordLength <= 0) {
        return;
    }
    const int x = xcoordinates[inputWordStartPos - 1];
    const int y = ycoordinates[inputWordStartPos - 1];
    if (!proximityInfo->hasSpaceProximity(x, y)) {
        return;
    }
    getSubStringSuggestion(proximityInfo, xcoordinates, ycoordinates, codes,
            useFullEditDistance, correction, queuePool, inputLength, hasAutoCorrectionCandidate,
            SECOND_WORD_INDEX, inputWordStartPos, inputWordLength, tempOutputWordLength,
            true /* mistyped space */, freqArray, wordLengthArray, outputWord, &outputWordLength);
 }
 // Wrapper for getMostFrequentWordLikeInner, which matches it to the previous
--- a/native/src/unigram_dictionary.h
+++ b/native/src/unigram_dictionary.h
@ -103,7 +103,7 @@ class UnigramDictionary {
            const int currentWordIndex);
    void getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo,
            const int *xcoordinates, const int *ycoordinates, const int *codes,
-            const bool useFullEditDistance, const int inputLength, const int wordDivideIndex,
+            const bool useFullEditDistance, const int inputLength,
            Correction *correction, WordsPriorityQueuePool* queuePool,
            const bool hasAutoCorrectionCandidate);
    void onTerminal(const int freq, const TerminalAttributes& terminalAttributes,
@ -127,6 +127,13 @@ class UnigramDictionary {
            const int inputWordStartPos, const int inputWordLength,
            const int outputWordStartPos, const bool isSpaceProximity, int *freqArray,
            int *wordLengthArray, unsigned short* outputWord, int *outputWordLength);
    void getMultiWordsSuggestionRec(ProximityInfo *proximityInfo,
            const int *xcoordinates, const int *ycoordinates, const int *codes,
            const bool useFullEditDistance, const int inputLength,
            Correction *correction, WordsPriorityQueuePool* queuePool,
            const bool hasAutoCorrectionCandidate, const int startPos, const int startWordIndex,
            const int outputWordLength, int *freqArray, int* wordLengthArray,
            unsigned short* outputWord);
    const uint8_t* const DICT_ROOT;
    const int MAX_WORD_LENGTH;
--- a/native/src/words_priority_queue_pool.h
+++ b/native/src/words_priority_queue_pool.h
@ -27,11 +27,10 @@ class WordsPriorityQueuePool {
 public:
    WordsPriorityQueuePool(int mainQueueMaxWords, int subQueueMaxWords, int maxWordLength) {
        mMasterQueue = new(mMasterQueueBuf) WordsPriorityQueue(mainQueueMaxWords, maxWordLength);
-        for (int i = 0, subQueueBufOffset = 0; i < SUB_QUEUE_MAX_COUNT;
+        for (int i = 0, subQueueBufOffset = 0;
                i < MULTIPLE_WORDS_SUGGESTION_MAX_WORDS * SUB_QUEUE_MAX_COUNT;
                ++i, subQueueBufOffset += sizeof(WordsPriorityQueue)) {
-            mSubQueues1[i] = new(mSubQueueBuf1 + subQueueBufOffset)
+            mSubQueues[i] = new(mSubQueueBuf + subQueueBufOffset)
                    WordsPriorityQueue(subQueueMaxWords, maxWordLength);
            mSubQueues2[i] = new(mSubQueueBuf2 + subQueueBufOffset)
                    WordsPriorityQueue(subQueueMaxWords, maxWordLength);
        }
    }
@ -44,7 +43,7 @@ class WordsPriorityQueuePool {
    }
    WordsPriorityQueue* getSubQueue(const int wordIndex, const int inputWordLength) {
-        if (wordIndex > SUB_QUEUE_MAX_WORD_INDEX) {
+        if (wordIndex >= MULTIPLE_WORDS_SUGGESTION_MAX_WORDS) {
            return 0;
        }
        if (inputWordLength < 0 || inputWordLength >= SUB_QUEUE_MAX_COUNT) {
@ -53,30 +52,21 @@ class WordsPriorityQueuePool {
            }
            return 0;
        }
-        // TODO: Come up with more generic pool
+        return mSubQueues[wordIndex * SUB_QUEUE_MAX_COUNT + inputWordLength];
        if (wordIndex == 1) {
            return mSubQueues1[inputWordLength];
        } else if (wordIndex == 2) {
            return mSubQueues2[inputWordLength];
        } else {
            return 0;
        }
    }
    inline void clearAll() {
        mMasterQueue->clear();
-        for (int i = 0; i < SUB_QUEUE_MAX_COUNT; ++i) {
+        for (int i = 0; i < MULTIPLE_WORDS_SUGGESTION_MAX_WORDS; ++i) {
-            mSubQueues1[i]->clear();
+            clearSubQueue(i);
            mSubQueues2[i]->clear();
        }
    }
    inline void clearSubQueue(const int wordIndex) {
        for (int i = 0; i < SUB_QUEUE_MAX_COUNT; ++i) {
-            if (wordIndex == 1) {
+            WordsPriorityQueue* queue = getSubQueue(wordIndex, i);
-                mSubQueues1[i]->clear();
+            if (queue) {
-            } else if (wordIndex == 2) {
+                queue->clear();
                mSubQueues2[i]->clear();
            }
        }
    }
@ -84,17 +74,16 @@ class WordsPriorityQueuePool {
    void dumpSubQueue1TopSuggestions() {
        AKLOGI("DUMP SUBQUEUE1 TOP SUGGESTIONS");
        for (int i = 0; i < SUB_QUEUE_MAX_COUNT; ++i) {
-            mSubQueues1[i]->dumpTopWord();
+            getSubQueue(0, i)->dumpTopWord();
        }
    }
 private:
    WordsPriorityQueue* mMasterQueue;
-    WordsPriorityQueue* mSubQueues1[SUB_QUEUE_MAX_COUNT];
+    WordsPriorityQueue* mSubQueues[SUB_QUEUE_MAX_COUNT * MULTIPLE_WORDS_SUGGESTION_MAX_WORDS];
    WordsPriorityQueue* mSubQueues2[SUB_QUEUE_MAX_COUNT];
    char mMasterQueueBuf[sizeof(WordsPriorityQueue)];
-    char mSubQueueBuf1[SUB_QUEUE_MAX_COUNT * sizeof(WordsPriorityQueue)];
+    char mSubQueueBuf[MULTIPLE_WORDS_SUGGESTION_MAX_WORDS
-    char mSubQueueBuf2[SUB_QUEUE_MAX_COUNT * sizeof(WordsPriorityQueue)];
+                      * SUB_QUEUE_MAX_COUNT * sizeof(WordsPriorityQueue)];
 };
 }