am 67e13976: Merge "Store suggestions for each input length for missing space algorithm etc."

* commit '67e13976b78619ec7bc45b5377a9f5da2534cfd5': Store suggestions for each input length for missing space algorithm etc.
2012-01-16 00:20:06 -08:00 · 2012-01-16 00:20:06 -08:00 · 53c4248c6f
parent 65b489cd76 67e13976b7
commit 53c4248c6f
7 changed files with 99 additions and 65 deletions
--- a/native/src/correction.cpp
+++ b/native/src/correction.cpp
@ -214,21 +214,11 @@ int Correction::goDownTree(
    return mOutputIndex;
 }

-// TODO: remove
-int Correction::getOutputIndex() {
-    return mOutputIndex;
-}
-
 // TODO: remove
 int Correction::getInputIndex() {
    return mInputIndex;
 }

-// TODO: remove
-bool Correction::needsToTraverseAllNodes() {
-    return mNeedsToTraverseAllNodes;
-}
-
 void Correction::incrementInputIndex() {
    ++mInputIndex;
 }
@ -278,13 +268,12 @@ void Correction::addCharToCurrentWord(const int32_t c) {
            mWord, mOutputIndex + 1);
 }

-// TODO: inline?
 Correction::CorrectionType Correction::processSkipChar(
        const int32_t c, const bool isTerminal, const bool inputIndexIncremented) {
    addCharToCurrentWord(c);
-    if (needsToTraverseAllNodes() && isTerminal) {
-        mTerminalInputIndex = mInputIndex - (inputIndexIncremented ? 1 : 0);
-        mTerminalOutputIndex = mOutputIndex;
+    mTerminalInputIndex = mInputIndex - (inputIndexIncremented ? 1 : 0);
+    mTerminalOutputIndex = mOutputIndex;
+    if (mNeedsToTraverseAllNodes && isTerminal) {
        incrementOutputIndex();
        return TRAVERSE_ALL_ON_TERMINAL;
    } else {
@ -293,6 +282,13 @@ Correction::CorrectionType Correction::processSkipChar(
    }
 }

+Correction::CorrectionType Correction::processUnrelatedCorrectionType() {
+    // Needs to set mTerminalInputIndex and mTerminalOutputIndex before returning any CorrectionType
+    mTerminalInputIndex = mInputIndex;
+    mTerminalOutputIndex = mOutputIndex;
+    return UNRELATED;
+}
+
 inline bool isEquivalentChar(ProximityInfo::ProximityType type) {
    return type == ProximityInfo::EQUIVALENT_CHAR;
 }
@ -301,7 +297,7 @@ Correction::CorrectionType Correction::processCharAndCalcState(
        const int32_t c, const bool isTerminal) {
    const int correctionCount = (mSkippedCount + mExcessiveCount + mTransposedCount);
    if (correctionCount > mMaxErrors) {
-        return UNRELATED;
+        return processUnrelatedCorrectionType();
    }

    // TODO: Change the limit if we'll allow two or more corrections
@ -381,7 +377,7 @@ Correction::CorrectionType Correction::processCharAndCalcState(
                AKLOGI("UNRELATED(0): %d, %d, %d, %d, %c", mProximityCount, mSkippedCount,
                        mTransposedCount, mExcessiveCount, c);
            }
-            return UNRELATED;
+            return processUnrelatedCorrectionType();
        }
    }

@ -484,7 +480,7 @@ Correction::CorrectionType Correction::processCharAndCalcState(
                AKLOGI("UNRELATED(1): %d, %d, %d, %d, %c", mProximityCount, mSkippedCount,
                        mTransposedCount, mExcessiveCount, c);
            }
-            return UNRELATED;
+            return processUnrelatedCorrectionType();
        }
    } else if (secondTransposing) {
        // If inputIndex is greater than mInputLength, that means there is no
@ -539,6 +535,8 @@ Correction::CorrectionType Correction::processCharAndCalcState(
        }
        return ON_TERMINAL;
    } else {
+        mTerminalInputIndex = mInputIndex - 1;
+        mTerminalOutputIndex = mOutputIndex - 1;
        return NOT_ON_TERMINAL;
    }
 }
--- a/native/src/correction.h
+++ b/native/src/correction.h
@ -48,7 +48,6 @@ class Correction {
    void checkState();
    bool initProcessState(const int index);

-    int getOutputIndex();
    int getInputIndex();

    virtual ~Correction();
@ -115,11 +114,11 @@ class Correction {
 private:
    inline void incrementInputIndex();
    inline void incrementOutputIndex();
-    inline bool needsToTraverseAllNodes();
    inline void startToTraverseAllNodes();
    inline bool isQuote(const unsigned short c);
    inline CorrectionType processSkipChar(
            const int32_t c, const bool isTerminal, const bool inputIndexIncremented);
+    inline CorrectionType processUnrelatedCorrectionType();
    inline void addCharToCurrentWord(const int32_t c);

    const int TYPED_LETTER_MULTIPLIER;
--- a/native/src/defines.h
+++ b/native/src/defines.h
@ -22,9 +22,23 @@
 #include <cutils/log.h>
 #define AKLOGE ALOGE
 #define AKLOGI ALOGI
+
+#define DUMP_WORD(word, length) do { dumpWord(word, length); } while(0)
+
+static char charBuf[50];
+
+static void dumpWord(const unsigned short* word, const int length) {
+    for (int i = 0; i < length; ++i) {
+        charBuf[i] = word[i];
+    }
+    charBuf[length] = 0;
+    AKLOGI("[ %s ]", charBuf);
+}
+
 #else
 #define AKLOGE(fmt, ...)
 #define AKLOGI(fmt, ...)
+#define DUMP_WORD(word, length)
 #endif

 #ifdef FLAG_DO_PROFILE
@ -106,18 +120,6 @@ static void prof_out(void) {
 #define DEBUG_CORRECTION_FREQ true
 #define DEBUG_WORDS_PRIORITY_QUEUE true

-#define DUMP_WORD(word, length) do { dumpWord(word, length); } while(0)
-
-static char charBuf[50];
-
-static void dumpWord(const unsigned short* word, const int length) {
-    for (int i = 0; i < length; ++i) {
-        charBuf[i] = word[i];
-    }
-    charBuf[length] = 0;
-    AKLOGI("[ %s ]", charBuf);
-}
-
 #else // FLAG_DBG

 #define DEBUG_DICT false
@ -131,7 +133,6 @@ static void dumpWord(const unsigned short* word, const int length) {
 #define DEBUG_CORRECTION_FREQ false
 #define DEBUG_WORDS_PRIORITY_QUEUE false

-#define DUMP_WORD(word, length)

 #endif // FLAG_DBG

@ -207,7 +208,8 @@ static void dumpWord(const unsigned short* word, const int length) {

 // Word limit for sub queues used in WordsPriorityQueuePool.  Sub queues are temporary queues used
 // for better performance.
-#define SUB_QUEUE_MAX_WORDS 5
+// Holds up to 1 candidate for each word
+#define SUB_QUEUE_MAX_WORDS 1
 #define SUB_QUEUE_MAX_COUNT 10

 #define MAX_DEPTH_MULTIPLIER 3
--- a/native/src/unigram_dictionary.cpp
+++ b/native/src/unigram_dictionary.cpp
@ -186,7 +186,7 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo,

    PROF_OPEN;
    PROF_START(0);
-    // Note: This line is intentionally left blank
+    queuePool->clearAll();
    PROF_END(0);

    PROF_START(1);
@ -241,18 +241,17 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo,
        }
    }
    PROF_END(6);
+    if (DEBUG_WORDS_PRIORITY_QUEUE) {
+        queuePool->dumpSubQueue1TopSuggestions();
+    }
 }

 void UnigramDictionary::initSuggestions(ProximityInfo *proximityInfo, const int *xCoordinates,
-        const int *yCoordinates, const int *codes, const int inputLength,
-        WordsPriorityQueue *queue, Correction *correction) {
+        const int *yCoordinates, const int *codes, const int inputLength, Correction *correction) {
    if (DEBUG_DICT) {
        AKLOGI("initSuggest");
    }
    proximityInfo->setInputParams(codes, inputLength, xCoordinates, yCoordinates);
-    if (queue) {
-        queue->clear();
-    }
    const int maxDepth = min(inputLength * MAX_DEPTH_MULTIPLIER, MAX_WORD_LENGTH);
    correction->initCorrection(proximityInfo, inputLength, maxDepth);
 }
@ -264,15 +263,13 @@ void UnigramDictionary::getOneWordSuggestions(ProximityInfo *proximityInfo,
        const int *xcoordinates, const int *ycoordinates, const int *codes,
        const bool useFullEditDistance, const int inputLength, Correction *correction,
        WordsPriorityQueuePool *queuePool) {
-    WordsPriorityQueue *masterQueue = queuePool->getMasterQueue();
-    initSuggestions(
-            proximityInfo, xcoordinates, ycoordinates, codes, inputLength, masterQueue, correction);
-    getSuggestionCandidates(useFullEditDistance, inputLength, correction, masterQueue,
+    initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, inputLength, correction);
+    getSuggestionCandidates(useFullEditDistance, inputLength, correction, queuePool,
            true /* doAutoCompletion */, DEFAULT_MAX_ERRORS);
 }

 void UnigramDictionary::getSuggestionCandidates(const bool useFullEditDistance,
-        const int inputLength, Correction *correction, WordsPriorityQueue *queue,
+        const int inputLength, Correction *correction, WordsPriorityQueuePool *queuePool,
        const bool doAutoCompletion, const int maxErrors) {
    // TODO: Remove setCorrectionParams
    correction->setCorrectionParams(0, 0, 0,
@ -292,7 +289,7 @@ void UnigramDictionary::getSuggestionCandidates(const bool useFullEditDistance,
            int firstChildPos;

            const bool needsToTraverseChildrenNodes = processCurrentNode(siblingPos,
-                    correction, &childCount, &firstChildPos, &siblingPos, queue);
+                    correction, &childCount, &firstChildPos, &siblingPos, queuePool);
            // Update next sibling pos
            correction->setTreeSiblingPos(outputIndex, siblingPos);

@ -327,14 +324,34 @@ void UnigramDictionary::getMistypedSpaceWords(ProximityInfo *proximityInfo, cons

 inline void UnigramDictionary::onTerminal(const int freq,
        const TerminalAttributes& terminalAttributes, Correction *correction,
-        WordsPriorityQueue *queue) {
+        WordsPriorityQueuePool *queuePool, const bool addToMasterQueue) {
+    const int inputIndex = correction->getInputIndex();
+    const bool addToSubQueue = inputIndex < SUB_QUEUE_MAX_COUNT;
+    if (!addToMasterQueue && !addToSubQueue) {
+        return;
+    }
+    WordsPriorityQueue *masterQueue = queuePool->getMasterQueue();
+    WordsPriorityQueue *subQueue = queuePool->getSubQueue1(inputIndex);
    int wordLength;
    unsigned short* wordPointer;
    const int finalFreq = correction->getFinalFreq(freq, &wordPointer, &wordLength);
    if (finalFreq >= 0) {
        if (!terminalAttributes.isShortcutOnly()) {
-            addWord(wordPointer, wordLength, finalFreq, queue);
+            if (addToMasterQueue) {
+                addWord(wordPointer, wordLength, finalFreq, masterQueue);
+            }
+            // TODO: Check the validity of "inputIndex == wordLength"
+            //if (addToSubQueue && inputIndex == wordLength) {
+            if (addToSubQueue) {
+                addWord(wordPointer, wordLength, finalFreq, subQueue);
+            }
        }
+        // Please note that the shortcut candidates will be added to the master queue only.
+        if (!addToMasterQueue) {
+            return;
+        }
+
+        // From here, below is the code to add shortcut candidates.
        TerminalAttributes::ShortcutIterator iterator = terminalAttributes.getShortcutIterator();
        while (iterator.hasNextShortcutTarget()) {
            // TODO: addWord only supports weak ordering, meaning we have no means to control the
@ -345,7 +362,7 @@ inline void UnigramDictionary::onTerminal(const int freq,
            uint16_t shortcutTarget[MAX_WORD_LENGTH_INTERNAL];
            const int shortcutTargetStringLength = iterator.getNextShortcutTarget(
                    MAX_WORD_LENGTH_INTERNAL, shortcutTarget);
-            addWord(shortcutTarget, shortcutTargetStringLength, finalFreq, queue);
+            addWord(shortcutTarget, shortcutTargetStringLength, finalFreq, masterQueue);
        }
    }
 }
@ -411,8 +428,7 @@ void UnigramDictionary::getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo
    }

    // TODO: Remove initSuggestions and correction->setCorrectionParams
-    initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, inputLength,
-            0 /* do not clear queue */, correction);
+    initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, inputLength, correction);

    correction->setCorrectionParams(-1 /* skipPos */, -1 /* excessivePos */,
            -1 /* transposedPos */, spaceProximityPos, missingSpacePos,
@ -584,7 +600,7 @@ int UnigramDictionary::getBigramPosition(int pos, unsigned short *word, int offs
 // given level, as output into newCount when traversing this level's parent.
 inline bool UnigramDictionary::processCurrentNode(const int initialPos,
        Correction *correction, int *newCount,
-        int *newChildrenPosition, int *nextSiblingPosition, WordsPriorityQueue *queue) {
+        int *newChildrenPosition, int *nextSiblingPosition, WordsPriorityQueuePool *queuePool) {
    if (DEBUG_DICT) {
        correction->checkState();
    }
@ -659,15 +675,13 @@ inline bool UnigramDictionary::processCurrentNode(const int initialPos,
    } while (NOT_A_CHARACTER != c);

    if (isTerminalNode) {
-        if (needsToInvokeOnTerminal) {
-            // The frequency should be here, because we come here only if this is actually
-            // a terminal node, and we are on its last char.
-            const int freq = BinaryFormat::readFrequencyWithoutMovingPointer(DICT_ROOT, pos);
-            const int childrenAddressPos = BinaryFormat::skipFrequency(flags, pos);
-            const int attributesPos = BinaryFormat::skipChildrenPosition(flags, childrenAddressPos);
-            TerminalAttributes terminalAttributes(DICT_ROOT, flags, attributesPos);
-            onTerminal(freq, terminalAttributes, correction, queue);
-        }
+        // The frequency should be here, because we come here only if this is actually
+        // a terminal node, and we are on its last char.
+        const int freq = BinaryFormat::readFrequencyWithoutMovingPointer(DICT_ROOT, pos);
+        const int childrenAddressPos = BinaryFormat::skipFrequency(flags, pos);
+        const int attributesPos = BinaryFormat::skipChildrenPosition(flags, childrenAddressPos);
+        TerminalAttributes terminalAttributes(DICT_ROOT, flags, attributesPos);
+        onTerminal(freq, terminalAttributes, correction, queuePool, needsToInvokeOnTerminal);

        // If there are more chars in this node, then this virtual node has children.
        // If we are on the last char, this virtual node has children if this node has.
--- a/native/src/unigram_dictionary.h
+++ b/native/src/unigram_dictionary.h
@ -93,14 +93,13 @@ class UnigramDictionary {
        const int codesRemain, const int currentDepth, int* codesDest, Correction *correction,
        WordsPriorityQueuePool* queuePool);
    void initSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
-            const int *ycoordinates, const int *codes, const int codesSize,
-            WordsPriorityQueue *queue, Correction *correction);
+            const int *ycoordinates, const int *codes, const int codesSize, Correction *correction);
    void getOneWordSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
            const int *ycoordinates, const int *codes, const bool useFullEditDistance,
            const int inputLength, Correction *correction, WordsPriorityQueuePool* queuePool);
    void getSuggestionCandidates(
            const bool useFullEditDistance, const int inputLength, Correction *correction,
-            WordsPriorityQueue* queue, const bool doAutoCompletion, const int maxErrors);
+            WordsPriorityQueuePool* queuePool, const bool doAutoCompletion, const int maxErrors);
    void getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo,
            const int *xcoordinates, const int *ycoordinates, const int *codes,
            const bool useFullEditDistance, const int inputLength, const int spaceProximityPos,
@ -114,12 +113,12 @@ class UnigramDictionary {
            const int inputLength, const int spaceProximityPos, Correction *correction,
            WordsPriorityQueuePool* queuePool);
    void onTerminal(const int freq, const TerminalAttributes& terminalAttributes,
-            Correction *correction, WordsPriorityQueue *queue);
+            Correction *correction, WordsPriorityQueuePool *queuePool, const bool addToMasterQueue);
    bool needsToSkipCurrentNode(const unsigned short c,
            const int inputIndex, const int skipPos, const int depth);
    // Process a node by considering proximity, missing and excessive character
    bool processCurrentNode(const int initialPos, Correction *correction, int *newCount,
-            int *newChildPosition, int *nextSiblingPosition, WordsPriorityQueue *queue);
+            int *newChildPosition, int *nextSiblingPosition, WordsPriorityQueuePool *queuePool);
    int getMostFrequentWordLike(const int startInputIndex, const int inputLength,
            ProximityInfo *proximityInfo, unsigned short *word);
    int getMostFrequentWordLikeInner(const uint16_t* const inWord, const int length,
--- a/native/src/words_priority_queue.h
+++ b/native/src/words_priority_queue.h
@ -128,6 +128,13 @@ class WordsPriorityQueue {
        }
    }

+    void dumpTopWord() {
+        if (size() <= 0) {
+            return;
+        }
+        DUMP_WORD(mSuggestions.top()->mWord, mSuggestions.top()->mWordLength);
+    }
+
 private:
    struct wordComparator {
        bool operator ()(SuggestedWord * left, SuggestedWord * right) {
--- a/native/src/words_priority_queue_pool.h
+++ b/native/src/words_priority_queue_pool.h
@ -58,6 +58,21 @@ class WordsPriorityQueuePool {
        return mSubQueues2[id];
    }

+    inline void clearAll() {
+        mMasterQueue->clear();
+        for (int i = 0; i < SUB_QUEUE_MAX_COUNT; ++i) {
+            mSubQueues1[i]->clear();
+            mSubQueues2[i]->clear();
+        }
+    }
+
+    void dumpSubQueue1TopSuggestions() {
+        AKLOGI("DUMP SUBQUEUE1 TOP SUGGESTIONS");
+        for (int i = 0; i < SUB_QUEUE_MAX_COUNT; ++i) {
+            mSubQueues1[i]->dumpTopWord();
+        }
+    }
+
 private:
    WordsPriorityQueue* mMasterQueue;
    WordsPriorityQueue* mSubQueues1[SUB_QUEUE_MAX_COUNT];