From 28bd03b9f5ffcc5fa8f0b1ac57781d422c4e6731 Mon Sep 17 00:00:00 2001
From: satok <satok@google.com>
Date: Fri, 3 Dec 2010 16:39:16 +0900
Subject: [PATCH] Breakdown getWordRec

Change-Id: I8556efb1dd053eff9a9681971cbe1014abf0333f
---
 native/src/unigram_dictionary.cpp | 129 +++++++++++++++---------------
 native/src/unigram_dictionary.h   |   6 ++
 2 files changed, 72 insertions(+), 63 deletions(-)

diff --git a/native/src/unigram_dictionary.cpp b/native/src/unigram_dictionary.cpp
index e4edc5ab6..b3479738e 100644
--- a/native/src/unigram_dictionary.cpp
+++ b/native/src/unigram_dictionary.cpp
@@ -109,9 +109,7 @@ void UnigramDictionary::registerNextLetter(
     }
 }
 
-bool
-UnigramDictionary::addWord(unsigned short *word, int length, int frequency)
-{
+bool UnigramDictionary::addWord(unsigned short *word, int length, int frequency) {
     word[length] = 0;
     if (DEBUG_DICT) {
         char s[length + 1];
@@ -147,8 +145,7 @@ UnigramDictionary::addWord(unsigned short *word, int length, int frequency)
     return false;
 }
 
-unsigned short
-UnigramDictionary::toLowerCase(unsigned short c) {
+unsigned short UnigramDictionary::toLowerCase(unsigned short c) {
     if (c < sizeof(BASE_CHARS) / sizeof(BASE_CHARS[0])) {
         c = BASE_CHARS[c];
     }
@@ -160,9 +157,7 @@ UnigramDictionary::toLowerCase(unsigned short c) {
     return c;
 }
 
-bool
-UnigramDictionary::sameAsTyped(unsigned short *word, int length)
-{
+bool UnigramDictionary::sameAsTyped(unsigned short *word, int length) {
     if (length != mInputLength) {
         return false;
     }
@@ -180,15 +175,10 @@ UnigramDictionary::sameAsTyped(unsigned short *word, int length)
 static const char QUOTE = '\'';
 
 // snr : frequency?
-void
-UnigramDictionary::getWordsRec(int pos, int depth, int maxDepth, bool completion, int snr,
-        int inputIndex, int diffs, int skipPos, int *nextLetters, int nextLettersSize)
-{
+void UnigramDictionary::getWordsRec(int pos, int depth, int maxDepth, bool traverseAllNodes,
+        int snr, int inputIndex, int diffs, int skipPos, int *nextLetters, int nextLettersSize) {
     // Optimization: Prune out words that are too long compared to how much was typed.
-    if (depth > maxDepth) {
-        return;
-    }
-    if (diffs > mMaxEditDistance) {
+    if (depth > maxDepth || diffs > mMaxEditDistance) {
         return;
     }
     // get the count of nodes and increment pos.
@@ -196,71 +186,59 @@ UnigramDictionary::getWordsRec(int pos, int depth, int maxDepth, bool completion
     int *currentChars = NULL;
     // If inputIndex is greater than mInputLength, that means there are no proximity chars.
     if (mInputLength <= inputIndex) {
-        completion = true;
+        traverseAllNodes = true;
     } else {
         currentChars = mInputCodes + (inputIndex * MAX_ALTERNATIVES);
     }
 
-    for (int i = 0; i < count; i++) {
+    for (int i = 0; i < count; ++i) {
         // -- at char
-        unsigned short c = Dictionary::getChar(DICT, &pos);
+        const unsigned short c = Dictionary::getChar(DICT, &pos);
         // -- at flag/add
-        unsigned short lowerC = toLowerCase(c);
-        bool terminal = Dictionary::getTerminal(DICT, &pos);
-        int childrenAddress = Dictionary::getAddress(DICT, &pos);
-        const bool needsToContinue = childrenAddress != 0;
+        const unsigned short lowerC = toLowerCase(c);
+        const bool terminal = Dictionary::getTerminal(DICT, &pos);
+        const int childrenAddress = Dictionary::getAddress(DICT, &pos);
+        int matchedProximityCharId = -1;
+        const bool needsToTraverseNextNode = childrenAddress != 0;
         // -- after address or flag
         int freq = 1;
         // If terminal, increment pos
         if (terminal) freq = Dictionary::getFreq(DICT, IS_LATEST_DICT_VERSION, &pos);
         // -- after add or freq
+        bool newTraverseAllNodes = traverseAllNodes;
+        int newSnr = snr;
+        int newDiffs = diffs;
+        int newInputIndex = inputIndex;
 
-        // If we are only doing completions, no need to look at the typed characters.
-        if (completion) {
+        // If we are only doing traverseAllNodes, no need to look at the typed characters.
+        if (traverseAllNodes || needsToSkipCurrentNode(c, currentChars[0], skipPos, depth)) {
             mWord[depth] = c;
-            if (terminal) {
+            if (traverseAllNodes && terminal) {
                 onTerminalWhenUserTypedLengthIsGreaterThanInputLength(mWord, mInputLength, depth,
                         snr, nextLetters, nextLettersSize, skipPos, freq);
             }
-            if (needsToContinue) {
-                // No need to do proximity suggest any more.
-                getWordsRec(childrenAddress, depth + 1, maxDepth, true, snr, inputIndex,
-                        diffs, skipPos, nextLetters, nextLettersSize);
-            }
-        } else if ((c == QUOTE && currentChars[0] != QUOTE) || skipPos == depth) {
-            // Skip the ' or other letter and continue deeper
-            mWord[depth] = c;
-            if (needsToContinue) {
-                getWordsRec(childrenAddress, depth + 1, maxDepth, false, snr, inputIndex,
-                        diffs, skipPos, nextLetters, nextLettersSize);
-            }
         } else {
-            int j = 0;
-            while (currentChars[j] > 0) {
-                // Move to child node
-                if (currentChars[j] == lowerC || currentChars[j] == c) {
-                    mWord[depth] = c;
-                    const int addedWeight = j == 0 ? TYPED_LETTER_MULTIPLIER : 1;
-                    const bool isSameAsUserTypedLength = mInputLength == inputIndex + 1;
-                    // If inputIndex is greater than mInputLength, that means there is no
-                    // proximity chars. So, we don't need to check proximity.
-                    if (isSameAsUserTypedLength) {
-                        if (terminal) {
-                            onTerminalWhenUserTypedLengthIsSameAsInputLength(mWord, depth, snr,
-                                    skipPos, freq, addedWeight);
-                        }
-                    }
-                    if (needsToContinue) {
-                        getWordsRec(childrenAddress, depth + 1, maxDepth,
-                                isSameAsUserTypedLength, snr * addedWeight, inputIndex + 1,
-                                diffs + (j > 0), skipPos, nextLetters, nextLettersSize);
-                    }
-                }
-                ++j;
-                // If skipPos is defined, not to search proximity collections.
-                // First char is what user typed.
-                if (skipPos >= 0) break;
+            matchedProximityCharId = getMatchedProximityId(currentChars, lowerC, c, skipPos);
+            if (matchedProximityCharId < 0) continue;
+            mWord[depth] = c;
+            // If inputIndex is greater than mInputLength, that means there is no
+            // proximity chars. So, we don't need to check proximity.
+            const int addedWeight = matchedProximityCharId == 0 ? TYPED_LETTER_MULTIPLIER : 1;
+            const bool isSameAsUserTypedLength = mInputLength == inputIndex + 1;
+            if (isSameAsUserTypedLength && terminal) {
+                onTerminalWhenUserTypedLengthIsSameAsInputLength(mWord, depth, snr,
+                        skipPos, freq, addedWeight);
             }
+            if (!needsToTraverseNextNode) continue;
+            // Start traversing all nodes after the index exceeds the user typed length
+            newTraverseAllNodes = isSameAsUserTypedLength;
+            newSnr *= addedWeight;
+            newDiffs += (matchedProximityCharId > 0);
+            ++newInputIndex;
+        }
+        if (needsToTraverseNextNode) {
+            getWordsRec(childrenAddress, depth + 1, maxDepth, newTraverseAllNodes,
+                    newSnr, newInputIndex, newDiffs, skipPos, nextLetters, nextLettersSize);
         }
     }
 }
@@ -285,4 +263,29 @@ inline void UnigramDictionary::onTerminalWhenUserTypedLengthIsSameAsInputLength(
         addWord(word, depth + 1, finalFreq);
     }
 }
+
+inline bool UnigramDictionary::needsToSkipCurrentNode(const unsigned short c,
+        const unsigned short userTypedChar, const int skipPos, const int depth) {
+    // Skip the ' or other letter and continue deeper
+    return (c == QUOTE && userTypedChar != QUOTE) || skipPos == depth;
+}
+
+inline int UnigramDictionary::getMatchedProximityId(const int *currentChars,
+        const unsigned short lowerC, const unsigned short c, const int skipPos) {
+    bool matched = false;
+    int j = 0;
+    while (currentChars[j] > 0) {
+        matched = (currentChars[j] == lowerC || currentChars[j] == c);
+        // If skipPos is defined, not to search proximity collections.
+        // First char is what user typed.
+        if (matched) {
+            return j;
+        } else if (skipPos >= 0) {
+            return -1;
+        }
+        ++j;
+    }
+    return -1;
+}
+
 } // namespace latinime
diff --git a/native/src/unigram_dictionary.h b/native/src/unigram_dictionary.h
index 118d7dc29..259276cea 100644
--- a/native/src/unigram_dictionary.h
+++ b/native/src/unigram_dictionary.h
@@ -53,6 +53,12 @@ private:
     void onTerminalWhenUserTypedLengthIsSameAsInputLength(unsigned short *word, const int depth,
             const int snr, const int skipPos, const int freq, const int addedWeight);
 
+    bool needsToSkipCurrentNode(const unsigned short c,
+            const unsigned short userTypedChar, const int skipPos, const int depth);
+
+    int getMatchedProximityId(const int *currentChars, const unsigned short lowerC,
+            const unsigned short c, const int skipPos);
+
     const unsigned char *DICT;
     const int MAX_WORDS;
     const int MAX_WORD_LENGTH;