Breakdown getWordRec and add comments

Change-Id: I88bad8a4a8177e3540b995b664c47b86d6904027
2010-12-02 20:19:59 +09:00 · 2010-12-02 20:19:59 +09:00 · 715514d7dd
parent a0e780fa4c
commit 715514d7dd
4 changed files with 70 additions and 35 deletions
--- a/native/src/defines.h
+++ b/native/src/defines.h
@ -50,4 +50,6 @@
 #define SUGGEST_MISSING_CHARACTERS true
 #define SUGGEST_MISSING_CHARACTERS_THRESHOLD 5

+#define MAX_WORD_LENGTH_INTERNAL 64
+
 #endif // LATINIME_DEFINES_H
--- a/native/src/dictionary.cpp
+++ b/native/src/dictionary.cpp
@ -29,6 +29,9 @@ Dictionary::Dictionary(void *dict, int typedLetterMultiplier, int fullWordMultip
    // Checks whether it has the latest dictionary or the old dictionary
    IS_LATEST_DICT_VERSION((((unsigned char*) dict)[0] & 0xFF) >= DICTIONARY_VERSION_MIN)
 {
+    if (MAX_WORD_LENGTH_INTERNAL < maxWordLength) {
+        LOGI("Max word length (%d) is greater than %d", maxWordLength, MAX_WORD_LENGTH_INTERNAL);
+    }
    LOGI("IN NATIVE SUGGEST Version: %d \n", (DICT[0] & 0xFF));
    mUnigramDictionary = new UnigramDictionary(DICT, typedLetterMultiplier, fullWordMultiplier,
            maxWordLength, maxWords, maxAlternatives, IS_LATEST_DICT_VERSION);
--- a/native/src/unigram_dictionary.cpp
+++ b/native/src/unigram_dictionary.cpp
@ -102,7 +102,8 @@ int UnigramDictionary::getSuggestionCandidates(int inputLength, int skipPos,
    return suggestedWordsCount;
 }

-void UnigramDictionary::registerNextLetter(unsigned short c, int *nextLetters, int nextLettersSize) {
+void UnigramDictionary::registerNextLetter(
+        unsigned short c, int *nextLetters, int nextLettersSize) {
    if (c < nextLettersSize) {
        nextLetters[c]++;
    }
@ -121,8 +122,7 @@ UnigramDictionary::addWord(unsigned short *word, int length, int frequency)
    // Find the right insertion point
    int insertAt = 0;
    while (insertAt < MAX_WORDS) {
-        if (frequency > mFrequencies[insertAt]
-                 || (mFrequencies[insertAt] == frequency
+        if (frequency > mFrequencies[insertAt] || (mFrequencies[insertAt] == frequency
                && length < Dictionary::wideStrLen(mOutputChars + insertAt * MAX_WORD_LENGTH))) {
            break;
        }
@ -134,9 +134,9 @@ UnigramDictionary::addWord(unsigned short *word, int length, int frequency)
               (MAX_WORDS - insertAt - 1) * sizeof(mFrequencies[0]));
        mFrequencies[insertAt] = frequency;
        memmove((char*) mOutputChars + (insertAt + 1) * MAX_WORD_LENGTH * sizeof(short),
-               (char*) mOutputChars + (insertAt    ) * MAX_WORD_LENGTH * sizeof(short),
+               (char*) mOutputChars + insertAt * MAX_WORD_LENGTH * sizeof(short),
               (MAX_WORDS - insertAt - 1) * sizeof(short) * MAX_WORD_LENGTH);
-        unsigned short *dest = mOutputChars + (insertAt    ) * MAX_WORD_LENGTH;
+        unsigned short *dest = mOutputChars + insertAt * MAX_WORD_LENGTH;
        while (length--) {
            *dest++ = *word++;
        }
@ -177,8 +177,9 @@ UnigramDictionary::sameAsTyped(unsigned short *word, int length)
    return true;
 }

-static char QUOTE = '\'';
+static const char QUOTE = '\'';

+// snr : frequency?
 void
 UnigramDictionary::getWordsRec(int pos, int depth, int maxDepth, bool completion, int snr,
        int inputIndex, int diffs, int skipPos, int *nextLetters, int nextLettersSize)
@ -190,8 +191,10 @@ UnigramDictionary::getWordsRec(int pos, int depth, int maxDepth, bool completion
    if (diffs > mMaxEditDistance) {
        return;
    }
+    // get the count of nodes and increment pos.
    int count = Dictionary::getCount(DICT, &pos);
    int *currentChars = NULL;
+    // If inputIndex is greater than mInputLength, that means there are no proximity chars.
    if (mInputLength <= inputIndex) {
        completion = true;
    } else {
@ -205,8 +208,10 @@ UnigramDictionary::getWordsRec(int pos, int depth, int maxDepth, bool completion
        unsigned short lowerC = toLowerCase(c);
        bool terminal = Dictionary::getTerminal(DICT, &pos);
        int childrenAddress = Dictionary::getAddress(DICT, &pos);
+        const bool needsToContinue = childrenAddress != 0;
        // -- after address or flag
        int freq = 1;
+        // If terminal, increment pos
        if (terminal) freq = Dictionary::getFreq(DICT, IS_LATEST_DICT_VERSION, &pos);
        // -- after add or freq

@ -214,53 +219,70 @@ UnigramDictionary::getWordsRec(int pos, int depth, int maxDepth, bool completion
        if (completion) {
            mWord[depth] = c;
            if (terminal) {
-                addWord(mWord, depth + 1, freq * snr);
-                if (depth >= mInputLength && skipPos < 0) {
-                    registerNextLetter(mWord[mInputLength], nextLetters, nextLettersSize);
+                onTerminalWhenUserTypedLengthIsGreaterThanInputLength(mWord, mInputLength, depth,
+                        snr, nextLetters, nextLettersSize, skipPos, freq);
            }
-            }
-            if (childrenAddress != 0) {
-                getWordsRec(childrenAddress, depth + 1, maxDepth, completion, snr, inputIndex,
+            if (needsToContinue) {
+                // No need to do proximity suggest any more.
+                getWordsRec(childrenAddress, depth + 1, maxDepth, true, snr, inputIndex,
                        diffs, skipPos, nextLetters, nextLettersSize);
            }
        } else if ((c == QUOTE && currentChars[0] != QUOTE) || skipPos == depth) {
            // Skip the ' or other letter and continue deeper
            mWord[depth] = c;
-            if (childrenAddress != 0) {
-                getWordsRec(childrenAddress, depth + 1, maxDepth, false, snr, inputIndex, diffs,
-                        skipPos, nextLetters, nextLettersSize);
+            if (needsToContinue) {
+                getWordsRec(childrenAddress, depth + 1, maxDepth, false, snr, inputIndex,
+                        diffs, skipPos, nextLetters, nextLettersSize);
            }
        } else {
            int j = 0;
            while (currentChars[j] > 0) {
+                // Move to child node
                if (currentChars[j] == lowerC || currentChars[j] == c) {
-                    int addedWeight = j == 0 ? TYPED_LETTER_MULTIPLIER : 1;
                    mWord[depth] = c;
-                    if (mInputLength == inputIndex + 1) {
+                    const int addedWeight = j == 0 ? TYPED_LETTER_MULTIPLIER : 1;
+                    const bool isSameAsUserTypedLength = mInputLength == inputIndex + 1;
+                    // If inputIndex is greater than mInputLength, that means there is no
+                    // proximity chars. So, we don't need to check proximity.
+                    if (isSameAsUserTypedLength) {
                        if (terminal) {
-                            if (//INCLUDE_TYPED_WORD_IF_VALID ||
-                                !sameAsTyped(mWord, depth + 1)) {
-                                int finalFreq = freq * snr * addedWeight;
-                                if (skipPos < 0) finalFreq *= FULL_WORD_MULTIPLIER;
-                                addWord(mWord, depth + 1, finalFreq);
+                            onTerminalWhenUserTypedLengthIsSameAsInputLength(mWord, depth, snr,
+                                    skipPos, freq, addedWeight);
                        }
                    }
-                        if (childrenAddress != 0) {
-                            getWordsRec(childrenAddress, depth + 1,
-                                    maxDepth, true, snr * addedWeight, inputIndex + 1,
+                    if (needsToContinue) {
+                        getWordsRec(childrenAddress, depth + 1, maxDepth,
+                                isSameAsUserTypedLength, snr * addedWeight, inputIndex + 1,
                                diffs + (j > 0), skipPos, nextLetters, nextLettersSize);
                    }
-                    } else if (childrenAddress != 0) {
-                        getWordsRec(childrenAddress, depth + 1, maxDepth,
-                                false, snr * addedWeight, inputIndex + 1, diffs + (j > 0),
-                                skipPos, nextLetters, nextLettersSize);
                }
-                }
-                j++;
+                ++j;
+                // If skipPos is defined, not to search proximity collections.
+                // First char is what user typed.
                if (skipPos >= 0) break;
            }
        }
    }
 }

+inline void UnigramDictionary::onTerminalWhenUserTypedLengthIsGreaterThanInputLength(
+        unsigned short *word, const int inputLength, const int depth, const int snr,
+        int *nextLetters, const int nextLettersSize, const int skipPos, const int freq) {
+    addWord(word, depth + 1, freq * snr);
+    if (depth >= inputLength && skipPos < 0) {
+        registerNextLetter(mWord[mInputLength], nextLetters, nextLettersSize);
+    }
+}
+
+inline void UnigramDictionary::onTerminalWhenUserTypedLengthIsSameAsInputLength(
+        unsigned short *word, const int depth, const int snr, const int skipPos, const int freq,
+        const int addedWeight) {
+    if (!sameAsTyped(word, depth + 1)) {
+        int finalFreq = freq * snr * addedWeight;
+        // Proximity collection will promote a word of the same length as
+        // what user typed.
+        if (skipPos < 0) finalFreq *= FULL_WORD_MULTIPLIER;
+        addWord(word, depth + 1, finalFreq);
+    }
+}
 } // namespace latinime
--- a/native/src/unigram_dictionary.h
+++ b/native/src/unigram_dictionary.h
@ -42,10 +42,17 @@ private:
    bool sameAsTyped(unsigned short *word, int length);
    bool addWord(unsigned short *word, int length, int frequency);
    unsigned short toLowerCase(unsigned short c);
-    void getWordsRec(int pos, int depth, int maxDepth, bool completion, int frequency,
+    void getWordsRec(int pos, int depth, int maxDepth, bool completion, int snr,
            int inputIndex, int diffs, int skipPos, int *nextLetters, int nextLettersSize);
    void registerNextLetter(unsigned short c, int *nextLetters, int nextLettersSize);

+    void onTerminalWhenUserTypedLengthIsGreaterThanInputLength(unsigned short *word,
+            const int mInputLength, const int depth, const int snr, int *nextLetters,
+            const int nextLettersSize, const int skipPos, const int freq);
+
+    void onTerminalWhenUserTypedLengthIsSameAsInputLength(unsigned short *word, const int depth,
+            const int snr, const int skipPos, const int freq, const int addedWeight);
+
    const unsigned char *DICT;
    const int MAX_WORDS;
    const int MAX_WORD_LENGTH;
@ -58,7 +65,8 @@ private:
    unsigned short *mOutputChars;
    int *mInputCodes;
    int mInputLength;
-    unsigned short mWord[128];
+    // MAX_WORD_LENGTH_INTERNAL must be bigger than MAX_WORD_LENGTH
+    unsigned short mWord[MAX_WORD_LENGTH_INTERNAL];
    int mMaxEditDistance;
 };