Merge "Trim the flow of getWordRec"

2010-12-05 21:58:48 -08:00 · 2010-12-05 21:58:48 -08:00 · 4e24668a75
parent e18bd3e323 683192684c
commit 4e24668a75
3 changed files with 46 additions and 39 deletions
--- a/native/src/defines.h
+++ b/native/src/defines.h
@ -52,4 +52,6 @@
 #define MAX_WORD_LENGTH_INTERNAL 64
 #define MAX_DEPTH_MULTIPLIER 3
 #endif // LATINIME_DEFINES_H
--- a/native/src/unigram_dictionary.cpp
+++ b/native/src/unigram_dictionary.cpp
@ -87,12 +87,11 @@ void UnigramDictionary::initSuggestions(int *codes, int codesSize, unsigned shor
 int UnigramDictionary::getSuggestionCandidates(int inputLength, int skipPos,
        int *nextLetters, int nextLettersSize) {
    int initialPos = 0;
    if (IS_LATEST_DICT_VERSION) {
-        getWordsRec(DICTIONARY_HEADER_SIZE, 0, inputLength * 3, false, 1, 0, 0, skipPos,
+        initialPos = DICTIONARY_HEADER_SIZE;
                nextLetters, nextLettersSize);
    } else {
        getWordsRec(0, 0, inputLength * 3, false, 1, 0, 0, skipPos, nextLetters, nextLettersSize);
    }
    getWords(initialPos, inputLength, skipPos, nextLetters, nextLettersSize);
    // Get the word count
    int suggestedWordsCount = 0;
@ -174,50 +173,49 @@ bool UnigramDictionary::sameAsTyped(unsigned short *word, int length) {
 static const char QUOTE = '\'';
-// snr : frequency?
+void UnigramDictionary::getWords(const int initialPos, const int inputLength, const int skipPos,
-void UnigramDictionary::getWordsRec(int pos, int depth, int maxDepth, bool traverseAllNodes,
+        int *nextLetters, const int nextLettersSize) {
-        int snr, int inputIndex, int diffs, int skipPos, int *nextLetters, int nextLettersSize) {
+    int initialPosition = initialPos;
-    // Optimization: Prune out words that are too long compared to how much was typed.
+    const int count = Dictionary::getCount(DICT, &initialPosition);
-    if (depth > maxDepth || diffs > mMaxEditDistance) {
+    getWordsRec(count, initialPosition, 0, inputLength * MAX_DEPTH_MULTIPLIER,
-        return;
+            mInputLength <= 0, 1, 0, 0, skipPos, nextLetters, nextLettersSize);
    }
    // get the count of nodes and increment pos.
    int count = Dictionary::getCount(DICT, &pos);
    int *currentChars = NULL;
    // If inputIndex is greater than mInputLength, that means there are no proximity chars.
    if (mInputLength <= inputIndex) {
        traverseAllNodes = true;
    } else {
        currentChars = mInputCodes + (inputIndex * MAX_ALTERNATIVES);
 }
-    for (int i = 0; i < count; ++i) {
+// snr : frequency?
 void UnigramDictionary::getWordsRec(const int childrenCount, const int pos, const int depth,
        const int maxDepth, const bool traverseAllNodes, const int snr, const int inputIndex,
        const int diffs, const int skipPos, int *nextLetters, const int nextLettersSize) {
    int position = pos;
    // If inputIndex is greater than mInputLength, that means there are no proximity chars.
    for (int i = 0; i < childrenCount; ++i) {
        // -- at char
-        const unsigned short c = Dictionary::getChar(DICT, &pos);
+        const unsigned short c = Dictionary::getChar(DICT, &position);
        // -- at flag/add
        const unsigned short lowerC = toLowerCase(c);
-        const bool terminal = Dictionary::getTerminal(DICT, &pos);
+        const bool terminal = Dictionary::getTerminal(DICT, &position);
-        const int childrenAddress = Dictionary::getAddress(DICT, &pos);
+        int childrenPosition = Dictionary::getAddress(DICT, &position);
        int matchedProximityCharId = -1;
-        const bool needsToTraverseNextNode = childrenAddress != 0;
+        const bool needsToTraverseNextNode = childrenPosition != 0;
        // -- after address or flag
        int freq = 1;
        // If terminal, increment pos
-        if (terminal) freq = Dictionary::getFreq(DICT, IS_LATEST_DICT_VERSION, &pos);
+        if (terminal) freq = Dictionary::getFreq(DICT, IS_LATEST_DICT_VERSION, &position);
        // -- after add or freq
        bool newTraverseAllNodes = traverseAllNodes;
        int newSnr = snr;
        int newDiffs = diffs;
        int newInputIndex = inputIndex;
        const int newDepth = depth + 1;
        // If we are only doing traverseAllNodes, no need to look at the typed characters.
-        if (traverseAllNodes || needsToSkipCurrentNode(c, currentChars[0], skipPos, depth)) {
+        if (traverseAllNodes || needsToSkipCurrentNode(c, inputIndex, skipPos, depth)) {
            mWord[depth] = c;
            if (traverseAllNodes && terminal) {
                onTerminalWhenUserTypedLengthIsGreaterThanInputLength(mWord, mInputLength, depth,
                        snr, nextLetters, nextLettersSize, skipPos, freq);
            }
        } else {
            int *currentChars = mInputCodes + (inputIndex * MAX_ALTERNATIVES);
            matchedProximityCharId = getMatchedProximityId(currentChars, lowerC, c, skipPos);
            if (matchedProximityCharId < 0) continue;
            mWord[depth] = c;
@ -236,8 +234,17 @@ void UnigramDictionary::getWordsRec(int pos, int depth, int maxDepth, bool trave
            newDiffs += (matchedProximityCharId > 0);
            ++newInputIndex;
        }
        // Optimization: Prune out words that are too long compared to how much was typed.
        if (newDepth > maxDepth || newDiffs > mMaxEditDistance) {
            continue;
        }
        if (mInputLength <= newInputIndex) {
            newTraverseAllNodes = true;
        }
        if (needsToTraverseNextNode) {
-            getWordsRec(childrenAddress, depth + 1, maxDepth, newTraverseAllNodes,
+            // get the count of nodes and increment childAddress.
            const int count = Dictionary::getCount(DICT, &childrenPosition);
            getWordsRec(count, childrenPosition, newDepth, maxDepth, newTraverseAllNodes,
                    newSnr, newInputIndex, newDiffs, skipPos, nextLetters, nextLettersSize);
        }
    }
@ -265,17 +272,17 @@ inline void UnigramDictionary::onTerminalWhenUserTypedLengthIsSameAsInputLength(
 }
 inline bool UnigramDictionary::needsToSkipCurrentNode(const unsigned short c,
-        const unsigned short userTypedChar, const int skipPos, const int depth) {
+        const int inputIndex, const int skipPos, const int depth) {
    const unsigned short userTypedChar = (mInputCodes + (inputIndex * MAX_ALTERNATIVES))[0];
    // Skip the ' or other letter and continue deeper
    return (c == QUOTE && userTypedChar != QUOTE) || skipPos == depth;
 }
 inline int UnigramDictionary::getMatchedProximityId(const int *currentChars,
        const unsigned short lowerC, const unsigned short c, const int skipPos) {
    bool matched = false;
    int j = 0;
    while (currentChars[j] > 0) {
-        matched = (currentChars[j] == lowerC || currentChars[j] == c);
+        const bool matched = (currentChars[j] == lowerC || currentChars[j] == c);
        // If skipPos is defined, not to search proximity collections.
        // First char is what user typed.
        if (matched) {
--- a/native/src/unigram_dictionary.h
+++ b/native/src/unigram_dictionary.h
@ -38,24 +38,22 @@ private:
    int getAddress(int *pos);
    int getFreq(int *pos);
    int wideStrLen(unsigned short *str);
    bool sameAsTyped(unsigned short *word, int length);
    bool addWord(unsigned short *word, int length, int frequency);
    unsigned short toLowerCase(unsigned short c);
-    void getWordsRec(int pos, int depth, int maxDepth, bool completion, int snr,
+    void getWordsRec(const int childrenCount, const int pos, const int depth, const int maxDepth,
-            int inputIndex, int diffs, int skipPos, int *nextLetters, int nextLettersSize);
+            const bool traverseAllNodes, const int snr, const int inputIndex, const int diffs,
            const int skipPos, int *nextLetters, const int nextLettersSize);
    void getWords(const int initialPos, const int inputLength, const int skipPos, int *nextLetters,
            const int nextLettersSize);
    void registerNextLetter(unsigned short c, int *nextLetters, int nextLettersSize);
    void onTerminalWhenUserTypedLengthIsGreaterThanInputLength(unsigned short *word,
            const int mInputLength, const int depth, const int snr, int *nextLetters,
            const int nextLettersSize, const int skipPos, const int freq);
    void onTerminalWhenUserTypedLengthIsSameAsInputLength(unsigned short *word, const int depth,
            const int snr, const int skipPos, const int freq, const int addedWeight);
    bool needsToSkipCurrentNode(const unsigned short c,
-            const unsigned short userTypedChar, const int skipPos, const int depth);
+            const int inputIndex, const int skipPos, const int depth);
    int getMatchedProximityId(const int *currentChars, const unsigned short lowerC,
            const unsigned short c, const int skipPos);