Breakdown getWordRec

Change-Id: I4fef02c227fb858334dbe2eabf2762d5b6e1d919
2010-12-06 17:38:58 +09:00 · 2010-12-06 17:38:58 +09:00 · 48e432ceb8
parent 4e24668a75
commit 48e432ceb8
3 changed files with 108 additions and 63 deletions
--- a/native/src/dictionary.h
+++ b/native/src/dictionary.h
@ -54,6 +54,10 @@ public:
    static int getAddress(const unsigned char *dict, int *pos);
    static int getFreq(const unsigned char *dict, const bool isLatestDictVersion, int *pos);
    static int wideStrLen(unsigned short *str);
    // returns next sibling's position
    static int setDictionaryValues(const unsigned char *dict, const bool isLatestDictVersion,
            const int pos, unsigned short *c, int *childrenPosition,
            bool *terminal, int *freq);
 private:
    bool hasBigram();
@ -127,5 +131,20 @@ inline int Dictionary::wideStrLen(unsigned short *str) {
    return end - str;
 }
 inline int Dictionary::setDictionaryValues(const unsigned char *dict,
        const bool isLatestDictVersion, const int pos, unsigned short *c,int *childrenPosition,
        bool *terminal, int *freq) {
    int position = pos;
    // -- at char
    *c = Dictionary::getChar(dict, &position);
    // -- at flag/add
    *terminal = Dictionary::getTerminal(dict, &position);
    *childrenPosition = Dictionary::getAddress(dict, &position);
    // -- after address or flag
    *freq = (*terminal) ? Dictionary::getFreq(dict, isLatestDictVersion, &position) : 1;
    // returns next sibling's position
    return position;
 }
 }; // namespace latinime
 #endif // LATINIME_DICTIONARY_H
--- a/native/src/unigram_dictionary.cpp
+++ b/native/src/unigram_dictionary.cpp
@ -15,9 +15,9 @@
 ** limitations under the License.
 */
 #include <assert.h>
 #include <stdio.h>
 #include <fcntl.h>
 #include <sys/mman.h>
 #include <string.h>
 #define LOG_TAG "LatinIME: unigram_dictionary.cpp"
@ -185,66 +185,24 @@ void UnigramDictionary::getWords(const int initialPos, const int inputLength, co
 void UnigramDictionary::getWordsRec(const int childrenCount, const int pos, const int depth,
        const int maxDepth, const bool traverseAllNodes, const int snr, const int inputIndex,
        const int diffs, const int skipPos, int *nextLetters, const int nextLettersSize) {
-    int position = pos;
+    int siblingPos = pos;
    // If inputIndex is greater than mInputLength, that means there are no proximity chars.
    for (int i = 0; i < childrenCount; ++i) {
-        // -- at char
+        int newCount;
-        const unsigned short c = Dictionary::getChar(DICT, &position);
+        int newChildPosition;
-        // -- at flag/add
+        int newDepth;
-        const unsigned short lowerC = toLowerCase(c);
+        bool newTraverseAllNodes;
-        const bool terminal = Dictionary::getTerminal(DICT, &position);
+        int newSnr;
-        int childrenPosition = Dictionary::getAddress(DICT, &position);
+        int newInputIndex;
-        int matchedProximityCharId = -1;
+        int newDiffs;
-        const bool needsToTraverseNextNode = childrenPosition != 0;
+        int newSiblingPos;
-        // -- after address or flag
+        const bool needsToTraverseChildrenNodes = processCurrentNode(siblingPos, depth, maxDepth,
-        int freq = 1;
+                traverseAllNodes, snr, inputIndex, diffs, skipPos, nextLetters, nextLettersSize,
-        // If terminal, increment pos
+                &newCount, &newChildPosition, &newDepth, &newTraverseAllNodes, &newSnr,
-        if (terminal) freq = Dictionary::getFreq(DICT, IS_LATEST_DICT_VERSION, &position);
+                &newInputIndex, &newDiffs, &newSiblingPos);
-        // -- after add or freq
+        siblingPos = newSiblingPos;
        bool newTraverseAllNodes = traverseAllNodes;
        int newSnr = snr;
        int newDiffs = diffs;
        int newInputIndex = inputIndex;
        const int newDepth = depth + 1;
-        // If we are only doing traverseAllNodes, no need to look at the typed characters.
+        if (needsToTraverseChildrenNodes) {
-        if (traverseAllNodes || needsToSkipCurrentNode(c, inputIndex, skipPos, depth)) {
+            getWordsRec(newCount, newChildPosition, newDepth, maxDepth, newTraverseAllNodes,
            mWord[depth] = c;
            if (traverseAllNodes && terminal) {
                onTerminalWhenUserTypedLengthIsGreaterThanInputLength(mWord, mInputLength, depth,
                        snr, nextLetters, nextLettersSize, skipPos, freq);
            }
        } else {
            int *currentChars = mInputCodes + (inputIndex * MAX_ALTERNATIVES);
            matchedProximityCharId = getMatchedProximityId(currentChars, lowerC, c, skipPos);
            if (matchedProximityCharId < 0) continue;
            mWord[depth] = c;
            // If inputIndex is greater than mInputLength, that means there is no
            // proximity chars. So, we don't need to check proximity.
            const int addedWeight = matchedProximityCharId == 0 ? TYPED_LETTER_MULTIPLIER : 1;
            const bool isSameAsUserTypedLength = mInputLength == inputIndex + 1;
            if (isSameAsUserTypedLength && terminal) {
                onTerminalWhenUserTypedLengthIsSameAsInputLength(mWord, depth, snr,
                        skipPos, freq, addedWeight);
            }
            if (!needsToTraverseNextNode) continue;
            // Start traversing all nodes after the index exceeds the user typed length
            newTraverseAllNodes = isSameAsUserTypedLength;
            newSnr *= addedWeight;
            newDiffs += (matchedProximityCharId > 0);
            ++newInputIndex;
        }
        // Optimization: Prune out words that are too long compared to how much was typed.
        if (newDepth > maxDepth || newDiffs > mMaxEditDistance) {
            continue;
        }
        if (mInputLength <= newInputIndex) {
            newTraverseAllNodes = true;
        }
        if (needsToTraverseNextNode) {
            // get the count of nodes and increment childAddress.
            const int count = Dictionary::getCount(DICT, &childrenPosition);
            getWordsRec(count, childrenPosition, newDepth, maxDepth, newTraverseAllNodes,
                    newSnr, newInputIndex, newDiffs, skipPos, nextLetters, nextLettersSize);
        }
    }
@ -279,7 +237,8 @@ inline bool UnigramDictionary::needsToSkipCurrentNode(const unsigned short c,
 }
 inline int UnigramDictionary::getMatchedProximityId(const int *currentChars,
-        const unsigned short lowerC, const unsigned short c, const int skipPos) {
+        const unsigned short c, const int skipPos) {
    const unsigned short lowerC = toLowerCase(c);
    int j = 0;
    while (currentChars[j] > 0) {
        const bool matched = (currentChars[j] == lowerC || currentChars[j] == c);
@ -295,4 +254,68 @@ inline int UnigramDictionary::getMatchedProximityId(const int *currentChars,
    return -1;
 }
 inline bool UnigramDictionary::processCurrentNode(const int pos, const int depth,
        const int maxDepth, const bool traverseAllNodes, const int snr, const int inputIndex,
        const int diffs, const int skipPos, int *nextLetters, const int nextLettersSize,
        int *newCount, int *newChildPosition, int *newDepth, bool *newTraverseAllNodes,
        int *newSnr, int*newInputIndex, int *newDiffs, int *nextSiblingPosition) {
    unsigned short c;
    int childPosition;
    bool terminal;
    int freq;
    *nextSiblingPosition = Dictionary::setDictionaryValues(DICT, IS_LATEST_DICT_VERSION, pos, &c,
            &childPosition, &terminal, &freq);
    const bool needsToTraverseChildrenNodes = childPosition != 0;
    // If we are only doing traverseAllNodes, no need to look at the typed characters.
    if (traverseAllNodes || needsToSkipCurrentNode(c, inputIndex, skipPos, depth)) {
        mWord[depth] = c;
        if (traverseAllNodes && terminal) {
            onTerminalWhenUserTypedLengthIsGreaterThanInputLength(mWord, mInputLength, depth,
                    snr, nextLetters, nextLettersSize, skipPos, freq);
        }
        if (!needsToTraverseChildrenNodes) return false;
        *newTraverseAllNodes = traverseAllNodes;
        *newSnr = snr;
        *newDiffs = diffs;
        *newInputIndex = inputIndex;
        *newDepth = depth + 1;
    } else {
        int *currentChars = mInputCodes + (inputIndex * MAX_ALTERNATIVES);
        int matchedProximityCharId = getMatchedProximityId(currentChars, c, skipPos);
        if (matchedProximityCharId < 0) return false;
        mWord[depth] = c;
        // If inputIndex is greater than mInputLength, that means there is no
        // proximity chars. So, we don't need to check proximity.
        const int addedWeight = matchedProximityCharId == 0 ? TYPED_LETTER_MULTIPLIER : 1;
        const bool isSameAsUserTypedLength = mInputLength == inputIndex + 1;
        if (isSameAsUserTypedLength && terminal) {
            onTerminalWhenUserTypedLengthIsSameAsInputLength(mWord, depth, snr,
                    skipPos, freq, addedWeight);
        }
        if (!needsToTraverseChildrenNodes) return false;
        // Start traversing all nodes after the index exceeds the user typed length
        *newTraverseAllNodes = isSameAsUserTypedLength;
        *newSnr = snr * addedWeight;
        *newDiffs = diffs + (matchedProximityCharId > 0);
        *newInputIndex = inputIndex + 1;
        *newDepth = depth + 1;
    }
    // Optimization: Prune out words that are too long compared to how much was typed.
    if (*newDepth > maxDepth || *newDiffs > mMaxEditDistance) {
        return false;
    }
    // If inputIndex is greater than mInputLength, that means there are no proximity chars.
    if (mInputLength <= *newInputIndex) {
        *newTraverseAllNodes = true;
    }
    // get the count of nodes and increment childAddress.
    *newCount = Dictionary::getCount(DICT, &childPosition);
    *newChildPosition = childPosition;
    if (DEBUG_DICT) assert(needsToTraverseChildrenNodes);
    return needsToTraverseChildrenNodes;
 }
 } // namespace latinime
--- a/native/src/unigram_dictionary.h
+++ b/native/src/unigram_dictionary.h
@ -54,9 +54,12 @@ private:
            const int snr, const int skipPos, const int freq, const int addedWeight);
    bool needsToSkipCurrentNode(const unsigned short c,
            const int inputIndex, const int skipPos, const int depth);
-    int getMatchedProximityId(const int *currentChars, const unsigned short lowerC,
+    int getMatchedProximityId(const int *currentChars, const unsigned short c, const int skipPos);
-            const unsigned short c, const int skipPos);
+    bool processCurrentNode(const int pos, const int depth,
-
+            const int maxDepth, const bool traverseAllNodes, const int snr, const int inputIndex,
            const int diffs, const int skipPos, int *nextLetters, const int nextLettersSize,
            int *newCount, int *newChildPosition, int *newDepth, bool *newTraverseAllNodes,
            int *newSnr, int*newInputIndex, int *newDiffs, int *nextSiblingPosition);
    const unsigned char *DICT;
    const int MAX_WORDS;
    const int MAX_WORD_LENGTH;