Merge "New dict format, step 4"

2011-06-17 05:30:26 -07:00 · 2011-06-17 05:30:26 -07:00 · 23eb0fa0b5
parent 8f5a3842e6 ca5ef2890e
commit 23eb0fa0b5
2 changed files with 49 additions and 38 deletions
--- a/native/src/unigram_dictionary.cpp
+++ b/native/src/unigram_dictionary.cpp
@ -272,6 +272,7 @@ static inline void registerNextLetter(unsigned short c, int *nextLetters, int ne
 }

 // TODO: We need to optimize addWord by using STL or something
+// TODO: This needs to take an const unsigned short* and not tinker with its contents
 bool UnigramDictionary::addWord(unsigned short *word, int length, int frequency) {
    word[length] = 0;
    if (DEBUG_DICT && DEBUG_SHOW_FOUND_WORD) {
@ -321,6 +322,16 @@ bool UnigramDictionary::addWord(unsigned short *word, int length, int frequency)
    return false;
 }

+inline void UnigramDictionary::addWordAlternatesSpellings(const uint8_t* const root, int pos,
+        int depth, int finalFreq) {
+    // TODO: actually add alternates when the format supports it.
+}
+
+static inline bool hasAlternateSpellings(uint8_t flags) {
+    // TODO: when the format supports it, return the actual value.
+    return false;
+}
+
 static inline unsigned short toBaseLowerCase(unsigned short c) {
    if (c < sizeof(BASE_CHARS) / sizeof(BASE_CHARS[0])) {
        c = BASE_CHARS[c];
@ -333,7 +344,7 @@ static inline unsigned short toBaseLowerCase(unsigned short c) {
    return c;
 }

-bool UnigramDictionary::sameAsTyped(unsigned short *word, int length) {
+bool UnigramDictionary::sameAsTyped(const unsigned short *word, int length) const {
    if (length != mInputLength) {
        return false;
    }
@ -656,28 +667,6 @@ inline int UnigramDictionary::calculateFinalFreq(const int inputIndex, const int
    return finalFreq;
 }

-inline void UnigramDictionary::onTerminalWhenUserTypedLengthIsGreaterThanInputLength(
-        unsigned short *word, const int inputIndex, const int depth, const int matchWeight,
-        int *nextLetters, const int nextLettersSize, const int skipPos, const int excessivePos,
-        const int transposedPos, const int freq) {
-    const int finalFreq = calculateFinalFreq(inputIndex, depth, matchWeight, skipPos, excessivePos,
-            transposedPos, freq, false);
-    if (depth >= MIN_SUGGEST_DEPTH) addWord(word, depth + 1, finalFreq);
-    if (depth >= mInputLength && skipPos < 0) {
-        registerNextLetter(mWord[mInputLength], nextLetters, nextLettersSize);
-    }
-}
-
-inline void UnigramDictionary::onTerminalWhenUserTypedLengthIsSameAsInputLength(
-        unsigned short *word, const int inputIndex, const int depth, const int matchWeight,
-        const int skipPos, const int excessivePos, const int transposedPos, const int freq) {
-    if (sameAsTyped(word, depth + 1)) return;
-    const int finalFreq = calculateFinalFreq(inputIndex, depth, matchWeight, skipPos,
-            excessivePos, transposedPos, freq, true);
-    // Proximity collection will promote a word of the same length as what user typed.
-    if (depth >= MIN_SUGGEST_DEPTH) addWord(word, depth + 1, finalFreq);
-}
-
 inline bool UnigramDictionary::needsToSkipCurrentNode(const unsigned short c,
        const int inputIndex, const int skipPos, const int depth) {
    const unsigned short userTypedChar = getInputCharsAt(inputIndex)[0];
@ -708,7 +697,6 @@ inline bool UnigramDictionary::existsAdjacentProximityChars(const int inputIndex
    return false;
 }

-
 // In the following function, c is the current character of the dictionary word
 // currently examined.
 // currentChars is an array containing the keys close to the character the
@ -751,6 +739,30 @@ inline UnigramDictionary::ProximityType UnigramDictionary::getMatchedProximityId
    return UNRELATED_CHAR;
 }

+inline void UnigramDictionary::onTerminal(unsigned short int* word, const int depth,
+        const uint8_t* const root, const uint8_t flags, int pos,
+        const int inputIndex, const int matchWeight, const int skipPos,
+        const int excessivePos, const int transposedPos, const int freq, const bool sameLength,
+        int* nextLetters, const int nextLettersSize) {
+
+    const bool isSameAsTyped = sameLength ? sameAsTyped(word, depth + 1) : false;
+    const bool hasAlternates = hasAlternateSpellings(flags);
+    if (isSameAsTyped && !hasAlternates) return;
+
+    if (depth >= MIN_SUGGEST_DEPTH) {
+        const int finalFreq = calculateFinalFreq(inputIndex, depth, matchWeight, skipPos,
+                excessivePos, transposedPos, freq, sameLength);
+        if (!isSameAsTyped)
+            addWord(word, depth + 1, finalFreq);
+        if (hasAlternates)
+            addWordAlternatesSpellings(DICT_ROOT, pos, flags, finalFreq);
+    }
+
+    if (sameLength && depth >= mInputLength && skipPos < 0) {
+        registerNextLetter(word[mInputLength], nextLetters, nextLettersSize);
+    }
+}
+
 inline bool UnigramDictionary::processCurrentNode(const int pos, const int depth,
        const int maxDepth, const bool traverseAllNodes, int matchWeight, int inputIndex,
        const int diffs, const int skipPos, const int excessivePos, const int transposedPos,
@ -770,6 +782,8 @@ inline bool UnigramDictionary::processCurrentNode(const int pos, const int depth
    int freq;
    bool isSameAsUserTypedLength = false;

+    const uint8_t flags = 0; // No flags for now
+
    if (excessivePos == depth && inputIndex < mInputLength - 1) ++inputIndex;

    *nextSiblingPosition = Dictionary::setDictionaryValues(DICT_ROOT, IS_LATEST_DICT_VERSION, pos,
@ -782,9 +796,8 @@ inline bool UnigramDictionary::processCurrentNode(const int pos, const int depth
    if (traverseAllNodes || needsToSkipCurrentNode(c, inputIndex, skipPos, depth)) {
        mWord[depth] = c;
        if (traverseAllNodes && terminal) {
-            onTerminalWhenUserTypedLengthIsGreaterThanInputLength(mWord, inputIndex, depth,
-                    matchWeight, nextLetters, nextLettersSize, skipPos, excessivePos, transposedPos,
-                    freq);
+            onTerminal(mWord, depth, DICT_ROOT, flags, pos, inputIndex, matchWeight, skipPos,
+                       excessivePos, transposedPos, freq, false, nextLetters, nextLettersSize);
        }
        if (!needsToTraverseChildrenNodes) return false;
        *newTraverseAllNodes = traverseAllNodes;
@ -811,8 +824,8 @@ inline bool UnigramDictionary::processCurrentNode(const int pos, const int depth
        bool isSameAsUserTypedLength = mInputLength == inputIndex + 1
                || (excessivePos == mInputLength - 1 && inputIndex == mInputLength - 2);
        if (isSameAsUserTypedLength && terminal) {
-            onTerminalWhenUserTypedLengthIsSameAsInputLength(mWord, inputIndex, depth, matchWeight,
-                    skipPos, excessivePos, transposedPos, freq);
+            onTerminal(mWord, depth, DICT_ROOT, flags, pos, inputIndex, matchWeight, skipPos,
+                    excessivePos, transposedPos, freq, true, nextLetters, nextLettersSize);
        }
        if (!needsToTraverseChildrenNodes) return false;
        // Start traversing all nodes after the index exceeds the user typed length
--- a/native/src/unigram_dictionary.h
+++ b/native/src/unigram_dictionary.h
@ -64,9 +64,9 @@ private:
    bool checkIfDictVersionIsLatest();
    int getAddress(int *pos);
    int getFreq(int *pos);
-    int wideStrLen(unsigned short *str);
-    bool sameAsTyped(unsigned short *word, int length);
+    bool sameAsTyped(const unsigned short *word, int length) const;
    bool addWord(unsigned short *word, int length, int frequency);
+    void addWordAlternatesSpellings(const uint8_t* const root, int pos, int depth, int finalFreq);
    void getWordsRec(const int childrenCount, const int pos, const int depth, const int maxDepth,
            const bool traverseAllNodes, const int snr, const int inputIndex, const int diffs,
            const int skipPos, const int excessivePos, const int transposedPos, int *nextLetters,
@ -83,13 +83,11 @@ private:
    int calculateFinalFreq(const int inputIndex, const int depth, const int snr, const int skipPos,
            const int excessivePos, const int transposedPos, const int freq,
            const bool sameLength) const;
-    void onTerminalWhenUserTypedLengthIsGreaterThanInputLength(unsigned short *word,
-            const int inputIndex, const int depth, const int snr, int *nextLetters,
-            const int nextLettersSize, const int skipPos, const int excessivePos,
-            const int transposedPos, const int freq);
-    void onTerminalWhenUserTypedLengthIsSameAsInputLength(unsigned short *word,
-            const int inputIndex, const int depth, const int snr, const int skipPos,
-            const int excessivePos, const int transposedPos, const int freq);
+    void onTerminal(unsigned short int* word, const int depth,
+            const uint8_t* const root, const uint8_t flags, int pos,
+            const int inputIndex, const int matchWeight, const int skipPos,
+            const int excessivePos, const int transposedPos, const int freq, const bool sameLength,
+            int *nextLetters, const int nextLettersSize);
    bool needsToSkipCurrentNode(const unsigned short c,
            const int inputIndex, const int skipPos, const int depth);
    ProximityType getMatchedProximityId(const int *currentChars, const unsigned short c,