New dict format, step 1

This renames some variables and removes dependancies to values that will disappear Bug: 4392433 Change-Id: I79a44462d6bf25248cc2de0d63d7918fc6925d68
2011-06-16 20:55:16 +09:00 · 2011-06-16 20:55:16 +09:00 · 293ece0f34
parent e968ad8ad1
commit 293ece0f34
2 changed files with 23 additions and 16 deletions
--- a/native/src/unigram_dictionary.cpp
+++ b/native/src/unigram_dictionary.cpp
@ -16,8 +16,6 @@
 */
 #include <assert.h>
 #include <fcntl.h>
 #include <stdio.h>
 #include <string.h>
 #define LOG_TAG "LatinIME: unigram_dictionary.cpp"
@ -34,10 +32,12 @@ const UnigramDictionary::digraph_t UnigramDictionary::GERMAN_UMLAUT_DIGRAPHS[] =
        { 'o', 'e' },
        { 'u', 'e' } };
-UnigramDictionary::UnigramDictionary(const unsigned char *dict, int typedLetterMultiplier,
+// TODO: check the header
 UnigramDictionary::UnigramDictionary(const uint8_t* const streamStart, int typedLetterMultiplier,
        int fullWordMultiplier, int maxWordLength, int maxWords, int maxProximityChars,
        const bool isLatestDictVersion)
-    : DICT(dict), MAX_WORD_LENGTH(maxWordLength), MAX_WORDS(maxWords),
+    : DICT_ROOT(streamStart),
    MAX_WORD_LENGTH(maxWordLength), MAX_WORDS(maxWords),
    MAX_PROXIMITY_CHARS(maxProximityChars), IS_LATEST_DICT_VERSION(isLatestDictVersion),
    TYPED_LETTER_MULTIPLIER(typedLetterMultiplier), FULL_WORD_MULTIPLIER(fullWordMultiplier),
    ROOT_POS(isLatestDictVersion ? DICTIONARY_HEADER_SIZE : 0),
@ -363,7 +363,7 @@ void UnigramDictionary::getSuggestionCandidates(const int skipPos,
    }
    int rootPosition = ROOT_POS;
    // Get the number of child of root, then increment the position
-    int childCount = Dictionary::getCount(DICT, &rootPosition);
+    int childCount = Dictionary::getCount(DICT_ROOT, &rootPosition);
    int depth = 0;
    mStackChildCount[0] = childCount;
@ -562,7 +562,7 @@ void UnigramDictionary::getWordsOld(const int initialPos, const int inputLength,
        const int excessivePos, const int transposedPos,int *nextLetters,
        const int nextLettersSize) {
    int initialPosition = initialPos;
-    const int count = Dictionary::getCount(DICT, &initialPosition);
+    const int count = Dictionary::getCount(DICT_ROOT, &initialPosition);
    getWordsRec(count, initialPosition, 0,
            min(inputLength * MAX_DEPTH_MULTIPLIER, MAX_WORD_LENGTH),
            mInputLength <= 0, 1, 0, 0, skipPos, excessivePos, transposedPos, nextLetters,
@ -770,8 +770,8 @@ inline bool UnigramDictionary::processCurrentNode(const int pos, const int depth
    if (excessivePos == depth && inputIndex < mInputLength - 1) ++inputIndex;
-    *nextSiblingPosition = Dictionary::setDictionaryValues(DICT, IS_LATEST_DICT_VERSION, pos, &c,
+    *nextSiblingPosition = Dictionary::setDictionaryValues(DICT_ROOT, IS_LATEST_DICT_VERSION, pos,
-            &childPosition, &terminal, &freq);
+            &c, &childPosition, &terminal, &freq);
    const bool needsToTraverseChildrenNodes = childPosition != 0;
@ -829,7 +829,7 @@ inline bool UnigramDictionary::processCurrentNode(const int pos, const int depth
        *newTraverseAllNodes = true;
    }
    // get the count of nodes and increment childAddress.
-    *newCount = Dictionary::getCount(DICT, &childPosition);
+    *newCount = Dictionary::getCount(DICT_ROOT, &childPosition);
    *newChildPosition = childPosition;
    if (DEBUG_DICT) assert(needsToTraverseChildrenNodes);
    return needsToTraverseChildrenNodes;
@ -838,7 +838,7 @@ inline bool UnigramDictionary::processCurrentNode(const int pos, const int depth
 inline int UnigramDictionary::getBestWordFreq(const int startInputIndex, const int inputLength,
        unsigned short *word) {
    int pos = ROOT_POS;
-    int count = Dictionary::getCount(DICT, &pos);
+    int count = Dictionary::getCount(DICT_ROOT, &pos);
    int maxFreq = 0;
    int depth = 0;
    unsigned short newWord[MAX_WORD_LENGTH_INTERNAL];
@ -894,8 +894,8 @@ inline bool UnigramDictionary::processCurrentNodeForExactMatch(const int firstCh
    const int inputIndex = startInputIndex + depth;
    const int *currentChars = getInputCharsAt(inputIndex);
    unsigned short c;
-    *siblingPos = Dictionary::setDictionaryValues(DICT, IS_LATEST_DICT_VERSION, firstChildPos, &c,
+    *siblingPos = Dictionary::setDictionaryValues(DICT_ROOT, IS_LATEST_DICT_VERSION, firstChildPos,
-            newChildPosition, newTerminal, newFreq);
+            &c, newChildPosition, newTerminal, newFreq);
    const unsigned int inputC = currentChars[0];
    if (DEBUG_DICT) {
        assert(inputC <= U_SHORT_MAX);
@ -912,7 +912,7 @@ inline bool UnigramDictionary::processCurrentNodeForExactMatch(const int firstCh
            }
        }
        if (hasChild) {
-            *newCount = Dictionary::getCount(DICT, newChildPosition);
+            *newCount = Dictionary::getCount(DICT_ROOT, newChildPosition);
            return true;
        } else {
            return false;
--- a/native/src/unigram_dictionary.h
+++ b/native/src/unigram_dictionary.h
@ -17,9 +17,14 @@
 #ifndef LATINIME_UNIGRAM_DICTIONARY_H
 #define LATINIME_UNIGRAM_DICTIONARY_H
 #include <stdint.h>
 #include "defines.h"
 #include "proximity_info.h"
 #ifndef NULL
 #define NULL 0
 #endif
 namespace latinime {
 class UnigramDictionary {
@ -31,8 +36,9 @@ class UnigramDictionary {
    } ProximityType;
 public:
-    UnigramDictionary(const unsigned char *dict, int typedLetterMultipler, int fullWordMultiplier,
+    UnigramDictionary(const uint8_t* const streamStart, int typedLetterMultipler,
-            int maxWordLength, int maxWords, int maxProximityChars, const bool isLatestDictVersion);
+            int fullWordMultiplier, int maxWordLength, int maxWords, int maxProximityChars,
            const bool isLatestDictVersion);
    int getSuggestions(const ProximityInfo *proximityInfo, const int *xcoordinates,
            const int *ycoordinates, const int *codes, const int codesSize, const int flags,
            unsigned short *outWords, int *frequencies);
@ -104,7 +110,8 @@ private:
    inline const int* getInputCharsAt(const int index) const {
        return mInputCodes + (index * MAX_PROXIMITY_CHARS);
    }
-    const unsigned char *DICT;
+
    const uint8_t* const DICT_ROOT;
    const int MAX_WORD_LENGTH;
    const int MAX_WORDS;
    const int MAX_PROXIMITY_CHARS;