From e808e436cbd6f1aeadb5d61f354d03c3c50872a7 Mon Sep 17 00:00:00 2001 From: satok Date: Thu, 2 Dec 2010 14:53:24 +0900 Subject: [PATCH] Refactor: Move utility functions and no suggestion functions from unigram_dictionary.cpp to dictionary.cpp Change-Id: I6f695e4f5852547d2c00de5ee54a650fef9accbe --- native/src/bigram_dictionary.cpp | 2 + native/src/defines.h | 53 +++++++++ native/src/dictionary.cpp | 58 +++++++++- native/src/dictionary.h | 84 +++++++++++--- native/src/unigram_dictionary.cpp | 185 +++++------------------------- native/src/unigram_dictionary.h | 38 ++---- 6 files changed, 220 insertions(+), 200 deletions(-) create mode 100644 native/src/defines.h diff --git a/native/src/bigram_dictionary.cpp b/native/src/bigram_dictionary.cpp index 4d9a612a5..f0d5f8c9a 100644 --- a/native/src/bigram_dictionary.cpp +++ b/native/src/bigram_dictionary.cpp @@ -15,6 +15,8 @@ ** limitations under the License. */ +#define LOG_TAG "LatinIME: bigram_dictionary.cpp" + #include "bigram_dictionary.h" namespace latinime { diff --git a/native/src/defines.h b/native/src/defines.h new file mode 100644 index 000000000..aaaf3483d --- /dev/null +++ b/native/src/defines.h @@ -0,0 +1,53 @@ +/* +** +** Copyright 2010, The Android Open Source Project +** +** Licensed under the Apache License, Version 2.0 (the "License"); +** you may not use this file except in compliance with the License. +** You may obtain a copy of the License at +** +** http://www.apache.org/licenses/LICENSE-2.0 +** +** Unless required by applicable law or agreed to in writing, software +** distributed under the License is distributed on an "AS IS" BASIS, +** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +** See the License for the specific language governing permissions and +** limitations under the License. +*/ + +#ifndef LATINIME_DEFINES_H +#define LATINIME_DEFINES_H + +#ifdef FLAG_DBG +#include +#ifndef LOG_TAG +#define LOG_TAG "LatinIME: " +#endif +#define DEBUG_DICT 1 +#else // FLAG_DBG +#define LOGI +#define DEBUG_DICT 0 +#endif // FLAG_DBG + +// 22-bit address = ~4MB dictionary size limit, which on average would be about 200k-300k words +#define ADDRESS_MASK 0x3FFFFF + +// The bit that decides if an address follows in the next 22 bits +#define FLAG_ADDRESS_MASK 0x40 +// The bit that decides if this is a terminal node for a word. The node could still have children, +// if the word has other endings. +#define FLAG_TERMINAL_MASK 0x80 + +#define FLAG_BIGRAM_READ 0x80 +#define FLAG_BIGRAM_CHILDEXIST 0x40 +#define FLAG_BIGRAM_CONTINUED 0x80 +#define FLAG_BIGRAM_FREQ 0x7F + +#define DICTIONARY_VERSION_MIN 200 +#define DICTIONARY_HEADER_SIZE 2 +#define NOT_VALID_WORD -99 + +#define SUGGEST_MISSING_CHARACTERS true +#define SUGGEST_MISSING_CHARACTERS_THRESHOLD 5 + +#endif // LATINIME_DEFINES_H diff --git a/native/src/dictionary.cpp b/native/src/dictionary.cpp index a21b80a48..cf050fd30 100644 --- a/native/src/dictionary.cpp +++ b/native/src/dictionary.cpp @@ -17,15 +17,22 @@ #include +#define LOG_TAG "LatinIME: dictionary.cpp" + #include "dictionary.h" namespace latinime { Dictionary::Dictionary(void *dict, int typedLetterMultiplier, int fullWordMultiplier, int maxWordLength, int maxWords, int maxAlternatives) + : DICT((unsigned char*) dict), + // Checks whether it has the latest dictionary or the old dictionary + IS_LATEST_DICT_VERSION((((unsigned char*) dict)[0] & 0xFF) >= DICTIONARY_VERSION_MIN) { - mUnigramDictionary = new UnigramDictionary(dict, typedLetterMultiplier, fullWordMultiplier, - maxWordLength, maxWords, maxAlternatives, this); + LOGI("IN NATIVE SUGGEST Version: %d \n", (DICT[0] & 0xFF)); + mUnigramDictionary = new UnigramDictionary(DICT, typedLetterMultiplier, fullWordMultiplier, + maxWordLength, maxWords, maxAlternatives, IS_LATEST_DICT_VERSION, + hasBigram(), this); mBigramDictionary = new BigramDictionary(dict, typedLetterMultiplier, fullWordMultiplier, maxWordLength, maxWords, maxAlternatives, this); } @@ -35,4 +42,51 @@ Dictionary::~Dictionary() delete mUnigramDictionary; delete mBigramDictionary; } + +bool Dictionary::hasBigram() { + return ((DICT[1] & 0xFF) == 1); +} + +// TODO: use uint16_t instead of unsigned short +bool Dictionary::isValidWord(unsigned short *word, int length) +{ + if (IS_LATEST_DICT_VERSION) { + return (isValidWordRec(DICTIONARY_HEADER_SIZE, word, 0, length) != NOT_VALID_WORD); + } else { + return (isValidWordRec(0, word, 0, length) != NOT_VALID_WORD); + } +} + +int Dictionary::isValidWordRec(int pos, unsigned short *word, int offset, int length) { + // returns address of bigram data of that word + // return -99 if not found + + int count = Dictionary::getCount(DICT, &pos); + unsigned short currentChar = (unsigned short) word[offset]; + for (int j = 0; j < count; j++) { + unsigned short c = Dictionary::getChar(DICT, &pos); + int terminal = Dictionary::getTerminal(DICT, &pos); + int childPos = Dictionary::getAddress(DICT, &pos); + if (c == currentChar) { + if (offset == length - 1) { + if (terminal) { + return (pos+1); + } + } else { + if (childPos != 0) { + int t = isValidWordRec(childPos, word, offset + 1, length); + if (t > 0) { + return t; + } + } + } + } + if (terminal) { + Dictionary::getFreq(DICT, IS_LATEST_DICT_VERSION, &pos); + } + // There could be two instances of each alphabet - upper and lower case. So continue + // looking ... + } + return NOT_VALID_WORD; +} } // namespace latinime diff --git a/native/src/dictionary.h b/native/src/dictionary.h index 4c1f8837c..61f7cf074 100644 --- a/native/src/dictionary.h +++ b/native/src/dictionary.h @@ -18,24 +18,11 @@ #define LATINIME_DICTIONARY_H #include "bigram_dictionary.h" +#include "defines.h" #include "unigram_dictionary.h" namespace latinime { -// 22-bit address = ~4MB dictionary size limit, which on average would be about 200k-300k words -#define ADDRESS_MASK 0x3FFFFF - -// The bit that decides if an address follows in the next 22 bits -#define FLAG_ADDRESS_MASK 0x40 -// The bit that decides if this is a terminal node for a word. The node could still have children, -// if the word has other endings. -#define FLAG_TERMINAL_MASK 0x80 - -#define FLAG_BIGRAM_READ 0x80 -#define FLAG_BIGRAM_CHILDEXIST 0x40 -#define FLAG_BIGRAM_CONTINUED 0x80 -#define FLAG_BIGRAM_FREQ 0x7F - class Dictionary { public: Dictionary(void *dict, int typedLetterMultipler, int fullWordMultiplier, int maxWordLength, @@ -53,21 +40,82 @@ public: return mUnigramDictionary->getBigrams(word, length, codes, codesSize, outWords, frequencies, maxWordLength, maxBigrams, maxAlternatives); } - bool isValidWord(unsigned short *word, int length) { - return mUnigramDictionary->isValidWord(word, length); - } + bool isValidWord(unsigned short *word, int length); + int isValidWordRec(int pos, unsigned short *word, int offset, int length); void setAsset(void *asset) { mAsset = asset; } void *getAsset() { return mAsset; } ~Dictionary(); + // public static utility methods + // static inline methods should be defined in the header file + static unsigned short getChar(const unsigned char *dict, int *pos); + static int getCount(const unsigned char *dict, int *pos); + static bool getTerminal(const unsigned char *dict, int *pos); + static int getAddress(const unsigned char *dict, int *pos); + static int getFreq(const unsigned char *dict, const bool isLatestDictVersion, int *pos); + private: + bool hasBigram(); + + const unsigned char *DICT; + const bool IS_LATEST_DICT_VERSION; void *mAsset; BigramDictionary *mBigramDictionary; UnigramDictionary *mUnigramDictionary; }; // ---------------------------------------------------------------------------- +// public static utility methods +// static inline methods should be defined in the header file +inline unsigned short Dictionary::getChar(const unsigned char *dict, int *pos) { + unsigned short ch = (unsigned short) (dict[(*pos)++] & 0xFF); + // If the code is 255, then actual 16 bit code follows (in big endian) + if (ch == 0xFF) { + ch = ((dict[*pos] & 0xFF) << 8) | (dict[*pos + 1] & 0xFF); + (*pos) += 2; + } + return ch; +} + +inline int Dictionary::getCount(const unsigned char *dict, int *pos) { + return dict[(*pos)++] & 0xFF; +} + +inline bool Dictionary::getTerminal(const unsigned char *dict, int *pos) { + return (dict[*pos] & FLAG_TERMINAL_MASK) > 0; +} + +inline int Dictionary::getAddress(const unsigned char *dict, int *pos) { + int address = 0; + if ((dict[*pos] & FLAG_ADDRESS_MASK) == 0) { + *pos += 1; + } else { + address += (dict[*pos] & (ADDRESS_MASK >> 16)) << 16; + address += (dict[*pos + 1] & 0xFF) << 8; + address += (dict[*pos + 2] & 0xFF); + *pos += 3; + } + return address; +} + +inline int Dictionary::getFreq(const unsigned char *dict, + const bool isLatestDictVersion, int *pos) { + int freq = dict[(*pos)++] & 0xFF; + if (isLatestDictVersion) { + // skipping bigram + int bigramExist = (dict[*pos] & FLAG_BIGRAM_READ); + if (bigramExist > 0) { + int nextBigramExist = 1; + while (nextBigramExist > 0) { + (*pos) += 3; + nextBigramExist = (dict[(*pos)++] & FLAG_BIGRAM_CONTINUED); + } + } else { + (*pos)++; + } + } + return freq; +} }; // namespace latinime - #endif // LATINIME_DICTIONARY_H diff --git a/native/src/unigram_dictionary.cpp b/native/src/unigram_dictionary.cpp index 88382a94d..5a48a97a8 100644 --- a/native/src/unigram_dictionary.cpp +++ b/native/src/unigram_dictionary.cpp @@ -20,39 +20,26 @@ #include #include -#ifdef FLAG_DBG -#define LOG_TAG "LatinIME: dictionary.cpp" -#include -#define DEBUG_DICT 1 -#else // FLAG_DBG -#define LOGI -#define DEBUG_DICT 0 -#endif // FLAG_DBG +#define LOG_TAG "LatinIME: unigram_dictionary.cpp" -#include "unigram_dictionary.h" #include "basechars.h" #include "char_utils.h" - -#define DICTIONARY_VERSION_MIN 200 -#define DICTIONARY_HEADER_SIZE 2 -#define NOT_VALID_WORD -99 - -#define SUGGEST_MISSING_CHARACTERS true -#define SUGGEST_MISSING_CHARACTERS_THRESHOLD 5 - +#include "dictionary.h" +#include "unigram_dictionary.h" namespace latinime { -UnigramDictionary::UnigramDictionary(void *dict, int typedLetterMultiplier, int fullWordMultiplier, - int maxWordLength, int maxWords, int maxAlternatives, Dictionary *parentDictionary) - : MAX_WORD_LENGTH(maxWordLength),MAX_WORDS(maxWords), MAX_ALTERNATIVES(maxAlternatives) +UnigramDictionary::UnigramDictionary(const unsigned char *dict, int typedLetterMultiplier, + int fullWordMultiplier, int maxWordLength, int maxWords, int maxAlternatives, + const bool isLatestDictVersion, const bool hasBigram, Dictionary *parentDictionary) + : DICT(dict), MAX_WORD_LENGTH(maxWordLength),MAX_WORDS(maxWords), + MAX_ALTERNATIVES(maxAlternatives), IS_LATEST_DICT_VERSION(isLatestDictVersion), + HAS_BIGRAM(hasBigram), mParentDictionary(parentDictionary) { LOGI("UnigramDictionary - constructor"); - mDict = (unsigned char*) dict; + LOGI("Has Bigram : %d \n", hasBigram); mTypedLetterMultiplier = typedLetterMultiplier; mFullWordMultiplier = fullWordMultiplier; - mParentDictionary = parentDictionary; - getVersionNumber(); } UnigramDictionary::~UnigramDictionary() @@ -106,7 +93,7 @@ void UnigramDictionary::initSuggestions(int *codes, int codesSize, unsigned shor int UnigramDictionary::getSuggestionCandidates(int inputLength, int skipPos, int *nextLetters, int nextLettersSize) { - if (checkIfDictVersionIsLatest()) { + if (IS_LATEST_DICT_VERSION) { getWordsRec(DICTIONARY_HEADER_SIZE, 0, inputLength * 3, false, 1, 0, 0, skipPos, nextLetters, nextLettersSize); } else { @@ -127,72 +114,6 @@ void UnigramDictionary::registerNextLetter(unsigned short c, int *nextLetters, i } } -// TODO: Should be const static variable calculate in the constructor -void -UnigramDictionary::getVersionNumber() -{ - mVersion = (mDict[0] & 0xFF); - mBigram = (mDict[1] & 0xFF); - LOGI("IN NATIVE SUGGEST Version: %d Bigram : %d \n", mVersion, mBigram); -} - -// TODO: Should be const static variable calculate in the constructor -// Checks whether it has the latest dictionary or the old dictionary -bool -UnigramDictionary::checkIfDictVersionIsLatest() -{ - return (mVersion >= DICTIONARY_VERSION_MIN) && (mBigram == 1 || mBigram == 0); -} - -unsigned short -UnigramDictionary::getChar(int *pos) -{ - unsigned short ch = (unsigned short) (mDict[(*pos)++] & 0xFF); - // If the code is 255, then actual 16 bit code follows (in big endian) - if (ch == 0xFF) { - ch = ((mDict[*pos] & 0xFF) << 8) | (mDict[*pos + 1] & 0xFF); - (*pos) += 2; - } - return ch; -} - -int -UnigramDictionary::getAddress(int *pos) -{ - int address = 0; - if ((mDict[*pos] & FLAG_ADDRESS_MASK) == 0) { - *pos += 1; - } else { - address += (mDict[*pos] & (ADDRESS_MASK >> 16)) << 16; - address += (mDict[*pos + 1] & 0xFF) << 8; - address += (mDict[*pos + 2] & 0xFF); - *pos += 3; - } - return address; -} - -int -UnigramDictionary::getFreq(int *pos) -{ - int freq = mDict[(*pos)++] & 0xFF; - - if (checkIfDictVersionIsLatest()) { - // skipping bigram - int bigramExist = (mDict[*pos] & FLAG_BIGRAM_READ); - if (bigramExist > 0) { - int nextBigramExist = 1; - while (nextBigramExist > 0) { - (*pos) += 3; - nextBigramExist = (mDict[(*pos)++] & FLAG_BIGRAM_CONTINUED); - } - } else { - (*pos)++; - } - } - - return freq; -} - int UnigramDictionary::wideStrLen(unsigned short *str) { @@ -325,7 +246,7 @@ UnigramDictionary::getWordsRec(int pos, int depth, int maxDepth, bool completion if (diffs > mMaxEditDistance) { return; } - int count = getCount(&pos); + int count = Dictionary::getCount(DICT, &pos); int *currentChars = NULL; if (mInputLength <= inputIndex) { completion = true; @@ -335,14 +256,14 @@ UnigramDictionary::getWordsRec(int pos, int depth, int maxDepth, bool completion for (int i = 0; i < count; i++) { // -- at char - unsigned short c = getChar(&pos); + unsigned short c = Dictionary::getChar(DICT, &pos); // -- at flag/add unsigned short lowerC = toLowerCase(c); - bool terminal = getTerminal(&pos); - int childrenAddress = getAddress(&pos); + bool terminal = Dictionary::getTerminal(DICT, &pos); + int childrenAddress = Dictionary::getAddress(DICT, &pos); // -- after address or flag int freq = 1; - if (terminal) freq = getFreq(&pos); + if (terminal) freq = Dictionary::getFreq(DICT, IS_LATEST_DICT_VERSION, &pos); // -- after add or freq // If we are only doing completions, no need to look at the typed characters. @@ -403,9 +324,9 @@ UnigramDictionary::getBigramAddress(int *pos, bool advance) { int address = 0; - address += (mDict[*pos] & 0x3F) << 16; - address += (mDict[*pos + 1] & 0xFF) << 8; - address += (mDict[*pos + 2] & 0xFF); + address += (DICT[*pos] & 0x3F) << 16; + address += (DICT[*pos + 1] & 0xFF) << 8; + address += (DICT[*pos + 2] & 0xFF); if (advance) { *pos += 3; @@ -417,7 +338,7 @@ UnigramDictionary::getBigramAddress(int *pos, bool advance) int UnigramDictionary::getBigramFreq(int *pos) { - int freq = mDict[(*pos)++] & FLAG_BIGRAM_FREQ; + int freq = DICT[(*pos)++] & FLAG_BIGRAM_FREQ; return freq; } @@ -434,8 +355,8 @@ UnigramDictionary::getBigrams(unsigned short *prevWord, int prevWordLength, int mInputLength = codesSize; mMaxBigrams = maxBigrams; - if (mBigram == 1 && checkIfDictVersionIsLatest()) { - int pos = isValidWordRec( + if (HAS_BIGRAM && IS_LATEST_DICT_VERSION) { + int pos = mParentDictionary->isValidWordRec( DICTIONARY_HEADER_SIZE, prevWord, 0, prevWordLength); LOGI("Pos -> %d\n", pos); if (pos < 0) { @@ -443,15 +364,15 @@ UnigramDictionary::getBigrams(unsigned short *prevWord, int prevWordLength, int } int bigramCount = 0; - int bigramExist = (mDict[pos] & FLAG_BIGRAM_READ); + int bigramExist = (DICT[pos] & FLAG_BIGRAM_READ); if (bigramExist > 0) { int nextBigramExist = 1; while (nextBigramExist > 0 && bigramCount < maxBigrams) { int bigramAddress = getBigramAddress(&pos, true); - int frequency = (FLAG_BIGRAM_FREQ & mDict[pos]); + int frequency = (FLAG_BIGRAM_FREQ & DICT[pos]); // search for all bigrams and store them searchForTerminalNode(bigramAddress, frequency); - nextBigramExist = (mDict[pos++] & FLAG_BIGRAM_CONTINUED); + nextBigramExist = (DICT[pos++] & FLAG_BIGRAM_CONTINUED); bigramCount++; } } @@ -482,7 +403,7 @@ UnigramDictionary::searchForTerminalNode(int addressLookingFor, int frequency) word[depth] = (unsigned short) followingChar; } pos = followDownBranchAddress; // pos start at count - int count = mDict[pos] & 0xFF; + int count = DICT[pos] & 0xFF; LOGI("count - %d\n",count); pos++; for (int i = 0; i < count; i++) { @@ -502,7 +423,7 @@ UnigramDictionary::searchForTerminalNode(int addressLookingFor, int frequency) } } else { followDownBranchAddress = addr; - followingChar = (char)(0xFF & mDict[pos-1]); + followingChar = (char)(0xFF & DICT[pos-1]); if (firstAddress) { firstAddress = false; haveToSearchAll = false; @@ -513,7 +434,7 @@ UnigramDictionary::searchForTerminalNode(int addressLookingFor, int frequency) } else if (getFirstBitOfByte(&pos)) { // terminal if (addressLookingFor == (pos-1)) { // found !! depth++; - word[depth] = (0xFF & mDict[pos-1]); + word[depth] = (0xFF & DICT[pos-1]); found = true; break; } @@ -530,7 +451,7 @@ UnigramDictionary::searchForTerminalNode(int addressLookingFor, int frequency) } } else { followDownBranchAddress = addr; - followingChar = (char)(0xFF & mDict[pos-1]); + followingChar = (char)(0xFF & DICT[pos-1]); if (firstAddress) { firstAddress = false; haveToSearchAll = true; @@ -543,12 +464,12 @@ UnigramDictionary::searchForTerminalNode(int addressLookingFor, int frequency) } // skipping bigram - int bigramExist = (mDict[pos] & FLAG_BIGRAM_READ); + int bigramExist = (DICT[pos] & FLAG_BIGRAM_READ); if (bigramExist > 0) { int nextBigramExist = 1; while (nextBigramExist > 0) { pos += 3; - nextBigramExist = (mDict[pos++] & FLAG_BIGRAM_CONTINUED); + nextBigramExist = (DICT[pos++] & FLAG_BIGRAM_CONTINUED); } } else { pos++; @@ -584,48 +505,4 @@ UnigramDictionary::checkFirstCharacter(unsigned short *word) return false; } -// TODO: Move to parent dictionary -bool -UnigramDictionary::isValidWord(unsigned short *word, int length) -{ - if (checkIfDictVersionIsLatest()) { - return (isValidWordRec(DICTIONARY_HEADER_SIZE, word, 0, length) != NOT_VALID_WORD); - } else { - return (isValidWordRec(0, word, 0, length) != NOT_VALID_WORD); - } -} - -int -UnigramDictionary::isValidWordRec(int pos, unsigned short *word, int offset, int length) { - // returns address of bigram data of that word - // return -99 if not found - - int count = getCount(&pos); - unsigned short currentChar = (unsigned short) word[offset]; - for (int j = 0; j < count; j++) { - unsigned short c = getChar(&pos); - int terminal = getTerminal(&pos); - int childPos = getAddress(&pos); - if (c == currentChar) { - if (offset == length - 1) { - if (terminal) { - return (pos+1); - } - } else { - if (childPos != 0) { - int t = isValidWordRec(childPos, word, offset + 1, length); - if (t > 0) { - return t; - } - } - } - } - if (terminal) { - getFreq(&pos); - } - // There could be two instances of each alphabet - upper and lower case. So continue - // looking ... - } - return NOT_VALID_WORD; -} } // namespace latinime diff --git a/native/src/unigram_dictionary.h b/native/src/unigram_dictionary.h index 28e4308b4..4cb1abe9b 100644 --- a/native/src/unigram_dictionary.h +++ b/native/src/unigram_dictionary.h @@ -17,33 +17,20 @@ #ifndef LATINIME_UNIGRAM_DICTIONARY_H #define LATINIME_UNIGRAM_DICTIONARY_H +#include "defines.h" + namespace latinime { -// 22-bit address = ~4MB dictionary size limit, which on average would be about 200k-300k words -#define ADDRESS_MASK 0x3FFFFF - -// The bit that decides if an address follows in the next 22 bits -#define FLAG_ADDRESS_MASK 0x40 -// The bit that decides if this is a terminal node for a word. The node could still have children, -// if the word has other endings. -#define FLAG_TERMINAL_MASK 0x80 - -#define FLAG_BIGRAM_READ 0x80 -#define FLAG_BIGRAM_CHILDEXIST 0x40 -#define FLAG_BIGRAM_CONTINUED 0x80 -#define FLAG_BIGRAM_FREQ 0x7F - -class Dictionary; class UnigramDictionary { public: - UnigramDictionary(void *dict, int typedLetterMultipler, int fullWordMultiplier, int maxWordLength, - int maxWords, int maxAlternatives, Dictionary *parentDictionary); + UnigramDictionary(const unsigned char *dict, int typedLetterMultipler, int fullWordMultiplier, + int maxWordLength, int maxWords, int maxAlternatives, const bool isLatestDictVersion, + const bool hasBigram, Dictionary *parentDictionary); int getSuggestions(int *codes, int codesSize, unsigned short *outWords, int *frequencies, int *nextLetters, int nextLettersSize); int getBigrams(unsigned short *word, int length, int *codes, int codesSize, unsigned short *outWords, int *frequencies, int maxWordLength, int maxBigrams, int maxAlternatives); - bool isValidWord(unsigned short *word, int length); ~UnigramDictionary(); private: @@ -57,11 +44,9 @@ private: int getBigramFreq(int *pos); void searchForTerminalNode(int address, int frequency); - bool getFirstBitOfByte(int *pos) { return (mDict[*pos] & 0x80) > 0; } - bool getSecondBitOfByte(int *pos) { return (mDict[*pos] & 0x40) > 0; } - bool getTerminal(int *pos) { return (mDict[*pos] & FLAG_TERMINAL_MASK) > 0; } - int getCount(int *pos) { return mDict[(*pos)++] & 0xFF; } - unsigned short getChar(int *pos); + bool getFirstBitOfByte(int *pos) { return (DICT[*pos] & 0x80) > 0; } + bool getSecondBitOfByte(int *pos) { return (DICT[*pos] & 0x40) > 0; } + bool getTerminal(int *pos) { return (DICT[*pos] & FLAG_TERMINAL_MASK) > 0; } int wideStrLen(unsigned short *str); bool sameAsTyped(unsigned short *word, int length); @@ -72,15 +57,16 @@ private: void getWordsRec(int pos, int depth, int maxDepth, bool completion, int frequency, int inputIndex, int diffs, int skipPos, int *nextLetters, int nextLettersSize); void registerNextLetter(unsigned short c, int *nextLetters, int nextLettersSize); - int isValidWordRec(int pos, unsigned short *word, int offset, int length); - unsigned char *mDict; - Dictionary *mParentDictionary; + const unsigned char *DICT; const int MAX_WORDS; const int MAX_WORD_LENGTH; const int MAX_ALTERNATIVES; + const bool IS_LATEST_DICT_VERSION; + const bool HAS_BIGRAM; + Dictionary *mParentDictionary; int *mFrequencies; int *mBigramFreq; int mMaxBigrams;