New dict format, step 2
Move some methods around and make static some methods Bug: 4392433 Change-Id: I2bbe98aec118a416d21d1e293638e1d324505b9b
This commit is contained in:
parent
3f4385511b
commit
8124e64dcc
5 changed files with 49 additions and 46 deletions
|
@ -111,8 +111,7 @@ int BigramDictionary::getBigrams(unsigned short *prevWord, int prevWordLength, i
|
|||
mMaxBigrams = maxBigrams;
|
||||
|
||||
if (HAS_BIGRAM && IS_LATEST_DICT_VERSION) {
|
||||
int pos = mParentDictionary->isValidWordRec(
|
||||
DICTIONARY_HEADER_SIZE, prevWord, 0, prevWordLength);
|
||||
int pos = mParentDictionary->isValidWord(prevWord, prevWordLength);
|
||||
if (DEBUG_DICT) {
|
||||
LOGI("Pos -> %d", pos);
|
||||
}
|
||||
|
|
|
@ -53,45 +53,8 @@ bool Dictionary::hasBigram() {
|
|||
return ((mDict[1] & 0xFF) == 1);
|
||||
}
|
||||
|
||||
// TODO: use uint32_t instead of unsigned short
|
||||
bool Dictionary::isValidWord(unsigned short *word, int length) {
|
||||
if (IS_LATEST_DICT_VERSION) {
|
||||
return (isValidWordRec(DICTIONARY_HEADER_SIZE, word, 0, length) != NOT_VALID_WORD);
|
||||
} else {
|
||||
return (isValidWordRec(0, word, 0, length) != NOT_VALID_WORD);
|
||||
}
|
||||
return mUnigramDictionary->isValidWord(word, length);
|
||||
}
|
||||
|
||||
int Dictionary::isValidWordRec(int pos, unsigned short *word, int offset, int length) {
|
||||
// returns address of bigram data of that word
|
||||
// return -99 if not found
|
||||
|
||||
int count = Dictionary::getCount(mDict, &pos);
|
||||
unsigned short currentChar = (unsigned short) word[offset];
|
||||
for (int j = 0; j < count; j++) {
|
||||
unsigned short c = Dictionary::getChar(mDict, &pos);
|
||||
int terminal = Dictionary::getTerminal(mDict, &pos);
|
||||
int childPos = Dictionary::getAddress(mDict, &pos);
|
||||
if (c == currentChar) {
|
||||
if (offset == length - 1) {
|
||||
if (terminal) {
|
||||
return (pos+1);
|
||||
}
|
||||
} else {
|
||||
if (childPos != 0) {
|
||||
int t = isValidWordRec(childPos, word, offset + 1, length);
|
||||
if (t > 0) {
|
||||
return t;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (terminal) {
|
||||
Dictionary::getFreq(mDict, IS_LATEST_DICT_VERSION, &pos);
|
||||
}
|
||||
// There could be two instances of each alphabet - upper and lower case. So continue
|
||||
// looking ...
|
||||
}
|
||||
return NOT_VALID_WORD;
|
||||
}
|
||||
} // namespace latinime
|
||||
|
|
|
@ -43,7 +43,6 @@ public:
|
|||
}
|
||||
|
||||
bool isValidWord(unsigned short *word, int length);
|
||||
int isValidWordRec(int pos, unsigned short *word, int offset, int length);
|
||||
void *getDict() { return (void *)mDict; }
|
||||
int getDictSize() { return mDictSize; }
|
||||
int getMmapFd() { return mMmapFd; }
|
||||
|
|
|
@ -265,8 +265,7 @@ void UnigramDictionary::initSuggestions(const int *codes, const int codesSize,
|
|||
mMaxEditDistance = mInputLength < 5 ? 2 : mInputLength / 2;
|
||||
}
|
||||
|
||||
void UnigramDictionary::registerNextLetter(
|
||||
unsigned short c, int *nextLetters, int nextLettersSize) {
|
||||
static inline void registerNextLetter(unsigned short c, int *nextLetters, int nextLettersSize) {
|
||||
if (c < nextLettersSize) {
|
||||
nextLetters[c]++;
|
||||
}
|
||||
|
@ -322,7 +321,7 @@ bool UnigramDictionary::addWord(unsigned short *word, int length, int frequency)
|
|||
return false;
|
||||
}
|
||||
|
||||
unsigned short UnigramDictionary::toBaseLowerCase(unsigned short c) {
|
||||
static inline unsigned short toBaseLowerCase(unsigned short c) {
|
||||
if (c < sizeof(BASE_CHARS) / sizeof(BASE_CHARS[0])) {
|
||||
c = BASE_CHARS[c];
|
||||
}
|
||||
|
@ -924,4 +923,47 @@ inline bool UnigramDictionary::processCurrentNodeForExactMatch(const int firstCh
|
|||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: use uint32_t instead of unsigned short
|
||||
bool UnigramDictionary::isValidWord(unsigned short *word, int length) {
|
||||
if (IS_LATEST_DICT_VERSION) {
|
||||
return (isValidWordRec(DICTIONARY_HEADER_SIZE, word, 0, length) != NOT_VALID_WORD);
|
||||
} else {
|
||||
return (isValidWordRec(0, word, 0, length) != NOT_VALID_WORD);
|
||||
}
|
||||
}
|
||||
|
||||
int UnigramDictionary::isValidWordRec(int pos, unsigned short *word, int offset, int length) {
|
||||
// returns address of bigram data of that word
|
||||
// return -99 if not found
|
||||
|
||||
int count = Dictionary::getCount(DICT_ROOT, &pos);
|
||||
unsigned short currentChar = (unsigned short) word[offset];
|
||||
for (int j = 0; j < count; j++) {
|
||||
unsigned short c = Dictionary::getChar(DICT_ROOT, &pos);
|
||||
int terminal = Dictionary::getTerminal(DICT_ROOT, &pos);
|
||||
int childPos = Dictionary::getAddress(DICT_ROOT, &pos);
|
||||
if (c == currentChar) {
|
||||
if (offset == length - 1) {
|
||||
if (terminal) {
|
||||
return (pos+1);
|
||||
}
|
||||
} else {
|
||||
if (childPos != 0) {
|
||||
int t = isValidWordRec(childPos, word, offset + 1, length);
|
||||
if (t > 0) {
|
||||
return t;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (terminal) {
|
||||
Dictionary::getFreq(DICT_ROOT, IS_LATEST_DICT_VERSION, &pos);
|
||||
}
|
||||
// There could be two instances of each alphabet - upper and lower case. So continue
|
||||
// looking ...
|
||||
}
|
||||
return NOT_VALID_WORD;
|
||||
}
|
||||
|
||||
} // namespace latinime
|
||||
|
|
|
@ -39,6 +39,7 @@ public:
|
|||
UnigramDictionary(const uint8_t* const streamStart, int typedLetterMultipler,
|
||||
int fullWordMultiplier, int maxWordLength, int maxWords, int maxProximityChars,
|
||||
const bool isLatestDictVersion);
|
||||
bool isValidWord(unsigned short *word, int length);
|
||||
int getSuggestions(const ProximityInfo *proximityInfo, const int *xcoordinates,
|
||||
const int *ycoordinates, const int *codes, const int codesSize, const int flags,
|
||||
unsigned short *outWords, int *frequencies);
|
||||
|
@ -58,6 +59,7 @@ private:
|
|||
void getSuggestionCandidates(const int skipPos, const int excessivePos,
|
||||
const int transposedPos, int *nextLetters, const int nextLettersSize,
|
||||
const int maxDepth);
|
||||
int isValidWordRec(int pos, unsigned short *word, int offset, int length);
|
||||
void getVersionNumber();
|
||||
bool checkIfDictVersionIsLatest();
|
||||
int getAddress(int *pos);
|
||||
|
@ -65,7 +67,6 @@ private:
|
|||
int wideStrLen(unsigned short *str);
|
||||
bool sameAsTyped(unsigned short *word, int length);
|
||||
bool addWord(unsigned short *word, int length, int frequency);
|
||||
unsigned short toBaseLowerCase(unsigned short c);
|
||||
void getWordsRec(const int childrenCount, const int pos, const int depth, const int maxDepth,
|
||||
const bool traverseAllNodes, const int snr, const int inputIndex, const int diffs,
|
||||
const int skipPos, const int excessivePos, const int transposedPos, int *nextLetters,
|
||||
|
@ -79,7 +80,6 @@ private:
|
|||
void getWordsOld(const int initialPos, const int inputLength, const int skipPos,
|
||||
const int excessivePos, const int transposedPos, int *nextLetters,
|
||||
const int nextLettersSize);
|
||||
void registerNextLetter(unsigned short c, int *nextLetters, int nextLettersSize);
|
||||
int calculateFinalFreq(const int inputIndex, const int depth, const int snr, const int skipPos,
|
||||
const int excessivePos, const int transposedPos, const int freq,
|
||||
const bool sameLength) const;
|
||||
|
|
Loading…
Reference in a new issue