Breakdown getWordRec and add comments
Change-Id: I88bad8a4a8177e3540b995b664c47b86d6904027main
parent
a0e780fa4c
commit
715514d7dd
|
@ -50,4 +50,6 @@
|
||||||
#define SUGGEST_MISSING_CHARACTERS true
|
#define SUGGEST_MISSING_CHARACTERS true
|
||||||
#define SUGGEST_MISSING_CHARACTERS_THRESHOLD 5
|
#define SUGGEST_MISSING_CHARACTERS_THRESHOLD 5
|
||||||
|
|
||||||
|
#define MAX_WORD_LENGTH_INTERNAL 64
|
||||||
|
|
||||||
#endif // LATINIME_DEFINES_H
|
#endif // LATINIME_DEFINES_H
|
||||||
|
|
|
@ -29,6 +29,9 @@ Dictionary::Dictionary(void *dict, int typedLetterMultiplier, int fullWordMultip
|
||||||
// Checks whether it has the latest dictionary or the old dictionary
|
// Checks whether it has the latest dictionary or the old dictionary
|
||||||
IS_LATEST_DICT_VERSION((((unsigned char*) dict)[0] & 0xFF) >= DICTIONARY_VERSION_MIN)
|
IS_LATEST_DICT_VERSION((((unsigned char*) dict)[0] & 0xFF) >= DICTIONARY_VERSION_MIN)
|
||||||
{
|
{
|
||||||
|
if (MAX_WORD_LENGTH_INTERNAL < maxWordLength) {
|
||||||
|
LOGI("Max word length (%d) is greater than %d", maxWordLength, MAX_WORD_LENGTH_INTERNAL);
|
||||||
|
}
|
||||||
LOGI("IN NATIVE SUGGEST Version: %d \n", (DICT[0] & 0xFF));
|
LOGI("IN NATIVE SUGGEST Version: %d \n", (DICT[0] & 0xFF));
|
||||||
mUnigramDictionary = new UnigramDictionary(DICT, typedLetterMultiplier, fullWordMultiplier,
|
mUnigramDictionary = new UnigramDictionary(DICT, typedLetterMultiplier, fullWordMultiplier,
|
||||||
maxWordLength, maxWords, maxAlternatives, IS_LATEST_DICT_VERSION);
|
maxWordLength, maxWords, maxAlternatives, IS_LATEST_DICT_VERSION);
|
||||||
|
|
|
@ -102,7 +102,8 @@ int UnigramDictionary::getSuggestionCandidates(int inputLength, int skipPos,
|
||||||
return suggestedWordsCount;
|
return suggestedWordsCount;
|
||||||
}
|
}
|
||||||
|
|
||||||
void UnigramDictionary::registerNextLetter(unsigned short c, int *nextLetters, int nextLettersSize) {
|
void UnigramDictionary::registerNextLetter(
|
||||||
|
unsigned short c, int *nextLetters, int nextLettersSize) {
|
||||||
if (c < nextLettersSize) {
|
if (c < nextLettersSize) {
|
||||||
nextLetters[c]++;
|
nextLetters[c]++;
|
||||||
}
|
}
|
||||||
|
@ -121,8 +122,7 @@ UnigramDictionary::addWord(unsigned short *word, int length, int frequency)
|
||||||
// Find the right insertion point
|
// Find the right insertion point
|
||||||
int insertAt = 0;
|
int insertAt = 0;
|
||||||
while (insertAt < MAX_WORDS) {
|
while (insertAt < MAX_WORDS) {
|
||||||
if (frequency > mFrequencies[insertAt]
|
if (frequency > mFrequencies[insertAt] || (mFrequencies[insertAt] == frequency
|
||||||
|| (mFrequencies[insertAt] == frequency
|
|
||||||
&& length < Dictionary::wideStrLen(mOutputChars + insertAt * MAX_WORD_LENGTH))) {
|
&& length < Dictionary::wideStrLen(mOutputChars + insertAt * MAX_WORD_LENGTH))) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -134,9 +134,9 @@ UnigramDictionary::addWord(unsigned short *word, int length, int frequency)
|
||||||
(MAX_WORDS - insertAt - 1) * sizeof(mFrequencies[0]));
|
(MAX_WORDS - insertAt - 1) * sizeof(mFrequencies[0]));
|
||||||
mFrequencies[insertAt] = frequency;
|
mFrequencies[insertAt] = frequency;
|
||||||
memmove((char*) mOutputChars + (insertAt + 1) * MAX_WORD_LENGTH * sizeof(short),
|
memmove((char*) mOutputChars + (insertAt + 1) * MAX_WORD_LENGTH * sizeof(short),
|
||||||
(char*) mOutputChars + (insertAt ) * MAX_WORD_LENGTH * sizeof(short),
|
(char*) mOutputChars + insertAt * MAX_WORD_LENGTH * sizeof(short),
|
||||||
(MAX_WORDS - insertAt - 1) * sizeof(short) * MAX_WORD_LENGTH);
|
(MAX_WORDS - insertAt - 1) * sizeof(short) * MAX_WORD_LENGTH);
|
||||||
unsigned short *dest = mOutputChars + (insertAt ) * MAX_WORD_LENGTH;
|
unsigned short *dest = mOutputChars + insertAt * MAX_WORD_LENGTH;
|
||||||
while (length--) {
|
while (length--) {
|
||||||
*dest++ = *word++;
|
*dest++ = *word++;
|
||||||
}
|
}
|
||||||
|
@ -177,8 +177,9 @@ UnigramDictionary::sameAsTyped(unsigned short *word, int length)
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
static char QUOTE = '\'';
|
static const char QUOTE = '\'';
|
||||||
|
|
||||||
|
// snr : frequency?
|
||||||
void
|
void
|
||||||
UnigramDictionary::getWordsRec(int pos, int depth, int maxDepth, bool completion, int snr,
|
UnigramDictionary::getWordsRec(int pos, int depth, int maxDepth, bool completion, int snr,
|
||||||
int inputIndex, int diffs, int skipPos, int *nextLetters, int nextLettersSize)
|
int inputIndex, int diffs, int skipPos, int *nextLetters, int nextLettersSize)
|
||||||
|
@ -190,8 +191,10 @@ UnigramDictionary::getWordsRec(int pos, int depth, int maxDepth, bool completion
|
||||||
if (diffs > mMaxEditDistance) {
|
if (diffs > mMaxEditDistance) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
// get the count of nodes and increment pos.
|
||||||
int count = Dictionary::getCount(DICT, &pos);
|
int count = Dictionary::getCount(DICT, &pos);
|
||||||
int *currentChars = NULL;
|
int *currentChars = NULL;
|
||||||
|
// If inputIndex is greater than mInputLength, that means there are no proximity chars.
|
||||||
if (mInputLength <= inputIndex) {
|
if (mInputLength <= inputIndex) {
|
||||||
completion = true;
|
completion = true;
|
||||||
} else {
|
} else {
|
||||||
|
@ -205,8 +208,10 @@ UnigramDictionary::getWordsRec(int pos, int depth, int maxDepth, bool completion
|
||||||
unsigned short lowerC = toLowerCase(c);
|
unsigned short lowerC = toLowerCase(c);
|
||||||
bool terminal = Dictionary::getTerminal(DICT, &pos);
|
bool terminal = Dictionary::getTerminal(DICT, &pos);
|
||||||
int childrenAddress = Dictionary::getAddress(DICT, &pos);
|
int childrenAddress = Dictionary::getAddress(DICT, &pos);
|
||||||
|
const bool needsToContinue = childrenAddress != 0;
|
||||||
// -- after address or flag
|
// -- after address or flag
|
||||||
int freq = 1;
|
int freq = 1;
|
||||||
|
// If terminal, increment pos
|
||||||
if (terminal) freq = Dictionary::getFreq(DICT, IS_LATEST_DICT_VERSION, &pos);
|
if (terminal) freq = Dictionary::getFreq(DICT, IS_LATEST_DICT_VERSION, &pos);
|
||||||
// -- after add or freq
|
// -- after add or freq
|
||||||
|
|
||||||
|
@ -214,53 +219,70 @@ UnigramDictionary::getWordsRec(int pos, int depth, int maxDepth, bool completion
|
||||||
if (completion) {
|
if (completion) {
|
||||||
mWord[depth] = c;
|
mWord[depth] = c;
|
||||||
if (terminal) {
|
if (terminal) {
|
||||||
addWord(mWord, depth + 1, freq * snr);
|
onTerminalWhenUserTypedLengthIsGreaterThanInputLength(mWord, mInputLength, depth,
|
||||||
if (depth >= mInputLength && skipPos < 0) {
|
snr, nextLetters, nextLettersSize, skipPos, freq);
|
||||||
registerNextLetter(mWord[mInputLength], nextLetters, nextLettersSize);
|
|
||||||
}
|
}
|
||||||
}
|
if (needsToContinue) {
|
||||||
if (childrenAddress != 0) {
|
// No need to do proximity suggest any more.
|
||||||
getWordsRec(childrenAddress, depth + 1, maxDepth, completion, snr, inputIndex,
|
getWordsRec(childrenAddress, depth + 1, maxDepth, true, snr, inputIndex,
|
||||||
diffs, skipPos, nextLetters, nextLettersSize);
|
diffs, skipPos, nextLetters, nextLettersSize);
|
||||||
}
|
}
|
||||||
} else if ((c == QUOTE && currentChars[0] != QUOTE) || skipPos == depth) {
|
} else if ((c == QUOTE && currentChars[0] != QUOTE) || skipPos == depth) {
|
||||||
// Skip the ' or other letter and continue deeper
|
// Skip the ' or other letter and continue deeper
|
||||||
mWord[depth] = c;
|
mWord[depth] = c;
|
||||||
if (childrenAddress != 0) {
|
if (needsToContinue) {
|
||||||
getWordsRec(childrenAddress, depth + 1, maxDepth, false, snr, inputIndex, diffs,
|
getWordsRec(childrenAddress, depth + 1, maxDepth, false, snr, inputIndex,
|
||||||
skipPos, nextLetters, nextLettersSize);
|
diffs, skipPos, nextLetters, nextLettersSize);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
int j = 0;
|
int j = 0;
|
||||||
while (currentChars[j] > 0) {
|
while (currentChars[j] > 0) {
|
||||||
|
// Move to child node
|
||||||
if (currentChars[j] == lowerC || currentChars[j] == c) {
|
if (currentChars[j] == lowerC || currentChars[j] == c) {
|
||||||
int addedWeight = j == 0 ? TYPED_LETTER_MULTIPLIER : 1;
|
|
||||||
mWord[depth] = c;
|
mWord[depth] = c;
|
||||||
if (mInputLength == inputIndex + 1) {
|
const int addedWeight = j == 0 ? TYPED_LETTER_MULTIPLIER : 1;
|
||||||
|
const bool isSameAsUserTypedLength = mInputLength == inputIndex + 1;
|
||||||
|
// If inputIndex is greater than mInputLength, that means there is no
|
||||||
|
// proximity chars. So, we don't need to check proximity.
|
||||||
|
if (isSameAsUserTypedLength) {
|
||||||
if (terminal) {
|
if (terminal) {
|
||||||
if (//INCLUDE_TYPED_WORD_IF_VALID ||
|
onTerminalWhenUserTypedLengthIsSameAsInputLength(mWord, depth, snr,
|
||||||
!sameAsTyped(mWord, depth + 1)) {
|
skipPos, freq, addedWeight);
|
||||||
int finalFreq = freq * snr * addedWeight;
|
|
||||||
if (skipPos < 0) finalFreq *= FULL_WORD_MULTIPLIER;
|
|
||||||
addWord(mWord, depth + 1, finalFreq);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (childrenAddress != 0) {
|
if (needsToContinue) {
|
||||||
getWordsRec(childrenAddress, depth + 1,
|
getWordsRec(childrenAddress, depth + 1, maxDepth,
|
||||||
maxDepth, true, snr * addedWeight, inputIndex + 1,
|
isSameAsUserTypedLength, snr * addedWeight, inputIndex + 1,
|
||||||
diffs + (j > 0), skipPos, nextLetters, nextLettersSize);
|
diffs + (j > 0), skipPos, nextLetters, nextLettersSize);
|
||||||
}
|
}
|
||||||
} else if (childrenAddress != 0) {
|
|
||||||
getWordsRec(childrenAddress, depth + 1, maxDepth,
|
|
||||||
false, snr * addedWeight, inputIndex + 1, diffs + (j > 0),
|
|
||||||
skipPos, nextLetters, nextLettersSize);
|
|
||||||
}
|
}
|
||||||
}
|
++j;
|
||||||
j++;
|
// If skipPos is defined, not to search proximity collections.
|
||||||
|
// First char is what user typed.
|
||||||
if (skipPos >= 0) break;
|
if (skipPos >= 0) break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline void UnigramDictionary::onTerminalWhenUserTypedLengthIsGreaterThanInputLength(
|
||||||
|
unsigned short *word, const int inputLength, const int depth, const int snr,
|
||||||
|
int *nextLetters, const int nextLettersSize, const int skipPos, const int freq) {
|
||||||
|
addWord(word, depth + 1, freq * snr);
|
||||||
|
if (depth >= inputLength && skipPos < 0) {
|
||||||
|
registerNextLetter(mWord[mInputLength], nextLetters, nextLettersSize);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void UnigramDictionary::onTerminalWhenUserTypedLengthIsSameAsInputLength(
|
||||||
|
unsigned short *word, const int depth, const int snr, const int skipPos, const int freq,
|
||||||
|
const int addedWeight) {
|
||||||
|
if (!sameAsTyped(word, depth + 1)) {
|
||||||
|
int finalFreq = freq * snr * addedWeight;
|
||||||
|
// Proximity collection will promote a word of the same length as
|
||||||
|
// what user typed.
|
||||||
|
if (skipPos < 0) finalFreq *= FULL_WORD_MULTIPLIER;
|
||||||
|
addWord(word, depth + 1, finalFreq);
|
||||||
|
}
|
||||||
|
}
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
|
|
|
@ -42,10 +42,17 @@ private:
|
||||||
bool sameAsTyped(unsigned short *word, int length);
|
bool sameAsTyped(unsigned short *word, int length);
|
||||||
bool addWord(unsigned short *word, int length, int frequency);
|
bool addWord(unsigned short *word, int length, int frequency);
|
||||||
unsigned short toLowerCase(unsigned short c);
|
unsigned short toLowerCase(unsigned short c);
|
||||||
void getWordsRec(int pos, int depth, int maxDepth, bool completion, int frequency,
|
void getWordsRec(int pos, int depth, int maxDepth, bool completion, int snr,
|
||||||
int inputIndex, int diffs, int skipPos, int *nextLetters, int nextLettersSize);
|
int inputIndex, int diffs, int skipPos, int *nextLetters, int nextLettersSize);
|
||||||
void registerNextLetter(unsigned short c, int *nextLetters, int nextLettersSize);
|
void registerNextLetter(unsigned short c, int *nextLetters, int nextLettersSize);
|
||||||
|
|
||||||
|
void onTerminalWhenUserTypedLengthIsGreaterThanInputLength(unsigned short *word,
|
||||||
|
const int mInputLength, const int depth, const int snr, int *nextLetters,
|
||||||
|
const int nextLettersSize, const int skipPos, const int freq);
|
||||||
|
|
||||||
|
void onTerminalWhenUserTypedLengthIsSameAsInputLength(unsigned short *word, const int depth,
|
||||||
|
const int snr, const int skipPos, const int freq, const int addedWeight);
|
||||||
|
|
||||||
const unsigned char *DICT;
|
const unsigned char *DICT;
|
||||||
const int MAX_WORDS;
|
const int MAX_WORDS;
|
||||||
const int MAX_WORD_LENGTH;
|
const int MAX_WORD_LENGTH;
|
||||||
|
@ -58,7 +65,8 @@ private:
|
||||||
unsigned short *mOutputChars;
|
unsigned short *mOutputChars;
|
||||||
int *mInputCodes;
|
int *mInputCodes;
|
||||||
int mInputLength;
|
int mInputLength;
|
||||||
unsigned short mWord[128];
|
// MAX_WORD_LENGTH_INTERNAL must be bigger than MAX_WORD_LENGTH
|
||||||
|
unsigned short mWord[MAX_WORD_LENGTH_INTERNAL];
|
||||||
int mMaxEditDistance;
|
int mMaxEditDistance;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue