Breakdown getWordRec and add comments

Change-Id: I88bad8a4a8177e3540b995b664c47b86d6904027
This commit is contained in:
satok 2010-12-02 20:19:59 +09:00
parent a0e780fa4c
commit 715514d7dd
4 changed files with 70 additions and 35 deletions

View file

@ -50,4 +50,6 @@
#define SUGGEST_MISSING_CHARACTERS true
#define SUGGEST_MISSING_CHARACTERS_THRESHOLD 5
#define MAX_WORD_LENGTH_INTERNAL 64
#endif // LATINIME_DEFINES_H

View file

@ -29,6 +29,9 @@ Dictionary::Dictionary(void *dict, int typedLetterMultiplier, int fullWordMultip
// Checks whether it has the latest dictionary or the old dictionary
IS_LATEST_DICT_VERSION((((unsigned char*) dict)[0] & 0xFF) >= DICTIONARY_VERSION_MIN)
{
if (MAX_WORD_LENGTH_INTERNAL < maxWordLength) {
LOGI("Max word length (%d) is greater than %d", maxWordLength, MAX_WORD_LENGTH_INTERNAL);
}
LOGI("IN NATIVE SUGGEST Version: %d \n", (DICT[0] & 0xFF));
mUnigramDictionary = new UnigramDictionary(DICT, typedLetterMultiplier, fullWordMultiplier,
maxWordLength, maxWords, maxAlternatives, IS_LATEST_DICT_VERSION);

View file

@ -102,7 +102,8 @@ int UnigramDictionary::getSuggestionCandidates(int inputLength, int skipPos,
return suggestedWordsCount;
}
void UnigramDictionary::registerNextLetter(unsigned short c, int *nextLetters, int nextLettersSize) {
void UnigramDictionary::registerNextLetter(
unsigned short c, int *nextLetters, int nextLettersSize) {
if (c < nextLettersSize) {
nextLetters[c]++;
}
@ -121,9 +122,8 @@ UnigramDictionary::addWord(unsigned short *word, int length, int frequency)
// Find the right insertion point
int insertAt = 0;
while (insertAt < MAX_WORDS) {
if (frequency > mFrequencies[insertAt]
|| (mFrequencies[insertAt] == frequency
&& length < Dictionary::wideStrLen(mOutputChars + insertAt * MAX_WORD_LENGTH))) {
if (frequency > mFrequencies[insertAt] || (mFrequencies[insertAt] == frequency
&& length < Dictionary::wideStrLen(mOutputChars + insertAt * MAX_WORD_LENGTH))) {
break;
}
insertAt++;
@ -134,9 +134,9 @@ UnigramDictionary::addWord(unsigned short *word, int length, int frequency)
(MAX_WORDS - insertAt - 1) * sizeof(mFrequencies[0]));
mFrequencies[insertAt] = frequency;
memmove((char*) mOutputChars + (insertAt + 1) * MAX_WORD_LENGTH * sizeof(short),
(char*) mOutputChars + (insertAt ) * MAX_WORD_LENGTH * sizeof(short),
(char*) mOutputChars + insertAt * MAX_WORD_LENGTH * sizeof(short),
(MAX_WORDS - insertAt - 1) * sizeof(short) * MAX_WORD_LENGTH);
unsigned short *dest = mOutputChars + (insertAt ) * MAX_WORD_LENGTH;
unsigned short *dest = mOutputChars + insertAt * MAX_WORD_LENGTH;
while (length--) {
*dest++ = *word++;
}
@ -177,8 +177,9 @@ UnigramDictionary::sameAsTyped(unsigned short *word, int length)
return true;
}
static char QUOTE = '\'';
static const char QUOTE = '\'';
// snr : frequency?
void
UnigramDictionary::getWordsRec(int pos, int depth, int maxDepth, bool completion, int snr,
int inputIndex, int diffs, int skipPos, int *nextLetters, int nextLettersSize)
@ -190,8 +191,10 @@ UnigramDictionary::getWordsRec(int pos, int depth, int maxDepth, bool completion
if (diffs > mMaxEditDistance) {
return;
}
// get the count of nodes and increment pos.
int count = Dictionary::getCount(DICT, &pos);
int *currentChars = NULL;
// If inputIndex is greater than mInputLength, that means there are no proximity chars.
if (mInputLength <= inputIndex) {
completion = true;
} else {
@ -205,8 +208,10 @@ UnigramDictionary::getWordsRec(int pos, int depth, int maxDepth, bool completion
unsigned short lowerC = toLowerCase(c);
bool terminal = Dictionary::getTerminal(DICT, &pos);
int childrenAddress = Dictionary::getAddress(DICT, &pos);
const bool needsToContinue = childrenAddress != 0;
// -- after address or flag
int freq = 1;
// If terminal, increment pos
if (terminal) freq = Dictionary::getFreq(DICT, IS_LATEST_DICT_VERSION, &pos);
// -- after add or freq
@ -214,53 +219,70 @@ UnigramDictionary::getWordsRec(int pos, int depth, int maxDepth, bool completion
if (completion) {
mWord[depth] = c;
if (terminal) {
addWord(mWord, depth + 1, freq * snr);
if (depth >= mInputLength && skipPos < 0) {
registerNextLetter(mWord[mInputLength], nextLetters, nextLettersSize);
}
onTerminalWhenUserTypedLengthIsGreaterThanInputLength(mWord, mInputLength, depth,
snr, nextLetters, nextLettersSize, skipPos, freq);
}
if (childrenAddress != 0) {
getWordsRec(childrenAddress, depth + 1, maxDepth, completion, snr, inputIndex,
if (needsToContinue) {
// No need to do proximity suggest any more.
getWordsRec(childrenAddress, depth + 1, maxDepth, true, snr, inputIndex,
diffs, skipPos, nextLetters, nextLettersSize);
}
} else if ((c == QUOTE && currentChars[0] != QUOTE) || skipPos == depth) {
// Skip the ' or other letter and continue deeper
mWord[depth] = c;
if (childrenAddress != 0) {
getWordsRec(childrenAddress, depth + 1, maxDepth, false, snr, inputIndex, diffs,
skipPos, nextLetters, nextLettersSize);
if (needsToContinue) {
getWordsRec(childrenAddress, depth + 1, maxDepth, false, snr, inputIndex,
diffs, skipPos, nextLetters, nextLettersSize);
}
} else {
int j = 0;
while (currentChars[j] > 0) {
// Move to child node
if (currentChars[j] == lowerC || currentChars[j] == c) {
int addedWeight = j == 0 ? TYPED_LETTER_MULTIPLIER : 1;
mWord[depth] = c;
if (mInputLength == inputIndex + 1) {
const int addedWeight = j == 0 ? TYPED_LETTER_MULTIPLIER : 1;
const bool isSameAsUserTypedLength = mInputLength == inputIndex + 1;
// If inputIndex is greater than mInputLength, that means there is no
// proximity chars. So, we don't need to check proximity.
if (isSameAsUserTypedLength) {
if (terminal) {
if (//INCLUDE_TYPED_WORD_IF_VALID ||
!sameAsTyped(mWord, depth + 1)) {
int finalFreq = freq * snr * addedWeight;
if (skipPos < 0) finalFreq *= FULL_WORD_MULTIPLIER;
addWord(mWord, depth + 1, finalFreq);
}
onTerminalWhenUserTypedLengthIsSameAsInputLength(mWord, depth, snr,
skipPos, freq, addedWeight);
}
if (childrenAddress != 0) {
getWordsRec(childrenAddress, depth + 1,
maxDepth, true, snr * addedWeight, inputIndex + 1,
diffs + (j > 0), skipPos, nextLetters, nextLettersSize);
}
} else if (childrenAddress != 0) {
}
if (needsToContinue) {
getWordsRec(childrenAddress, depth + 1, maxDepth,
false, snr * addedWeight, inputIndex + 1, diffs + (j > 0),
skipPos, nextLetters, nextLettersSize);
isSameAsUserTypedLength, snr * addedWeight, inputIndex + 1,
diffs + (j > 0), skipPos, nextLetters, nextLettersSize);
}
}
j++;
++j;
// If skipPos is defined, not to search proximity collections.
// First char is what user typed.
if (skipPos >= 0) break;
}
}
}
}
inline void UnigramDictionary::onTerminalWhenUserTypedLengthIsGreaterThanInputLength(
unsigned short *word, const int inputLength, const int depth, const int snr,
int *nextLetters, const int nextLettersSize, const int skipPos, const int freq) {
addWord(word, depth + 1, freq * snr);
if (depth >= inputLength && skipPos < 0) {
registerNextLetter(mWord[mInputLength], nextLetters, nextLettersSize);
}
}
inline void UnigramDictionary::onTerminalWhenUserTypedLengthIsSameAsInputLength(
unsigned short *word, const int depth, const int snr, const int skipPos, const int freq,
const int addedWeight) {
if (!sameAsTyped(word, depth + 1)) {
int finalFreq = freq * snr * addedWeight;
// Proximity collection will promote a word of the same length as
// what user typed.
if (skipPos < 0) finalFreq *= FULL_WORD_MULTIPLIER;
addWord(word, depth + 1, finalFreq);
}
}
} // namespace latinime

View file

@ -42,10 +42,17 @@ private:
bool sameAsTyped(unsigned short *word, int length);
bool addWord(unsigned short *word, int length, int frequency);
unsigned short toLowerCase(unsigned short c);
void getWordsRec(int pos, int depth, int maxDepth, bool completion, int frequency,
void getWordsRec(int pos, int depth, int maxDepth, bool completion, int snr,
int inputIndex, int diffs, int skipPos, int *nextLetters, int nextLettersSize);
void registerNextLetter(unsigned short c, int *nextLetters, int nextLettersSize);
void onTerminalWhenUserTypedLengthIsGreaterThanInputLength(unsigned short *word,
const int mInputLength, const int depth, const int snr, int *nextLetters,
const int nextLettersSize, const int skipPos, const int freq);
void onTerminalWhenUserTypedLengthIsSameAsInputLength(unsigned short *word, const int depth,
const int snr, const int skipPos, const int freq, const int addedWeight);
const unsigned char *DICT;
const int MAX_WORDS;
const int MAX_WORD_LENGTH;
@ -58,7 +65,8 @@ private:
unsigned short *mOutputChars;
int *mInputCodes;
int mInputLength;
unsigned short mWord[128];
// MAX_WORD_LENGTH_INTERNAL must be bigger than MAX_WORD_LENGTH
unsigned short mWord[MAX_WORD_LENGTH_INTERNAL];
int mMaxEditDistance;
};