Breakdown getWordRec

Change-Id: I8556efb1dd053eff9a9681971cbe1014abf0333f
main
satok 2010-12-03 16:39:16 +09:00
parent 715514d7dd
commit 28bd03b9f5
2 changed files with 72 additions and 63 deletions

View File

@ -109,9 +109,7 @@ void UnigramDictionary::registerNextLetter(
} }
} }
bool bool UnigramDictionary::addWord(unsigned short *word, int length, int frequency) {
UnigramDictionary::addWord(unsigned short *word, int length, int frequency)
{
word[length] = 0; word[length] = 0;
if (DEBUG_DICT) { if (DEBUG_DICT) {
char s[length + 1]; char s[length + 1];
@ -147,8 +145,7 @@ UnigramDictionary::addWord(unsigned short *word, int length, int frequency)
return false; return false;
} }
unsigned short unsigned short UnigramDictionary::toLowerCase(unsigned short c) {
UnigramDictionary::toLowerCase(unsigned short c) {
if (c < sizeof(BASE_CHARS) / sizeof(BASE_CHARS[0])) { if (c < sizeof(BASE_CHARS) / sizeof(BASE_CHARS[0])) {
c = BASE_CHARS[c]; c = BASE_CHARS[c];
} }
@ -160,9 +157,7 @@ UnigramDictionary::toLowerCase(unsigned short c) {
return c; return c;
} }
bool bool UnigramDictionary::sameAsTyped(unsigned short *word, int length) {
UnigramDictionary::sameAsTyped(unsigned short *word, int length)
{
if (length != mInputLength) { if (length != mInputLength) {
return false; return false;
} }
@ -180,15 +175,10 @@ UnigramDictionary::sameAsTyped(unsigned short *word, int length)
static const char QUOTE = '\''; static const char QUOTE = '\'';
// snr : frequency? // snr : frequency?
void void UnigramDictionary::getWordsRec(int pos, int depth, int maxDepth, bool traverseAllNodes,
UnigramDictionary::getWordsRec(int pos, int depth, int maxDepth, bool completion, int snr, int snr, int inputIndex, int diffs, int skipPos, int *nextLetters, int nextLettersSize) {
int inputIndex, int diffs, int skipPos, int *nextLetters, int nextLettersSize)
{
// Optimization: Prune out words that are too long compared to how much was typed. // Optimization: Prune out words that are too long compared to how much was typed.
if (depth > maxDepth) { if (depth > maxDepth || diffs > mMaxEditDistance) {
return;
}
if (diffs > mMaxEditDistance) {
return; return;
} }
// get the count of nodes and increment pos. // get the count of nodes and increment pos.
@ -196,71 +186,59 @@ UnigramDictionary::getWordsRec(int pos, int depth, int maxDepth, bool completion
int *currentChars = NULL; int *currentChars = NULL;
// If inputIndex is greater than mInputLength, that means there are no proximity chars. // If inputIndex is greater than mInputLength, that means there are no proximity chars.
if (mInputLength <= inputIndex) { if (mInputLength <= inputIndex) {
completion = true; traverseAllNodes = true;
} else { } else {
currentChars = mInputCodes + (inputIndex * MAX_ALTERNATIVES); currentChars = mInputCodes + (inputIndex * MAX_ALTERNATIVES);
} }
for (int i = 0; i < count; i++) { for (int i = 0; i < count; ++i) {
// -- at char // -- at char
unsigned short c = Dictionary::getChar(DICT, &pos); const unsigned short c = Dictionary::getChar(DICT, &pos);
// -- at flag/add // -- at flag/add
unsigned short lowerC = toLowerCase(c); const unsigned short lowerC = toLowerCase(c);
bool terminal = Dictionary::getTerminal(DICT, &pos); const bool terminal = Dictionary::getTerminal(DICT, &pos);
int childrenAddress = Dictionary::getAddress(DICT, &pos); const int childrenAddress = Dictionary::getAddress(DICT, &pos);
const bool needsToContinue = childrenAddress != 0; int matchedProximityCharId = -1;
const bool needsToTraverseNextNode = childrenAddress != 0;
// -- after address or flag // -- after address or flag
int freq = 1; int freq = 1;
// If terminal, increment pos // If terminal, increment pos
if (terminal) freq = Dictionary::getFreq(DICT, IS_LATEST_DICT_VERSION, &pos); if (terminal) freq = Dictionary::getFreq(DICT, IS_LATEST_DICT_VERSION, &pos);
// -- after add or freq // -- after add or freq
bool newTraverseAllNodes = traverseAllNodes;
int newSnr = snr;
int newDiffs = diffs;
int newInputIndex = inputIndex;
// If we are only doing completions, no need to look at the typed characters. // If we are only doing traverseAllNodes, no need to look at the typed characters.
if (completion) { if (traverseAllNodes || needsToSkipCurrentNode(c, currentChars[0], skipPos, depth)) {
mWord[depth] = c; mWord[depth] = c;
if (terminal) { if (traverseAllNodes && terminal) {
onTerminalWhenUserTypedLengthIsGreaterThanInputLength(mWord, mInputLength, depth, onTerminalWhenUserTypedLengthIsGreaterThanInputLength(mWord, mInputLength, depth,
snr, nextLetters, nextLettersSize, skipPos, freq); snr, nextLetters, nextLettersSize, skipPos, freq);
} }
if (needsToContinue) {
// No need to do proximity suggest any more.
getWordsRec(childrenAddress, depth + 1, maxDepth, true, snr, inputIndex,
diffs, skipPos, nextLetters, nextLettersSize);
}
} else if ((c == QUOTE && currentChars[0] != QUOTE) || skipPos == depth) {
// Skip the ' or other letter and continue deeper
mWord[depth] = c;
if (needsToContinue) {
getWordsRec(childrenAddress, depth + 1, maxDepth, false, snr, inputIndex,
diffs, skipPos, nextLetters, nextLettersSize);
}
} else { } else {
int j = 0; matchedProximityCharId = getMatchedProximityId(currentChars, lowerC, c, skipPos);
while (currentChars[j] > 0) { if (matchedProximityCharId < 0) continue;
// Move to child node
if (currentChars[j] == lowerC || currentChars[j] == c) {
mWord[depth] = c; mWord[depth] = c;
const int addedWeight = j == 0 ? TYPED_LETTER_MULTIPLIER : 1;
const bool isSameAsUserTypedLength = mInputLength == inputIndex + 1;
// If inputIndex is greater than mInputLength, that means there is no // If inputIndex is greater than mInputLength, that means there is no
// proximity chars. So, we don't need to check proximity. // proximity chars. So, we don't need to check proximity.
if (isSameAsUserTypedLength) { const int addedWeight = matchedProximityCharId == 0 ? TYPED_LETTER_MULTIPLIER : 1;
if (terminal) { const bool isSameAsUserTypedLength = mInputLength == inputIndex + 1;
if (isSameAsUserTypedLength && terminal) {
onTerminalWhenUserTypedLengthIsSameAsInputLength(mWord, depth, snr, onTerminalWhenUserTypedLengthIsSameAsInputLength(mWord, depth, snr,
skipPos, freq, addedWeight); skipPos, freq, addedWeight);
} }
if (!needsToTraverseNextNode) continue;
// Start traversing all nodes after the index exceeds the user typed length
newTraverseAllNodes = isSameAsUserTypedLength;
newSnr *= addedWeight;
newDiffs += (matchedProximityCharId > 0);
++newInputIndex;
} }
if (needsToContinue) { if (needsToTraverseNextNode) {
getWordsRec(childrenAddress, depth + 1, maxDepth, getWordsRec(childrenAddress, depth + 1, maxDepth, newTraverseAllNodes,
isSameAsUserTypedLength, snr * addedWeight, inputIndex + 1, newSnr, newInputIndex, newDiffs, skipPos, nextLetters, nextLettersSize);
diffs + (j > 0), skipPos, nextLetters, nextLettersSize);
}
}
++j;
// If skipPos is defined, not to search proximity collections.
// First char is what user typed.
if (skipPos >= 0) break;
}
} }
} }
} }
@ -285,4 +263,29 @@ inline void UnigramDictionary::onTerminalWhenUserTypedLengthIsSameAsInputLength(
addWord(word, depth + 1, finalFreq); addWord(word, depth + 1, finalFreq);
} }
} }
inline bool UnigramDictionary::needsToSkipCurrentNode(const unsigned short c,
const unsigned short userTypedChar, const int skipPos, const int depth) {
// Skip the ' or other letter and continue deeper
return (c == QUOTE && userTypedChar != QUOTE) || skipPos == depth;
}
inline int UnigramDictionary::getMatchedProximityId(const int *currentChars,
const unsigned short lowerC, const unsigned short c, const int skipPos) {
bool matched = false;
int j = 0;
while (currentChars[j] > 0) {
matched = (currentChars[j] == lowerC || currentChars[j] == c);
// If skipPos is defined, not to search proximity collections.
// First char is what user typed.
if (matched) {
return j;
} else if (skipPos >= 0) {
return -1;
}
++j;
}
return -1;
}
} // namespace latinime } // namespace latinime

View File

@ -53,6 +53,12 @@ private:
void onTerminalWhenUserTypedLengthIsSameAsInputLength(unsigned short *word, const int depth, void onTerminalWhenUserTypedLengthIsSameAsInputLength(unsigned short *word, const int depth,
const int snr, const int skipPos, const int freq, const int addedWeight); const int snr, const int skipPos, const int freq, const int addedWeight);
bool needsToSkipCurrentNode(const unsigned short c,
const unsigned short userTypedChar, const int skipPos, const int depth);
int getMatchedProximityId(const int *currentChars, const unsigned short lowerC,
const unsigned short c, const int skipPos);
const unsigned char *DICT; const unsigned char *DICT;
const int MAX_WORDS; const int MAX_WORDS;
const int MAX_WORD_LENGTH; const int MAX_WORD_LENGTH;