Trim the flow of getWordRec

Change-Id: Ic0cfa64ee1e55682ca73681c585db6a5cb510900
main
satok 2010-12-03 19:38:08 +09:00
parent cfb24365f9
commit 683192684c
3 changed files with 46 additions and 39 deletions

View File

@ -52,4 +52,6 @@
#define MAX_WORD_LENGTH_INTERNAL 64 #define MAX_WORD_LENGTH_INTERNAL 64
#define MAX_DEPTH_MULTIPLIER 3
#endif // LATINIME_DEFINES_H #endif // LATINIME_DEFINES_H

View File

@ -87,12 +87,11 @@ void UnigramDictionary::initSuggestions(int *codes, int codesSize, unsigned shor
int UnigramDictionary::getSuggestionCandidates(int inputLength, int skipPos, int UnigramDictionary::getSuggestionCandidates(int inputLength, int skipPos,
int *nextLetters, int nextLettersSize) { int *nextLetters, int nextLettersSize) {
int initialPos = 0;
if (IS_LATEST_DICT_VERSION) { if (IS_LATEST_DICT_VERSION) {
getWordsRec(DICTIONARY_HEADER_SIZE, 0, inputLength * 3, false, 1, 0, 0, skipPos, initialPos = DICTIONARY_HEADER_SIZE;
nextLetters, nextLettersSize);
} else {
getWordsRec(0, 0, inputLength * 3, false, 1, 0, 0, skipPos, nextLetters, nextLettersSize);
} }
getWords(initialPos, inputLength, skipPos, nextLetters, nextLettersSize);
// Get the word count // Get the word count
int suggestedWordsCount = 0; int suggestedWordsCount = 0;
@ -174,50 +173,49 @@ bool UnigramDictionary::sameAsTyped(unsigned short *word, int length) {
static const char QUOTE = '\''; static const char QUOTE = '\'';
// snr : frequency? void UnigramDictionary::getWords(const int initialPos, const int inputLength, const int skipPos,
void UnigramDictionary::getWordsRec(int pos, int depth, int maxDepth, bool traverseAllNodes, int *nextLetters, const int nextLettersSize) {
int snr, int inputIndex, int diffs, int skipPos, int *nextLetters, int nextLettersSize) { int initialPosition = initialPos;
// Optimization: Prune out words that are too long compared to how much was typed. const int count = Dictionary::getCount(DICT, &initialPosition);
if (depth > maxDepth || diffs > mMaxEditDistance) { getWordsRec(count, initialPosition, 0, inputLength * MAX_DEPTH_MULTIPLIER,
return; mInputLength <= 0, 1, 0, 0, skipPos, nextLetters, nextLettersSize);
} }
// get the count of nodes and increment pos.
int count = Dictionary::getCount(DICT, &pos);
int *currentChars = NULL;
// If inputIndex is greater than mInputLength, that means there are no proximity chars.
if (mInputLength <= inputIndex) {
traverseAllNodes = true;
} else {
currentChars = mInputCodes + (inputIndex * MAX_ALTERNATIVES);
}
for (int i = 0; i < count; ++i) { // snr : frequency?
void UnigramDictionary::getWordsRec(const int childrenCount, const int pos, const int depth,
const int maxDepth, const bool traverseAllNodes, const int snr, const int inputIndex,
const int diffs, const int skipPos, int *nextLetters, const int nextLettersSize) {
int position = pos;
// If inputIndex is greater than mInputLength, that means there are no proximity chars.
for (int i = 0; i < childrenCount; ++i) {
// -- at char // -- at char
const unsigned short c = Dictionary::getChar(DICT, &pos); const unsigned short c = Dictionary::getChar(DICT, &position);
// -- at flag/add // -- at flag/add
const unsigned short lowerC = toLowerCase(c); const unsigned short lowerC = toLowerCase(c);
const bool terminal = Dictionary::getTerminal(DICT, &pos); const bool terminal = Dictionary::getTerminal(DICT, &position);
const int childrenAddress = Dictionary::getAddress(DICT, &pos); int childrenPosition = Dictionary::getAddress(DICT, &position);
int matchedProximityCharId = -1; int matchedProximityCharId = -1;
const bool needsToTraverseNextNode = childrenAddress != 0; const bool needsToTraverseNextNode = childrenPosition != 0;
// -- after address or flag // -- after address or flag
int freq = 1; int freq = 1;
// If terminal, increment pos // If terminal, increment pos
if (terminal) freq = Dictionary::getFreq(DICT, IS_LATEST_DICT_VERSION, &pos); if (terminal) freq = Dictionary::getFreq(DICT, IS_LATEST_DICT_VERSION, &position);
// -- after add or freq // -- after add or freq
bool newTraverseAllNodes = traverseAllNodes; bool newTraverseAllNodes = traverseAllNodes;
int newSnr = snr; int newSnr = snr;
int newDiffs = diffs; int newDiffs = diffs;
int newInputIndex = inputIndex; int newInputIndex = inputIndex;
const int newDepth = depth + 1;
// If we are only doing traverseAllNodes, no need to look at the typed characters. // If we are only doing traverseAllNodes, no need to look at the typed characters.
if (traverseAllNodes || needsToSkipCurrentNode(c, currentChars[0], skipPos, depth)) { if (traverseAllNodes || needsToSkipCurrentNode(c, inputIndex, skipPos, depth)) {
mWord[depth] = c; mWord[depth] = c;
if (traverseAllNodes && terminal) { if (traverseAllNodes && terminal) {
onTerminalWhenUserTypedLengthIsGreaterThanInputLength(mWord, mInputLength, depth, onTerminalWhenUserTypedLengthIsGreaterThanInputLength(mWord, mInputLength, depth,
snr, nextLetters, nextLettersSize, skipPos, freq); snr, nextLetters, nextLettersSize, skipPos, freq);
} }
} else { } else {
int *currentChars = mInputCodes + (inputIndex * MAX_ALTERNATIVES);
matchedProximityCharId = getMatchedProximityId(currentChars, lowerC, c, skipPos); matchedProximityCharId = getMatchedProximityId(currentChars, lowerC, c, skipPos);
if (matchedProximityCharId < 0) continue; if (matchedProximityCharId < 0) continue;
mWord[depth] = c; mWord[depth] = c;
@ -236,8 +234,17 @@ void UnigramDictionary::getWordsRec(int pos, int depth, int maxDepth, bool trave
newDiffs += (matchedProximityCharId > 0); newDiffs += (matchedProximityCharId > 0);
++newInputIndex; ++newInputIndex;
} }
// Optimization: Prune out words that are too long compared to how much was typed.
if (newDepth > maxDepth || newDiffs > mMaxEditDistance) {
continue;
}
if (mInputLength <= newInputIndex) {
newTraverseAllNodes = true;
}
if (needsToTraverseNextNode) { if (needsToTraverseNextNode) {
getWordsRec(childrenAddress, depth + 1, maxDepth, newTraverseAllNodes, // get the count of nodes and increment childAddress.
const int count = Dictionary::getCount(DICT, &childrenPosition);
getWordsRec(count, childrenPosition, newDepth, maxDepth, newTraverseAllNodes,
newSnr, newInputIndex, newDiffs, skipPos, nextLetters, nextLettersSize); newSnr, newInputIndex, newDiffs, skipPos, nextLetters, nextLettersSize);
} }
} }
@ -265,17 +272,17 @@ inline void UnigramDictionary::onTerminalWhenUserTypedLengthIsSameAsInputLength(
} }
inline bool UnigramDictionary::needsToSkipCurrentNode(const unsigned short c, inline bool UnigramDictionary::needsToSkipCurrentNode(const unsigned short c,
const unsigned short userTypedChar, const int skipPos, const int depth) { const int inputIndex, const int skipPos, const int depth) {
const unsigned short userTypedChar = (mInputCodes + (inputIndex * MAX_ALTERNATIVES))[0];
// Skip the ' or other letter and continue deeper // Skip the ' or other letter and continue deeper
return (c == QUOTE && userTypedChar != QUOTE) || skipPos == depth; return (c == QUOTE && userTypedChar != QUOTE) || skipPos == depth;
} }
inline int UnigramDictionary::getMatchedProximityId(const int *currentChars, inline int UnigramDictionary::getMatchedProximityId(const int *currentChars,
const unsigned short lowerC, const unsigned short c, const int skipPos) { const unsigned short lowerC, const unsigned short c, const int skipPos) {
bool matched = false;
int j = 0; int j = 0;
while (currentChars[j] > 0) { while (currentChars[j] > 0) {
matched = (currentChars[j] == lowerC || currentChars[j] == c); const bool matched = (currentChars[j] == lowerC || currentChars[j] == c);
// If skipPos is defined, not to search proximity collections. // If skipPos is defined, not to search proximity collections.
// First char is what user typed. // First char is what user typed.
if (matched) { if (matched) {

View File

@ -38,24 +38,22 @@ private:
int getAddress(int *pos); int getAddress(int *pos);
int getFreq(int *pos); int getFreq(int *pos);
int wideStrLen(unsigned short *str); int wideStrLen(unsigned short *str);
bool sameAsTyped(unsigned short *word, int length); bool sameAsTyped(unsigned short *word, int length);
bool addWord(unsigned short *word, int length, int frequency); bool addWord(unsigned short *word, int length, int frequency);
unsigned short toLowerCase(unsigned short c); unsigned short toLowerCase(unsigned short c);
void getWordsRec(int pos, int depth, int maxDepth, bool completion, int snr, void getWordsRec(const int childrenCount, const int pos, const int depth, const int maxDepth,
int inputIndex, int diffs, int skipPos, int *nextLetters, int nextLettersSize); const bool traverseAllNodes, const int snr, const int inputIndex, const int diffs,
const int skipPos, int *nextLetters, const int nextLettersSize);
void getWords(const int initialPos, const int inputLength, const int skipPos, int *nextLetters,
const int nextLettersSize);
void registerNextLetter(unsigned short c, int *nextLetters, int nextLettersSize); void registerNextLetter(unsigned short c, int *nextLetters, int nextLettersSize);
void onTerminalWhenUserTypedLengthIsGreaterThanInputLength(unsigned short *word, void onTerminalWhenUserTypedLengthIsGreaterThanInputLength(unsigned short *word,
const int mInputLength, const int depth, const int snr, int *nextLetters, const int mInputLength, const int depth, const int snr, int *nextLetters,
const int nextLettersSize, const int skipPos, const int freq); const int nextLettersSize, const int skipPos, const int freq);
void onTerminalWhenUserTypedLengthIsSameAsInputLength(unsigned short *word, const int depth, void onTerminalWhenUserTypedLengthIsSameAsInputLength(unsigned short *word, const int depth,
const int snr, const int skipPos, const int freq, const int addedWeight); const int snr, const int skipPos, const int freq, const int addedWeight);
bool needsToSkipCurrentNode(const unsigned short c, bool needsToSkipCurrentNode(const unsigned short c,
const unsigned short userTypedChar, const int skipPos, const int depth); const int inputIndex, const int skipPos, const int depth);
int getMatchedProximityId(const int *currentChars, const unsigned short lowerC, int getMatchedProximityId(const int *currentChars, const unsigned short lowerC,
const unsigned short c, const int skipPos); const unsigned short c, const int skipPos);