Merge "Trim the flow of getWordRec"
commit
4e24668a75
|
@ -52,4 +52,6 @@
|
||||||
|
|
||||||
#define MAX_WORD_LENGTH_INTERNAL 64
|
#define MAX_WORD_LENGTH_INTERNAL 64
|
||||||
|
|
||||||
|
#define MAX_DEPTH_MULTIPLIER 3
|
||||||
|
|
||||||
#endif // LATINIME_DEFINES_H
|
#endif // LATINIME_DEFINES_H
|
||||||
|
|
|
@ -87,12 +87,11 @@ void UnigramDictionary::initSuggestions(int *codes, int codesSize, unsigned shor
|
||||||
|
|
||||||
int UnigramDictionary::getSuggestionCandidates(int inputLength, int skipPos,
|
int UnigramDictionary::getSuggestionCandidates(int inputLength, int skipPos,
|
||||||
int *nextLetters, int nextLettersSize) {
|
int *nextLetters, int nextLettersSize) {
|
||||||
|
int initialPos = 0;
|
||||||
if (IS_LATEST_DICT_VERSION) {
|
if (IS_LATEST_DICT_VERSION) {
|
||||||
getWordsRec(DICTIONARY_HEADER_SIZE, 0, inputLength * 3, false, 1, 0, 0, skipPos,
|
initialPos = DICTIONARY_HEADER_SIZE;
|
||||||
nextLetters, nextLettersSize);
|
|
||||||
} else {
|
|
||||||
getWordsRec(0, 0, inputLength * 3, false, 1, 0, 0, skipPos, nextLetters, nextLettersSize);
|
|
||||||
}
|
}
|
||||||
|
getWords(initialPos, inputLength, skipPos, nextLetters, nextLettersSize);
|
||||||
|
|
||||||
// Get the word count
|
// Get the word count
|
||||||
int suggestedWordsCount = 0;
|
int suggestedWordsCount = 0;
|
||||||
|
@ -174,50 +173,49 @@ bool UnigramDictionary::sameAsTyped(unsigned short *word, int length) {
|
||||||
|
|
||||||
static const char QUOTE = '\'';
|
static const char QUOTE = '\'';
|
||||||
|
|
||||||
// snr : frequency?
|
void UnigramDictionary::getWords(const int initialPos, const int inputLength, const int skipPos,
|
||||||
void UnigramDictionary::getWordsRec(int pos, int depth, int maxDepth, bool traverseAllNodes,
|
int *nextLetters, const int nextLettersSize) {
|
||||||
int snr, int inputIndex, int diffs, int skipPos, int *nextLetters, int nextLettersSize) {
|
int initialPosition = initialPos;
|
||||||
// Optimization: Prune out words that are too long compared to how much was typed.
|
const int count = Dictionary::getCount(DICT, &initialPosition);
|
||||||
if (depth > maxDepth || diffs > mMaxEditDistance) {
|
getWordsRec(count, initialPosition, 0, inputLength * MAX_DEPTH_MULTIPLIER,
|
||||||
return;
|
mInputLength <= 0, 1, 0, 0, skipPos, nextLetters, nextLettersSize);
|
||||||
}
|
|
||||||
// get the count of nodes and increment pos.
|
|
||||||
int count = Dictionary::getCount(DICT, &pos);
|
|
||||||
int *currentChars = NULL;
|
|
||||||
// If inputIndex is greater than mInputLength, that means there are no proximity chars.
|
|
||||||
if (mInputLength <= inputIndex) {
|
|
||||||
traverseAllNodes = true;
|
|
||||||
} else {
|
|
||||||
currentChars = mInputCodes + (inputIndex * MAX_ALTERNATIVES);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int i = 0; i < count; ++i) {
|
// snr : frequency?
|
||||||
|
void UnigramDictionary::getWordsRec(const int childrenCount, const int pos, const int depth,
|
||||||
|
const int maxDepth, const bool traverseAllNodes, const int snr, const int inputIndex,
|
||||||
|
const int diffs, const int skipPos, int *nextLetters, const int nextLettersSize) {
|
||||||
|
int position = pos;
|
||||||
|
// If inputIndex is greater than mInputLength, that means there are no proximity chars.
|
||||||
|
for (int i = 0; i < childrenCount; ++i) {
|
||||||
// -- at char
|
// -- at char
|
||||||
const unsigned short c = Dictionary::getChar(DICT, &pos);
|
const unsigned short c = Dictionary::getChar(DICT, &position);
|
||||||
// -- at flag/add
|
// -- at flag/add
|
||||||
const unsigned short lowerC = toLowerCase(c);
|
const unsigned short lowerC = toLowerCase(c);
|
||||||
const bool terminal = Dictionary::getTerminal(DICT, &pos);
|
const bool terminal = Dictionary::getTerminal(DICT, &position);
|
||||||
const int childrenAddress = Dictionary::getAddress(DICT, &pos);
|
int childrenPosition = Dictionary::getAddress(DICT, &position);
|
||||||
int matchedProximityCharId = -1;
|
int matchedProximityCharId = -1;
|
||||||
const bool needsToTraverseNextNode = childrenAddress != 0;
|
const bool needsToTraverseNextNode = childrenPosition != 0;
|
||||||
// -- after address or flag
|
// -- after address or flag
|
||||||
int freq = 1;
|
int freq = 1;
|
||||||
// If terminal, increment pos
|
// If terminal, increment pos
|
||||||
if (terminal) freq = Dictionary::getFreq(DICT, IS_LATEST_DICT_VERSION, &pos);
|
if (terminal) freq = Dictionary::getFreq(DICT, IS_LATEST_DICT_VERSION, &position);
|
||||||
// -- after add or freq
|
// -- after add or freq
|
||||||
bool newTraverseAllNodes = traverseAllNodes;
|
bool newTraverseAllNodes = traverseAllNodes;
|
||||||
int newSnr = snr;
|
int newSnr = snr;
|
||||||
int newDiffs = diffs;
|
int newDiffs = diffs;
|
||||||
int newInputIndex = inputIndex;
|
int newInputIndex = inputIndex;
|
||||||
|
const int newDepth = depth + 1;
|
||||||
|
|
||||||
// If we are only doing traverseAllNodes, no need to look at the typed characters.
|
// If we are only doing traverseAllNodes, no need to look at the typed characters.
|
||||||
if (traverseAllNodes || needsToSkipCurrentNode(c, currentChars[0], skipPos, depth)) {
|
if (traverseAllNodes || needsToSkipCurrentNode(c, inputIndex, skipPos, depth)) {
|
||||||
mWord[depth] = c;
|
mWord[depth] = c;
|
||||||
if (traverseAllNodes && terminal) {
|
if (traverseAllNodes && terminal) {
|
||||||
onTerminalWhenUserTypedLengthIsGreaterThanInputLength(mWord, mInputLength, depth,
|
onTerminalWhenUserTypedLengthIsGreaterThanInputLength(mWord, mInputLength, depth,
|
||||||
snr, nextLetters, nextLettersSize, skipPos, freq);
|
snr, nextLetters, nextLettersSize, skipPos, freq);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
int *currentChars = mInputCodes + (inputIndex * MAX_ALTERNATIVES);
|
||||||
matchedProximityCharId = getMatchedProximityId(currentChars, lowerC, c, skipPos);
|
matchedProximityCharId = getMatchedProximityId(currentChars, lowerC, c, skipPos);
|
||||||
if (matchedProximityCharId < 0) continue;
|
if (matchedProximityCharId < 0) continue;
|
||||||
mWord[depth] = c;
|
mWord[depth] = c;
|
||||||
|
@ -236,8 +234,17 @@ void UnigramDictionary::getWordsRec(int pos, int depth, int maxDepth, bool trave
|
||||||
newDiffs += (matchedProximityCharId > 0);
|
newDiffs += (matchedProximityCharId > 0);
|
||||||
++newInputIndex;
|
++newInputIndex;
|
||||||
}
|
}
|
||||||
|
// Optimization: Prune out words that are too long compared to how much was typed.
|
||||||
|
if (newDepth > maxDepth || newDiffs > mMaxEditDistance) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (mInputLength <= newInputIndex) {
|
||||||
|
newTraverseAllNodes = true;
|
||||||
|
}
|
||||||
if (needsToTraverseNextNode) {
|
if (needsToTraverseNextNode) {
|
||||||
getWordsRec(childrenAddress, depth + 1, maxDepth, newTraverseAllNodes,
|
// get the count of nodes and increment childAddress.
|
||||||
|
const int count = Dictionary::getCount(DICT, &childrenPosition);
|
||||||
|
getWordsRec(count, childrenPosition, newDepth, maxDepth, newTraverseAllNodes,
|
||||||
newSnr, newInputIndex, newDiffs, skipPos, nextLetters, nextLettersSize);
|
newSnr, newInputIndex, newDiffs, skipPos, nextLetters, nextLettersSize);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -265,17 +272,17 @@ inline void UnigramDictionary::onTerminalWhenUserTypedLengthIsSameAsInputLength(
|
||||||
}
|
}
|
||||||
|
|
||||||
inline bool UnigramDictionary::needsToSkipCurrentNode(const unsigned short c,
|
inline bool UnigramDictionary::needsToSkipCurrentNode(const unsigned short c,
|
||||||
const unsigned short userTypedChar, const int skipPos, const int depth) {
|
const int inputIndex, const int skipPos, const int depth) {
|
||||||
|
const unsigned short userTypedChar = (mInputCodes + (inputIndex * MAX_ALTERNATIVES))[0];
|
||||||
// Skip the ' or other letter and continue deeper
|
// Skip the ' or other letter and continue deeper
|
||||||
return (c == QUOTE && userTypedChar != QUOTE) || skipPos == depth;
|
return (c == QUOTE && userTypedChar != QUOTE) || skipPos == depth;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline int UnigramDictionary::getMatchedProximityId(const int *currentChars,
|
inline int UnigramDictionary::getMatchedProximityId(const int *currentChars,
|
||||||
const unsigned short lowerC, const unsigned short c, const int skipPos) {
|
const unsigned short lowerC, const unsigned short c, const int skipPos) {
|
||||||
bool matched = false;
|
|
||||||
int j = 0;
|
int j = 0;
|
||||||
while (currentChars[j] > 0) {
|
while (currentChars[j] > 0) {
|
||||||
matched = (currentChars[j] == lowerC || currentChars[j] == c);
|
const bool matched = (currentChars[j] == lowerC || currentChars[j] == c);
|
||||||
// If skipPos is defined, not to search proximity collections.
|
// If skipPos is defined, not to search proximity collections.
|
||||||
// First char is what user typed.
|
// First char is what user typed.
|
||||||
if (matched) {
|
if (matched) {
|
||||||
|
|
|
@ -38,24 +38,22 @@ private:
|
||||||
int getAddress(int *pos);
|
int getAddress(int *pos);
|
||||||
int getFreq(int *pos);
|
int getFreq(int *pos);
|
||||||
int wideStrLen(unsigned short *str);
|
int wideStrLen(unsigned short *str);
|
||||||
|
|
||||||
bool sameAsTyped(unsigned short *word, int length);
|
bool sameAsTyped(unsigned short *word, int length);
|
||||||
bool addWord(unsigned short *word, int length, int frequency);
|
bool addWord(unsigned short *word, int length, int frequency);
|
||||||
unsigned short toLowerCase(unsigned short c);
|
unsigned short toLowerCase(unsigned short c);
|
||||||
void getWordsRec(int pos, int depth, int maxDepth, bool completion, int snr,
|
void getWordsRec(const int childrenCount, const int pos, const int depth, const int maxDepth,
|
||||||
int inputIndex, int diffs, int skipPos, int *nextLetters, int nextLettersSize);
|
const bool traverseAllNodes, const int snr, const int inputIndex, const int diffs,
|
||||||
|
const int skipPos, int *nextLetters, const int nextLettersSize);
|
||||||
|
void getWords(const int initialPos, const int inputLength, const int skipPos, int *nextLetters,
|
||||||
|
const int nextLettersSize);
|
||||||
void registerNextLetter(unsigned short c, int *nextLetters, int nextLettersSize);
|
void registerNextLetter(unsigned short c, int *nextLetters, int nextLettersSize);
|
||||||
|
|
||||||
void onTerminalWhenUserTypedLengthIsGreaterThanInputLength(unsigned short *word,
|
void onTerminalWhenUserTypedLengthIsGreaterThanInputLength(unsigned short *word,
|
||||||
const int mInputLength, const int depth, const int snr, int *nextLetters,
|
const int mInputLength, const int depth, const int snr, int *nextLetters,
|
||||||
const int nextLettersSize, const int skipPos, const int freq);
|
const int nextLettersSize, const int skipPos, const int freq);
|
||||||
|
|
||||||
void onTerminalWhenUserTypedLengthIsSameAsInputLength(unsigned short *word, const int depth,
|
void onTerminalWhenUserTypedLengthIsSameAsInputLength(unsigned short *word, const int depth,
|
||||||
const int snr, const int skipPos, const int freq, const int addedWeight);
|
const int snr, const int skipPos, const int freq, const int addedWeight);
|
||||||
|
|
||||||
bool needsToSkipCurrentNode(const unsigned short c,
|
bool needsToSkipCurrentNode(const unsigned short c,
|
||||||
const unsigned short userTypedChar, const int skipPos, const int depth);
|
const int inputIndex, const int skipPos, const int depth);
|
||||||
|
|
||||||
int getMatchedProximityId(const int *currentChars, const unsigned short lowerC,
|
int getMatchedProximityId(const int *currentChars, const unsigned short lowerC,
|
||||||
const unsigned short c, const int skipPos);
|
const unsigned short c, const int skipPos);
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue